diff options
Diffstat (limited to 'net')
151 files changed, 5349 insertions, 3120 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 145f5cde96c..b7486488967 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -120,7 +120,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, unsigned short vid; struct net_device_stats *stats; unsigned short vlan_TCI; - unsigned short proto; + __be16 proto; /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ vlan_TCI = ntohs(vhdr->h_vlan_TCI); diff --git a/net/Kconfig b/net/Kconfig index 2bdd5623fdd..60f6f321bd7 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -140,6 +140,7 @@ config BRIDGE_NETFILTER If unsure, say N. +source "net/netfilter/Kconfig" source "net/ipv4/netfilter/Kconfig" source "net/ipv6/netfilter/Kconfig" source "net/decnet/netfilter/Kconfig" @@ -206,8 +207,6 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. -source "net/netfilter/Kconfig" - endmenu endmenu diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 4dbb5af34a5..d89056ec44d 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -21,6 +21,7 @@ #include "resources.h" #include "signaling.h" /* for WAITING and sigd_attach */ +#include "common.h" static DECLARE_MUTEX(ioctl_mutex); diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 17a81ebe7e6..526d9531411 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -105,7 +105,7 @@ extern void mpc_proc_clean(void); struct mpoa_client *mpcs = NULL; /* FIXME */ static struct atm_mpoa_qos *qos_head = NULL; -static struct timer_list mpc_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(mpc_timer, NULL, 0, 0); static struct mpoa_client *find_mpc_by_itfnum(int itf) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ea43dfb774e..8e37e71e34f 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1695,16 +1695,12 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) /* These two are safe on a single CPU system as only user tasks fiddle here */ if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; - res = put_user(amount, (int __user *)argp); + res = put_user(amount, (int __user *) argp); break; } case SIOCGSTAMP: - if (sk != NULL) { - res = sock_get_timestamp(sk, argp); - break; - } - res = -EINVAL; + res = sock_get_timestamp(sk, argp); break; case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */ @@ -1874,6 +1870,7 @@ static void ax25_info_stop(struct seq_file *seq, void *v) static int ax25_info_show(struct seq_file *seq, void *v) { ax25_cb *ax25 = v; + char buf[11]; int k; @@ -1885,13 +1882,13 @@ static int ax25_info_show(struct seq_file *seq, void *v) seq_printf(seq, "%8.8lx %s %s%s ", (long) ax25, ax25->ax25_dev == NULL? "???" : ax25->ax25_dev->dev->name, - ax2asc(&ax25->source_addr), + ax2asc(buf, &ax25->source_addr), ax25->iamdigi? "*":""); - seq_printf(seq, "%s", ax2asc(&ax25->dest_addr)); + seq_printf(seq, "%s", ax2asc(buf, &ax25->dest_addr)); for (k=0; (ax25->digipeat != NULL) && (k < ax25->digipeat->ndigi); k++) { seq_printf(seq, ",%s%s", - ax2asc(&ax25->digipeat->calls[k]), + ax2asc(buf, &ax25->digipeat->calls[k]), ax25->digipeat->repeated[k]? "*":""); } @@ -1950,24 +1947,24 @@ static struct net_proto_family ax25_family_ops = { }; static struct proto_ops ax25_proto_ops = { - .family = PF_AX25, - .owner = THIS_MODULE, - .release = ax25_release, - .bind = ax25_bind, - .connect = ax25_connect, - .socketpair = sock_no_socketpair, - .accept = ax25_accept, - .getname = ax25_getname, - .poll = datagram_poll, - .ioctl = ax25_ioctl, - .listen = ax25_listen, - .shutdown = ax25_shutdown, - .setsockopt = ax25_setsockopt, - .getsockopt = ax25_getsockopt, - .sendmsg = ax25_sendmsg, - .recvmsg = ax25_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, + .family = PF_AX25, + .owner = THIS_MODULE, + .release = ax25_release, + .bind = ax25_bind, + .connect = ax25_connect, + .socketpair = sock_no_socketpair, + .accept = ax25_accept, + .getname = ax25_getname, + .poll = datagram_poll, + .ioctl = ax25_ioctl, + .listen = ax25_listen, + .shutdown = ax25_shutdown, + .setsockopt = ax25_setsockopt, + .getsockopt = ax25_getsockopt, + .sendmsg = ax25_sendmsg, + .recvmsg = ax25_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, }; /* @@ -1983,7 +1980,7 @@ static struct notifier_block ax25_dev_notifier = { .notifier_call =ax25_device_event, }; -EXPORT_SYMBOL(ax25_encapsulate); +EXPORT_SYMBOL(ax25_hard_header); EXPORT_SYMBOL(ax25_rebuild_header); EXPORT_SYMBOL(ax25_findbyuid); EXPORT_SYMBOL(ax25_find_cb); diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c index f4fa6dfb846..0164a155b8c 100644 --- a/net/ax25/ax25_addr.c +++ b/net/ax25/ax25_addr.c @@ -36,9 +36,8 @@ ax25_address null_ax25_address = {{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00}}; /* * ax25 -> ascii conversion */ -char *ax2asc(ax25_address *a) +char *ax2asc(char *buf, ax25_address *a) { - static char buf[11]; char c, *s; int n; @@ -68,37 +67,34 @@ char *ax2asc(ax25_address *a) /* * ascii -> ax25 conversion */ -ax25_address *asc2ax(char *callsign) +void asc2ax(ax25_address *addr, char *callsign) { - static ax25_address addr; char *s; int n; for (s = callsign, n = 0; n < 6; n++) { if (*s != '\0' && *s != '-') - addr.ax25_call[n] = *s++; + addr->ax25_call[n] = *s++; else - addr.ax25_call[n] = ' '; - addr.ax25_call[n] <<= 1; - addr.ax25_call[n] &= 0xFE; + addr->ax25_call[n] = ' '; + addr->ax25_call[n] <<= 1; + addr->ax25_call[n] &= 0xFE; } if (*s++ == '\0') { - addr.ax25_call[6] = 0x00; - return &addr; + addr->ax25_call[6] = 0x00; + return; } - addr.ax25_call[6] = *s++ - '0'; + addr->ax25_call[6] = *s++ - '0'; if (*s != '\0') { - addr.ax25_call[6] *= 10; - addr.ax25_call[6] += *s++ - '0'; + addr->ax25_call[6] *= 10; + addr->ax25_call[6] += *s++ - '0'; } - addr.ax25_call[6] <<= 1; - addr.ax25_call[6] &= 0x1E; - - return &addr; + addr->ax25_call[6] <<= 1; + addr->ax25_call[6] &= 0x1E; } /* diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index bba0173e2d6..d643dac3ecc 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -47,7 +47,7 @@ #ifdef CONFIG_INET -int ax25_encapsulate(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) +int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { unsigned char *buff; @@ -88,7 +88,7 @@ int ax25_encapsulate(struct sk_buff *skb, struct net_device *dev, unsigned short *buff++ = AX25_P_ARP; break; default: - printk(KERN_ERR "AX.25: ax25_encapsulate - wrong protocol type 0x%2.2x\n", type); + printk(KERN_ERR "AX.25: ax25_hard_header - wrong protocol type 0x%2.2x\n", type); *buff++ = 0; break; } @@ -209,7 +209,7 @@ put: #else /* INET */ -int ax25_encapsulate(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) +int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { return -AX25_HEADER_LEN; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index c288526da4c..26b77d97222 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -298,6 +298,8 @@ static void ax25_rt_seq_stop(struct seq_file *seq, void *v) static int ax25_rt_seq_show(struct seq_file *seq, void *v) { + char buf[11]; + if (v == SEQ_START_TOKEN) seq_puts(seq, "callsign dev mode digipeaters\n"); else { @@ -308,7 +310,7 @@ static int ax25_rt_seq_show(struct seq_file *seq, void *v) if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0) callsign = "default"; else - callsign = ax2asc(&ax25_rt->callsign); + callsign = ax2asc(buf, &ax25_rt->callsign); seq_printf(seq, "%-9s %-4s", callsign, @@ -328,7 +330,8 @@ static int ax25_rt_seq_show(struct seq_file *seq, void *v) if (ax25_rt->digipeat != NULL) for (i = 0; i < ax25_rt->digipeat->ndigi; i++) - seq_printf(seq, " %s", ax2asc(&ax25_rt->digipeat->calls[i])); + seq_printf(seq, " %s", + ax2asc(buf, &ax25_rt->digipeat->calls[i])); seq_puts(seq, "\n"); } diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index a8b3822f3ee..d53cc861586 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -168,12 +168,14 @@ static void ax25_uid_seq_stop(struct seq_file *seq, void *v) static int ax25_uid_seq_show(struct seq_file *seq, void *v) { + char buf[11]; + if (v == SEQ_START_TOKEN) seq_printf(seq, "Policy: %d\n", ax25_uid_policy); else { struct ax25_uid_assoc *pt = v; - seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(&pt->call)); + seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(buf, &pt->call)); } return 0; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d6da0939216..b61b4e8e36f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -558,6 +558,35 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } +/* Extended Inquiry Result */ +static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct inquiry_data data; + struct extended_inquiry_info *info = (struct extended_inquiry_info *) (skb->data + 1); + int num_rsp = *((__u8 *) skb->data); + + BT_DBG("%s num_rsp %d", hdev->name, num_rsp); + + if (!num_rsp) + return; + + hci_dev_lock(hdev); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = 0x00; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } + + hci_dev_unlock(hdev); +} + /* Connect Request */ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { @@ -940,6 +969,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_inquiry_result_with_rssi_evt(hdev, skb); break; + case HCI_EV_EXTENDED_INQUIRY_RESULT: + hci_extended_inquiry_result_evt(hdev, skb); + break; + case HCI_EV_CONN_REQUEST: hci_conn_request_evt(hdev, skb); break; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 90e19eb6d3c..f49e7e938bf 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -363,6 +363,11 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr goto done; } + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + goto done; + } + write_lock_bh(&rfcomm_sk_list.lock); if (sa->rc_channel && __rfcomm_get_sock_by_addr(sa->rc_channel, &sa->rc_bdaddr)) { @@ -393,13 +398,17 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_rc)) return -EINVAL; - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) - return -EBADFD; + lock_sock(sk); - if (sk->sk_type != SOCK_STREAM) - return -EINVAL; + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } - lock_sock(sk); + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + goto done; + } sk->sk_state = BT_CONNECT; bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr); @@ -410,6 +419,7 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a err = bt_sock_wait_state(sk, BT_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); +done: release_sock(sk); return err; } @@ -428,6 +438,11 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog) goto done; } + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + goto done; + } + if (!rfcomm_pi(sk)->channel) { bdaddr_t *src = &bt_sk(sk)->src; u8 channel; @@ -472,6 +487,11 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f goto done; } + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + goto done; + } + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 069253f830c..2d24fb400e0 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -31,7 +31,8 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sk_buff *skb) { - if (skb->len > skb->dev->mtu) + /* drop mtu oversized packets except tso */ + if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size) kfree_skb(skb); else { #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2d52fee63a8..d8e36b77512 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -214,9 +214,11 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) .tos = RT_TOS(iph->tos)} }, .proto = 0}; if (!ip_route_output_key(&rt, &fl)) { - /* Bridged-and-DNAT'ed traffic doesn't - * require ip_forwarding. */ - if (((struct dst_entry *)rt)->dev == dev) { + /* - Bridged-and-DNAT'ed traffic doesn't + * require ip_forwarding. + * - Deal with redirected traffic. */ + if (((struct dst_entry *)rt)->dev == dev || + rt->rt_type == RTN_LOCAL) { skb->dst = (struct dst_entry *)rt; goto bridged_dnat; } diff --git a/net/compat.c b/net/compat.c index d99ab969589..e593dace2fd 100644 --- a/net/compat.c +++ b/net/compat.c @@ -135,13 +135,14 @@ static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *ms * thus placement) of cmsg headers and length are different for * 32-bit apps. -DaveM */ -int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, +int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, unsigned char *stackbuf, int stackbuf_size) { struct compat_cmsghdr __user *ucmsg; struct cmsghdr *kcmsg, *kcmsg_base; compat_size_t ucmlen; __kernel_size_t kcmlen, tmp; + int err = -EFAULT; kcmlen = 0; kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf; @@ -156,6 +157,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) + CMSG_ALIGN(sizeof(struct cmsghdr))); + tmp = CMSG_ALIGN(tmp); kcmlen += tmp; ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); } @@ -167,30 +169,34 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, * until we have successfully copied over all of the data * from the user. */ - if(kcmlen > stackbuf_size) - kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL); - if(kcmsg == NULL) + if (kcmlen > stackbuf_size) + kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL); + if (kcmsg == NULL) return -ENOBUFS; /* Now copy them over neatly. */ memset(kcmsg, 0, kcmlen); ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); while(ucmsg != NULL) { - __get_user(ucmlen, &ucmsg->cmsg_len); + if (__get_user(ucmlen, &ucmsg->cmsg_len)) + goto Efault; + if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg)) + goto Einval; tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) + CMSG_ALIGN(sizeof(struct cmsghdr))); + if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp)) + goto Einval; kcmsg->cmsg_len = tmp; - __get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level); - __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type); - - /* Copy over the data. */ - if(copy_from_user(CMSG_DATA(kcmsg), - CMSG_COMPAT_DATA(ucmsg), - (ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))))) - goto out_free_efault; + tmp = CMSG_ALIGN(tmp); + if (__get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level) || + __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type) || + copy_from_user(CMSG_DATA(kcmsg), + CMSG_COMPAT_DATA(ucmsg), + (ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))))) + goto Efault; /* Advance. */ - kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp)); + kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp); ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); } @@ -199,10 +205,12 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, kmsg->msg_controllen = kcmlen; return 0; -out_free_efault: - if(kcmsg_base != (struct cmsghdr *)stackbuf) - kfree(kcmsg_base); - return -EFAULT; +Einval: + err = -EINVAL; +Efault: + if (kcmsg_base != (struct cmsghdr *)stackbuf) + sock_kfree_s(sk, kcmsg_base, kcmlen); + return err; } int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data) diff --git a/net/core/dst.c b/net/core/dst.c index 334790da9f1..470c05bc4cb 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -39,8 +39,7 @@ static unsigned long dst_gc_timer_inc = DST_GC_MAX; static void dst_run_gc(unsigned long); static void ___dst_free(struct dst_entry * dst); -static struct timer_list dst_gc_timer = - TIMER_INITIALIZER(dst_run_gc, DST_GC_MIN, 0); +static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0); static void dst_run_gc(unsigned long dummy) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 289c1b5a8e4..404b761e82c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -695,7 +695,7 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return ret; } -static int ethtool_get_perm_addr(struct net_device *dev, void *useraddr) +static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) { struct ethtool_perm_addr epaddr; u8 *data; diff --git a/net/core/filter.c b/net/core/filter.c index cd91a24f972..079c2edff78 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -182,7 +182,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) A = ntohl(*(u32 *)ptr); continue; } - return 0; + break; case BPF_LD|BPF_H|BPF_ABS: k = fentry->k; load_h: @@ -191,7 +191,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) A = ntohs(*(u16 *)ptr); continue; } - return 0; + break; case BPF_LD|BPF_B|BPF_ABS: k = fentry->k; load_b: @@ -200,7 +200,7 @@ load_b: A = *(u8 *)ptr; continue; } - return 0; + break; case BPF_LD|BPF_W|BPF_LEN: A = skb->len; continue; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a1a9a7abff5..5265dfd6992 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -645,10 +645,10 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_flags = 0; npinfo->rx_np = NULL; - npinfo->poll_lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&npinfo->poll_lock); npinfo->poll_owner = -1; npinfo->tries = MAX_RETRIES; - npinfo->rx_lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&npinfo->rx_lock); } else npinfo = ndev->npinfo; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8eb083b6041..ef430b1e8e4 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -503,7 +503,7 @@ static int pg_delay_d = 0; static int pg_clone_skb_d = 0; static int debug = 0; -static spinlock_t _thread_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(_thread_lock); static struct pktgen_thread *pktgen_threads = NULL; static char module_fname[128]; @@ -1452,8 +1452,7 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer, thread_lock(); t->control |= T_REMDEV; thread_unlock(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/8); /* Propagate thread->control */ + schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ ret = count; sprintf(pg_result, "OK: rem_device_all"); goto out; @@ -1716,10 +1715,9 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); while (now < spin_until_us) { /* TODO: optimise sleeping behavior */ - if (spin_until_us - now > (1000000/HZ)+1) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } else if (spin_until_us - now > 100) { + if (spin_until_us - now > jiffies_to_usecs(1)+1) + schedule_timeout_interruptible(1); + else if (spin_until_us - now > 100) { do_softirq(); if (!pkt_dev->running) return; @@ -2449,8 +2447,7 @@ static void pktgen_run_all_threads(void) } thread_unlock(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/8); /* Propagate thread->control */ + schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ pktgen_wait_all_threads_run(); } diff --git a/net/core/sock.c b/net/core/sock.c index ccd10fd6568..ac63b56e23b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -341,11 +341,11 @@ set_rcvbuf: sock_reset_flag(sk, SOCK_LINGER); else { #if (BITS_PER_LONG == 32) - if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) + if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; else #endif - sk->sk_lingertime = ling.l_linger * HZ; + sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; sock_set_flag(sk, SOCK_LINGER); } break; @@ -1529,6 +1529,8 @@ EXPORT_SYMBOL(proto_register); void proto_unregister(struct proto *prot) { write_lock(&proto_list_lock); + list_del(&prot->node); + write_unlock(&proto_list_lock); if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); @@ -1550,9 +1552,6 @@ void proto_unregister(struct proto *prot) kfree(name); prot->twsk_slab = NULL; } - - list_del(&prot->node); - write_unlock(&proto_list_lock); } EXPORT_SYMBOL(proto_unregister); @@ -1719,8 +1718,8 @@ EXPORT_SYMBOL(sock_wfree); EXPORT_SYMBOL(sock_wmalloc); EXPORT_SYMBOL(sock_i_uid); EXPORT_SYMBOL(sock_i_ino); -#ifdef CONFIG_SYSCTL EXPORT_SYMBOL(sysctl_optmem_max); +#ifdef CONFIG_SYSCTL EXPORT_SYMBOL(sysctl_rmem_max); EXPORT_SYMBOL(sysctl_wmem_max); #endif diff --git a/net/core/wireless.c b/net/core/wireless.c index 5caae2399f3..d17f1583ea3 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -58,6 +58,13 @@ * o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus * Based on patch from Pavel Roskin <proski@gnu.org> : * o Fix kernel data leak to user space in private handler handling + * + * v7 - 18.3.05 - Jean II + * o Remove (struct iw_point *)->pointer from events and streams + * o Remove spy_offset from struct iw_handler_def + * o Start deprecating dev->get_wireless_stats, output a warning + * o If IW_QUAL_DBM is set, show dBm values in /proc/net/wireless + * o Don't loose INVALID/DBM flags when clearing UPDATED flags (iwstats) */ /***************************** INCLUDES *****************************/ @@ -446,10 +453,14 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev) (dev->wireless_handlers->get_wireless_stats != NULL)) return dev->wireless_handlers->get_wireless_stats(dev); - /* Old location, will be phased out in next WE */ - return (dev->get_wireless_stats ? - dev->get_wireless_stats(dev) : - (struct iw_statistics *) NULL); + /* Old location, field to be removed in next WE */ + if(dev->get_wireless_stats) { + printk(KERN_DEBUG "%s (WE) : Driver using old /proc/net/wireless support, please fix driver !\n", + dev->name); + return dev->get_wireless_stats(dev); + } + /* Not found */ + return (struct iw_statistics *) NULL; } /* ---------------------------------------------------------------- */ @@ -541,16 +552,18 @@ static __inline__ void wireless_seq_printf_stats(struct seq_file *seq, dev->name, stats->status, stats->qual.qual, stats->qual.updated & IW_QUAL_QUAL_UPDATED ? '.' : ' ', - ((__u8) stats->qual.level), + ((__s32) stats->qual.level) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), stats->qual.updated & IW_QUAL_LEVEL_UPDATED ? '.' : ' ', - ((__u8) stats->qual.noise), + ((__s32) stats->qual.noise) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), stats->qual.updated & IW_QUAL_NOISE_UPDATED ? '.' : ' ', stats->discard.nwid, stats->discard.code, stats->discard.fragment, stats->discard.retries, stats->discard.misc, stats->miss.beacon); - stats->qual.updated = 0; + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; } } @@ -593,6 +606,7 @@ static struct file_operations wireless_seq_fops = { int __init wireless_proc_init(void) { + /* Create /proc/net/wireless entry */ if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) return -ENOMEM; @@ -627,9 +641,9 @@ static inline int dev_iwstats(struct net_device *dev, struct ifreq *ifr) sizeof(struct iw_statistics))) return -EFAULT; - /* Check if we need to clear the update flag */ + /* Check if we need to clear the updated flag */ if(wrq->u.data.flags != 0) - stats->qual.updated = 0; + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; return 0; } else return -EOPNOTSUPP; @@ -1161,10 +1175,11 @@ void wireless_send_event(struct net_device * dev, struct iw_event *event; /* Mallocated whole event */ int event_len; /* Its size */ int hdr_len; /* Size of the event header */ + int wrqu_off = 0; /* Offset in wrqu */ /* Don't "optimise" the following variable, it will crash */ unsigned cmd_index; /* *MUST* be unsigned */ - /* Get the description of the IOCTL */ + /* Get the description of the Event */ if(cmd <= SIOCIWLAST) { cmd_index = cmd - SIOCIWFIRST; if(cmd_index < standard_ioctl_num) @@ -1207,6 +1222,8 @@ void wireless_send_event(struct net_device * dev, /* Calculate extra_len - extra is NULL for restricted events */ if(extra != NULL) extra_len = wrqu->data.length * descr->token_size; + /* Always at an offset in wrqu */ + wrqu_off = IW_EV_POINT_OFF; #ifdef WE_EVENT_DEBUG printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len); #endif /* WE_EVENT_DEBUG */ @@ -1217,7 +1234,7 @@ void wireless_send_event(struct net_device * dev, event_len = hdr_len + extra_len; #ifdef WE_EVENT_DEBUG - printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, event_len %d\n", dev->name, cmd, hdr_len, event_len); + printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len); #endif /* WE_EVENT_DEBUG */ /* Create temporary buffer to hold the event */ @@ -1228,7 +1245,7 @@ void wireless_send_event(struct net_device * dev, /* Fill event */ event->len = event_len; event->cmd = cmd; - memcpy(&event->u, wrqu, hdr_len - IW_EV_LCP_LEN); + memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN); if(extra != NULL) memcpy(((char *) event) + hdr_len, extra, extra_len); @@ -1249,7 +1266,7 @@ void wireless_send_event(struct net_device * dev, * Now, the driver can delegate this task to Wireless Extensions. * It needs to use those standard spy iw_handler in struct iw_handler_def, * push data to us via wireless_spy_update() and include struct iw_spy_data - * in its private part (and advertise it in iw_handler_def->spy_offset). + * in its private part (and export it in net_device->wireless_data->spy_data). * One of the main advantage of centralising spy support here is that * it becomes much easier to improve and extend it without having to touch * the drivers. One example is the addition of the Spy-Threshold events. @@ -1266,10 +1283,7 @@ static inline struct iw_spy_data * get_spydata(struct net_device *dev) /* This is the new way */ if(dev->wireless_data) return(dev->wireless_data->spy_data); - - /* This is the old way. Doesn't work for multi-headed drivers. - * It will be removed in the next version of WE. */ - return (dev->priv + dev->wireless_handlers->spy_offset); + return NULL; } /*------------------------------------------------------------------*/ @@ -1284,10 +1298,6 @@ int iw_handler_set_spy(struct net_device * dev, struct iw_spy_data * spydata = get_spydata(dev); struct sockaddr * address = (struct sockaddr *) extra; - if(!dev->wireless_data) - /* Help user know that driver needs updating */ - printk(KERN_DEBUG "%s (WE) : Driver using old/buggy spy support, please fix driver !\n", - dev->name); /* Make sure driver is not buggy or using the old API */ if(!spydata) return -EOPNOTSUPP; @@ -1318,7 +1328,7 @@ int iw_handler_set_spy(struct net_device * dev, sizeof(struct iw_quality) * IW_MAX_SPY); #ifdef WE_SPY_DEBUG - printk(KERN_DEBUG "iw_handler_set_spy() : offset %ld, spydata %p, num %d\n", dev->wireless_handlers->spy_offset, spydata, wrqu->data.length); + printk(KERN_DEBUG "iw_handler_set_spy() : wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length); for (i = 0; i < wrqu->data.length; i++) printk(KERN_DEBUG "%02X:%02X:%02X:%02X:%02X:%02X \n", @@ -1371,7 +1381,7 @@ int iw_handler_get_spy(struct net_device * dev, sizeof(struct iw_quality) * spydata->spy_number); /* Reset updated flags. */ for(i = 0; i < spydata->spy_number; i++) - spydata->spy_stat[i].updated = 0; + spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; return 0; } @@ -1486,7 +1496,7 @@ void wireless_spy_update(struct net_device * dev, return; #ifdef WE_SPY_DEBUG - printk(KERN_DEBUG "wireless_spy_update() : offset %ld, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_handlers->spy_offset, spydata, address[0], address[1], address[2], address[3], address[4], address[5]); + printk(KERN_DEBUG "wireless_spy_update() : wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]); #endif /* WE_SPY_DEBUG */ /* Update all records that match */ diff --git a/net/dccp/Makefile b/net/dccp/Makefile index fb97bb04245..344a8da153f 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -3,6 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o +dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o + obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o dccp_diag-y := diag.o diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c new file mode 100644 index 00000000000..6530283eafc --- /dev/null +++ b/net/dccp/ackvec.c @@ -0,0 +1,419 @@ +/* + * net/dccp/ackvec.c + * + * An implementation of the DCCP protocol + * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License; + */ + +#include "ackvec.h" +#include "dccp.h" + +#include <linux/dccp.h> +#include <linux/skbuff.h> + +#include <net/sock.h> + +int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + int len = av->dccpav_vec_len + 2; + struct timeval now; + u32 elapsed_time; + unsigned char *to, *from; + + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; + + if (elapsed_time != 0) + dccp_insert_option_elapsed_time(sk, skb, elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + return -1; + + /* + * XXX: now we have just one ack vector sent record, so + * we have to wait for it to be cleared. + * + * Of course this is not acceptable, but this is just for + * basic testing now. + */ + if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1) + return -1; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = len; + + len = av->dccpav_vec_len; + from = av->dccpav_buf + av->dccpav_buf_head; + + /* Check if buf_head wraps */ + if (av->dccpav_buf_head + len > av->dccpav_vec_len) { + const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head); + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + from = av->dccpav_buf; + } + + memcpy(to, from, len); + /* + * From draft-ietf-dccp-spec-11.txt: + * + * For each acknowledgement it sends, the HC-Receiver will add an + * acknowledgement record. ack_seqno will equal the HC-Receiver + * sequence number it used for the ack packet; ack_ptr will equal + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will + * equal buf_nonce. + * + * This implemention uses just one ack record for now. + */ + av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + av->dccpav_ack_ptr = av->dccpav_buf_head; + av->dccpav_ack_ackno = av->dccpav_buf_ackno; + av->dccpav_ack_nonce = av->dccpav_buf_nonce; + av->dccpav_sent_len = av->dccpav_vec_len; + + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu\n", + debug_prefix, av->dccpav_sent_len, + (unsigned long long)av->dccpav_ack_seqno, + (unsigned long long)av->dccpav_ack_ackno); + return -1; +} + +struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, + const unsigned int __nocast priority) +{ + struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority); + + if (av != NULL) { + av->dccpav_buf_len = len; + av->dccpav_buf_head = + av->dccpav_buf_tail = av->dccpav_buf_len - 1; + av->dccpav_buf_ackno = + av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU; + av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; + av->dccpav_ack_ptr = 0; + av->dccpav_time.tv_sec = 0; + av->dccpav_time.tv_usec = 0; + av->dccpav_sent_len = av->dccpav_vec_len = 0; + } + + return av; +} + +void dccp_ackvec_free(struct dccp_ackvec *av) +{ + kfree(av); +} + +static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, + const unsigned int index) +{ + return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; +} + +static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, + const unsigned int index) +{ + return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; +} + +/* + * If several packets are missing, the HC-Receiver may prefer to enter multiple + * bytes with run length 0, rather than a single byte with a larger run length; + * this simplifies table updates if one of the missing packets arrives. + */ +static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, + const unsigned int packets, + const unsigned char state) +{ + unsigned int gap; + signed long new_head; + + if (av->dccpav_vec_len + packets > av->dccpav_buf_len) + return -ENOBUFS; + + gap = packets - 1; + new_head = av->dccpav_buf_head - packets; + + if (new_head < 0) { + if (gap > 0) { + memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, + gap + new_head + 1); + gap = -new_head; + } + new_head += av->dccpav_buf_len; + } + + av->dccpav_buf_head = new_head; + + if (gap > 0) + memset(av->dccpav_buf + av->dccpav_buf_head + 1, + DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); + + av->dccpav_buf[av->dccpav_buf_head] = state; + av->dccpav_vec_len += packets; + return 0; +} + +/* + * Implements the draft-ietf-dccp-spec-11.txt Appendix A + */ +int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state) +{ + /* + * Check at the right places if the buffer is full, if it is, tell the + * caller to start dropping packets till the HC-Sender acks our ACK + * vectors, when we will free up space in dccpav_buf. + * + * We may well decide to do buffer compression, etc, but for now lets + * just drop. + * + * From Appendix A: + * + * Of course, the circular buffer may overflow, either when the + * HC-Sender is sending data at a very high rate, when the + * HC-Receiver's acknowledgements are not reaching the HC-Sender, + * or when the HC-Sender is forgetting to acknowledge those acks + * (so the HC-Receiver is unable to clean up old state). In this + * case, the HC-Receiver should either compress the buffer (by + * increasing run lengths when possible), transfer its state to + * a larger buffer, or, as a last resort, drop all received + * packets, without processing them whatsoever, until its buffer + * shrinks again. + */ + + /* See if this is the first ackno being inserted */ + if (av->dccpav_vec_len == 0) { + av->dccpav_buf[av->dccpav_buf_head] = state; + av->dccpav_vec_len = 1; + } else if (after48(ackno, av->dccpav_buf_ackno)) { + const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno, + ackno); + + /* + * Look if the state of this packet is the same as the + * previous ackno and if so if we can bump the head len. + */ + if (delta == 1 && + dccp_ackvec_state(av, av->dccpav_buf_head) == state && + (dccp_ackvec_len(av, av->dccpav_buf_head) < + DCCP_ACKVEC_LEN_MASK)) + av->dccpav_buf[av->dccpav_buf_head]++; + else if (dccp_ackvec_set_buf_head_state(av, delta, state)) + return -ENOBUFS; + } else { + /* + * A.1.2. Old Packets + * + * When a packet with Sequence Number S arrives, and + * S <= buf_ackno, the HC-Receiver will scan the table + * for the byte corresponding to S. (Indexing structures + * could reduce the complexity of this scan.) + */ + u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); + unsigned int index = av->dccpav_buf_head; + + while (1) { + const u8 len = dccp_ackvec_len(av, index); + const u8 state = dccp_ackvec_state(av, index); + /* + * valid packets not yet in dccpav_buf have a reserved + * entry, with a len equal to 0. + */ + if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && + len == 0 && delta == 0) { /* Found our + reserved seat! */ + dccp_pr_debug("Found %llu reserved seat!\n", + (unsigned long long)ackno); + av->dccpav_buf[index] = state; + goto out; + } + /* len == 0 means one packet */ + if (delta < len + 1) + goto out_duplicate; + + delta -= len + 1; + if (++index == av->dccpav_buf_len) + index = 0; + } + } + + av->dccpav_buf_ackno = ackno; + dccp_timestamp(sk, &av->dccpav_time); +out: + dccp_pr_debug(""); + return 0; + +out_duplicate: + /* Duplicate packet */ + dccp_pr_debug("Received a dup or already considered lost " + "packet: %llu\n", (unsigned long long)ackno); + return -EILSEQ; +} + +#ifdef CONFIG_IP_DCCP_DEBUG +void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) +{ + if (!dccp_debug) + return; + + printk("ACK vector len=%d, ackno=%llu |", len, + (unsigned long long)ackno); + + while (len--) { + const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; + const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; + + printk("%d,%d|", state, rl); + ++vector; + } + + printk("\n"); +} + +void dccp_ackvec_print(const struct dccp_ackvec *av) +{ + dccp_ackvector_print(av->dccpav_buf_ackno, + av->dccpav_buf + av->dccpav_buf_head, + av->dccpav_vec_len); +} +#endif + +static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av) +{ + /* + * As we're keeping track of the ack vector size (dccpav_vec_len) and + * the sent ack vector size (dccpav_sent_len) we don't need + * dccpav_buf_tail at all, but keep this code here as in the future + * we'll implement a vector of ack records, as suggested in + * draft-ietf-dccp-spec-11.txt Appendix A. -acme + */ +#if 0 + av->dccpav_buf_tail = av->dccpav_ack_ptr + 1; + if (av->dccpav_buf_tail >= av->dccpav_vec_len) + av->dccpav_buf_tail -= av->dccpav_vec_len; +#endif + av->dccpav_vec_len -= av->dccpav_sent_len; +} + +void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, + const u64 ackno) +{ + /* Check if we actually sent an ACK vector */ + if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + + if (ackno == av->dccpav_ack_seqno) { +#ifdef CONFIG_IP_DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; +#endif + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu, ACKED!\n", + debug_prefix, 1, + (unsigned long long)av->dccpav_ack_seqno, + (unsigned long long)av->dccpav_ack_ackno); + dccp_ackvec_trow_away_ack_record(av); + av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; + } +} + +static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, + struct sock *sk, u64 ackno, + const unsigned char len, + const unsigned char *vector) +{ + unsigned char i; + + /* Check if we actually sent an ACK vector */ + if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + /* + * We're in the receiver half connection, so if the received an ACK + * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're + * not interested. + * + * Extra explanation with example: + * + * if we received an ACK vector with ackno 50, it can only be acking + * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). + */ + /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */ + if (before48(ackno, av->dccpav_ack_seqno)) { + /* dccp_pr_debug_cat("yes\n"); */ + return; + } + /* dccp_pr_debug_cat("no\n"); */ + + i = len; + while (i--) { + const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; + u64 ackno_end_rl; + + dccp_set_seqno(&ackno_end_rl, ackno - rl); + + /* + * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, + * av->dccpav_ack_seqno, ackno); + */ + if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) { + const u8 state = (*vector & + DCCP_ACKVEC_STATE_MASK) >> 6; + /* dccp_pr_debug_cat("yes\n"); */ + + if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { +#ifdef CONFIG_IP_DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = + dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; +#endif + dccp_pr_debug("%sACK vector 0, len=%d, " + "ack_seqno=%llu, ack_ackno=%llu, " + "ACKED!\n", + debug_prefix, len, + (unsigned long long) + av->dccpav_ack_seqno, + (unsigned long long) + av->dccpav_ack_ackno); + dccp_ackvec_trow_away_ack_record(av); + } + /* + * If dccpav_ack_seqno was not received, no problem + * we'll send another ACK vector. + */ + av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; + break; + } + /* dccp_pr_debug_cat("no\n"); */ + + dccp_set_seqno(&ackno, ackno_end_rl - 1); + ++vector; + } +} + +int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u8 opt, const u8 *value, const u8 len) +{ + if (len > DCCP_MAX_ACKVEC_LEN) + return -1; + + /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ + dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, + len, value); + return 0; +} diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h new file mode 100644 index 00000000000..8ca51c9191f --- /dev/null +++ b/net/dccp/ackvec.h @@ -0,0 +1,133 @@ +#ifndef _ACKVEC_H +#define _ACKVEC_H +/* + * net/dccp/ackvec.h + * + * An implementation of the DCCP protocol + * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/config.h> +#include <linux/compiler.h> +#include <linux/time.h> +#include <linux/types.h> + +/* Read about the ECN nonce to see why it is 253 */ +#define DCCP_MAX_ACKVEC_LEN 253 + +#define DCCP_ACKVEC_STATE_RECEIVED 0 +#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) +#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) + +#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ +#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ + +/** struct dccp_ackvec - ack vector + * + * This data structure is the one defined in the DCCP draft + * Appendix A. + * + * @dccpav_buf_head - circular buffer head + * @dccpav_buf_tail - circular buffer tail + * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the + * buffer (i.e. %dccpav_buf_head) + * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked + * by the buffer with State 0 + * + * Additionally, the HC-Receiver must keep some information about the + * Ack Vectors it has recently sent. For each packet sent carrying an + * Ack Vector, it remembers four variables: + * + * @dccpav_ack_seqno - the Sequence Number used for the packet + * (HC-Receiver seqno) + * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement. + * @dccpav_ack_ackno - the Acknowledgement Number used for the packet + * (HC-Sender seqno) + * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. + * + * @dccpav_buf_len - circular buffer length + * @dccpav_time - the time in usecs + * @dccpav_buf - circular buffer of acknowledgeable packets + */ +struct dccp_ackvec { + unsigned int dccpav_buf_head; + unsigned int dccpav_buf_tail; + u64 dccpav_buf_ackno; + u64 dccpav_ack_seqno; + u64 dccpav_ack_ackno; + unsigned int dccpav_ack_ptr; + unsigned int dccpav_sent_len; + unsigned int dccpav_vec_len; + unsigned int dccpav_buf_len; + struct timeval dccpav_time; + u8 dccpav_buf_nonce; + u8 dccpav_ack_nonce; + u8 dccpav_buf[0]; +}; + +struct sock; +struct sk_buff; + +#ifdef CONFIG_IP_DCCP_ACKVEC +extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, + const unsigned int __nocast priority); +extern void dccp_ackvec_free(struct dccp_ackvec *av); + +extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state); + +extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, + struct sock *sk, const u64 ackno); +extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u8 opt, const u8 *value, const u8 len); + +extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); + +static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) +{ + return av->dccpav_sent_len != av->dccpav_vec_len; +} +#else /* CONFIG_IP_DCCP_ACKVEC */ +static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, + const unsigned int __nocast priority) +{ + return NULL; +} + +static inline void dccp_ackvec_free(struct dccp_ackvec *av) +{ +} + +static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state) +{ + return -1; +} + +static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, + struct sock *sk, const u64 ackno) +{ +} + +static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u8 opt, const u8 *value, const u8 len) +{ + return -1; +} + +static inline int dccp_insert_option_ackvec(const struct sock *sk, + const struct sk_buff *skb) +{ + return -1; +} + +static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) +{ + return 0; +} +#endif /* CONFIG_IP_DCCP_ACKVEC */ +#endif /* _ACKVEC_H */ diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 962f1e9e2f7..21e55142dcd 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -14,6 +14,7 @@ */ #include <net/sock.h> +#include <linux/compiler.h> #include <linux/dccp.h> #include <linux/list.h> #include <linux/module.h> @@ -54,6 +55,14 @@ struct ccid { struct tcp_info *info); void (*ccid_hc_tx_get_info)(struct sock *sk, struct tcp_info *info); + int (*ccid_hc_rx_getsockopt)(struct sock *sk, + const int optname, int len, + u32 __user *optval, + int __user *optlen); + int (*ccid_hc_tx_getsockopt)(struct sock *sk, + const int optname, int len, + u32 __user *optval, + int __user *optlen); }; extern int ccid_register(struct ccid *ccid); @@ -177,4 +186,26 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, if (ccid->ccid_hc_tx_get_info != NULL) ccid->ccid_hc_tx_get_info(sk, info); } + +static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, + const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + int rc = -ENOPROTOOPT; + if (ccid->ccid_hc_rx_getsockopt != NULL) + rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len, + optval, optlen); + return rc; +} + +static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, + const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + int rc = -ENOPROTOOPT; + if (ccid->ccid_hc_tx_getsockopt != NULL) + rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len, + optval, optlen); + return rc; +} #endif /* _CCID_H */ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 7bf3b3a91e9..aa68e0ab274 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -43,12 +43,22 @@ #include "ccid3.h" /* - * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. + * Reason for maths here is to avoid 32 bit overflow when a is big. + * With this we get close to the limit. */ static inline u32 usecs_div(const u32 a, const u32 b) { - const u32 tmp = a * (USEC_PER_SEC / 10); - return b > 20 ? tmp / (b / 10) : tmp; + const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 : + a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 : + a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 : + a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 : + a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 : + a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 : + a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 : + a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 : + 100000; + const u32 tmp = a * (USEC_PER_SEC / div); + return (b >= 2 * div) ? tmp / (b / div) : tmp; } static int ccid3_debug; @@ -68,13 +78,11 @@ static struct dccp_li_hist *ccid3_li_hist; static int ccid3_init(struct sock *sk) { - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); return 0; } static void ccid3_exit(struct sock *sk) { - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); } /* TFRC sender states */ @@ -102,8 +110,7 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", @@ -144,8 +151,7 @@ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) */ static void ccid3_hc_tx_update_x(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); /* To avoid large error in calcX */ if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { @@ -159,7 +165,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk) } else { struct timeval now; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, @@ -174,9 +180,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk) static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - struct dccp_sock *dp = dccp_sk(sk); unsigned long next_tmout = 0; - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -274,20 +279,20 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, int len) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct dccp_tx_hist_entry *new_packet; struct timeval now; long delay; int rc = -ENOTCONN; - /* Check if pure ACK or Terminating*/ + BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); + /* Check if pure ACK or Terminating*/ /* * XXX: We only call this function for DATA and DATAACK, on, these * packets can have zero length, but why the comment about "pure ACK"? */ - if (hctx == NULL || len == 0 || - hctx->ccid3hctx_state == TFRC_SSTATE_TERM) + if (unlikely(len == 0)) goto out; /* See if last packet allocated was not sent */ @@ -297,23 +302,20 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, SLAB_ATOMIC); rc = -ENOBUFS; - if (new_packet == NULL) { - ccid3_pr_debug("%s, sk=%p, not enough mem to add " - "to history, send refused\n", - dccp_role(sk), sk); + if (unlikely(new_packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, not enough " + "mem to add to history, send refused\n", + __FUNCTION__, dccp_role(sk), sk); goto out; } dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); } - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: - ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", - dccp_role(sk), sk, dp->dccps_gss); - hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, @@ -321,7 +323,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, hctx->ccid3hctx_last_win_count = 0; hctx->ccid3hctx_t_last_win_count = now; ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; + hctx->ccid3hctx_t_ipi = TFRC_INITIAL_IPI; /* Set nominal send time for initial packet */ hctx->ccid3hctx_t_nom = now; @@ -334,7 +336,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, case TFRC_SSTATE_FBACK: delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); - ccid3_pr_debug("send_packet delay=%ld\n", delay); delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ rc = delay > 0 ? delay : 0; @@ -348,29 +349,25 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, } /* Can we send? if so add options and add to packet history */ - if (rc == 0) + if (rc == 0) { + dp->dccps_hc_tx_insert_options = 1; new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + } out: return rc; } static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct timeval now; - BUG_ON(hctx == NULL); - - if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { - ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", - dccp_role(sk), sk); - return; - } + BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); - do_gettimeofday(&now); + dccp_timestamp(sk, &now); /* check if we have sent a data packet */ if (len > 0) { @@ -378,14 +375,14 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) struct dccp_tx_hist_entry *packet; packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); - if (packet == NULL) { - printk(KERN_CRIT "%s: packet doesn't exists in " - "history!\n", __FUNCTION__); + if (unlikely(packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: packet doesn't " + "exists in history!\n", __FUNCTION__); return; } - if (packet->dccphtx_sent) { - printk(KERN_CRIT "%s: no unsent packet in history!\n", - __FUNCTION__); + if (unlikely(packet->dccphtx_sent)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: no unsent packet in " + "history!\n", __FUNCTION__); return; } packet->dccphtx_tstamp = now; @@ -445,24 +442,18 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; + struct timeval now; unsigned long next_tmout; u32 t_elapsed; u32 pinv; u32 x_recv; u32 r_sample; - if (hctx == NULL) - return; - - if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { - ccid3_pr_debug("%s, sk=%p, received a packet when " - "terminating!\n", dccp_role(sk), sk); - return; - } + BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); /* we are only interested in ACKs */ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || @@ -471,7 +462,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) opt_recv = &hctx->ccid3hctx_options_received; - t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10; x_recv = opt_recv->ccid3or_receive_rate; pinv = opt_recv->ccid3or_loss_event_rate; @@ -486,19 +477,24 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* get t_recvdata from history */ packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, DCCP_SKB_CB(skb)->dccpd_ack_seq); - if (packet == NULL) { - ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't " - "exist in history!\n", - dccp_role(sk), sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, - dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + if (unlikely(packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, seqno " + "%llu(%s) does't exist in history!\n", + __FUNCTION__, dccp_role(sk), sk, + (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); return; } /* Update RTT */ - r_sample = timeval_now_delta(&packet->dccphtx_tstamp); - /* FIXME: */ - // r_sample -= usecs_to_jiffies(t_elapsed * 10); + dccp_timestamp(sk, &now); + r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); + if (unlikely(r_sample <= t_elapsed)) + LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, " + "t_elapsed=%uus\n", + __FUNCTION__, r_sample, t_elapsed); + else + r_sample -= t_elapsed; /* Update RTT estimate by * If (No feedback recv) @@ -591,11 +587,11 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + + BUG_ON(hctx == NULL); - if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || - sk->sk_state == DCCP_PARTOPEN)) + if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return; DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; @@ -606,12 +602,11 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, unsigned char *value) { int rc = 0; - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; - if (hctx == NULL) - return 0; + BUG_ON(hctx == NULL); opt_recv = &hctx->ccid3hctx_options_received; @@ -625,10 +620,10 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, switch (option) { case TFRC_OPT_LOSS_EVENT_RATE: - if (len != 4) { - ccid3_pr_debug("%s, sk=%p, invalid len for " - "TFRC_OPT_LOSS_EVENT_RATE\n", - dccp_role(sk), sk); + if (unlikely(len != 4)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid " + "len for TFRC_OPT_LOSS_EVENT_RATE\n", + __FUNCTION__, dccp_role(sk), sk); rc = -EINVAL; } else { opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); @@ -646,10 +641,10 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, opt_recv->ccid3or_loss_intervals_len); break; case TFRC_OPT_RECEIVE_RATE: - if (len != 4) { - ccid3_pr_debug("%s, sk=%p, invalid len for " - "TFRC_OPT_RECEIVE_RATE\n", - dccp_role(sk), sk); + if (unlikely(len != 4)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid " + "len for TFRC_OPT_RECEIVE_RATE\n", + __FUNCTION__, dccp_role(sk), sk); rc = -EINVAL; } else { opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); @@ -668,13 +663,11 @@ static int ccid3_hc_tx_init(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx; - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - - hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), - gfp_any()); - if (hctx == NULL) + dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); + if (dp->dccps_hc_tx_ccid_private == NULL) return -ENOMEM; + hctx = ccid3_hc_tx_sk(sk); memset(hctx, 0, sizeof(*hctx)); if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && @@ -696,9 +689,8 @@ static int ccid3_hc_tx_init(struct sock *sk) static void ccid3_hc_tx_exit(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); BUG_ON(hctx == NULL); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); @@ -738,8 +730,7 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", @@ -751,14 +742,14 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk, static void ccid3_hc_rx_send_feedback(struct sock *sk) { + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *packet; struct timeval now; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: @@ -767,11 +758,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) case TFRC_RSTATE_DATA: { const u32 delta = timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_feedback); - - hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * - USEC_PER_SEC); - if (likely(delta > 1)) - hcrx->ccid3hcrx_x_recv /= delta; + hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, + delta); } break; default: @@ -782,10 +770,10 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); - if (packet == NULL) { - printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", - __FUNCTION__, dccp_role(sk), sk); - dump_stack(); + if (unlikely(packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, no data packet " + "in history!\n", + __FUNCTION__, dccp_role(sk), sk); return; } @@ -801,17 +789,18 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_pinv = ~0; else hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); } static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); + const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); u32 x_recv, pinv; - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || - sk->sk_state == DCCP_PARTOPEN)) + BUG_ON(hcrx == NULL); + + if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return; DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; @@ -837,8 +826,7 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; struct timeval tstamp = { 0, }; @@ -869,17 +857,17 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } } - if (step == 0) { - printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no " - "data packets!\n", - __FUNCTION__, dccp_role(sk), sk); + if (unlikely(step == 0)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, packet history " + "contains no data packets!\n", + __FUNCTION__, dccp_role(sk), sk); return ~0; } - if (interval == 0) { - ccid3_pr_debug("%s, sk=%p, Could not find a win_count " - "interval > 0. Defaulting to 1\n", - dccp_role(sk), sk); + if (unlikely(interval == 0)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Could not find a " + "win_count interval > 0. Defaulting to 1\n", + __FUNCTION__, dccp_role(sk), sk); interval = 1; } found: @@ -889,10 +877,9 @@ found: if (rtt == 0) rtt = 1; - delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); - x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; - if (likely(delta > 1)) - x_recv /= delta; + dccp_timestamp(sk, &tstamp); + delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); + x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); @@ -911,8 +898,7 @@ found: static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); if (seq_loss != DCCP_MAX_SEQNO + 1 && list_empty(&hcrx->ccid3hcrx_li_hist)) { @@ -924,14 +910,14 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) if (li_tail == NULL) return; li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); - } - /* FIXME: find end of interval */ + } else + LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of " + "interval\n", __FUNCTION__); } static void ccid3_hc_rx_detect_loss(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); u8 win_loss; const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist, @@ -942,22 +928,19 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; - u32 p_prev; + u32 p_prev, r_sample, t_elapsed; int ins; - if (hcrx == NULL) - return; - - BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || + BUG_ON(hcrx == NULL || + !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); - opt_recv = &dp->dccps_options_received; + opt_recv = &dccp_sk(sk)->dccps_options_received; switch (DCCP_SKB_CB(skb)->dccpd_type) { case DCCP_PKT_ACK: @@ -967,10 +950,24 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (opt_recv->dccpor_timestamp_echo == 0) break; p_prev = hcrx->ccid3hcrx_rtt; - do_gettimeofday(&now); - hcrx->ccid3hcrx_rtt = timeval_usecs(&now) - - (opt_recv->dccpor_timestamp_echo - - opt_recv->dccpor_elapsed_time) * 10; + dccp_timestamp(sk, &now); + timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); + r_sample = timeval_usecs(&now); + t_elapsed = opt_recv->dccpor_elapsed_time * 10; + + if (unlikely(r_sample <= t_elapsed)) + LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, " + "t_elapsed=%uus\n", + __FUNCTION__, r_sample, t_elapsed); + else + r_sample -= t_elapsed; + + if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) + hcrx->ccid3hcrx_rtt = r_sample; + else + hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + + r_sample / 10; + if (p_prev != hcrx->ccid3hcrx_rtt) ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", dccp_role(sk), hcrx->ccid3hcrx_rtt, @@ -978,19 +975,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) break; case DCCP_PKT_DATA: break; - default: - ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", - dccp_role(sk), sk, - dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + default: /* We're not interested in other packet types, move along */ return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); - if (packet == NULL) { - ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " - "to history (consider it lost)!", - dccp_role(sk), sk); + if (unlikely(packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Not enough mem to " + "add rx packet to history, consider it lost!\n", + __FUNCTION__, dccp_role(sk), sk); return; } @@ -1017,7 +1011,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (ins != 0) break; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= hcrx->ccid3hcrx_rtt) { hcrx->ccid3hcrx_tstamp_last_ack = now; @@ -1056,11 +1050,11 @@ static int ccid3_hc_rx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), - gfp_any()); - if (hcrx == NULL) + dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); + if (dp->dccps_hc_rx_ccid_private == NULL) return -ENOMEM; + hcrx = ccid3_hc_rx_sk(sk); memset(hcrx, 0, sizeof(*hcrx)); if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && @@ -1072,23 +1066,18 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - /* - * XXX this seems to be paranoid, need to think more about this, for - * now start with something different than zero. -acme - */ - hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5; + dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); + hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; + hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ return 0; } static void ccid3_hc_rx_exit(struct sock *sk) { + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - - if (hcrx == NULL) - return; + BUG_ON(hcrx == NULL); ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); @@ -1104,12 +1093,14 @@ static void ccid3_hc_rx_exit(struct sock *sk) static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { - const struct dccp_sock *dp = dccp_sk(sk); - const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - if (hcrx == NULL) + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) return; + BUG_ON(hcrx == NULL); + info->tcpi_ca_state = hcrx->ccid3hcrx_state; info->tcpi_options |= TCPI_OPT_TIMESTAMPS; info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; @@ -1117,16 +1108,72 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { - const struct dccp_sock *dp = dccp_sk(sk); - const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - if (hctx == NULL) + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) return; + BUG_ON(hctx == NULL); + info->tcpi_rto = hctx->ccid3hctx_t_rto; info->tcpi_rtt = hctx->ccid3hctx_rtt; } +static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const void *val; + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return -EINVAL; + + switch (optname) { + case DCCP_SOCKOPT_CCID_RX_INFO: + if (len < sizeof(hcrx->ccid3hcrx_tfrc)) + return -EINVAL; + len = sizeof(hcrx->ccid3hcrx_tfrc); + val = &hcrx->ccid3hcrx_tfrc; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, val, len)) + return -EFAULT; + + return 0; +} + +static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const void *val; + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return -EINVAL; + + switch (optname) { + case DCCP_SOCKOPT_CCID_TX_INFO: + if (len < sizeof(hctx->ccid3hctx_tfrc)) + return -EINVAL; + len = sizeof(hctx->ccid3hctx_tfrc); + val = &hctx->ccid3hctx_tfrc; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, val, len)) + return -EFAULT; + + return 0; +} + static struct ccid ccid3 = { .ccid_id = 3, .ccid_name = "ccid3", @@ -1146,6 +1193,8 @@ static struct ccid ccid3 = { .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, + .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, + .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; module_param(ccid3_debug, int, 0444); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index ee8cbace663..0bde4583d09 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -40,6 +40,7 @@ #include <linux/list.h> #include <linux/time.h> #include <linux/types.h> +#include <linux/tfrc.h> #define TFRC_MIN_PACKET_SIZE 16 #define TFRC_STD_PACKET_SIZE 256 @@ -48,6 +49,8 @@ /* Two seconds as per CCID3 spec */ #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) +#define TFRC_INITIAL_IPI (USEC_PER_SEC / 4) + /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) @@ -91,12 +94,15 @@ struct ccid3_options_received { * @ccid3hctx_hist - Packet history */ struct ccid3_hc_tx_sock { - u32 ccid3hctx_x; - u32 ccid3hctx_x_recv; - u32 ccid3hctx_x_calc; + struct tfrc_tx_info ccid3hctx_tfrc; +#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x +#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv +#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc +#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt +#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p +#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto +#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi u16 ccid3hctx_s; - u32 ccid3hctx_rtt; - u32 ccid3hctx_p; u8 ccid3hctx_state; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; @@ -104,19 +110,19 @@ struct ccid3_hc_tx_sock { struct timer_list ccid3hctx_no_feedback_timer; struct timeval ccid3hctx_t_ld; struct timeval ccid3hctx_t_nom; - u32 ccid3hctx_t_rto; - u32 ccid3hctx_t_ipi; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; }; struct ccid3_hc_rx_sock { + struct tfrc_rx_info ccid3hcrx_tfrc; +#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv +#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt +#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, ccid3hcrx_last_counter:4; - unsigned long ccid3hcrx_rtt; - u32 ccid3hcrx_p; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; @@ -125,13 +131,16 @@ struct ccid3_hc_rx_sock { u16 ccid3hcrx_s; u32 ccid3hcrx_pinv; u32 ccid3hcrx_elapsed_time; - u32 ccid3hcrx_x_recv; }; -#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) +static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_hc_tx_ccid_private; +} -#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) +static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_hc_rx_ccid_private; +} #endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index fb90a91aa93..b375ebdb7dc 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -134,6 +134,7 @@ static inline struct dccp_tx_hist_entry * static inline struct dccp_rx_hist_entry * dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const struct sock *sk, const u32 ndp, const struct sk_buff *skb, const unsigned int __nocast prio) @@ -148,7 +149,7 @@ static inline struct dccp_rx_hist_entry * entry->dccphrx_ccval = dh->dccph_ccval; entry->dccphrx_type = dh->dccph_type; entry->dccphrx_ndp = ndp; - do_gettimeofday(&(entry->dccphrx_tstamp)); + dccp_timestamp(sk, &entry->dccphrx_tstamp); } return entry; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 33456c0d593..5871c027f9d 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -17,6 +17,7 @@ #include <net/snmp.h> #include <net/sock.h> #include <net/tcp.h> +#include "ackvec.h" #ifdef CONFIG_IP_DCCP_DEBUG extern int dccp_debug; @@ -258,13 +259,12 @@ extern int dccp_v4_send_reset(struct sock *sk, extern void dccp_send_close(struct sock *sk, const int active); struct dccp_skb_cb { - __u8 dccpd_type; - __u8 dccpd_reset_code; - __u8 dccpd_service; - __u8 dccpd_ccval; + __u8 dccpd_type:4; + __u8 dccpd_ccval:4; + __u8 dccpd_reset_code; + __u16 dccpd_opt_len; __u64 dccpd_seq; __u64 dccpd_ack_seq; - int dccpd_opt_len; }; #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) @@ -359,6 +359,17 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) (dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1)); } + +static inline int dccp_ack_pending(const struct sock *sk) +{ + const struct dccp_sock *dp = dccp_sk(sk); + return dp->dccps_timestamp_echo != 0 || +#ifdef CONFIG_IP_DCCP_ACKVEC + (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || +#endif + inet_csk_ack_scheduled(sk); +} extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern void dccp_insert_option_elapsed_time(struct sock *sk, @@ -372,63 +383,7 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, extern struct socket *dccp_ctl_socket; -#define DCCP_ACKPKTS_STATE_RECEIVED 0 -#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) -#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) - -#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ -#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ - -/** struct dccp_ackpkts - acknowledgeable packets - * - * This data structure is the one defined in the DCCP draft - * Appendix A. - * - * @dccpap_buf_head - circular buffer head - * @dccpap_buf_tail - circular buffer tail - * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the - * buffer (i.e. %dccpap_buf_head) - * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked - * by the buffer with State 0 - * - * Additionally, the HC-Receiver must keep some information about the - * Ack Vectors it has recently sent. For each packet sent carrying an - * Ack Vector, it remembers four variables: - * - * @dccpap_ack_seqno - the Sequence Number used for the packet - * (HC-Receiver seqno) - * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. - * @dccpap_ack_ackno - the Acknowledgement Number used for the packet - * (HC-Sender seqno) - * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. - * - * @dccpap_buf_len - circular buffer length - * @dccpap_time - the time in usecs - * @dccpap_buf - circular buffer of acknowledgeable packets - */ -struct dccp_ackpkts { - unsigned int dccpap_buf_head; - unsigned int dccpap_buf_tail; - u64 dccpap_buf_ackno; - u64 dccpap_ack_seqno; - u64 dccpap_ack_ackno; - unsigned int dccpap_ack_ptr; - unsigned int dccpap_buf_vector_len; - unsigned int dccpap_ack_vector_len; - unsigned int dccpap_buf_len; - struct timeval dccpap_time; - u8 dccpap_buf_nonce; - u8 dccpap_ack_nonce; - u8 dccpap_buf[0]; -}; - -extern struct dccp_ackpkts * - dccp_ackpkts_alloc(unsigned int len, - const unsigned int __nocast priority); -extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); -extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); -extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, - struct sock *sk, u64 ackno); +extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); static inline suseconds_t timeval_usecs(const struct timeval *tv) { @@ -468,26 +423,4 @@ static inline void timeval_sub_usecs(struct timeval *tv, } } -/* - * Returns the difference in usecs between timeval - * passed in and current time - */ -static inline suseconds_t timeval_now_delta(const struct timeval *tv) -{ - struct timeval now; - do_gettimeofday(&now); - return timeval_delta(&now, tv); -} - -#ifdef CONFIG_IP_DCCP_DEBUG -extern void dccp_ackvector_print(const u64 ackno, - const unsigned char *vector, int len); -extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); -#else -static inline void dccp_ackvector_print(const u64 ackno, - const unsigned char *vector, - int len) { } -static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } -#endif - #endif /* _DCCP_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index ef29cef1daf..1b6b2cb1237 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -16,6 +16,7 @@ #include <net/sock.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -50,7 +51,8 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) return; } - dccp_set_state(sk, DCCP_CLOSING); + if (sk->sk_state != DCCP_CLOSING) + dccp_set_state(sk, DCCP_CLOSING); dccp_send_close(sk, 0); } @@ -59,8 +61,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); if (dp->dccps_options.dccpo_send_ack_vector) - dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq); } static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) @@ -163,37 +165,11 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - /* - * FIXME: check ECN to see if we should use - * DCCP_ACKPKTS_STATE_ECN_MARKED - */ - if (dp->dccps_options.dccpo_send_ack_vector) { - struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " - "packets buffer full!\n"); - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MIN, - DCCP_RTO_MAX); - goto discard; - } - - /* - * FIXME: this activation is probably wrong, have to study more - * TCP delack machinery and how it fits into DCCP draft, but - * for now it kinda "works" 8) - */ - if (!inet_csk_ack_scheduled(sk)) { - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, - DCCP_RTO_MAX); - } - } + if (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); @@ -383,9 +359,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, } out_invalid_packet: - return 1; /* dccp_v4_do_rcv will send a reset, but... - FIXME: the reset code should be - DCCP_RESET_CODE_PACKET_ERROR */ + /* dccp_v4_do_rcv will send a reset */ + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; + return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -432,6 +408,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_hdr *dh, unsigned len) { struct dccp_sock *dp = dccp_sk(sk); + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; int queued = 0; @@ -472,7 +449,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dh->dccph_type == DCCP_PKT_RESET) goto discard; - /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ + /* Caller (dccp_v4_do_rcv) will send Reset */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; } @@ -486,36 +464,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, skb)) goto discard; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != - DCCP_PKT_WITHOUT_ACK_SEQ) + if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); - /* - * FIXME: check ECN to see if we should use - * DCCP_ACKPKTS_STATE_ECN_MARKED - */ - if (dp->dccps_options.dccpo_send_ack_vector) { - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKPKTS_STATE_RECEIVED)) - goto discard; - /* - * FIXME: this activation is probably wrong, have to - * study more TCP delack machinery and how it fits into - * DCCP draft, but for now it kinda "works" 8) - */ - if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == - DCCP_MAX_SEQNO + 1) && - !inet_csk_ack_scheduled(sk)) { - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MIN, - DCCP_RTO_MAX); - } - } + if (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; } /* @@ -550,8 +509,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_PKT_SYNC); + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { dccp_rcv_closereq(sk, skb); @@ -561,8 +519,14 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } + if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); + goto discard; + } + switch (sk->sk_state) { case DCCP_CLOSED: + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; case DCCP_REQUESTING: diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3fc75dbee4b..40fe6afacde 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -23,6 +23,7 @@ #include <net/tcp_states.h> #include <net/xfrm.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -246,6 +247,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, dp->dccps_role = DCCP_ROLE_CLIENT; + if (dccp_service_not_initialized(sk)) + return -EPROTO; + if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -641,16 +645,12 @@ int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) skb = dccp_make_reset(sk, sk->sk_dst_cache, code); if (skb != NULL) { - const struct dccp_sock *dp = dccp_sk(sk); const struct inet_sock *inet = inet_sk(sk); err = ip_build_and_send_pkt(skb, sk, inet->saddr, inet->daddr, NULL); if (err == NET_XMIT_CN) err = 0; - - ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); } return err; @@ -665,6 +665,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, dccp_hdr(skb)->dccph_sport); } +static inline int dccp_bad_service_code(const struct sock *sk, + const __u32 service) +{ + const struct dccp_sock *dp = dccp_sk(sk); + + if (dp->dccps_service == service) + return 0; + return !dccp_list_has_service(dp->dccps_service_list, service); +} + int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct inet_request_sock *ireq; @@ -673,13 +683,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __u32 saddr = skb->nh.iph->saddr; const __u32 daddr = skb->nh.iph->daddr; + const __u32 service = dccp_hdr_request(skb)->dccph_req_service; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; struct dst_entry *dst = NULL; /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) + (RTCF_BROADCAST | RTCF_MULTICAST)) { + reset_code = DCCP_RESET_CODE_NO_CONNECTION; goto drop; + } + if (dccp_bad_service_code(sk, service)) { + reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + goto drop; + } /* * TW buckets are converted to open requests without * limitations, they conserve resources and peer is @@ -722,9 +741,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * dccp_create_openreq_child. */ dreq = dccp_rsk(req); - dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; + dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_service = service; if (dccp_v4_send_response(sk, req, dst)) goto drop_and_free; @@ -739,6 +758,7 @@ drop_and_free: __reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + dcb->dccpd_reset_code = reset_code; return -1; } @@ -1009,7 +1029,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; dccp_v4_ctl_send_reset(skb); discard: kfree_skb(skb); @@ -1094,45 +1113,7 @@ int dccp_v4_rcv(struct sk_buff *skb) goto discard_it; dh = dccp_hdr(skb); -#if 0 - /* - * Use something like this to simulate some DATA/DATAACK loss to test - * dccp_ackpkts_add, you'll get something like this on a session that - * sends 10 DATA/DATAACK packets: - * - * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| - * - * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet - * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets - * with the same state - * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet - * - * So... - * - * 281473596467422 was received - * 281473596467421 was not received - * 281473596467420 was received - * 281473596467419 was not received - * 281473596467418 was received - * 281473596467417 was not received - * 281473596467416 was received - * 281473596467415 was not received - * 281473596467414 was received - * 281473596467413 was received (this one was the 3way handshake - * RESPONSE) - * - */ - if (dh->dccph_type == DCCP_PKT_DATA || - dh->dccph_type == DCCP_PKT_DATAACK) { - static int discard = 0; - if (discard) { - discard = 0; - goto discard_it; - } - discard = 1; - } -#endif DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; @@ -1243,13 +1224,12 @@ static int dccp_v4_init_sock(struct sock *sk) static int dccp_ctl_socket_init = 1; dccp_options_init(&dp->dccps_options); + do_gettimeofday(&dp->dccps_epoch); if (dp->dccps_options.dccpo_send_ack_vector) { - dp->dccps_hc_rx_ackpkts = - dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_KERNEL); - - if (dp->dccps_hc_rx_ackpkts == NULL) + dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, + GFP_KERNEL); + if (dp->dccps_hc_rx_ackvec == NULL) return -ENOMEM; } @@ -1261,16 +1241,18 @@ static int dccp_v4_init_sock(struct sock *sk) * setsockopt(CCIDs-I-want/accept). -acme */ if (likely(!dccp_ctl_socket_init)) { - dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid, sk); - dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid, sk); if (dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL) { ccid_exit(dp->dccps_hc_rx_ccid, sk); ccid_exit(dp->dccps_hc_tx_ccid, sk); - dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); - dp->dccps_hc_rx_ackpkts = NULL; + if (dp->dccps_options.dccpo_send_ack_vector) { + dccp_ackvec_free(dp->dccps_hc_rx_ackvec); + dp->dccps_hc_rx_ackvec = NULL; + } dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; return -ENOMEM; } @@ -1283,6 +1265,7 @@ static int dccp_v4_init_sock(struct sock *sk) sk->sk_write_space = dccp_write_space; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; + dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; return 0; } @@ -1304,10 +1287,17 @@ static int dccp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash != NULL) inet_put_port(&dccp_hashinfo, sk); + if (dp->dccps_service_list != NULL) { + kfree(dp->dccps_service_list); + dp->dccps_service_list = NULL; + } + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); - dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); - dp->dccps_hc_rx_ackpkts = NULL; + if (dp->dccps_options.dccpo_send_ack_vector) { + dccp_ackvec_free(dp->dccps_hc_rx_ackvec); + dp->dccps_hc_rx_ackvec = NULL; + } ccid_exit(dp->dccps_hc_rx_ccid, sk); ccid_exit(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index ce5dff4ac22..1393461898b 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -19,6 +19,7 @@ #include <net/xfrm.h> #include <net/inet_timewait_sock.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -93,21 +94,24 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct inet_connection_sock *newicsk = inet_csk(sk); struct dccp_sock *newdp = dccp_sk(newsk); - newdp->dccps_hc_rx_ackpkts = NULL; - newdp->dccps_role = DCCP_ROLE_SERVER; - newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + newdp->dccps_role = DCCP_ROLE_SERVER; + newdp->dccps_hc_rx_ackvec = NULL; + newdp->dccps_service_list = NULL; + newdp->dccps_service = dreq->dreq_service; + newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { - newdp->dccps_hc_rx_ackpkts = - dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_ATOMIC); + newdp->dccps_hc_rx_ackvec = + dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, + GFP_ATOMIC); /* * XXX: We're using the same CCIDs set on the parent, * i.e. sk_clone copied the master sock and left the * CCID pointers for this child, that is why we do the * __ccid_get calls. */ - if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) + if (unlikely(newdp->dccps_hc_rx_ackvec == NULL)) goto out_free; } @@ -115,7 +119,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newsk) != 0 || ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { - dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); + dccp_ackvec_free(newdp->dccps_hc_rx_ackvec); ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); out_free: diff --git a/net/dccp/options.c b/net/dccp/options.c index 382c5894acb..0a76426c9ae 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -18,19 +18,15 @@ #include <linux/kernel.h> #include <linux/skbuff.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" -static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, - struct sock *sk, - const u64 ackno, - const unsigned char len, - const unsigned char *vector); - /* stores the default values for new connection. may be changed with sysctl */ static const struct dccp_options dccpo_default_values = { .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, - .dccpo_ccid = DCCPF_INITIAL_CCID, + .dccpo_rx_ccid = DCCPF_INITIAL_CCID, + .dccpo_tx_ccid = DCCPF_INITIAL_CCID, .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, }; @@ -72,6 +68,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; unsigned char *value; + u32 elapsed_time; memset(opt_recv, 0, sizeof(*opt_recv)); @@ -112,25 +109,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_ndp); break; case DCCPO_ACK_VECTOR_0: - if (len > DCCP_MAX_ACK_VECTOR_LEN) - goto out_invalid_option; - + case DCCPO_ACK_VECTOR_1: if (pkt_type == DCCP_PKT_DATA) continue; - opt_recv->dccpor_ack_vector_len = len; - opt_recv->dccpor_ack_vector_idx = value - options; - - dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", - debug_prefix, len, - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq); - dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, - value, len); - dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, - sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, - len, value); + if (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_parse(sk, skb, opt, value, len)) + goto out_invalid_option; break; case DCCPO_TIMESTAMP: if (len != 4) @@ -139,7 +124,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - do_gettimeofday(&dp->dccps_timestamp_time); + dccp_timestamp(sk, &dp->dccps_timestamp_time); dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, @@ -159,18 +144,18 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); - if (len > 4) { - if (len == 6) - opt_recv->dccpor_elapsed_time = - ntohs(*(u16 *)(value + 4)); - else - opt_recv->dccpor_elapsed_time = - ntohl(*(u32 *)(value + 4)); - dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", - debug_prefix, - opt_recv->dccpor_elapsed_time); - } + if (len == 4) + break; + + if (len == 6) + elapsed_time = ntohs(*(u16 *)(value + 4)); + else + elapsed_time = ntohl(*(u32 *)(value + 4)); + + /* Give precedence to the biggest ELAPSED_TIME */ + if (elapsed_time > opt_recv->dccpor_elapsed_time) + opt_recv->dccpor_elapsed_time = elapsed_time; break; case DCCPO_ELAPSED_TIME: if (len != 2 && len != 4) @@ -180,14 +165,15 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) continue; if (len == 2) - opt_recv->dccpor_elapsed_time = - ntohs(*(u16 *)value); + elapsed_time = ntohs(*(u16 *)value); else - opt_recv->dccpor_elapsed_time = - ntohl(*(u32 *)value); + elapsed_time = ntohl(*(u32 *)value); + + if (elapsed_time > opt_recv->dccpor_elapsed_time) + opt_recv->dccpor_elapsed_time = elapsed_time; dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, - opt_recv->dccpor_elapsed_time); + elapsed_time); break; /* * From draft-ietf-dccp-spec-11.txt: @@ -350,89 +336,29 @@ void dccp_insert_option_elapsed_time(struct sock *sk, EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); -static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) +void dccp_timestamp(const struct sock *sk, struct timeval *tv) { - struct dccp_sock *dp = dccp_sk(sk); -#ifdef CONFIG_IP_DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT TX opt: " : "server TX opt: "; -#endif - struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; - unsigned char *to, *from; + const struct dccp_sock *dp = dccp_sk(sk); - if (elapsed_time != 0) - dccp_insert_option_elapsed_time(sk, skb, elapsed_time); + do_gettimeofday(tv); + tv->tv_sec -= dp->dccps_epoch.tv_sec; + tv->tv_usec -= dp->dccps_epoch.tv_usec; - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " - "insert ACK Vector!\n"); - return; - } - - /* - * XXX: now we have just one ack vector sent record, so - * we have to wait for it to be cleared. - * - * Of course this is not acceptable, but this is just for - * basic testing now. - */ - if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) - return; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - *to++ = DCCPO_ACK_VECTOR_0; - *to++ = len; - - len = ap->dccpap_buf_vector_len; - from = ap->dccpap_buf + ap->dccpap_buf_head; - - /* Check if buf_head wraps */ - if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { - const unsigned int tailsize = (ap->dccpap_buf_len - - ap->dccpap_buf_head); - - memcpy(to, from, tailsize); - to += tailsize; - len -= tailsize; - from = ap->dccpap_buf; + while (tv->tv_usec < 0) { + tv->tv_sec--; + tv->tv_usec += USEC_PER_SEC; } - - memcpy(to, from, len); - /* - * From draft-ietf-dccp-spec-11.txt: - * - * For each acknowledgement it sends, the HC-Receiver will add an - * acknowledgement record. ack_seqno will equal the HC-Receiver - * sequence number it used for the ack packet; ack_ptr will equal - * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will - * equal buf_nonce. - * - * This implemention uses just one ack record for now. - */ - ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - ap->dccpap_ack_ptr = ap->dccpap_buf_head; - ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; - ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; - ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; - - dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu\n", - debug_prefix, ap->dccpap_ack_vector_len, - (unsigned long long) ap->dccpap_ack_seqno, - (unsigned long long) ap->dccpap_ack_ackno); } +EXPORT_SYMBOL_GPL(dccp_timestamp); + void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { struct timeval tv; u32 now; - do_gettimeofday(&tv); - now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; + dccp_timestamp(sk, &tv); + now = timeval_usecs(&tv) / 10; /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ @@ -450,13 +376,17 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; #endif + struct timeval now; u32 tstamp_echo; - const u32 elapsed_time = - timeval_now_delta(&dp->dccps_timestamp_time) / 10; - const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - const int len = 6 + elapsed_time_len; + u32 elapsed_time; + int len, elapsed_time_len; unsigned char *to; + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10; + elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + len = 6 + elapsed_time_len; + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " "timestamp echo!\n"); @@ -502,16 +432,20 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (!dccp_packet_without_ack(skb)) { if (dp->dccps_options.dccpo_send_ack_vector && - (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != - DCCP_MAX_SEQNO + 1)) - dccp_insert_option_ack_vector(sk, skb); - + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) + dccp_insert_option_ackvec(sk, skb); if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } - ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + if (dp->dccps_hc_rx_insert_options) { + ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); + dp->dccps_hc_rx_insert_options = 0; + } + if (dp->dccps_hc_tx_insert_options) { + ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + dp->dccps_hc_tx_insert_options = 0; + } /* XXX: insert other options when appropriate */ @@ -526,330 +460,3 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) } } } - -struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, - const unsigned int __nocast priority) -{ - struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); - - if (ap != NULL) { -#ifdef CONFIG_IP_DCCP_DEBUG - memset(ap->dccpap_buf, 0xFF, len); -#endif - ap->dccpap_buf_len = len; - ap->dccpap_buf_head = - ap->dccpap_buf_tail = - ap->dccpap_buf_len - 1; - ap->dccpap_buf_ackno = - ap->dccpap_ack_ackno = - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; - ap->dccpap_ack_ptr = 0; - ap->dccpap_time.tv_sec = 0; - ap->dccpap_time.tv_usec = 0; - ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; - } - - return ap; -} - -void dccp_ackpkts_free(struct dccp_ackpkts *ap) -{ - if (ap != NULL) { -#ifdef CONFIG_IP_DCCP_DEBUG - memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); -#endif - kfree(ap); - } -} - -static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, - const unsigned int index) -{ - return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; -} - -static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, - const unsigned int index) -{ - return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; -} - -/* - * If several packets are missing, the HC-Receiver may prefer to enter multiple - * bytes with run length 0, rather than a single byte with a larger run length; - * this simplifies table updates if one of the missing packets arrives. - */ -static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, - const unsigned int packets, - const unsigned char state) -{ - unsigned int gap; - signed long new_head; - - if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) - return -ENOBUFS; - - gap = packets - 1; - new_head = ap->dccpap_buf_head - packets; - - if (new_head < 0) { - if (gap > 0) { - memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, - gap + new_head + 1); - gap = -new_head; - } - new_head += ap->dccpap_buf_len; - } - - ap->dccpap_buf_head = new_head; - - if (gap > 0) - memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, - DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); - - ap->dccpap_buf[ap->dccpap_buf_head] = state; - ap->dccpap_buf_vector_len += packets; - return 0; -} - -/* - * Implements the draft-ietf-dccp-spec-11.txt Appendix A - */ -int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) -{ - /* - * Check at the right places if the buffer is full, if it is, tell the - * caller to start dropping packets till the HC-Sender acks our ACK - * vectors, when we will free up space in dccpap_buf. - * - * We may well decide to do buffer compression, etc, but for now lets - * just drop. - * - * From Appendix A: - * - * Of course, the circular buffer may overflow, either when the - * HC-Sender is sending data at a very high rate, when the - * HC-Receiver's acknowledgements are not reaching the HC-Sender, - * or when the HC-Sender is forgetting to acknowledge those acks - * (so the HC-Receiver is unable to clean up old state). In this - * case, the HC-Receiver should either compress the buffer (by - * increasing run lengths when possible), transfer its state to - * a larger buffer, or, as a last resort, drop all received - * packets, without processing them whatsoever, until its buffer - * shrinks again. - */ - - /* See if this is the first ackno being inserted */ - if (ap->dccpap_buf_vector_len == 0) { - ap->dccpap_buf[ap->dccpap_buf_head] = state; - ap->dccpap_buf_vector_len = 1; - } else if (after48(ackno, ap->dccpap_buf_ackno)) { - const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, - ackno); - - /* - * Look if the state of this packet is the same as the - * previous ackno and if so if we can bump the head len. - */ - if (delta == 1 && - dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && - (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < - DCCP_ACKPKTS_LEN_MASK)) - ap->dccpap_buf[ap->dccpap_buf_head]++; - else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) - return -ENOBUFS; - } else { - /* - * A.1.2. Old Packets - * - * When a packet with Sequence Number S arrives, and - * S <= buf_ackno, the HC-Receiver will scan the table - * for the byte corresponding to S. (Indexing structures - * could reduce the complexity of this scan.) - */ - u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); - unsigned int index = ap->dccpap_buf_head; - - while (1) { - const u8 len = dccp_ackpkts_len(ap, index); - const u8 state = dccp_ackpkts_state(ap, index); - /* - * valid packets not yet in dccpap_buf have a reserved - * entry, with a len equal to 0. - */ - if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && - len == 0 && delta == 0) { /* Found our - reserved seat! */ - dccp_pr_debug("Found %llu reserved seat!\n", - (unsigned long long) ackno); - ap->dccpap_buf[index] = state; - goto out; - } - /* len == 0 means one packet */ - if (delta < len + 1) - goto out_duplicate; - - delta -= len + 1; - if (++index == ap->dccpap_buf_len) - index = 0; - } - } - - ap->dccpap_buf_ackno = ackno; - do_gettimeofday(&ap->dccpap_time); -out: - dccp_pr_debug(""); - dccp_ackpkts_print(ap); - return 0; - -out_duplicate: - /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost " - "packet: %llu\n", (unsigned long long) ackno); - return -EILSEQ; -} - -#ifdef CONFIG_IP_DCCP_DEBUG -void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, - int len) -{ - if (!dccp_debug) - return; - - printk("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long) ackno); - - while (len--) { - const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; - const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); - - printk("%d,%d|", state, rl); - ++vector; - } - - printk("\n"); -} - -void dccp_ackpkts_print(const struct dccp_ackpkts *ap) -{ - dccp_ackvector_print(ap->dccpap_buf_ackno, - ap->dccpap_buf + ap->dccpap_buf_head, - ap->dccpap_buf_vector_len); -} -#endif - -static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) -{ - /* - * As we're keeping track of the ack vector size - * (dccpap_buf_vector_len) and the sent ack vector size - * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but - * keep this code here as in the future we'll implement a vector of - * ack records, as suggested in draft-ietf-dccp-spec-11.txt - * Appendix A. -acme - */ -#if 0 - ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; - if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) - ap->dccpap_buf_tail -= ap->dccpap_buf_len; -#endif - ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; -} - -void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, - u64 ackno) -{ - /* Check if we actually sent an ACK vector */ - if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) - return; - - if (ackno == ap->dccpap_ack_seqno) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu, ACKED!\n", - debug_prefix, 1, - (unsigned long long) ap->dccpap_ack_seqno, - (unsigned long long) ap->dccpap_ack_ackno); - dccp_ackpkts_trow_away_ack_record(ap); - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - } -} - -static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, - struct sock *sk, u64 ackno, - const unsigned char len, - const unsigned char *vector) -{ - unsigned char i; - - /* Check if we actually sent an ACK vector */ - if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) - return; - /* - * We're in the receiver half connection, so if the received an ACK - * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're - * not interested. - * - * Extra explanation with example: - * - * if we received an ACK vector with ackno 50, it can only be acking - * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). - */ - /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ - if (before48(ackno, ap->dccpap_ack_seqno)) { - /* dccp_pr_debug_cat("yes\n"); */ - return; - } - /* dccp_pr_debug_cat("no\n"); */ - - i = len; - while (i--) { - const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); - u64 ackno_end_rl; - - dccp_set_seqno(&ackno_end_rl, ackno - rl); - - /* - * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, - * ap->dccpap_ack_seqno, ackno); - */ - if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { - const u8 state = (*vector & - DCCP_ACKPKTS_STATE_MASK) >> 6; - /* dccp_pr_debug_cat("yes\n"); */ - - if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = - dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK vector 0, len=%d, " - "ack_seqno=%llu, ack_ackno=%llu, " - "ACKED!\n", - debug_prefix, len, - (unsigned long long) - ap->dccpap_ack_seqno, - (unsigned long long) - ap->dccpap_ack_ackno); - dccp_ackpkts_trow_away_ack_record(ap); - } - /* - * If dccpap_ack_seqno was not received, no problem - * we'll send another ACK vector. - */ - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - break; - } - /* dccp_pr_debug_cat("no\n"); */ - - dccp_set_seqno(&ackno, ackno_end_rl - 1); - ++vector; - } -} diff --git a/net/dccp/output.c b/net/dccp/output.c index 28de157a432..4786bdcddcc 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -16,6 +16,7 @@ #include <net/sock.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -85,7 +86,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = - dcb->dccpd_service; + dp->dccps_service; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = @@ -225,7 +226,6 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) err = dccp_wait_for_ccid(sk, skb, timeo); if (err == 0) { - const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int len = skb->len; @@ -236,15 +236,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); dcb->dccpd_type = DCCP_PKT_DATAACK; - /* - * FIXME: we really should have a - * dccps_ack_pending or use icsk. - */ - } else if (inet_csk_ack_scheduled(sk) || - dp->dccps_timestamp_echo != 0 || - (dp->dccps_options.dccpo_send_ack_vector && - ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && - ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) + } else if (dccp_ack_pending(sk)) dcb->dccpd_type = DCCP_PKT_DATAACK; else dcb->dccpd_type = DCCP_PKT_DATA; @@ -270,6 +262,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, struct request_sock *req) { struct dccp_hdr *dh; + struct dccp_request_sock *dreq; const int dccp_header_size = sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext) + sizeof(struct dccp_hdr_response); @@ -285,8 +278,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); skb->csum = 0; + dreq = dccp_rsk(req); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; - DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; + DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; dccp_insert_options(sk, skb); skb->h.raw = skb_push(skb, dccp_header_size); @@ -300,8 +294,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); + dccp_hdr_set_seq(dh, dreq->dreq_iss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); + dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, inet_rsk(req)->rmt_addr); @@ -397,9 +392,6 @@ int dccp_connect(struct sock *sk) skb_reserve(skb, MAX_DCCP_HEADER); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - /* FIXME: set service to something meaningful, coming - * from userspace*/ - DCCP_SKB_CB(skb)->dccpd_service = 0; skb->csum = 0; skb_set_owner_w(skb, sk); @@ -522,7 +514,4 @@ void dccp_send_close(struct sock *sk, const int active) dccp_transmit_skb(sk, skb_clone(skb, prio)); } else dccp_transmit_skb(sk, skb); - - ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 18a0e69c9dc..a1cfd0e9e3b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name); static inline int dccp_listen_start(struct sock *sk) { - dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; + struct dccp_sock *dp = dccp_sk(sk); + + dp->dccps_role = DCCP_ROLE_LISTEN; + /* + * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) + * before calling listen() + */ + if (dccp_service_not_initialized(sk)) + return -EPROTO; return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); } @@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -ENOIOCTLCMD; } +static int dccp_setsockopt_service(struct sock *sk, const u32 service, + char __user *optval, int optlen) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_service_list *sl = NULL; + + if (service == DCCP_SERVICE_INVALID_VALUE || + optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) + return -EINVAL; + + if (optlen > sizeof(service)) { + sl = kmalloc(optlen, GFP_KERNEL); + if (sl == NULL) + return -ENOMEM; + + sl->dccpsl_nr = optlen / sizeof(u32) - 1; + if (copy_from_user(sl->dccpsl_list, + optval + sizeof(service), + optlen - sizeof(service)) || + dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { + kfree(sl); + return -EFAULT; + } + } + + lock_sock(sk); + dp->dccps_service = service; + + if (dp->dccps_service_list != NULL) + kfree(dp->dccps_service_list); + + dp->dccps_service_list = sl; + release_sock(sk); + return 0; +} + int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; - lock_sock(sk); + if (optname == DCCP_SOCKOPT_SERVICE) + return dccp_setsockopt_service(sk, val, optval, optlen); + lock_sock(sk); dp = dccp_sk(sk); err = 0; @@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, return err; } +static int dccp_getsockopt_service(struct sock *sk, int len, + u32 __user *optval, + int __user *optlen) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const struct dccp_service_list *sl; + int err = -ENOENT, slen = 0, total_len = sizeof(u32); + + lock_sock(sk); + if (dccp_service_not_initialized(sk)) + goto out; + + if ((sl = dp->dccps_service_list) != NULL) { + slen = sl->dccpsl_nr * sizeof(u32); + total_len += slen; + } + + err = -EINVAL; + if (total_len > len) + goto out; + + err = 0; + if (put_user(total_len, optlen) || + put_user(dp->dccps_service, optval) || + (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) + err = -EFAULT; +out: + release_sock(sk); + return err; +} + int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -248,8 +325,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) + if (len < sizeof(int)) return -EINVAL; dp = dccp_sk(sk); @@ -257,7 +333,17 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: val = dp->dccps_packet_size; + len = sizeof(dp->dccps_packet_size); break; + case DCCP_SOCKOPT_SERVICE: + return dccp_getsockopt_service(sk, len, + (u32 __user *)optval, optlen); + case 128 ... 191: + return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, + len, (u32 __user *)optval, optlen); + case 192 ... 255: + return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, + len, (u32 __user *)optval, optlen); default: return -ENOPROTOOPT; } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 621680f127a..348f36b529f 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1876,8 +1876,27 @@ static inline unsigned int dn_current_mss(struct sock *sk, int flags) return mss_now; } +/* + * N.B. We get the timeout wrong here, but then we always did get it + * wrong before and this is another step along the road to correcting + * it. It ought to get updated each time we pass through the routine, + * but in practise it probably doesn't matter too much for now. + */ +static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk, + unsigned long datalen, int noblock, + int *errcode) +{ + struct sk_buff *skb = sock_alloc_send_skb(sk, datalen, + noblock, errcode); + if (skb) { + skb->protocol = __constant_htons(ETH_P_DNA_RT); + skb->pkt_type = PACKET_OUTGOING; + } + return skb; +} + static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) + struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); @@ -1892,7 +1911,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, struct dn_skb_cb *cb; size_t len; unsigned char fctype; - long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + long timeo; if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT)) return -EOPNOTSUPP; @@ -1900,18 +1919,21 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, if (addr_len && (addr_len != sizeof(struct sockaddr_dn))) return -EINVAL; + lock_sock(sk); + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); /* * The only difference between stream sockets and sequenced packet * sockets is that the stream sockets always behave as if MSG_EOR * has been set. */ if (sock->type == SOCK_STREAM) { - if (flags & MSG_EOR) - return -EINVAL; + if (flags & MSG_EOR) { + err = -EINVAL; + goto out; + } flags |= MSG_EOR; } - lock_sock(sk); err = dn_check_state(sk, addr, addr_len, &timeo, flags); if (err) @@ -1980,8 +2002,12 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, /* * Get a suitably sized skb. + * 64 is a bit of a hack really, but its larger than any + * link-layer headers and has served us well as a good + * guess as to their real length. */ - skb = dn_alloc_send_skb(sk, &len, flags & MSG_DONTWAIT, timeo, &err); + skb = dn_alloc_send_pskb(sk, len + 64 + DN_MAX_NSP_DATA_HEADER, + flags & MSG_DONTWAIT, &err); if (err) break; @@ -1991,7 +2017,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, cb = DN_SKB_CB(skb); - skb_reserve(skb, DN_MAX_NSP_DATA_HEADER); + skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER); if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { err = -EFAULT; diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index e0bebf4bbca..53633d35286 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -137,69 +137,6 @@ struct sk_buff *dn_alloc_skb(struct sock *sk, int size, int pri) } /* - * Wrapper for the above, for allocs of data skbs. We try and get the - * whole size thats been asked for (plus 11 bytes of header). If this - * fails, then we try for any size over 16 bytes for SOCK_STREAMS. - */ -struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock, long timeo, int *err) -{ - int space; - int len; - struct sk_buff *skb = NULL; - - *err = 0; - - while(skb == NULL) { - if (signal_pending(current)) { - *err = sock_intr_errno(timeo); - break; - } - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - *err = EINVAL; - break; - } - - if (sk->sk_err) - break; - - len = *size + 11; - space = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); - - if (space < len) { - if ((sk->sk_socket->type == SOCK_STREAM) && - (space >= (16 + 11))) - len = space; - } - - if (space < len) { - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - if (noblock) { - *err = EWOULDBLOCK; - break; - } - - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - SOCK_SLEEP_PRE(sk) - - if ((sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc)) < - len) - schedule(); - - SOCK_SLEEP_POST(sk) - continue; - } - - if ((skb = dn_alloc_skb(sk, len, sk->sk_allocation)) == NULL) - continue; - - *size = len - 11; - } - - return skb; -} - -/* * Calculate persist timer based upon the smoothed round * trip time and the variance. Backoff according to the * nsp_backoff[] array. diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2c915f305be..3407f190afe 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -117,8 +117,7 @@ static struct dn_rt_hash_bucket *dn_rt_hash_table; static unsigned dn_rt_hash_mask; static struct timer_list dn_route_timer; -static struct timer_list dn_rt_flush_timer = - TIMER_INITIALIZER(dn_run_flush, 0, 0); +static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0); int decnet_dst_gc_interval = 2; static struct dst_ops dn_dst_ops = { diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig index 58ed4319e69..91b16fbf91f 100644 --- a/net/ieee80211/Kconfig +++ b/net/ieee80211/Kconfig @@ -1,6 +1,5 @@ config IEEE80211 tristate "Generic IEEE 802.11 Networking Stack" - select NET_RADIO ---help--- This option enables the hardware independent IEEE 802.11 networking stack. diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c index 05a6f2f298d..61a9d92e455 100644 --- a/net/ieee80211/ieee80211_crypt.c +++ b/net/ieee80211/ieee80211_crypt.c @@ -30,7 +30,6 @@ struct ieee80211_crypto_alg { struct ieee80211_crypto_ops *ops; }; - struct ieee80211_crypto { struct list_head algs; spinlock_t lock; @@ -38,8 +37,7 @@ struct ieee80211_crypto { static struct ieee80211_crypto *hcrypt; -void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, - int force) +void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) { struct list_head *ptr, *n; struct ieee80211_crypt_data *entry; @@ -140,7 +138,7 @@ int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops) spin_lock_irqsave(&hcrypt->lock, flags); for (ptr = hcrypt->algs.next; ptr != &hcrypt->algs; ptr = ptr->next) { struct ieee80211_crypto_alg *alg = - (struct ieee80211_crypto_alg *) ptr; + (struct ieee80211_crypto_alg *)ptr; if (alg->ops == ops) { list_del(&alg->list); del_alg = alg; @@ -158,8 +156,7 @@ int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops) return del_alg ? 0 : -1; } - -struct ieee80211_crypto_ops * ieee80211_get_crypto_ops(const char *name) +struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name) { unsigned long flags; struct list_head *ptr; @@ -171,7 +168,7 @@ struct ieee80211_crypto_ops * ieee80211_get_crypto_ops(const char *name) spin_lock_irqsave(&hcrypt->lock, flags); for (ptr = hcrypt->algs.next; ptr != &hcrypt->algs; ptr = ptr->next) { struct ieee80211_crypto_alg *alg = - (struct ieee80211_crypto_alg *) ptr; + (struct ieee80211_crypto_alg *)ptr; if (strcmp(alg->ops->name, name) == 0) { found_alg = alg; break; @@ -185,9 +182,13 @@ struct ieee80211_crypto_ops * ieee80211_get_crypto_ops(const char *name) return NULL; } - -static void * ieee80211_crypt_null_init(int keyidx) { return (void *) 1; } -static void ieee80211_crypt_null_deinit(void *priv) {} +static void *ieee80211_crypt_null_init(int keyidx) +{ + return (void *)1; +} +static void ieee80211_crypt_null_deinit(void *priv) +{ +} static struct ieee80211_crypto_ops ieee80211_crypt_null = { .name = "NULL", @@ -204,7 +205,6 @@ static struct ieee80211_crypto_ops ieee80211_crypt_null = { .owner = THIS_MODULE, }; - static int __init ieee80211_crypto_init(void) { int ret = -ENOMEM; @@ -222,11 +222,10 @@ static int __init ieee80211_crypto_init(void) kfree(hcrypt); hcrypt = NULL; } -out: + out: return ret; } - static void __exit ieee80211_crypto_deinit(void) { struct list_head *ptr, *n; @@ -237,7 +236,7 @@ static void __exit ieee80211_crypto_deinit(void) for (ptr = hcrypt->algs.next, n = ptr->next; ptr != &hcrypt->algs; ptr = n, n = ptr->next) { struct ieee80211_crypto_alg *alg = - (struct ieee80211_crypto_alg *) ptr; + (struct ieee80211_crypto_alg *)ptr; list_del(ptr); printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm " "'%s' (deinit)\n", alg->ops->name); diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index 11d15573b26..8fc13f45971 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -24,7 +24,6 @@ #include <net/ieee80211.h> - #include <linux/crypto.h> #include <asm/scatterlist.h> @@ -55,7 +54,7 @@ struct ieee80211_ccmp_data { /* scratch buffers for virt_to_page() (crypto API) */ u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN], - tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN]; + tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN]; u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN]; }; @@ -75,7 +74,7 @@ static void ieee80211_ccmp_aes_encrypt(struct crypto_tfm *tfm, crypto_cipher_encrypt(tfm, &dst, &src, AES_BLOCK_LEN); } -static void * ieee80211_ccmp_init(int key_idx) +static void *ieee80211_ccmp_init(int key_idx) { struct ieee80211_ccmp_data *priv; @@ -94,7 +93,7 @@ static void * ieee80211_ccmp_init(int key_idx) return priv; -fail: + fail: if (priv) { if (priv->tfm) crypto_free_tfm(priv->tfm); @@ -104,7 +103,6 @@ fail: return NULL; } - static void ieee80211_ccmp_deinit(void *priv) { struct ieee80211_ccmp_data *_priv = priv; @@ -113,19 +111,16 @@ static void ieee80211_ccmp_deinit(void *priv) kfree(priv); } - -static inline void xor_block(u8 *b, u8 *a, size_t len) +static inline void xor_block(u8 * b, u8 * a, size_t len) { int i; for (i = 0; i < len; i++) b[i] ^= a[i]; } - static void ccmp_init_blocks(struct crypto_tfm *tfm, struct ieee80211_hdr *hdr, - u8 *pn, size_t dlen, u8 *b0, u8 *auth, - u8 *s0) + u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) { u8 *pos, qc = 0; size_t aad_len; @@ -142,7 +137,7 @@ static void ccmp_init_blocks(struct crypto_tfm *tfm, if (a4_included) aad_len += 6; if (qc_included) { - pos = (u8 *) &hdr->addr4; + pos = (u8 *) & hdr->addr4; if (a4_included) pos += 6; qc = *pos & 0x0f; @@ -169,14 +164,14 @@ static void ccmp_init_blocks(struct crypto_tfm *tfm, * QC (if present) */ pos = (u8 *) hdr; - aad[0] = 0; /* aad_len >> 8 */ + aad[0] = 0; /* aad_len >> 8 */ aad[1] = aad_len & 0xff; aad[2] = pos[0] & 0x8f; aad[3] = pos[1] & 0xc7; memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN); - pos = (u8 *) &hdr->seq_ctl; + pos = (u8 *) & hdr->seq_ctl; aad[22] = pos[0] & 0x0f; - aad[23] = 0; /* all bits masked */ + aad[23] = 0; /* all bits masked */ memset(aad + 24, 0, 8); if (a4_included) memcpy(aad + 24, hdr->addr4, ETH_ALEN); @@ -196,7 +191,6 @@ static void ccmp_init_blocks(struct crypto_tfm *tfm, ieee80211_ccmp_aes_encrypt(tfm, b0, s0); } - static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_ccmp_data *key = priv; @@ -209,8 +203,7 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) u8 *s0 = key->tx_s0; if (skb_headroom(skb) < CCMP_HDR_LEN || - skb_tailroom(skb) < CCMP_MIC_LEN || - skb->len < hdr_len) + skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) return -1; data_len = skb->len - hdr_len; @@ -230,13 +223,13 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) *pos++ = key->tx_pn[5]; *pos++ = key->tx_pn[4]; *pos++ = 0; - *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */; + *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ; *pos++ = key->tx_pn[3]; *pos++ = key->tx_pn[2]; *pos++ = key->tx_pn[1]; *pos++ = key->tx_pn[0]; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; @@ -261,7 +254,6 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) return 0; } - static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_ccmp_data *key = priv; @@ -280,7 +272,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return -1; } - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; pos = skb->data + hdr_len; keyidx = pos[3]; if (!(keyidx & (1 << 5))) { @@ -364,8 +356,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return keyidx; } - -static int ieee80211_ccmp_set_key(void *key, int len, u8 *seq, void *priv) +static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) { struct ieee80211_ccmp_data *data = priv; int keyidx; @@ -395,8 +386,7 @@ static int ieee80211_ccmp_set_key(void *key, int len, u8 *seq, void *priv) return 0; } - -static int ieee80211_ccmp_get_key(void *key, int len, u8 *seq, void *priv) +static int ieee80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) { struct ieee80211_ccmp_data *data = priv; @@ -419,8 +409,7 @@ static int ieee80211_ccmp_get_key(void *key, int len, u8 *seq, void *priv) return CCMP_TK_LEN; } - -static char * ieee80211_ccmp_print_stats(char *p, void *priv) +static char *ieee80211_ccmp_print_stats(char *p, void *priv) { struct ieee80211_ccmp_data *ccmp = priv; p += sprintf(p, "key[%d] alg=CCMP key_set=%d " @@ -436,7 +425,6 @@ static char * ieee80211_ccmp_print_stats(char *p, void *priv) return p; } - static struct ieee80211_crypto_ops ieee80211_crypt_ccmp = { .name = "CCMP", .init = ieee80211_ccmp_init, @@ -453,18 +441,15 @@ static struct ieee80211_crypto_ops ieee80211_crypt_ccmp = { .owner = THIS_MODULE, }; - static int __init ieee80211_crypto_ccmp_init(void) { return ieee80211_register_crypto_ops(&ieee80211_crypt_ccmp); } - static void __exit ieee80211_crypto_ccmp_exit(void) { ieee80211_unregister_crypto_ops(&ieee80211_crypt_ccmp); } - module_init(ieee80211_crypto_ccmp_init); module_exit(ieee80211_crypto_ccmp_exit); diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index f91d92c6df2..d4f9164be1a 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -23,7 +23,6 @@ #include <net/ieee80211.h> - #include <linux/crypto.h> #include <asm/scatterlist.h> #include <linux/crc32.h> @@ -62,7 +61,7 @@ struct ieee80211_tkip_data { u8 rx_hdr[16], tx_hdr[16]; }; -static void * ieee80211_tkip_init(int key_idx) +static void *ieee80211_tkip_init(int key_idx) { struct ieee80211_tkip_data *priv; @@ -88,7 +87,7 @@ static void * ieee80211_tkip_init(int key_idx) return priv; -fail: + fail: if (priv) { if (priv->tfm_michael) crypto_free_tfm(priv->tfm_michael); @@ -100,7 +99,6 @@ fail: return NULL; } - static void ieee80211_tkip_deinit(void *priv) { struct ieee80211_tkip_data *_priv = priv; @@ -111,51 +109,42 @@ static void ieee80211_tkip_deinit(void *priv) kfree(priv); } - static inline u16 RotR1(u16 val) { return (val >> 1) | (val << 15); } - static inline u8 Lo8(u16 val) { return val & 0xff; } - static inline u8 Hi8(u16 val) { return val >> 8; } - static inline u16 Lo16(u32 val) { return val & 0xffff; } - static inline u16 Hi16(u32 val) { return val >> 16; } - static inline u16 Mk16(u8 hi, u8 lo) { return lo | (((u16) hi) << 8); } - -static inline u16 Mk16_le(u16 *v) +static inline u16 Mk16_le(u16 * v) { return le16_to_cpu(*v); } - -static const u16 Sbox[256] = -{ +static const u16 Sbox[256] = { 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A, 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B, @@ -190,17 +179,16 @@ static const u16 Sbox[256] = 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, }; - static inline u16 _S_(u16 v) { u16 t = Sbox[Hi8(v)]; return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8)); } - #define PHASE1_LOOP_COUNT 8 -static void tkip_mixing_phase1(u16 *TTAK, const u8 *TK, const u8 *TA, u32 IV32) +static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA, + u32 IV32) { int i, j; @@ -221,13 +209,12 @@ static void tkip_mixing_phase1(u16 *TTAK, const u8 *TK, const u8 *TA, u32 IV32) } } - -static void tkip_mixing_phase2(u8 *WEPSeed, const u8 *TK, const u16 *TTAK, +static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, u16 IV16) { /* Make temporary area overlap WEP seed so that the final copy can be * avoided on little endian hosts. */ - u16 *PPK = (u16 *) &WEPSeed[4]; + u16 *PPK = (u16 *) & WEPSeed[4]; /* Step 1 - make copy of TTAK and bring in TSC */ PPK[0] = TTAK[0]; @@ -238,15 +225,15 @@ static void tkip_mixing_phase2(u8 *WEPSeed, const u8 *TK, const u16 *TTAK, PPK[5] = TTAK[4] + IV16; /* Step 2 - 96-bit bijective mixing using S-box */ - PPK[0] += _S_(PPK[5] ^ Mk16_le((u16 *) &TK[0])); - PPK[1] += _S_(PPK[0] ^ Mk16_le((u16 *) &TK[2])); - PPK[2] += _S_(PPK[1] ^ Mk16_le((u16 *) &TK[4])); - PPK[3] += _S_(PPK[2] ^ Mk16_le((u16 *) &TK[6])); - PPK[4] += _S_(PPK[3] ^ Mk16_le((u16 *) &TK[8])); - PPK[5] += _S_(PPK[4] ^ Mk16_le((u16 *) &TK[10])); - - PPK[0] += RotR1(PPK[5] ^ Mk16_le((u16 *) &TK[12])); - PPK[1] += RotR1(PPK[0] ^ Mk16_le((u16 *) &TK[14])); + PPK[0] += _S_(PPK[5] ^ Mk16_le((u16 *) & TK[0])); + PPK[1] += _S_(PPK[0] ^ Mk16_le((u16 *) & TK[2])); + PPK[2] += _S_(PPK[1] ^ Mk16_le((u16 *) & TK[4])); + PPK[3] += _S_(PPK[2] ^ Mk16_le((u16 *) & TK[6])); + PPK[4] += _S_(PPK[3] ^ Mk16_le((u16 *) & TK[8])); + PPK[5] += _S_(PPK[4] ^ Mk16_le((u16 *) & TK[10])); + + PPK[0] += RotR1(PPK[5] ^ Mk16_le((u16 *) & TK[12])); + PPK[1] += RotR1(PPK[0] ^ Mk16_le((u16 *) & TK[14])); PPK[2] += RotR1(PPK[1]); PPK[3] += RotR1(PPK[2]); PPK[4] += RotR1(PPK[3]); @@ -257,7 +244,7 @@ static void tkip_mixing_phase2(u8 *WEPSeed, const u8 *TK, const u16 *TTAK, WEPSeed[0] = Hi8(IV16); WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F; WEPSeed[2] = Lo8(IV16); - WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((u16 *) &TK[0])) >> 1); + WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((u16 *) & TK[0])) >> 1); #ifdef __BIG_ENDIAN { @@ -281,7 +268,7 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) skb->len < hdr_len) return -1; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; if (!tkey->tx_phase1_done) { tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2, tkey->tx_iv32); @@ -298,7 +285,7 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) *pos++ = rc4key[0]; *pos++ = rc4key[1]; *pos++ = rc4key[2]; - *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */; + *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ; *pos++ = tkey->tx_iv32 & 0xff; *pos++ = (tkey->tx_iv32 >> 8) & 0xff; *pos++ = (tkey->tx_iv32 >> 16) & 0xff; @@ -341,7 +328,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (skb->len < hdr_len + 8 + 4) return -1; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; pos = skb->data + hdr_len; keyidx = pos[3]; if (!(keyidx & (1 << 5))) { @@ -427,9 +414,8 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return keyidx; } - -static int michael_mic(struct ieee80211_tkip_data *tkey, u8 *key, u8 *hdr, - u8 *data, size_t data_len, u8 *mic) +static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr, + u8 * data, size_t data_len, u8 * mic) { struct scatterlist sg[2]; @@ -453,37 +439,37 @@ static int michael_mic(struct ieee80211_tkip_data *tkey, u8 *key, u8 *hdr, return 0; } -static void michael_mic_hdr(struct sk_buff *skb, u8 *hdr) +static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) { struct ieee80211_hdr *hdr11; - hdr11 = (struct ieee80211_hdr *) skb->data; + hdr11 = (struct ieee80211_hdr *)skb->data; switch (le16_to_cpu(hdr11->frame_ctl) & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { case IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ + memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ break; case IEEE80211_FCTL_FROMDS: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */ + memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */ break; case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */ + memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */ break; case 0: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ + memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ break; } - hdr[12] = 0; /* priority */ - hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ + hdr[12] = 0; /* priority */ + hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ } - -static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv) +static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, + void *priv) { struct ieee80211_tkip_data *tkey = priv; u8 *pos; @@ -504,11 +490,9 @@ static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, void *pri return 0; } - #if WIRELESS_EXT >= 18 static void ieee80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr *hdr, - int keyidx) + struct ieee80211_hdr *hdr, int keyidx) { union iwreq_data wrqu; struct iw_michaelmicfailure ev; @@ -524,12 +508,11 @@ static void ieee80211_michael_mic_failure(struct net_device *dev, memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN); memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = sizeof(ev); - wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *) &ev); + wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev); } #elif WIRELESS_EXT >= 15 static void ieee80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr *hdr, - int keyidx) + struct ieee80211_hdr *hdr, int keyidx) { union iwreq_data wrqu; char buf[128]; @@ -542,17 +525,16 @@ static void ieee80211_michael_mic_failure(struct net_device *dev, wrqu.data.length = strlen(buf); wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); } -#else /* WIRELESS_EXT >= 15 */ +#else /* WIRELESS_EXT >= 15 */ static inline void ieee80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr *hdr, - int keyidx) + struct ieee80211_hdr *hdr, + int keyidx) { } -#endif /* WIRELESS_EXT >= 15 */ - +#endif /* WIRELESS_EXT >= 15 */ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, - int hdr_len, void *priv) + int hdr_len, void *priv) { struct ieee80211_tkip_data *tkey = priv; u8 mic[8]; @@ -566,7 +548,7 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, return -1; if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { struct ieee80211_hdr *hdr; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; printk(KERN_DEBUG "%s: Michael MIC verification failed for " "MSDU from " MAC_FMT " keyidx=%d\n", skb->dev ? skb->dev->name : "N/A", MAC_ARG(hdr->addr2), @@ -587,8 +569,7 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, return 0; } - -static int ieee80211_tkip_set_key(void *key, int len, u8 *seq, void *priv) +static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) { struct ieee80211_tkip_data *tkey = priv; int keyidx; @@ -603,10 +584,10 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 *seq, void *priv) if (len == TKIP_KEY_LEN) { memcpy(tkey->key, key, TKIP_KEY_LEN); tkey->key_set = 1; - tkey->tx_iv16 = 1; /* TSC is initialized to 1 */ + tkey->tx_iv16 = 1; /* TSC is initialized to 1 */ if (seq) { tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) | - (seq[3] << 8) | seq[2]; + (seq[3] << 8) | seq[2]; tkey->rx_iv16 = (seq[1] << 8) | seq[0]; } } else if (len == 0) @@ -617,8 +598,7 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 *seq, void *priv) return 0; } - -static int ieee80211_tkip_get_key(void *key, int len, u8 *seq, void *priv) +static int ieee80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) { struct ieee80211_tkip_data *tkey = priv; @@ -647,8 +627,7 @@ static int ieee80211_tkip_get_key(void *key, int len, u8 *seq, void *priv) return TKIP_KEY_LEN; } - -static char * ieee80211_tkip_print_stats(char *p, void *priv) +static char *ieee80211_tkip_print_stats(char *p, void *priv) { struct ieee80211_tkip_data *tkip = priv; p += sprintf(p, "key[%d] alg=TKIP key_set=%d " @@ -674,7 +653,6 @@ static char * ieee80211_tkip_print_stats(char *p, void *priv) return p; } - static struct ieee80211_crypto_ops ieee80211_crypt_tkip = { .name = "TKIP", .init = ieee80211_tkip_init, @@ -686,23 +664,20 @@ static struct ieee80211_crypto_ops ieee80211_crypt_tkip = { .set_key = ieee80211_tkip_set_key, .get_key = ieee80211_tkip_get_key, .print_stats = ieee80211_tkip_print_stats, - .extra_prefix_len = 4 + 4, /* IV + ExtIV */ - .extra_postfix_len = 8 + 4, /* MIC + ICV */ - .owner = THIS_MODULE, + .extra_prefix_len = 4 + 4, /* IV + ExtIV */ + .extra_postfix_len = 8 + 4, /* MIC + ICV */ + .owner = THIS_MODULE, }; - static int __init ieee80211_crypto_tkip_init(void) { return ieee80211_register_crypto_ops(&ieee80211_crypt_tkip); } - static void __exit ieee80211_crypto_tkip_exit(void) { ieee80211_unregister_crypto_ops(&ieee80211_crypt_tkip); } - module_init(ieee80211_crypto_tkip_init); module_exit(ieee80211_crypto_tkip_exit); diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index bec1d3470d3..b4d2514a090 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -20,7 +20,6 @@ #include <net/ieee80211.h> - #include <linux/crypto.h> #include <asm/scatterlist.h> #include <linux/crc32.h> @@ -29,7 +28,6 @@ MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Host AP crypt: WEP"); MODULE_LICENSE("GPL"); - struct prism2_wep_data { u32 iv; #define WEP_KEY_LEN 13 @@ -39,8 +37,7 @@ struct prism2_wep_data { struct crypto_tfm *tfm; }; - -static void * prism2_wep_init(int keyidx) +static void *prism2_wep_init(int keyidx) { struct prism2_wep_data *priv; @@ -62,7 +59,7 @@ static void * prism2_wep_init(int keyidx) return priv; -fail: + fail: if (priv) { if (priv->tfm) crypto_free_tfm(priv->tfm); @@ -71,7 +68,6 @@ fail: return NULL; } - static void prism2_wep_deinit(void *priv) { struct prism2_wep_data *_priv = priv; @@ -80,7 +76,6 @@ static void prism2_wep_deinit(void *priv) kfree(priv); } - /* Perform WEP encryption on given skb that has at least 4 bytes of headroom * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted, * so the payload length increases with 8 bytes. @@ -143,7 +138,6 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) return 0; } - /* Perform WEP decryption on given buffer. Buffer includes whole WEP part of * the frame: IV (4 bytes), encrypted payload (including SNAP header), * ICV (4 bytes). len includes both IV and ICV. @@ -202,8 +196,7 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return 0; } - -static int prism2_wep_set_key(void *key, int len, u8 *seq, void *priv) +static int prism2_wep_set_key(void *key, int len, u8 * seq, void *priv) { struct prism2_wep_data *wep = priv; @@ -216,8 +209,7 @@ static int prism2_wep_set_key(void *key, int len, u8 *seq, void *priv) return 0; } - -static int prism2_wep_get_key(void *key, int len, u8 *seq, void *priv) +static int prism2_wep_get_key(void *key, int len, u8 * seq, void *priv) { struct prism2_wep_data *wep = priv; @@ -229,16 +221,13 @@ static int prism2_wep_get_key(void *key, int len, u8 *seq, void *priv) return wep->key_len; } - -static char * prism2_wep_print_stats(char *p, void *priv) +static char *prism2_wep_print_stats(char *p, void *priv) { struct prism2_wep_data *wep = priv; - p += sprintf(p, "key[%d] alg=WEP len=%d\n", - wep->key_idx, wep->key_len); + p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); return p; } - static struct ieee80211_crypto_ops ieee80211_crypt_wep = { .name = "WEP", .init = prism2_wep_init, @@ -250,23 +239,20 @@ static struct ieee80211_crypto_ops ieee80211_crypt_wep = { .set_key = prism2_wep_set_key, .get_key = prism2_wep_get_key, .print_stats = prism2_wep_print_stats, - .extra_prefix_len = 4, /* IV */ - .extra_postfix_len = 4, /* ICV */ + .extra_prefix_len = 4, /* IV */ + .extra_postfix_len = 4, /* ICV */ .owner = THIS_MODULE, }; - static int __init ieee80211_crypto_wep_init(void) { return ieee80211_register_crypto_ops(&ieee80211_crypt_wep); } - static void __exit ieee80211_crypto_wep_exit(void) { ieee80211_unregister_crypto_ops(&ieee80211_crypt_wep); } - module_init(ieee80211_crypto_wep_init); module_exit(ieee80211_crypto_wep_exit); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 553acb2e93d..03a47343ddc 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -54,7 +54,8 @@ #include <net/ieee80211.h> MODULE_DESCRIPTION("802.11 data/management/control stack"); -MODULE_AUTHOR("Copyright (C) 2004 Intel Corporation <jketreno@linux.intel.com>"); +MODULE_AUTHOR + ("Copyright (C) 2004 Intel Corporation <jketreno@linux.intel.com>"); MODULE_LICENSE("GPL"); #define DRV_NAME "ieee80211" @@ -64,9 +65,9 @@ static inline int ieee80211_networks_allocate(struct ieee80211_device *ieee) if (ieee->networks) return 0; - ieee->networks = kmalloc( - MAX_NETWORK_COUNT * sizeof(struct ieee80211_network), - GFP_KERNEL); + ieee->networks = + kmalloc(MAX_NETWORK_COUNT * sizeof(struct ieee80211_network), + GFP_KERNEL); if (!ieee->networks) { printk(KERN_WARNING "%s: Out of memory allocating beacons\n", ieee->dev->name); @@ -94,10 +95,10 @@ static inline void ieee80211_networks_initialize(struct ieee80211_device *ieee) INIT_LIST_HEAD(&ieee->network_free_list); INIT_LIST_HEAD(&ieee->network_list); for (i = 0; i < MAX_NETWORK_COUNT; i++) - list_add_tail(&ieee->networks[i].list, &ieee->network_free_list); + list_add_tail(&ieee->networks[i].list, + &ieee->network_free_list); } - struct net_device *alloc_ieee80211(int sizeof_priv) { struct ieee80211_device *ieee; @@ -118,8 +119,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv) err = ieee80211_networks_allocate(ieee); if (err) { - IEEE80211_ERROR("Unable to allocate beacon storage: %d\n", - err); + IEEE80211_ERROR("Unable to allocate beacon storage: %d\n", err); goto failed; } ieee80211_networks_initialize(ieee); @@ -132,7 +132,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv) /* Default to enabling full open WEP with host based encrypt/decrypt */ ieee->host_encrypt = 1; ieee->host_decrypt = 1; - ieee->ieee802_1x = 1; /* Default to supporting 802.1x */ + ieee->ieee802_1x = 1; /* Default to supporting 802.1x */ INIT_LIST_HEAD(&ieee->crypt_deinit_list); init_timer(&ieee->crypt_deinit_timer); @@ -141,21 +141,20 @@ struct net_device *alloc_ieee80211(int sizeof_priv) spin_lock_init(&ieee->lock); - ieee->wpa_enabled = 0; - ieee->tkip_countermeasures = 0; - ieee->drop_unencrypted = 0; - ieee->privacy_invoked = 0; - ieee->ieee802_1x = 1; + ieee->wpa_enabled = 0; + ieee->tkip_countermeasures = 0; + ieee->drop_unencrypted = 0; + ieee->privacy_invoked = 0; + ieee->ieee802_1x = 1; return dev; - failed: + failed: if (dev) free_netdev(dev); return NULL; } - void free_ieee80211(struct net_device *dev) { struct ieee80211_device *ieee = netdev_priv(dev); @@ -193,7 +192,7 @@ static int show_debug_level(char *page, char **start, off_t offset, return snprintf(page, count, "0x%08X\n", ieee80211_debug_level); } -static int store_debug_level(struct file *file, const char __user *buffer, +static int store_debug_level(struct file *file, const char __user * buffer, unsigned long count, void *data) { char buf[] = "0x00000000"; @@ -264,13 +263,12 @@ static void __exit ieee80211_exit(void) module_param(debug, int, 0444); MODULE_PARM_DESC(debug, "debug output mask"); - module_exit(ieee80211_exit); module_init(ieee80211_init); #endif - -const char *escape_essid(const char *essid, u8 essid_len) { +const char *escape_essid(const char *essid, u8 essid_len) +{ static char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; const char *s = essid; char *d = escaped; @@ -280,7 +278,7 @@ const char *escape_essid(const char *essid, u8 essid_len) { return escaped; } - essid_len = min(essid_len, (u8)IW_ESSID_MAX_SIZE); + essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE); while (essid_len--) { if (*s == '\0') { *d++ = '\\'; diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index d582faa6447..f7dcd854139 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -52,11 +52,14 @@ static inline void ieee80211_monitor_rx(struct ieee80211_device *ieee, netif_rx(skb); } - /* Called only as a tasklet (software IRQ) */ -static struct ieee80211_frag_entry * -ieee80211_frag_cache_find(struct ieee80211_device *ieee, unsigned int seq, - unsigned int frag, u8 *src, u8 *dst) +static struct ieee80211_frag_entry *ieee80211_frag_cache_find(struct + ieee80211_device + *ieee, + unsigned int seq, + unsigned int frag, + u8 * src, + u8 * dst) { struct ieee80211_frag_entry *entry; int i; @@ -65,10 +68,9 @@ ieee80211_frag_cache_find(struct ieee80211_device *ieee, unsigned int seq, entry = &ieee->frag_cache[i]; if (entry->skb != NULL && time_after(jiffies, entry->first_frag_time + 2 * HZ)) { - IEEE80211_DEBUG_FRAG( - "expiring fragment cache entry " - "seq=%u last_frag=%u\n", - entry->seq, entry->last_frag); + IEEE80211_DEBUG_FRAG("expiring fragment cache entry " + "seq=%u last_frag=%u\n", + entry->seq, entry->last_frag); dev_kfree_skb_any(entry->skb); entry->skb = NULL; } @@ -84,9 +86,8 @@ ieee80211_frag_cache_find(struct ieee80211_device *ieee, unsigned int seq, } /* Called only as a tasklet (software IRQ) */ -static struct sk_buff * -ieee80211_frag_cache_get(struct ieee80211_device *ieee, - struct ieee80211_hdr *hdr) +static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee, + struct ieee80211_hdr *hdr) { struct sk_buff *skb = NULL; u16 sc; @@ -101,9 +102,9 @@ ieee80211_frag_cache_get(struct ieee80211_device *ieee, /* Reserve enough space to fit maximum frame length */ skb = dev_alloc_skb(ieee->dev->mtu + sizeof(struct ieee80211_hdr) + - 8 /* LLC */ + - 2 /* alignment */ + - 8 /* WEP */ + ETH_ALEN /* WDS */); + 8 /* LLC */ + + 2 /* alignment */ + + 8 /* WEP */ + ETH_ALEN /* WDS */ ); if (skb == NULL) return NULL; @@ -135,7 +136,6 @@ ieee80211_frag_cache_get(struct ieee80211_device *ieee, return skb; } - /* Called only as a tasklet (software IRQ) */ static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee, struct ieee80211_hdr *hdr) @@ -151,9 +151,8 @@ static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee, hdr->addr1); if (entry == NULL) { - IEEE80211_DEBUG_FRAG( - "could not invalidate fragment cache " - "entry (seq=%u)\n", seq); + IEEE80211_DEBUG_FRAG("could not invalidate fragment cache " + "entry (seq=%u)\n", seq); return -1; } @@ -161,7 +160,6 @@ static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee, return 0; } - #ifdef NOT_YET /* ieee80211_rx_frame_mgtmt * @@ -201,7 +199,7 @@ ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb, return 0; } - if (ieee->iw_mode == IW_MODE_MASTER) { + if (ieee->iw_mode == IW_MODE_MASTER) { if (type != WLAN_FC_TYPE_MGMT && type != WLAN_FC_TYPE_CTRL) { printk(KERN_DEBUG "%s: unknown management frame " "(type=0x%02x, stype=0x%02x) dropped\n", @@ -219,14 +217,13 @@ ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb, } #endif - /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -static unsigned char rfc1042_header[] = -{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; +static unsigned char rfc1042_header[] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; + /* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ static unsigned char bridge_tunnel_header[] = -{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; /* No encapsulation header if EtherType < 0x600 (=length) */ /* Called by ieee80211_rx_frame_decrypt */ @@ -241,7 +238,7 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee, if (skb->len < 24) return 0; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; fc = le16_to_cpu(hdr->frame_ctl); /* check that the frame is unicast frame to us */ @@ -271,7 +268,7 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee, /* Called only as a tasklet (software IRQ), by ieee80211_rx */ static inline int -ieee80211_rx_frame_decrypt(struct ieee80211_device* ieee, struct sk_buff *skb, +ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, struct ieee80211_crypt_data *crypt) { struct ieee80211_hdr *hdr; @@ -280,12 +277,11 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device* ieee, struct sk_buff *skb, if (crypt == NULL || crypt->ops->decrypt_mpdu == NULL) return 0; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); #ifdef CONFIG_IEEE80211_CRYPT_TKIP - if (ieee->tkip_countermeasures && - strcmp(crypt->ops->name, "TKIP") == 0) { + if (ieee->tkip_countermeasures && strcmp(crypt->ops->name, "TKIP") == 0) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: TKIP countermeasures: dropped " "received packet from " MAC_FMT "\n", @@ -299,9 +295,8 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device* ieee, struct sk_buff *skb, res = crypt->ops->decrypt_mpdu(skb, hdrlen, crypt->priv); atomic_dec(&crypt->refcnt); if (res < 0) { - IEEE80211_DEBUG_DROP( - "decryption failed (SA=" MAC_FMT - ") res=%d\n", MAC_ARG(hdr->addr2), res); + IEEE80211_DEBUG_DROP("decryption failed (SA=" MAC_FMT + ") res=%d\n", MAC_ARG(hdr->addr2), res); if (res == -2) IEEE80211_DEBUG_DROP("Decryption failed ICV " "mismatch (key %d)\n", @@ -313,11 +308,11 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device* ieee, struct sk_buff *skb, return res; } - /* Called only as a tasklet (software IRQ), by ieee80211_rx */ static inline int -ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device* ieee, struct sk_buff *skb, - int keyidx, struct ieee80211_crypt_data *crypt) +ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, + struct sk_buff *skb, int keyidx, + struct ieee80211_crypt_data *crypt) { struct ieee80211_hdr *hdr; int res, hdrlen; @@ -325,7 +320,7 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device* ieee, struct sk_buff *s if (crypt == NULL || crypt->ops->decrypt_msdu == NULL) return 0; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); atomic_inc(&crypt->refcnt); @@ -341,7 +336,6 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device* ieee, struct sk_buff *s return 0; } - /* All received frames are sent to this function. @skb contains the frame in * IEEE 802.11 format, i.e., in the format it was sent over air. * This function is called only as a tasklet (software IRQ). */ @@ -373,8 +367,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, stats = &ieee->stats; if (skb->len < 10) { - printk(KERN_INFO "%s: SKB length < 10\n", - dev->name); + printk(KERN_INFO "%s: SKB length < 10\n", dev->name); goto rx_dropped; } @@ -399,8 +392,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, /* Update spy records */ wireless_spy_update(dev, hdr->addr2, &wstats); } -#endif /* IW_WIRELESS_SPY */ -#endif /* WIRELESS_EXT > 15 */ +#endif /* IW_WIRELESS_SPY */ +#endif /* WIRELESS_EXT > 15 */ hostap_update_rx_stats(local->ap, hdr, rx_stats); #endif @@ -429,8 +422,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, * stations that do not support WEP key mapping). */ if (!(hdr->addr1[0] & 0x01) || local->bcrx_sta_key) - (void) hostap_handle_sta_crypto(local, hdr, &crypt, - &sta); + (void)hostap_handle_sta_crypto(local, hdr, &crypt, + &sta); #endif /* allow NULL decrypt to indicate an station specific override @@ -451,13 +444,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, goto rx_dropped; } } - #ifdef NOT_YET if (type != WLAN_FC_TYPE_DATA) { if (type == WLAN_FC_TYPE_MGMT && stype == WLAN_FC_STYPE_AUTH && fc & IEEE80211_FCTL_PROTECTED && ieee->host_decrypt && - (keyidx = hostap_rx_frame_decrypt(ieee, skb, crypt)) < 0) - { + (keyidx = hostap_rx_frame_decrypt(ieee, skb, crypt)) < 0) { printk(KERN_DEBUG "%s: failed to decrypt mgmt::auth " "from " MAC_FMT "\n", dev->name, MAC_ARG(hdr->addr2)); @@ -507,9 +498,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, } if (ieee->iw_mode == IW_MODE_MASTER && !wds && - (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == IEEE80211_FCTL_FROMDS && - ieee->stadev && - memcmp(hdr->addr2, ieee->assoc_ap_addr, ETH_ALEN) == 0) { + (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == + IEEE80211_FCTL_FROMDS && ieee->stadev + && memcmp(hdr->addr2, ieee->assoc_ap_addr, ETH_ALEN) == 0) { /* Frame from BSSID of the AP for which we are a client */ skb->dev = dev = ieee->stadev; stats = hostap_get_stats(dev); @@ -521,8 +512,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, #ifdef NOT_YET if ((ieee->iw_mode == IW_MODE_MASTER || - ieee->iw_mode == IW_MODE_REPEAT) && - !from_assoc_ap) { + ieee->iw_mode == IW_MODE_REPEAT) && !from_assoc_ap) { switch (hostap_handle_sta_rx(ieee, dev, skb, rx_stats, wds != NULL)) { case AP_RX_CONTINUE_NOT_AUTHORIZED: @@ -546,11 +536,10 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, stype != IEEE80211_STYPE_DATA_CFPOLL && stype != IEEE80211_STYPE_DATA_CFACKPOLL) { if (stype != IEEE80211_STYPE_NULLFUNC) - IEEE80211_DEBUG_DROP( - "RX: dropped data frame " - "with no data (type=0x%02x, " - "subtype=0x%02x, len=%d)\n", - type, stype, skb->len); + IEEE80211_DEBUG_DROP("RX: dropped data frame " + "with no data (type=0x%02x, " + "subtype=0x%02x, len=%d)\n", + type, stype, skb->len); goto rx_dropped; } @@ -560,7 +549,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, (keyidx = ieee80211_rx_frame_decrypt(ieee, skb, crypt)) < 0) goto rx_dropped; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; /* skb: hdr + (possibly fragmented) plaintext payload */ // PR: FIXME: hostap has additional conditions in the "if" below: @@ -614,7 +603,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, /* this was the last fragment and the frame will be * delivered, so remove skb from fragment cache */ skb = frag_skb; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; ieee80211_frag_cache_invalidate(ieee, hdr); } @@ -624,28 +613,26 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, ieee80211_rx_frame_decrypt_msdu(ieee, skb, keyidx, crypt)) goto rx_dropped; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep) { - if (/*ieee->ieee802_1x &&*/ - ieee80211_is_eapol_frame(ieee, skb)) { + if ( /*ieee->ieee802_1x && */ + ieee80211_is_eapol_frame(ieee, skb)) { /* pass unencrypted EAPOL frames even if encryption is * configured */ } else { - IEEE80211_DEBUG_DROP( - "encryption configured, but RX " - "frame not encrypted (SA=" MAC_FMT ")\n", - MAC_ARG(hdr->addr2)); + IEEE80211_DEBUG_DROP("encryption configured, but RX " + "frame not encrypted (SA=" MAC_FMT + ")\n", MAC_ARG(hdr->addr2)); goto rx_dropped; } } if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep && !ieee80211_is_eapol_frame(ieee, skb)) { - IEEE80211_DEBUG_DROP( - "dropped unencrypted RX data " - "frame from " MAC_FMT - " (drop_unencrypted=1)\n", - MAC_ARG(hdr->addr2)); + IEEE80211_DEBUG_DROP("dropped unencrypted RX data " + "frame from " MAC_FMT + " (drop_unencrypted=1)\n", + MAC_ARG(hdr->addr2)); goto rx_dropped; } @@ -673,8 +660,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, } else if (!frame_authorized) { printk(KERN_DEBUG "%s: dropped frame from " "unauthorized port (IEEE 802.1X): " - "ethertype=0x%04x\n", - dev->name, ethertype); + "ethertype=0x%04x\n", dev->name, ethertype); goto rx_dropped; } } @@ -702,8 +688,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, #ifdef NOT_YET if (wds && ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - IEEE80211_FCTL_TODS) && - skb->len >= ETH_HLEN + ETH_ALEN) { + IEEE80211_FCTL_TODS) && skb->len >= ETH_HLEN + ETH_ALEN) { /* Non-standard frame: get addr4 from its bogus location after * the payload */ memcpy(skb->data + ETH_ALEN, @@ -716,8 +701,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, stats->rx_bytes += skb->len; #ifdef NOT_YET - if (ieee->iw_mode == IW_MODE_MASTER && !wds && - ieee->ap->bridge_packets) { + if (ieee->iw_mode == IW_MODE_MASTER && !wds && ieee->ap->bridge_packets) { if (dst[0] & 0x01) { /* copy multicast frame both to the higher layers and * to the wireless media */ @@ -743,25 +727,24 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, skb2->dev = dev; dev_queue_xmit(skb2); } - #endif if (skb) { skb->protocol = eth_type_trans(skb, dev); memset(skb->cb, 0, sizeof(skb->cb)); skb->dev = dev; - skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */ + skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */ netif_rx(skb); } - rx_exit: + rx_exit: #ifdef NOT_YET if (sta) hostap_handle_sta_release(sta); #endif return 1; - rx_dropped: + rx_dropped: stats->rx_dropped++; /* Returning 0 indicates to caller that we have not handled the SKB-- @@ -785,22 +768,21 @@ static inline int ieee80211_is_ofdm_rate(u8 rate) case IEEE80211_OFDM_RATE_54MB: return 1; } - return 0; + return 0; } - -static inline int ieee80211_network_init( - struct ieee80211_device *ieee, - struct ieee80211_probe_response *beacon, - struct ieee80211_network *network, - struct ieee80211_rx_stats *stats) +static inline int ieee80211_network_init(struct ieee80211_device *ieee, + struct ieee80211_probe_response + *beacon, + struct ieee80211_network *network, + struct ieee80211_rx_stats *stats) { #ifdef CONFIG_IEEE80211_DEBUG char rates_str[64]; char *p; #endif struct ieee80211_info_element *info_element; - u16 left; + u16 left; u8 i; /* Pull out fixed field data */ @@ -810,7 +792,7 @@ static inline int ieee80211_network_init( network->time_stamp[0] = beacon->time_stamp[0]; network->time_stamp[1] = beacon->time_stamp[1]; network->beacon_interval = beacon->beacon_interval; - /* Where to pull this? beacon->listen_interval;*/ + /* Where to pull this? beacon->listen_interval; */ network->listen_interval = 0x0A; network->rates_len = network->rates_ex_len = 0; network->last_associate = 0; @@ -824,18 +806,20 @@ static inline int ieee80211_network_init( } else network->flags |= NETWORK_HAS_CCK; - network->wpa_ie_len = 0; - network->rsn_ie_len = 0; + network->wpa_ie_len = 0; + network->rsn_ie_len = 0; - info_element = &beacon->info_element; + info_element = &beacon->info_element; left = stats->len - ((void *)info_element - (void *)beacon); while (left >= sizeof(struct ieee80211_info_element_hdr)) { - if (sizeof(struct ieee80211_info_element_hdr) + info_element->len > left) { - IEEE80211_DEBUG_SCAN("SCAN: parse failed: info_element->len + 2 > left : info_element->len+2=%d left=%d.\n", - info_element->len + sizeof(struct ieee80211_info_element), - left); + if (sizeof(struct ieee80211_info_element_hdr) + + info_element->len > left) { + IEEE80211_DEBUG_SCAN + ("SCAN: parse failed: info_element->len + 2 > left : info_element->len+2=%Zd left=%d.\n", + info_element->len + + sizeof(struct ieee80211_info_element), left); return 1; - } + } switch (info_element->id) { case MFIE_TYPE_SSID: @@ -846,10 +830,11 @@ static inline int ieee80211_network_init( } network->ssid_len = min(info_element->len, - (u8)IW_ESSID_MAX_SIZE); - memcpy(network->ssid, info_element->data, network->ssid_len); - if (network->ssid_len < IW_ESSID_MAX_SIZE) - memset(network->ssid + network->ssid_len, 0, + (u8) IW_ESSID_MAX_SIZE); + memcpy(network->ssid, info_element->data, + network->ssid_len); + if (network->ssid_len < IW_ESSID_MAX_SIZE) + memset(network->ssid + network->ssid_len, 0, IW_ESSID_MAX_SIZE - network->ssid_len); IEEE80211_DEBUG_SCAN("MFIE_TYPE_SSID: '%s' len=%d.\n", @@ -860,18 +845,23 @@ static inline int ieee80211_network_init( #ifdef CONFIG_IEEE80211_DEBUG p = rates_str; #endif - network->rates_len = min(info_element->len, MAX_RATES_LENGTH); + network->rates_len = + min(info_element->len, MAX_RATES_LENGTH); for (i = 0; i < network->rates_len; i++) { network->rates[i] = info_element->data[i]; #ifdef CONFIG_IEEE80211_DEBUG - p += snprintf(p, sizeof(rates_str) - (p - rates_str), "%02X ", network->rates[i]); + p += snprintf(p, + sizeof(rates_str) - (p - + rates_str), + "%02X ", network->rates[i]); #endif - if (ieee80211_is_ofdm_rate(info_element->data[i])) { + if (ieee80211_is_ofdm_rate + (info_element->data[i])) { network->flags |= NETWORK_HAS_OFDM; if (info_element->data[i] & IEEE80211_BASIC_RATE_MASK) network->flags &= - ~NETWORK_HAS_CCK; + ~NETWORK_HAS_CCK; } } @@ -883,18 +873,23 @@ static inline int ieee80211_network_init( #ifdef CONFIG_IEEE80211_DEBUG p = rates_str; #endif - network->rates_ex_len = min(info_element->len, MAX_RATES_EX_LENGTH); + network->rates_ex_len = + min(info_element->len, MAX_RATES_EX_LENGTH); for (i = 0; i < network->rates_ex_len; i++) { network->rates_ex[i] = info_element->data[i]; #ifdef CONFIG_IEEE80211_DEBUG - p += snprintf(p, sizeof(rates_str) - (p - rates_str), "%02X ", network->rates[i]); + p += snprintf(p, + sizeof(rates_str) - (p - + rates_str), + "%02X ", network->rates[i]); #endif - if (ieee80211_is_ofdm_rate(info_element->data[i])) { + if (ieee80211_is_ofdm_rate + (info_element->data[i])) { network->flags |= NETWORK_HAS_OFDM; if (info_element->data[i] & IEEE80211_BASIC_RATE_MASK) network->flags &= - ~NETWORK_HAS_CCK; + ~NETWORK_HAS_CCK; } } @@ -903,14 +898,14 @@ static inline int ieee80211_network_init( break; case MFIE_TYPE_DS_SET: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_DS_SET: %d\n", + IEEE80211_DEBUG_SCAN("MFIE_TYPE_DS_SET: %d\n", info_element->data[0]); if (stats->freq == IEEE80211_24GHZ_BAND) network->channel = info_element->data[0]; break; - case MFIE_TYPE_FH_SET: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_FH_SET: ignored\n"); + case MFIE_TYPE_FH_SET: + IEEE80211_DEBUG_SCAN("MFIE_TYPE_FH_SET: ignored\n"); break; case MFIE_TYPE_CF_SET: @@ -932,13 +927,13 @@ static inline int ieee80211_network_init( case MFIE_TYPE_GENERIC: IEEE80211_DEBUG_SCAN("MFIE_TYPE_GENERIC: %d bytes\n", info_element->len); - if (info_element->len >= 4 && + if (info_element->len >= 4 && info_element->data[0] == 0x00 && info_element->data[1] == 0x50 && info_element->data[2] == 0xf2 && info_element->data[3] == 0x01) { network->wpa_ie_len = min(info_element->len + 2, - MAX_WPA_IE_LEN); + MAX_WPA_IE_LEN); memcpy(network->wpa_ie, info_element, network->wpa_ie_len); } @@ -948,7 +943,7 @@ static inline int ieee80211_network_init( IEEE80211_DEBUG_SCAN("MFIE_TYPE_RSN: %d bytes\n", info_element->len); network->rsn_ie_len = min(info_element->len + 2, - MAX_WPA_IE_LEN); + MAX_WPA_IE_LEN); memcpy(network->rsn_ie, info_element, network->rsn_ie_len); break; @@ -956,14 +951,14 @@ static inline int ieee80211_network_init( default: IEEE80211_DEBUG_SCAN("unsupported IE %d\n", info_element->id); - break; - } + break; + } left -= sizeof(struct ieee80211_info_element_hdr) + - info_element->len; + info_element->len; info_element = (struct ieee80211_info_element *) - &info_element->data[info_element->len]; - } + &info_element->data[info_element->len]; + } network->mode = 0; if (stats->freq == IEEE80211_52GHZ_BAND) @@ -1032,10 +1027,13 @@ static inline void update_network(struct ieee80211_network *dst, /* dst->last_associate is not overwritten */ } -static inline void ieee80211_process_probe_response( - struct ieee80211_device *ieee, - struct ieee80211_probe_response *beacon, - struct ieee80211_rx_stats *stats) +static inline void ieee80211_process_probe_response(struct ieee80211_device + *ieee, + struct + ieee80211_probe_response + *beacon, + struct ieee80211_rx_stats + *stats) { struct ieee80211_network network; struct ieee80211_network *target; @@ -1045,33 +1043,35 @@ static inline void ieee80211_process_probe_response( #endif unsigned long flags; - IEEE80211_DEBUG_SCAN( - "'%s' (" MAC_FMT "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", - escape_essid(info_element->data, info_element->len), - MAC_ARG(beacon->header.addr3), - (beacon->capability & (1<<0xf)) ? '1' : '0', - (beacon->capability & (1<<0xe)) ? '1' : '0', - (beacon->capability & (1<<0xd)) ? '1' : '0', - (beacon->capability & (1<<0xc)) ? '1' : '0', - (beacon->capability & (1<<0xb)) ? '1' : '0', - (beacon->capability & (1<<0xa)) ? '1' : '0', - (beacon->capability & (1<<0x9)) ? '1' : '0', - (beacon->capability & (1<<0x8)) ? '1' : '0', - (beacon->capability & (1<<0x7)) ? '1' : '0', - (beacon->capability & (1<<0x6)) ? '1' : '0', - (beacon->capability & (1<<0x5)) ? '1' : '0', - (beacon->capability & (1<<0x4)) ? '1' : '0', - (beacon->capability & (1<<0x3)) ? '1' : '0', - (beacon->capability & (1<<0x2)) ? '1' : '0', - (beacon->capability & (1<<0x1)) ? '1' : '0', - (beacon->capability & (1<<0x0)) ? '1' : '0'); + IEEE80211_DEBUG_SCAN("'%s' (" MAC_FMT + "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", + escape_essid(info_element->data, + info_element->len), + MAC_ARG(beacon->header.addr3), + (beacon->capability & (1 << 0xf)) ? '1' : '0', + (beacon->capability & (1 << 0xe)) ? '1' : '0', + (beacon->capability & (1 << 0xd)) ? '1' : '0', + (beacon->capability & (1 << 0xc)) ? '1' : '0', + (beacon->capability & (1 << 0xb)) ? '1' : '0', + (beacon->capability & (1 << 0xa)) ? '1' : '0', + (beacon->capability & (1 << 0x9)) ? '1' : '0', + (beacon->capability & (1 << 0x8)) ? '1' : '0', + (beacon->capability & (1 << 0x7)) ? '1' : '0', + (beacon->capability & (1 << 0x6)) ? '1' : '0', + (beacon->capability & (1 << 0x5)) ? '1' : '0', + (beacon->capability & (1 << 0x4)) ? '1' : '0', + (beacon->capability & (1 << 0x3)) ? '1' : '0', + (beacon->capability & (1 << 0x2)) ? '1' : '0', + (beacon->capability & (1 << 0x1)) ? '1' : '0', + (beacon->capability & (1 << 0x0)) ? '1' : '0'); if (ieee80211_network_init(ieee, beacon, &network, stats)) { IEEE80211_DEBUG_SCAN("Dropped '%s' (" MAC_FMT ") via %s.\n", escape_essid(info_element->data, info_element->len), MAC_ARG(beacon->header.addr3), - WLAN_FC_GET_STYPE(beacon->header.frame_ctl) == + WLAN_FC_GET_STYPE(beacon->header. + frame_ctl) == IEEE80211_STYPE_PROBE_RESP ? "PROBE RESPONSE" : "BEACON"); return; @@ -1117,13 +1117,13 @@ static inline void ieee80211_process_probe_response( list_del(ieee->network_free_list.next); } - #ifdef CONFIG_IEEE80211_DEBUG IEEE80211_DEBUG_SCAN("Adding '%s' (" MAC_FMT ") via %s.\n", escape_essid(network.ssid, network.ssid_len), MAC_ARG(network.bssid), - WLAN_FC_GET_STYPE(beacon->header.frame_ctl) == + WLAN_FC_GET_STYPE(beacon->header. + frame_ctl) == IEEE80211_STYPE_PROBE_RESP ? "PROBE RESPONSE" : "BEACON"); #endif @@ -1134,7 +1134,8 @@ static inline void ieee80211_process_probe_response( escape_essid(target->ssid, target->ssid_len), MAC_ARG(target->bssid), - WLAN_FC_GET_STYPE(beacon->header.frame_ctl) == + WLAN_FC_GET_STYPE(beacon->header. + frame_ctl) == IEEE80211_STYPE_PROBE_RESP ? "PROBE RESPONSE" : "BEACON"); update_network(target, &network); @@ -1162,16 +1163,20 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee, IEEE80211_DEBUG_MGMT("received PROBE RESPONSE (%d)\n", WLAN_FC_GET_STYPE(header->frame_ctl)); IEEE80211_DEBUG_SCAN("Probe response\n"); - ieee80211_process_probe_response( - ieee, (struct ieee80211_probe_response *)header, stats); + ieee80211_process_probe_response(ieee, + (struct + ieee80211_probe_response *) + header, stats); break; case IEEE80211_STYPE_BEACON: IEEE80211_DEBUG_MGMT("received BEACON (%d)\n", WLAN_FC_GET_STYPE(header->frame_ctl)); IEEE80211_DEBUG_SCAN("Beacon\n"); - ieee80211_process_probe_response( - ieee, (struct ieee80211_probe_response *)header, stats); + ieee80211_process_probe_response(ieee, + (struct + ieee80211_probe_response *) + header, stats); break; default: @@ -1184,6 +1189,5 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee, } } - EXPORT_SYMBOL(ieee80211_rx_mgt); EXPORT_SYMBOL(ieee80211_rx); diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index b7ea3e25e25..c9aaff3fea1 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -45,10 +45,8 @@ #include <net/ieee80211.h> - /* - 802.11 Data Frame ,-------------------------------------------------------------------. @@ -82,7 +80,6 @@ Desc. | IV | Encrypted | ICV | `-----------------------' Total: 8 non-data bytes - 802.3 Ethernet Data Frame ,-----------------------------------------. @@ -131,7 +128,7 @@ payload of each frame is reduced to 492 bytes. static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 }; static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 }; -static inline int ieee80211_put_snap(u8 *data, u16 h_proto) +static inline int ieee80211_put_snap(u8 * data, u16 h_proto) { struct ieee80211_snap_hdr *snap; u8 *oui; @@ -149,17 +146,15 @@ static inline int ieee80211_put_snap(u8 *data, u16 h_proto) snap->oui[1] = oui[1]; snap->oui[2] = oui[2]; - *(u16 *)(data + SNAP_SIZE) = htons(h_proto); + *(u16 *) (data + SNAP_SIZE) = htons(h_proto); return SNAP_SIZE + sizeof(u16); } -static inline int ieee80211_encrypt_fragment( - struct ieee80211_device *ieee, - struct sk_buff *frag, - int hdr_len) +static inline int ieee80211_encrypt_fragment(struct ieee80211_device *ieee, + struct sk_buff *frag, int hdr_len) { - struct ieee80211_crypt_data* crypt = ieee->crypt[ieee->tx_keyidx]; + struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx]; int res; #ifdef CONFIG_IEEE80211_CRYPT_TKIP @@ -167,7 +162,7 @@ static inline int ieee80211_encrypt_fragment( if (ieee->tkip_countermeasures && crypt && crypt->ops && strcmp(crypt->ops->name, "TKIP") == 0) { - header = (struct ieee80211_hdr *) frag->data; + header = (struct ieee80211_hdr *)frag->data; if (net_ratelimit()) { printk(KERN_DEBUG "%s: TKIP countermeasures: dropped " "TX packet to " MAC_FMT "\n", @@ -200,8 +195,8 @@ static inline int ieee80211_encrypt_fragment( return 0; } - -void ieee80211_txb_free(struct ieee80211_txb *txb) { +void ieee80211_txb_free(struct ieee80211_txb *txb) +{ int i; if (unlikely(!txb)) return; @@ -216,9 +211,8 @@ static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, { struct ieee80211_txb *txb; int i; - txb = kmalloc( - sizeof(struct ieee80211_txb) + (sizeof(u8*) * nr_frags), - gfp_mask); + txb = kmalloc(sizeof(struct ieee80211_txb) + (sizeof(u8 *) * nr_frags), + gfp_mask); if (!txb) return NULL; @@ -243,8 +237,7 @@ static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, } /* SKBs are added to the ieee->tx_queue. */ -int ieee80211_xmit(struct sk_buff *skb, - struct net_device *dev) +int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_device *ieee = netdev_priv(dev); struct ieee80211_txb *txb = NULL; @@ -255,21 +248,20 @@ int ieee80211_xmit(struct sk_buff *skb, int ether_type, encrypt; int bytes, fc, hdr_len; struct sk_buff *skb_frag; - struct ieee80211_hdr header = { /* Ensure zero initialized */ + struct ieee80211_hdr header = { /* Ensure zero initialized */ .duration_id = 0, .seq_ctl = 0 }; u8 dest[ETH_ALEN], src[ETH_ALEN]; - struct ieee80211_crypt_data* crypt; + struct ieee80211_crypt_data *crypt; spin_lock_irqsave(&ieee->lock, flags); /* If there is no driver handler to take the TXB, dont' bother * creating it... */ if (!ieee->hard_start_xmit) { - printk(KERN_WARNING "%s: No xmit handler.\n", - ieee->dev->name); + printk(KERN_WARNING "%s: No xmit handler.\n", ieee->dev->name); goto success; } @@ -284,7 +276,7 @@ int ieee80211_xmit(struct sk_buff *skb, crypt = ieee->crypt[ieee->tx_keyidx]; encrypt = !(ether_type == ETH_P_PAE && ieee->ieee802_1x) && - ieee->host_encrypt && crypt && crypt->ops; + ieee->host_encrypt && crypt && crypt->ops; if (!encrypt && ieee->ieee802_1x && ieee->drop_unencrypted && ether_type != ETH_P_PAE) { @@ -294,7 +286,7 @@ int ieee80211_xmit(struct sk_buff *skb, /* Save source and destination addresses */ memcpy(&dest, skb->data, ETH_ALEN); - memcpy(&src, skb->data+ETH_ALEN, ETH_ALEN); + memcpy(&src, skb->data + ETH_ALEN, ETH_ALEN); /* Advance the SKB to the start of the payload */ skb_pull(skb, sizeof(struct ethhdr)); @@ -304,7 +296,7 @@ int ieee80211_xmit(struct sk_buff *skb, if (encrypt) fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA | - IEEE80211_FCTL_PROTECTED; + IEEE80211_FCTL_PROTECTED; else fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; @@ -327,8 +319,7 @@ int ieee80211_xmit(struct sk_buff *skb, /* Determine fragmentation size based on destination (multicast * and broadcast are not fragmented) */ - if (is_multicast_ether_addr(dest) || - is_broadcast_ether_addr(dest)) + if (is_multicast_ether_addr(dest) || is_broadcast_ether_addr(dest)) frag_size = MAX_FRAG_THRESHOLD; else frag_size = ieee->fts; @@ -345,7 +336,7 @@ int ieee80211_xmit(struct sk_buff *skb, /* Each fragment may need to have room for encryptiong pre/postfix */ if (encrypt) bytes_per_frag -= crypt->ops->extra_prefix_len + - crypt->ops->extra_postfix_len; + crypt->ops->extra_postfix_len; /* Number of fragments is the total bytes_per_frag / * payload_per_fragment */ @@ -380,19 +371,19 @@ int ieee80211_xmit(struct sk_buff *skb, /* If this is not the last fragment, then add the MOREFRAGS * bit to the frame control */ if (i != nr_frags - 1) { - frag_hdr->frame_ctl = cpu_to_le16( - fc | IEEE80211_FCTL_MOREFRAGS); + frag_hdr->frame_ctl = + cpu_to_le16(fc | IEEE80211_FCTL_MOREFRAGS); bytes = bytes_per_frag; } else { /* The last fragment takes the remaining length */ bytes = bytes_last_frag; } - /* Put a SNAP header on the first fragment */ + /* Put a SNAP header on the first fragment */ if (i == 0) { - ieee80211_put_snap( - skb_put(skb_frag, SNAP_SIZE + sizeof(u16)), - ether_type); + ieee80211_put_snap(skb_put + (skb_frag, SNAP_SIZE + sizeof(u16)), + ether_type); bytes -= SNAP_SIZE + sizeof(u16); } @@ -410,14 +401,13 @@ int ieee80211_xmit(struct sk_buff *skb, skb_put(skb_frag, 4); } - - success: + success: spin_unlock_irqrestore(&ieee->lock, flags); dev_kfree_skb_any(skb); if (txb) { - if ((*ieee->hard_start_xmit)(txb, dev) == 0) { + if ((*ieee->hard_start_xmit) (txb, dev) == 0) { stats->tx_packets++; stats->tx_bytes += txb->payload_size; return 0; @@ -427,7 +417,7 @@ int ieee80211_xmit(struct sk_buff *skb, return 0; - failed: + failed: spin_unlock_irqrestore(&ieee->lock, flags); netif_stop_queue(dev); stats->tx_errors++; diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 2cd571c525a..94882f39b07 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -29,19 +29,20 @@ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ******************************************************************************/ -#include <linux/wireless.h> -#include <linux/version.h> + #include <linux/kmod.h> #include <linux/module.h> #include <net/ieee80211.h> +#include <linux/wireless.h> + static const char *ieee80211_modes[] = { "?", "a", "b", "ab", "g", "ag", "bg", "abg" }; #define MAX_CUSTOM_LEN 64 static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, - char *start, char *stop, + char *start, char *stop, struct ieee80211_network *network) { char custom[MAX_CUSTOM_LEN]; @@ -65,29 +66,28 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, iwe.u.data.length = sizeof("<hidden>"); start = iwe_stream_add_point(start, stop, &iwe, "<hidden>"); } else { - iwe.u.data.length = min(network->ssid_len, (u8)32); + iwe.u.data.length = min(network->ssid_len, (u8) 32); start = iwe_stream_add_point(start, stop, &iwe, network->ssid); } /* Add the protocol name */ iwe.cmd = SIOCGIWNAME; - snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s", ieee80211_modes[network->mode]); + snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s", + ieee80211_modes[network->mode]); start = iwe_stream_add_event(start, stop, &iwe, IW_EV_CHAR_LEN); - /* Add mode */ - iwe.cmd = SIOCGIWMODE; - if (network->capability & - (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)) { + /* Add mode */ + iwe.cmd = SIOCGIWMODE; + if (network->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)) { if (network->capability & WLAN_CAPABILITY_ESS) iwe.u.mode = IW_MODE_MASTER; else iwe.u.mode = IW_MODE_ADHOC; - start = iwe_stream_add_event(start, stop, &iwe, - IW_EV_UINT_LEN); + start = iwe_stream_add_event(start, stop, &iwe, IW_EV_UINT_LEN); } - /* Add frequency/channel */ + /* Add frequency/channel */ iwe.cmd = SIOCGIWFREQ; /* iwe.u.freq.m = ieee80211_frequency(network->channel, network->mode); iwe.u.freq.e = 3; */ @@ -109,7 +109,7 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, max_rate = 0; p = custom; p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), " Rates (Mb/s): "); - for (i = 0, j = 0; i < network->rates_len; ) { + for (i = 0, j = 0; i < network->rates_len;) { if (j < network->rates_ex_len && ((network->rates_ex[j] & 0x7F) < (network->rates[i] & 0x7F))) @@ -132,8 +132,7 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, iwe.cmd = SIOCGIWRATE; iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; iwe.u.bitrate.value = max_rate * 500000; - start = iwe_stream_add_event(start, stop, &iwe, - IW_EV_PARAM_LEN); + start = iwe_stream_add_event(start, stop, &iwe, IW_EV_PARAM_LEN); iwe.cmd = IWEVCUSTOM; iwe.u.data.length = p - custom; @@ -163,7 +162,7 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, if (iwe.u.data.length) start = iwe_stream_add_point(start, stop, &iwe, custom); - if (ieee->wpa_enabled && network->wpa_ie_len){ + if (ieee->wpa_enabled && network->wpa_ie_len) { char buf[MAX_WPA_IE_LEN * 2 + 30]; u8 *p = buf; @@ -178,7 +177,7 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, start = iwe_stream_add_point(start, stop, &iwe, buf); } - if (ieee->wpa_enabled && network->rsn_ie_len){ + if (ieee->wpa_enabled && network->rsn_ie_len) { char buf[MAX_WPA_IE_LEN * 2 + 30]; u8 *p = buf; @@ -198,12 +197,12 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, iwe.cmd = IWEVCUSTOM; p = custom; p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), - " Last beacon: %lums ago", (jiffies - network->last_scanned) / (HZ / 100)); + " Last beacon: %lums ago", + (jiffies - network->last_scanned) / (HZ / 100)); iwe.u.data.length = p - custom; if (iwe.u.data.length) start = iwe_stream_add_point(start, stop, &iwe, custom); - return start; } @@ -228,18 +227,19 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, time_after(network->last_scanned + ieee->scan_age, jiffies)) ev = ipw2100_translate_scan(ieee, ev, stop, network); else - IEEE80211_DEBUG_SCAN( - "Not showing network '%s (" - MAC_FMT ")' due to age (%lums).\n", - escape_essid(network->ssid, - network->ssid_len), - MAC_ARG(network->bssid), - (jiffies - network->last_scanned) / (HZ / 100)); + IEEE80211_DEBUG_SCAN("Not showing network '%s (" + MAC_FMT ")' due to age (%lums).\n", + escape_essid(network->ssid, + network->ssid_len), + MAC_ARG(network->bssid), + (jiffies - + network->last_scanned) / (HZ / + 100)); } spin_unlock_irqrestore(&ieee->lock, flags); - wrqu->data.length = ev - extra; + wrqu->data.length = ev - extra; wrqu->data.flags = 0; IEEE80211_DEBUG_WX("exit: %d networks returned.\n", i); @@ -291,8 +291,8 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, if (ieee->crypt[i] != NULL) { if (key_provided) break; - ieee80211_crypt_delayed_deinit( - ieee, &ieee->crypt[i]); + ieee80211_crypt_delayed_deinit(ieee, + &ieee->crypt[i]); } } @@ -305,8 +305,6 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, goto done; } - - sec.enabled = 1; sec.flags |= SEC_ENABLED; @@ -340,8 +338,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, new_crypt = NULL; printk(KERN_WARNING "%s: could not initialize WEP: " - "load module ieee80211_crypt_wep\n", - dev->name); + "load module ieee80211_crypt_wep\n", dev->name); return -EOPNOTSUPP; } *crypt = new_crypt; @@ -358,7 +355,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, key, escape_essid(sec.keys[key], len), erq->length, len); sec.key_sizes[key] = len; - (*crypt)->ops->set_key(sec.keys[key], len, NULL, + (*crypt)->ops->set_key(sec.keys[key], len, NULL, (*crypt)->priv); sec.flags |= (1 << key); /* This ensures a key will be activated if no key is @@ -381,15 +378,15 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, /* No key data - just set the default TX key index */ if (key_provided) { - IEEE80211_DEBUG_WX( - "Setting key %d to default Tx key.\n", key); + IEEE80211_DEBUG_WX + ("Setting key %d to default Tx key.\n", key); ieee->tx_keyidx = key; sec.active_key = key; sec.flags |= SEC_ACTIVE_KEY; } } - done: + done: ieee->open_wep = !(erq->flags & IW_ENCODE_RESTRICTED); sec.auth_mode = ieee->open_wep ? WLAN_AUTH_OPEN : WLAN_AUTH_SHARED_KEY; sec.flags |= SEC_AUTH_MODE; @@ -399,7 +396,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, /* For now we just support WEP, so only set that security level... * TODO: When WPA is added this is one place that needs to change */ sec.flags |= SEC_LEVEL; - sec.level = SEC_LEVEL_1; /* 40 and 104 bit WEP */ + sec.level = SEC_LEVEL_1; /* 40 and 104 bit WEP */ if (ieee->set_security) ieee->set_security(dev, &sec); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index bf147f8db39..a9d84f93442 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1248,11 +1248,6 @@ module_init(inet_init); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -#ifdef CONFIG_IP_FIB_TRIE -extern int fib_stat_proc_init(void); -extern void fib_stat_proc_exit(void); -#endif - static int __init ipv4_proc_init(void) { int rc = 0; @@ -1265,19 +1260,11 @@ static int __init ipv4_proc_init(void) goto out_udp; if (fib_proc_init()) goto out_fib; -#ifdef CONFIG_IP_FIB_TRIE - if (fib_stat_proc_init()) - goto out_fib_stat; -#endif if (ip_misc_proc_init()) goto out_misc; out: return rc; out_misc: -#ifdef CONFIG_IP_FIB_TRIE - fib_stat_proc_exit(); -out_fib_stat: -#endif fib_proc_exit(); out_fib: udp4_proc_exit(); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 514c85b2631..035ad2c9e1b 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -263,10 +263,8 @@ static int ah_init_state(struct xfrm_state *x) error: if (ahp) { - if (ahp->work_icv) - kfree(ahp->work_icv); - if (ahp->tfm) - crypto_free_tfm(ahp->tfm); + kfree(ahp->work_icv); + crypto_free_tfm(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -279,14 +277,10 @@ static void ah_destroy(struct xfrm_state *x) if (!ahp) return; - if (ahp->work_icv) { - kfree(ahp->work_icv); - ahp->work_icv = NULL; - } - if (ahp->tfm) { - crypto_free_tfm(ahp->tfm); - ahp->tfm = NULL; - } + kfree(ahp->work_icv); + ahp->work_icv = NULL; + crypto_free_tfm(ahp->tfm); + ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b31ffc5053d..1b5a09d1b90 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -343,22 +343,14 @@ static void esp_destroy(struct xfrm_state *x) if (!esp) return; - if (esp->conf.tfm) { - crypto_free_tfm(esp->conf.tfm); - esp->conf.tfm = NULL; - } - if (esp->conf.ivec) { - kfree(esp->conf.ivec); - esp->conf.ivec = NULL; - } - if (esp->auth.tfm) { - crypto_free_tfm(esp->auth.tfm); - esp->auth.tfm = NULL; - } - if (esp->auth.work_icv) { - kfree(esp->auth.work_icv); - esp->auth.work_icv = NULL; - } + crypto_free_tfm(esp->conf.tfm); + esp->conf.tfm = NULL; + kfree(esp->conf.ivec); + esp->conf.ivec = NULL; + crypto_free_tfm(esp->auth.tfm); + esp->auth.tfm = NULL; + kfree(esp->auth.work_icv); + esp->auth.work_icv = NULL; kfree(esp); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b2dea4e5da7..50c0519cd70 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -43,7 +43,7 @@ * 2 of the License, or (at your option) any later version. */ -#define VERSION "0.402" +#define VERSION "0.404" #include <linux/config.h> #include <asm/uaccess.h> @@ -164,7 +164,6 @@ static struct node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); -static void trie_dump_seq(struct seq_file *seq, struct trie *t); static kmem_cache_t *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; @@ -225,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) Consider a node 'n' and its parent 'tp'. If n is a leaf, every bit in its key is significant. Its presence is - necessitaded by path compression, since during a tree traversal (when + necessitated by path compression, since during a tree traversal (when searching for a leaf - unless we are doing an insertion) we will completely ignore all skipped bits we encounter. Thus we need to verify, at the end of a potentially successful search, that we have indeed been walking the @@ -837,11 +836,12 @@ static void trie_init(struct trie *t) #endif } -/* readside most use rcu_read_lock currently dump routines +/* readside must use rcu_read_lock currently dump routines via get_fa_head and dump */ -static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) +static struct leaf_info *find_leaf_info(struct leaf *l, int plen) { + struct hlist_head *head = &l->list; struct hlist_node *node; struct leaf_info *li; @@ -854,7 +854,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) static inline struct list_head * get_fa_head(struct leaf *l, int plen) { - struct leaf_info *li = find_leaf_info(&l->list, plen); + struct leaf_info *li = find_leaf_info(l, plen); if (!li) return NULL; @@ -1086,7 +1086,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) } if (tp && tp->pos + tp->bits > 32) - printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", + printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", tp, tp->pos, tp->bits, key, plen); /* Rebalance the trie */ @@ -1249,7 +1249,7 @@ err: } -/* should be clalled with rcu_read_lock */ +/* should be called with rcu_read_lock */ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, struct fib_result *res) @@ -1591,7 +1591,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); l = fib_find_node(t, key); - li = find_leaf_info(&l->list, plen); + li = find_leaf_info(l, plen); list_del_rcu(&fa->fa_list); @@ -1715,7 +1715,6 @@ static int fn_trie_flush(struct fib_table *tb) t->revision++; - rcu_read_lock(); for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { found += trie_flush_leaf(t, l); @@ -1723,7 +1722,6 @@ static int fn_trie_flush(struct fib_table *tb) trie_leaf_remove(t, ll->key); ll = l; } - rcu_read_unlock(); if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); @@ -1834,16 +1832,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi i++; continue; } - if (fa->fa_info->fib_nh == NULL) { - printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } - if (fa->fa_info == NULL) { - printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } + BUG_ON(!fa->fa_info); if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -1966,563 +1955,531 @@ struct fib_table * __init fib_hash_init(int id) trie_main = t; if (id == RT_TABLE_LOCAL) - printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); + printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); return tb; } -/* Trie dump functions */ +#ifdef CONFIG_PROC_FS +/* Depth first Trie walk iterator */ +struct fib_trie_iter { + struct tnode *tnode; + struct trie *trie; + unsigned index; + unsigned depth; +}; -static void putspace_seq(struct seq_file *seq, int n) +static struct node *fib_trie_get_next(struct fib_trie_iter *iter) { - while (n--) - seq_printf(seq, " "); -} + struct tnode *tn = iter->tnode; + unsigned cindex = iter->index; + struct tnode *p; -static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) -{ - while (bits--) - seq_printf(seq, "%s", (v & (1<<bits))?"1":"0"); -} + pr_debug("get_next iter={node=%p index=%d depth=%d}\n", + iter->tnode, iter->index, iter->depth); +rescan: + while (cindex < (1<<tn->bits)) { + struct node *n = tnode_get_child(tn, cindex); -static void printnode_seq(struct seq_file *seq, int indent, struct node *n, - int pend, int cindex, int bits) -{ - putspace_seq(seq, indent); - if (IS_LEAF(n)) - seq_printf(seq, "|"); - else - seq_printf(seq, "+"); - if (bits) { - seq_printf(seq, "%d/", cindex); - printbin_seq(seq, cindex, bits); - seq_printf(seq, ": "); - } else - seq_printf(seq, "<root>: "); - seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); + if (n) { + if (IS_LEAF(n)) { + iter->tnode = tn; + iter->index = cindex + 1; + } else { + /* push down one level */ + iter->tnode = (struct tnode *) n; + iter->index = 0; + ++iter->depth; + } + return n; + } - if (IS_LEAF(n)) { - struct leaf *l = (struct leaf *)n; - struct fib_alias *fa; - int i; + ++cindex; + } - seq_printf(seq, "key=%d.%d.%d.%d\n", - n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); - - for (i = 32; i >= 0; i--) - if (find_leaf_info(&l->list, i)) { - struct list_head *fa_head = get_fa_head(l, i); - - if (!fa_head) - continue; - - if (list_empty(fa_head)) - continue; - - putspace_seq(seq, indent+2); - seq_printf(seq, "{/%d...dumping}\n", i); - - list_for_each_entry_rcu(fa, fa_head, fa_list) { - putspace_seq(seq, indent+2); - if (fa->fa_info == NULL) { - seq_printf(seq, "Error fa_info=NULL\n"); - continue; - } - if (fa->fa_info->fib_nh == NULL) { - seq_printf(seq, "Error _fib_nh=NULL\n"); - continue; - } - - seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", - fa->fa_type, - fa->fa_scope, - fa->fa_tos); - } - } - } else { - struct tnode *tn = (struct tnode *)n; - int plen = ((struct tnode *)n)->pos; - t_key prf = MASK_PFX(n->key, plen); - - seq_printf(seq, "key=%d.%d.%d.%d/%d\n", - prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); - - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos)); - printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); - seq_printf(seq, "}\n"); - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{pos=%d", tn->pos); - seq_printf(seq, " (skip=%d bits)", tn->pos - pend); - seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits)); - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children); + /* Current node exhausted, pop back up */ + p = NODE_PARENT(tn); + if (p) { + cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; + tn = p; + --iter->depth; + goto rescan; } + + /* got root? */ + return NULL; } -static void trie_dump_seq(struct seq_file *seq, struct trie *t) +static struct node *fib_trie_get_first(struct fib_trie_iter *iter, + struct trie *t) { - struct node *n; - int cindex = 0; - int indent = 1; - int pend = 0; - int depth = 0; - struct tnode *tn; + struct node *n = rcu_dereference(t->trie); - rcu_read_lock(); - n = rcu_dereference(t->trie); - seq_printf(seq, "------ trie_dump of t=%p ------\n", t); - - if (!n) { - seq_printf(seq, "------ trie is empty\n"); - - rcu_read_unlock(); - return; - } - - printnode_seq(seq, indent, n, pend, cindex, 0); - - if (!IS_TNODE(n)) { - rcu_read_unlock(); - return; + if (n && IS_TNODE(n)) { + iter->tnode = (struct tnode *) n; + iter->trie = t; + iter->index = 0; + iter->depth = 1; + return n; } + return NULL; +} - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); - indent += 3; - depth++; - - while (tn && cindex < (1 << tn->bits)) { - struct node *child = rcu_dereference(tn->child[cindex]); - if (!child) - cindex++; - else { - /* Got a child */ - printnode_seq(seq, indent, child, pend, - cindex, tn->bits); - - if (IS_LEAF(child)) - cindex++; - - else { - /* - * New tnode. Decend one level - */ - - depth++; - n = child; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); - seq_printf(seq, "\\--\n"); - indent += 3; - cindex = 0; - } - } - - /* - * Test if we are done - */ - - while (cindex >= (1 << tn->bits)) { - /* - * Move upwards and test for root - * pop off all traversed nodes - */ +static void trie_collect_stats(struct trie *t, struct trie_stat *s) +{ + struct node *n; + struct fib_trie_iter iter; - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - break; - } + memset(s, 0, sizeof(*s)); - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - cindex++; - tn = NODE_PARENT(tn); - pend = tn->pos + tn->bits; - indent -= 3; - depth--; + rcu_read_lock(); + for (n = fib_trie_get_first(&iter, t); n; + n = fib_trie_get_next(&iter)) { + if (IS_LEAF(n)) { + s->leaves++; + s->totdepth += iter.depth; + if (iter.depth > s->maxdepth) + s->maxdepth = iter.depth; + } else { + const struct tnode *tn = (const struct tnode *) n; + int i; + + s->tnodes++; + s->nodesizes[tn->bits]++; + for (i = 0; i < (1<<tn->bits); i++) + if (!tn->child[i]) + s->nullpointers++; } } rcu_read_unlock(); } -static struct trie_stat *trie_stat_new(void) +/* + * This outputs /proc/net/fib_triestats + */ +static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) { - struct trie_stat *s; - int i; + unsigned i, max, pointers, bytes, avdepth; - s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); - if (!s) - return NULL; + if (stat->leaves) + avdepth = stat->totdepth*100 / stat->leaves; + else + avdepth = 0; - s->totdepth = 0; - s->maxdepth = 0; - s->tnodes = 0; - s->leaves = 0; - s->nullpointers = 0; + seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); + seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); - for (i = 0; i < MAX_CHILDS; i++) - s->nodesizes[i] = 0; + seq_printf(seq, "\tLeaves: %u\n", stat->leaves); - return s; -} + bytes = sizeof(struct leaf) * stat->leaves; + seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes); + bytes += sizeof(struct tnode) * stat->tnodes; -static struct trie_stat *trie_collect_stats(struct trie *t) -{ - struct node *n; - struct trie_stat *s = trie_stat_new(); - int cindex = 0; - int pend = 0; - int depth = 0; + max = MAX_CHILDS-1; + while (max >= 0 && stat->nodesizes[max] == 0) + max--; - if (!s) - return NULL; + pointers = 0; + for (i = 1; i <= max; i++) + if (stat->nodesizes[i] != 0) { + seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); + pointers += (1<<i) * stat->nodesizes[i]; + } + seq_putc(seq, '\n'); + seq_printf(seq, "\tPointers: %d\n", pointers); - rcu_read_lock(); - n = rcu_dereference(t->trie); + bytes += sizeof(struct node *) * pointers; + seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); + seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024); - if (!n) - return s; +#ifdef CONFIG_IP_FIB_TRIE_STATS + seq_printf(seq, "Counters:\n---------\n"); + seq_printf(seq,"gets = %d\n", t->stats.gets); + seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); + seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); + seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); + seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); + seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); +#ifdef CLEAR_STATS + memset(&(t->stats), 0, sizeof(t->stats)); +#endif +#endif /* CONFIG_IP_FIB_TRIE_STATS */ +} - if (IS_TNODE(n)) { - struct tnode *tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - s->nodesizes[tn->bits]++; - depth++; - - while (tn && cindex < (1 << tn->bits)) { - struct node *ch = rcu_dereference(tn->child[cindex]); - if (ch) { - - /* Got a child */ - - if (IS_LEAF(tn->child[cindex])) { - cindex++; - - /* stats */ - if (depth > s->maxdepth) - s->maxdepth = depth; - s->totdepth += depth; - s->leaves++; - } else { - /* - * New tnode. Decend one level - */ - - s->tnodes++; - s->nodesizes[tn->bits]++; - depth++; - - n = ch; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - - cindex = 0; - } - } else { - cindex++; - s->nullpointers++; - } +static int fib_triestat_seq_show(struct seq_file *seq, void *v) +{ + struct trie_stat *stat; - /* - * Test if we are done - */ + stat = kmalloc(sizeof(*stat), GFP_KERNEL); + if (!stat) + return -ENOMEM; - while (cindex >= (1 << tn->bits)) { - /* - * Move upwards and test for root - * pop off all traversed nodes - */ + seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", + sizeof(struct leaf), sizeof(struct tnode)); - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - n = NULL; - break; - } + if (trie_local) { + seq_printf(seq, "Local:\n"); + trie_collect_stats(trie_local, stat); + trie_show_stats(seq, stat); + } - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - tn = NODE_PARENT(tn); - cindex++; - n = (struct node *)tn; - pend = tn->pos+tn->bits; - depth--; - } - } + if (trie_main) { + seq_printf(seq, "Main:\n"); + trie_collect_stats(trie_main, stat); + trie_show_stats(seq, stat); } + kfree(stat); - rcu_read_unlock(); - return s; + return 0; } -#ifdef CONFIG_PROC_FS - -static struct fib_alias *fib_triestat_get_first(struct seq_file *seq) +static int fib_triestat_seq_open(struct inode *inode, struct file *file) { - return NULL; + return single_open(file, fib_triestat_seq_show, NULL); } -static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) +static struct file_operations fib_triestat_fops = { + .owner = THIS_MODULE, + .open = fib_triestat_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, + loff_t pos) { + loff_t idx = 0; + struct node *n; + + for (n = fib_trie_get_first(iter, trie_local); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; + } + + for (n = fib_trie_get_first(iter, trie_main); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; + } return NULL; } -static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) +static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) { - if (!ip_fib_main_table) - return NULL; - - if (*pos) - return fib_triestat_get_next(seq); - else + rcu_read_lock(); + if (*pos == 0) return SEQ_START_TOKEN; + return fib_trie_get_idx(seq->private, *pos - 1); } -static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct fib_trie_iter *iter = seq->private; + void *l = v; + ++*pos; if (v == SEQ_START_TOKEN) - return fib_triestat_get_first(seq); - else - return fib_triestat_get_next(seq); -} + return fib_trie_get_idx(iter, 0); -static void fib_triestat_seq_stop(struct seq_file *seq, void *v) -{ + v = fib_trie_get_next(iter); + BUG_ON(v == l); + if (v) + return v; -} + /* continue scan in next trie */ + if (iter->trie == trie_local) + return fib_trie_get_first(iter, trie_main); -/* - * This outputs /proc/net/fib_triestats - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ + return NULL; +} -static void collect_and_show(struct trie *t, struct seq_file *seq) +static void fib_trie_seq_stop(struct seq_file *seq, void *v) { - int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ - int i, max, pointers; - struct trie_stat *stat; - int avdepth; - - stat = trie_collect_stats(t); - - bytes = 0; - seq_printf(seq, "trie=%p\n", t); - - if (stat) { - if (stat->leaves) - avdepth = stat->totdepth*100 / stat->leaves; - else - avdepth = 0; - seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100); - seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); + rcu_read_unlock(); +} - seq_printf(seq, "Leaves: %d\n", stat->leaves); - bytes += sizeof(struct leaf) * stat->leaves; - seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); - bytes += sizeof(struct tnode) * stat->tnodes; +static void seq_indent(struct seq_file *seq, int n) +{ + while (n-- > 0) seq_puts(seq, " "); +} - max = MAX_CHILDS-1; +static inline const char *rtn_scope(enum rt_scope_t s) +{ + static char buf[32]; - while (max >= 0 && stat->nodesizes[max] == 0) - max--; - pointers = 0; + switch(s) { + case RT_SCOPE_UNIVERSE: return "universe"; + case RT_SCOPE_SITE: return "site"; + case RT_SCOPE_LINK: return "link"; + case RT_SCOPE_HOST: return "host"; + case RT_SCOPE_NOWHERE: return "nowhere"; + default: + snprintf(buf, sizeof(buf), "scope=%d", s); + return buf; + } +} - for (i = 1; i <= max; i++) - if (stat->nodesizes[i] != 0) { - seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); - pointers += (1<<i) * stat->nodesizes[i]; - } - seq_printf(seq, "\n"); - seq_printf(seq, "Pointers: %d\n", pointers); - bytes += sizeof(struct node *) * pointers; - seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); - seq_printf(seq, "Total size: %d kB\n", bytes / 1024); +static const char *rtn_type_names[__RTN_MAX] = { + [RTN_UNSPEC] = "UNSPEC", + [RTN_UNICAST] = "UNICAST", + [RTN_LOCAL] = "LOCAL", + [RTN_BROADCAST] = "BROADCAST", + [RTN_ANYCAST] = "ANYCAST", + [RTN_MULTICAST] = "MULTICAST", + [RTN_BLACKHOLE] = "BLACKHOLE", + [RTN_UNREACHABLE] = "UNREACHABLE", + [RTN_PROHIBIT] = "PROHIBIT", + [RTN_THROW] = "THROW", + [RTN_NAT] = "NAT", + [RTN_XRESOLVE] = "XRESOLVE", +}; - kfree(stat); - } +static inline const char *rtn_type(unsigned t) +{ + static char buf[32]; -#ifdef CONFIG_IP_FIB_TRIE_STATS - seq_printf(seq, "Counters:\n---------\n"); - seq_printf(seq,"gets = %d\n", t->stats.gets); - seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); - seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); - seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); - seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); - seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); -#ifdef CLEAR_STATS - memset(&(t->stats), 0, sizeof(t->stats)); -#endif -#endif /* CONFIG_IP_FIB_TRIE_STATS */ + if (t < __RTN_MAX && rtn_type_names[t]) + return rtn_type_names[t]; + snprintf(buf, sizeof(buf), "type %d", t); + return buf; } -static int fib_triestat_seq_show(struct seq_file *seq, void *v) +/* Pretty print the trie */ +static int fib_trie_seq_show(struct seq_file *seq, void *v) { - char bf[128]; + const struct fib_trie_iter *iter = seq->private; + struct node *n = v; - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", - sizeof(struct leaf), sizeof(struct tnode)); - if (trie_local) - collect_and_show(trie_local, seq); + if (v == SEQ_START_TOKEN) + return 0; - if (trie_main) - collect_and_show(trie_main, seq); - } else { - snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400); + if (IS_TNODE(n)) { + struct tnode *tn = (struct tnode *) n; + t_key prf = ntohl(MASK_PFX(tn->key, tn->pos)); - seq_printf(seq, "%-127s\n", bf); + if (!NODE_PARENT(n)) { + if (iter->trie == trie_local) + seq_puts(seq, "<local>:\n"); + else + seq_puts(seq, "<main>:\n"); + } + seq_indent(seq, iter->depth-1); + seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", + NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, + tn->empty_children); + + } else { + struct leaf *l = (struct leaf *) n; + int i; + u32 val = ntohl(l->key); + + seq_indent(seq, iter->depth); + seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); + for (i = 32; i >= 0; i--) { + struct leaf_info *li = find_leaf_info(l, i); + if (li) { + struct fib_alias *fa; + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + seq_indent(seq, iter->depth+1); + seq_printf(seq, " /%d %s %s", i, + rtn_scope(fa->fa_scope), + rtn_type(fa->fa_type)); + if (fa->fa_tos) + seq_printf(seq, "tos =%d\n", + fa->fa_tos); + seq_putc(seq, '\n'); + } + } + } } + return 0; } -static struct seq_operations fib_triestat_seq_ops = { - .start = fib_triestat_seq_start, - .next = fib_triestat_seq_next, - .stop = fib_triestat_seq_stop, - .show = fib_triestat_seq_show, +static struct seq_operations fib_trie_seq_ops = { + .start = fib_trie_seq_start, + .next = fib_trie_seq_next, + .stop = fib_trie_seq_stop, + .show = fib_trie_seq_show, }; -static int fib_triestat_seq_open(struct inode *inode, struct file *file) +static int fib_trie_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; + struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); + + if (!s) + goto out; - rc = seq_open(file, &fib_triestat_seq_ops); + rc = seq_open(file, &fib_trie_seq_ops); if (rc) goto out_kfree; - seq = file->private_data; + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); out: return rc; out_kfree: + kfree(s); goto out; } -static struct file_operations fib_triestat_seq_fops = { - .owner = THIS_MODULE, - .open = fib_triestat_seq_open, - .read = seq_read, - .llseek = seq_lseek, +static struct file_operations fib_trie_fops = { + .owner = THIS_MODULE, + .open = fib_trie_seq_open, + .read = seq_read, + .llseek = seq_lseek, .release = seq_release_private, }; -int __init fib_stat_proc_init(void) +static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi) { - if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops)) - return -ENOMEM; - return 0; -} + static unsigned type2flags[RTN_MAX + 1] = { + [7] = RTF_REJECT, [8] = RTF_REJECT, + }; + unsigned flags = type2flags[type]; -void __init fib_stat_proc_exit(void) -{ - proc_net_remove("fib_triestat"); + if (fi && fi->fib_nh->nh_gw) + flags |= RTF_GATEWAY; + if (mask == 0xFFFFFFFF) + flags |= RTF_HOST; + flags |= RTF_UP; + return flags; } -static struct fib_alias *fib_trie_get_first(struct seq_file *seq) +/* + * This outputs /proc/net/route. + * The format of the file is not supposed to be changed + * and needs to be same as fib_hash output to avoid breaking + * legacy utilities + */ +static int fib_route_seq_show(struct seq_file *seq, void *v) { - return NULL; -} + struct leaf *l = v; + int i; + char bf[128]; -static struct fib_alias *fib_trie_get_next(struct seq_file *seq) -{ - return NULL; -} + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " + "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" + "\tWindow\tIRTT"); + return 0; + } -static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (!ip_fib_main_table) - return NULL; + if (IS_TNODE(l)) + return 0; - if (*pos) - return fib_trie_get_next(seq); - else - return SEQ_START_TOKEN; -} + for (i=32; i>=0; i--) { + struct leaf_info *li = find_leaf_info(l, i); + struct fib_alias *fa; + u32 mask, prefix; -static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - if (v == SEQ_START_TOKEN) - return fib_trie_get_first(seq); - else - return fib_trie_get_next(seq); + if (!li) + continue; -} + mask = inet_make_mask(li->plen); + prefix = htonl(l->key); -static void fib_trie_seq_stop(struct seq_file *seq, void *v) -{ -} - -/* - * This outputs /proc/net/fib_trie. - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + const struct fib_info *fi = rcu_dereference(fa->fa_info); + unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); -static int fib_trie_seq_show(struct seq_file *seq, void *v) -{ - char bf[128]; + if (fa->fa_type == RTN_BROADCAST + || fa->fa_type == RTN_MULTICAST) + continue; - if (v == SEQ_START_TOKEN) { - if (trie_local) - trie_dump_seq(seq, trie_local); + if (fi) + snprintf(bf, sizeof(bf), + "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + fi->fib_dev ? fi->fib_dev->name : "*", + prefix, + fi->fib_nh->nh_gw, flags, 0, 0, + fi->fib_priority, + mask, + (fi->fib_advmss ? fi->fib_advmss + 40 : 0), + fi->fib_window, + fi->fib_rtt >> 3); + else + snprintf(bf, sizeof(bf), + "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + prefix, 0, flags, 0, 0, 0, + mask, 0, 0, 0); - if (trie_main) - trie_dump_seq(seq, trie_main); - } else { - snprintf(bf, sizeof(bf), - "*\t%08X\t%08X", 200, 400); - seq_printf(seq, "%-127s\n", bf); + seq_printf(seq, "%-127s\n", bf); + } } return 0; } -static struct seq_operations fib_trie_seq_ops = { - .start = fib_trie_seq_start, - .next = fib_trie_seq_next, - .stop = fib_trie_seq_stop, - .show = fib_trie_seq_show, +static struct seq_operations fib_route_seq_ops = { + .start = fib_trie_seq_start, + .next = fib_trie_seq_next, + .stop = fib_trie_seq_stop, + .show = fib_route_seq_show, }; -static int fib_trie_seq_open(struct inode *inode, struct file *file) +static int fib_route_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; + struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - rc = seq_open(file, &fib_trie_seq_ops); + if (!s) + goto out; + + rc = seq_open(file, &fib_route_seq_ops); if (rc) goto out_kfree; - seq = file->private_data; + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); out: return rc; out_kfree: + kfree(s); goto out; } -static struct file_operations fib_trie_seq_fops = { - .owner = THIS_MODULE, - .open = fib_trie_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release= seq_release_private, +static struct file_operations fib_route_fops = { + .owner = THIS_MODULE, + .open = fib_route_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, }; int __init fib_proc_init(void) { - if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops)) - return -ENOMEM; + if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) + goto out1; + + if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) + goto out2; + + if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) + goto out3; + return 0; + +out3: + proc_net_remove("fib_triestat"); +out2: + proc_net_remove("fib_trie"); +out1: + return -ENOMEM; } void __init fib_proc_exit(void) { proc_net_remove("fib_trie"); + proc_net_remove("fib_triestat"); + proc_net_remove("route"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 44607f4767b..70c44e4c3ce 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1603,7 +1603,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) } pmc->sources = NULL; pmc->sfmode = MCAST_EXCLUDE; - pmc->sfcount[MCAST_EXCLUDE] = 0; + pmc->sfcount[MCAST_INCLUDE] = 0; pmc->sfcount[MCAST_EXCLUDE] = 1; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f84ba9c9655..2fc3fd38924 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -100,8 +100,7 @@ DEFINE_SPINLOCK(inet_peer_unused_lock); #define PEER_MAX_CLEANUP_WORK 30 static void peer_check_expire(unsigned long dummy); -static struct timer_list peer_periodic_timer = - TIMER_INITIALIZER(peer_check_expire, 0, 0); +static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); /* Exported for sysctl_net_ipv4. */ int inet_peer_gc_mintime = 10 * HZ, diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9e6e683cc34..e7d26d9943c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -457,7 +457,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (pskb_pull(skb, ihl) == NULL) goto err; - if (pskb_trim(skb, end-offset)) + if (pskb_trim_rcsum(skb, end-offset)) goto err; /* Find out which fragments are in front and at the back of us diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index dcb7ee6c485..fc718df17b4 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -345,8 +345,7 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms) for_each_cpu(cpu) { struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); } free_percpu(tfms); } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 63e106605f2..e8674baaa8d 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -54,6 +54,7 @@ #include <linux/major.h> #include <linux/root_dev.h> #include <linux/delay.h> +#include <linux/nfs_fs.h> #include <net/arp.h> #include <net/ip.h> #include <net/ipconfig.h> @@ -1102,10 +1103,8 @@ static int __init ic_dynamic(void) #endif jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout); - while (time_before(jiffies, jiff) && !ic_got_reply) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); - } + while (time_before(jiffies, jiff) && !ic_got_reply) + schedule_timeout_uninterruptible(1); #ifdef IPCONFIG_DHCP /* DHCP isn't done until we get a DHCPACK. */ if ((ic_got_reply & IC_BOOTP) diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index e11952ea17a..f828fa2eb7d 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (s_addr==cp->caddr && s_port==cp->cport && d_port==cp->vport && d_addr==cp->vaddr && + ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && protocol==cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); @@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get return cp; } +/* Get reference to connection template */ +struct ip_vs_conn *ip_vs_ct_in_get +(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +{ + unsigned hash; + struct ip_vs_conn *cp; + + hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + + ct_read_lock(hash); + + list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + if (s_addr==cp->caddr && s_port==cp->cport && + d_port==cp->vport && d_addr==cp->vaddr && + cp->flags & IP_VS_CONN_F_TEMPLATE && + protocol==cp->protocol) { + /* HIT */ + atomic_inc(&cp->refcnt); + goto out; + } + } + cp = NULL; + + out: + ct_read_unlock(hash); + + IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", + ip_vs_proto_name(protocol), + NIPQUAD(s_addr), ntohs(s_port), + NIPQUAD(d_addr), ntohs(d_port), + cp?"hit":"not hit"); + + return cp; +} /* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. @@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) atomic_read(&dest->refcnt)); /* Update the connection counters */ - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { /* It is a normal connection, so increase the inactive connection counter because it is in TCP SYNRECV state (inactive) or other protocol inacive state */ @@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) atomic_read(&dest->refcnt)); /* Update the connection counters */ - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { /* It is a normal connection, so decrease the inactconns or activeconns counter */ if (cp->flags & IP_VS_CONN_F_INACTIVE) { @@ -467,7 +502,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) /* * Invalidate the connection template */ - if (ct->cport) { + if (ct->vport != 65535) { if (ip_vs_conn_unhash(ct)) { ct->dport = 65535; ct->vport = 65535; @@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void) ct_write_lock_bh(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) + if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 3ac7eeca04a..981cc3244ef 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, if (ports[1] == svc->port) { /* Check if a template already exists */ if (svc->port != FTPPORT) - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, ports[1]); else - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { @@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, iph->daddr, ports[1], dest->addr, dest->port, - 0, + IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; @@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> */ if (svc->fwmark) - ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, + ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, htonl(svc->fwmark), 0); else - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { @@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, snet, 0, htonl(svc->fwmark), 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 574d1f509b4..2e5ced3d806 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); for (i=0; i<m->nr_conns; i++) { + unsigned flags; + s = (struct ip_vs_sync_conn *)p; - cp = ip_vs_conn_in_get(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport); + flags = ntohs(s->flags); + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + cp = ip_vs_conn_in_get(s->protocol, + s->caddr, s->cport, + s->vaddr, s->vport); + else + cp = ip_vs_ct_in_get(s->protocol, + s->caddr, s->cport, + s->vaddr, s->vport); if (!cp) { cp = ip_vs_conn_new(s->protocol, s->caddr, s->cport, s->vaddr, s->vport, s->daddr, s->dport, - ntohs(s->flags), NULL); + flags, NULL); if (!cp) { IP_VS_ERR("ip_vs_conn_new failed\n"); return; @@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } else if (!cp->dest) { /* it is an entry created by the synchronization */ cp->state = ntohs(s->state); - cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED; + cp->flags = flags | IP_VS_CONN_F_HASHED; } /* Note that we don't touch its state and flags if it is a normal entry. */ - if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { + if (flags & IP_VS_CONN_F_SEQ_MASK) { opt = (struct ip_vs_sync_conn_options *)&s[1]; memcpy(&cp->in_seq, opt, sizeof(*opt)); p += FULL_CONN_SIZE; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e046f552181..3cf9b451675 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -34,6 +34,7 @@ config IP_NF_CT_ACCT config IP_NF_CONNTRACK_MARK bool 'Connection mark tracking support' + depends on IP_NF_CONNTRACK help This option enables support for connection marks, used by the `CONNMARK' target and `connmark' match. Similar to the mark value @@ -50,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS IF unsure, say `N'. +config IP_NF_CONNTRACK_NETLINK + tristate 'Connection tracking netlink interface' + depends on IP_NF_CONNTRACK && NETFILTER_NETLINK + depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m + help + This option enables support for a netlink-based userspace interface + + config IP_NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on IP_NF_CONNTRACK && EXPERIMENTAL @@ -85,6 +94,25 @@ config IP_NF_IRC To compile it as a module, choose M here. If unsure, say Y. +config IP_NF_NETBIOS_NS + tristate "NetBIOS name service protocol support (EXPERIMENTAL)" + depends on IP_NF_CONNTRACK && EXPERIMENTAL + help + NetBIOS name service requests are sent as broadcast messages from an + unprivileged port and responded to with unicast messages to the + same port. This make them hard to firewall properly because connection + tracking doesn't deal with broadcasts. This helper tracks locally + originating NetBIOS name service requests and the corresponding + responses. It relies on correct IP address configuration, specifically + netmask and broadcast address. When properly configured, the output + of "ip address show" should look similar to this: + + $ ip -4 address show eth0 + 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000 + inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0 + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TFTP tristate "TFTP protocol support" depends on IP_NF_CONNTRACK @@ -109,6 +137,22 @@ config IP_NF_AMANDA To compile it as a module, choose M here. If unsure, say Y. +config IP_NF_PPTP + tristate 'PPTP protocol support' + help + This module adds support for PPTP (Point to Point Tunnelling + Protocol, RFC2637) conncection tracking and NAT. + + If you are running PPTP sessions over a stateful firewall or NAT + box, you may want to enable this feature. + + Please note that not all PPTP modes of operation are supported yet. + For more info, read top of the file + net/ipv4/netfilter/ip_conntrack_pptp.c + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + config IP_NF_QUEUE tristate "IP Userspace queueing via NETLINK (OBSOLETE)" help @@ -593,6 +637,12 @@ config IP_NF_NAT_AMANDA default IP_NF_NAT if IP_NF_AMANDA=y default m if IP_NF_AMANDA=m +config IP_NF_NAT_PPTP + tristate + depends on IP_NF_NAT!=n && IP_NF_PPTP!=n + default IP_NF_NAT if IP_NF_PPTP=y + default m if IP_NF_PPTP=m + # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" @@ -754,11 +804,5 @@ config IP_NF_ARP_MANGLE Allows altering the ARP packet payload: source and destination hardware and network addresses. -config IP_NF_CONNTRACK_NETLINK - tristate 'Connection tracking netlink interface' - depends on IP_NF_CONNTRACK && NETFILTER_NETLINK - help - This option enables support for a netlink-based userspace interface - endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index a7bd38f5052..3d45d3c0283 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -6,6 +6,9 @@ ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o +ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o + # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o @@ -17,12 +20,15 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o # connection tracking helpers +obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o +obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o # NAT helpers +obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index be4c9eb3243..fa3f914117e 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ); + ip_ct_refresh(ct, *pskb, master_timeout * HZ); /* No data? */ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); @@ -108,6 +108,7 @@ static int help(struct sk_buff **pskb, } exp->expectfn = NULL; + exp->flags = 0; exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; exp->tuple.src.u.tcp.port = 0; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index a0648600190..ea65dd3e517 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -197,7 +197,7 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, /* ip_conntrack_expect helper functions */ -static void unlink_expect(struct ip_conntrack_expect *exp) +void ip_ct_unlink_expect(struct ip_conntrack_expect *exp) { ASSERT_WRITE_LOCK(&ip_conntrack_lock); IP_NF_ASSERT(!timer_pending(&exp->timeout)); @@ -207,18 +207,12 @@ static void unlink_expect(struct ip_conntrack_expect *exp) ip_conntrack_expect_put(exp); } -void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp) -{ - unlink_expect(exp); - ip_conntrack_expect_put(exp); -} - static void expectation_timed_out(unsigned long ul_expect) { struct ip_conntrack_expect *exp = (void *)ul_expect; write_lock_bh(&ip_conntrack_lock); - unlink_expect(exp); + ip_ct_unlink_expect(exp); write_unlock_bh(&ip_conntrack_lock); ip_conntrack_expect_put(exp); } @@ -239,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) /* Just find a expectation corresponding to a tuple. */ struct ip_conntrack_expect * -ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) +ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) { struct ip_conntrack_expect *i; @@ -264,10 +258,14 @@ find_expectation(const struct ip_conntrack_tuple *tuple) master ct never got confirmed, we'd hold a reference to it and weird things would happen to future packets). */ if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) - && is_confirmed(i->master) - && del_timer(&i->timeout)) { - unlink_expect(i); - return i; + && is_confirmed(i->master)) { + if (i->flags & IP_CT_EXPECT_PERMANENT) { + atomic_inc(&i->use); + return i; + } else if (del_timer(&i->timeout)) { + ip_ct_unlink_expect(i); + return i; + } } } return NULL; @@ -284,7 +282,7 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct) list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) { if (i->master == ct && del_timer(&i->timeout)) { - unlink_expect(i); + ip_ct_unlink_expect(i); ip_conntrack_expect_put(i); } } @@ -925,7 +923,7 @@ void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp) /* choose the the oldest expectation to evict */ list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { if (expect_matches(i, exp) && del_timer(&i->timeout)) { - unlink_expect(i); + ip_ct_unlink_expect(i); write_unlock_bh(&ip_conntrack_lock); ip_conntrack_expect_put(i); return; @@ -934,6 +932,9 @@ void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp) write_unlock_bh(&ip_conntrack_lock); } +/* We don't increase the master conntrack refcount for non-fulfilled + * conntracks. During the conntrack destruction, the expectations are + * always killed before the conntrack itself */ struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me) { struct ip_conntrack_expect *new; @@ -944,17 +945,14 @@ struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me) return NULL; } new->master = me; - atomic_inc(&new->master->ct_general.use); atomic_set(&new->use, 1); return new; } void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) { - if (atomic_dec_and_test(&exp->use)) { - ip_conntrack_put(exp->master); + if (atomic_dec_and_test(&exp->use)) kmem_cache_free(ip_conntrack_expect_cachep, exp); - } } static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp) @@ -982,7 +980,7 @@ static void evict_oldest_expect(struct ip_conntrack *master) list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { if (i->master == master) { if (del_timer(&i->timeout)) { - unlink_expect(i); + ip_ct_unlink_expect(i); ip_conntrack_expect_put(i); } break; @@ -1099,7 +1097,7 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { if (exp->master->helper == me && del_timer(&exp->timeout)) { - unlink_expect(exp); + ip_ct_unlink_expect(exp); ip_conntrack_expect_put(exp); } } @@ -1114,42 +1112,46 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) synchronize_net(); } -static inline void ct_add_counters(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ -#ifdef CONFIG_IP_NF_CT_ACCT - if (skb) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - ntohs(skb->nh.iph->tot_len); - } -#endif -} - -/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */ -void ip_ct_refresh_acct(struct ip_conntrack *ct, +/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ +void __ip_ct_refresh_acct(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies) + unsigned long extra_jiffies, + int do_acct) { + int do_event = 0; + IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); + IP_NF_ASSERT(skb); + + write_lock_bh(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - ct_add_counters(ct, ctinfo, skb); + do_event = 1; } else { - write_lock_bh(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - ip_conntrack_event_cache(IPCT_REFRESH, skb); + do_event = 1; } - ct_add_counters(ct, ctinfo, skb); - write_unlock_bh(&ip_conntrack_lock); } + +#ifdef CONFIG_IP_NF_CT_ACCT + if (do_acct) { + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + ntohs(skb->nh.iph->tot_len); + } +#endif + + write_unlock_bh(&ip_conntrack_lock); + + /* must be unlocked when calling event cache */ + if (do_event) + ip_conntrack_event_cache(IPCT_REFRESH, skb); } #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 3a2627db172..d77d6b3f5f8 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -29,9 +29,9 @@ static char *ftp_buffer; static DEFINE_SPINLOCK(ip_ftp_lock); #define MAX_PORTS 8 -static int ports[MAX_PORTS]; +static short ports[MAX_PORTS]; static int ports_c; -module_param_array(ports, int, &ports_c, 0400); +module_param_array(ports, short, &ports_c, 0400); static int loose; module_param(loose, int, 0600); @@ -421,6 +421,7 @@ static int help(struct sk_buff **pskb, { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); exp->expectfn = NULL; + exp->flags = 0; /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ @@ -449,7 +450,7 @@ out_update_nl: } static struct ip_conntrack_helper ftp[MAX_PORTS]; -static char ftp_names[MAX_PORTS][10]; +static char ftp_names[MAX_PORTS][sizeof("ftp-65535")]; /* Not __exit: called from init() */ static void fini(void) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c new file mode 100644 index 00000000000..926a6684643 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -0,0 +1,806 @@ +/* + * ip_conntrack_pptp.c - Version 3.0 + * + * Connection tracking support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * Limitations: + * - We blindly assume that control connections are always + * established in PNS->PAC direction. This is a violation + * of RFFC2673 + * - We can only support one single call within each session + * + * TODO: + * - testing of incoming PPTP calls + * + * Changes: + * 2002-02-05 - Version 1.3 + * - Call ip_conntrack_unexpect_related() from + * pptp_destroy_siblings() to destroy expectations in case + * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen + * (Philip Craig <philipc@snapgear.com>) + * - Add Version information at module loadtime + * 2002-02-10 - Version 1.6 + * - move to C99 style initializers + * - remove second expectation if first arrives + * 2004-10-22 - Version 2.0 + * - merge Mandrake's 2.6.x port with recent 2.6.x API changes + * - fix lots of linear skb assumptions from Mandrake's port + * 2005-06-10 - Version 2.1 + * - use ip_conntrack_expect_free() instead of kfree() on the + * expect's (which are from the slab for quite some time) + * 2005-06-10 - Version 3.0 + * - port helper to post-2.6.11 API changes, + * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) + * 2005-07-30 - Version 3.1 + * - port helper to 2.6.13 API changes + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/ip.h> +#include <net/checksum.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +#define IP_CT_PPTP_VERSION "3.1" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); + +static DEFINE_SPINLOCK(ip_pptp_lock); + +int +(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +int +(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +int +(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, + struct ip_conntrack_expect *expect_reply); + +void +(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp); + +#if 0 +/* PptpControlMessageType names */ +const char *pptp_msg_name[] = { + "UNKNOWN_MESSAGE", + "START_SESSION_REQUEST", + "START_SESSION_REPLY", + "STOP_SESSION_REQUEST", + "STOP_SESSION_REPLY", + "ECHO_REQUEST", + "ECHO_REPLY", + "OUT_CALL_REQUEST", + "OUT_CALL_REPLY", + "IN_CALL_REQUEST", + "IN_CALL_REPLY", + "IN_CALL_CONNECT", + "CALL_CLEAR_REQUEST", + "CALL_DISCONNECT_NOTIFY", + "WAN_ERROR_NOTIFY", + "SET_LINK_INFO" +}; +EXPORT_SYMBOL(pptp_msg_name); +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +#define SECS *HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS + +#define PPTP_GRE_TIMEOUT (10 MINS) +#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS) + +static void pptp_expectfn(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) +{ + DEBUGP("increasing timeouts\n"); + + /* increase timeout of GRE data channel conntrack entry */ + ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; + ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; + + /* Can you see how rusty this code is, compared with the pre-2.6.11 + * one? That's what happened to my shiny newnat of 2002 ;( -HW */ + + if (!ip_nat_pptp_hook_expectfn) { + struct ip_conntrack_tuple inv_t; + struct ip_conntrack_expect *exp_other; + + /* obviously this tuple inversion only works until you do NAT */ + invert_tuplepr(&inv_t, &exp->tuple); + DEBUGP("trying to unexpect other dir: "); + DUMP_TUPLE(&inv_t); + + exp_other = ip_conntrack_expect_find(&inv_t); + if (exp_other) { + /* delete other expectation. */ + DEBUGP("found\n"); + ip_conntrack_unexpect_related(exp_other); + ip_conntrack_expect_put(exp_other); + } else { + DEBUGP("not found\n"); + } + } else { + /* we need more than simple inversion */ + ip_nat_pptp_hook_expectfn(ct, exp); + } +} + +static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_expect *exp; + + DEBUGP("trying to timeout ct or exp for tuple "); + DUMP_TUPLE(t); + + h = ip_conntrack_find_get(t, NULL); + if (h) { + struct ip_conntrack *sibling = tuplehash_to_ctrack(h); + DEBUGP("setting timeout of conntrack %p to 0\n", sibling); + sibling->proto.gre.timeout = 0; + sibling->proto.gre.stream_timeout = 0; + if (del_timer(&sibling->timeout)) + sibling->timeout.function((unsigned long)sibling); + ip_conntrack_put(sibling); + return 1; + } else { + exp = ip_conntrack_expect_find(t); + if (exp) { + DEBUGP("unexpect_related of expect %p\n", exp); + ip_conntrack_unexpect_related(exp); + ip_conntrack_expect_put(exp); + return 1; + } + } + + return 0; +} + + +/* timeout GRE data connections */ +static void pptp_destroy_siblings(struct ip_conntrack *ct) +{ + struct ip_conntrack_tuple t; + + /* Since ct->sibling_list has literally rusted away in 2.6.11, + * we now need another way to find out about our sibling + * contrack and expects... -HW */ + + /* try original (pns->pac) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout original pns->pac ct/exp\n"); + + /* try reply (pac->pns) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout reply pac->pns ct/exp\n"); +} + +/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ +static inline int +exp_gre(struct ip_conntrack *master, + u_int32_t seq, + __be16 callid, + __be16 peer_callid) +{ + struct ip_conntrack_tuple inv_tuple; + struct ip_conntrack_tuple exp_tuples[] = { + /* tuple in original direction, PNS->PAC */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, + .u = { .gre = { .key = peer_callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, + .u = { .gre = { .key = callid } }, + .protonum = IPPROTO_GRE + }, + }, + /* tuple in reply direction, PAC->PNS */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, + .u = { .gre = { .key = callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, + .u = { .gre = { .key = peer_callid } }, + .protonum = IPPROTO_GRE + }, + } + }; + struct ip_conntrack_expect *exp_orig, *exp_reply; + int ret = 1; + + exp_orig = ip_conntrack_expect_alloc(master); + if (exp_orig == NULL) + goto out; + + exp_reply = ip_conntrack_expect_alloc(master); + if (exp_reply == NULL) + goto out_put_orig; + + memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); + + exp_orig->mask.src.ip = 0xffffffff; + exp_orig->mask.src.u.all = 0; + exp_orig->mask.dst.u.all = 0; + exp_orig->mask.dst.u.gre.key = htons(0xffff); + exp_orig->mask.dst.ip = 0xffffffff; + exp_orig->mask.dst.protonum = 0xff; + + exp_orig->master = master; + exp_orig->expectfn = pptp_expectfn; + exp_orig->flags = 0; + + exp_orig->dir = IP_CT_DIR_ORIGINAL; + + /* both expectations are identical apart from tuple */ + memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); + memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); + + exp_reply->dir = !exp_orig->dir; + + if (ip_nat_pptp_hook_exp_gre) + ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + else { + + DEBUGP("calling expect_related PNS->PAC"); + DUMP_TUPLE(&exp_orig->tuple); + + if (ip_conntrack_expect_related(exp_orig) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_put_both; + } + + DEBUGP("calling expect_related PAC->PNS"); + DUMP_TUPLE(&exp_reply->tuple); + + if (ip_conntrack_expect_related(exp_reply) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_unexpect_orig; + } + + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { + DEBUGP("cannot keymap_add() exp\n"); + goto out_unexpect_both; + } + + invert_tuplepr(&inv_tuple, &exp_reply->tuple); + if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(master); + DEBUGP("cannot keymap_add() exp_inv\n"); + goto out_unexpect_both; + } + ret = 0; + } + +out_put_both: + ip_conntrack_expect_put(exp_reply); +out_put_orig: + ip_conntrack_expect_put(exp_orig); +out: + return ret; + +out_unexpect_both: + ip_conntrack_unexpect_related(exp_reply); +out_unexpect_orig: + ip_conntrack_unexpect_related(exp_orig); + goto out_put_both; +} + +static inline int +pptp_inbound_pkt(struct sk_buff **pskb, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg; + __be16 *cid, *pcid; + u_int32_t seq; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } + + msg = ntohs(ctlh->messageType); + DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.srep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms new control session */ + if (info->sstate < PPTP_SESSION_REQUESTED) { + DEBUGP("%s without START_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->srep.resultCode == PPTP_START_OK) + info->sstate = PPTP_SESSION_CONFIRMED; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_STOP_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.strep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms end of control session */ + if (info->sstate > PPTP_SESSION_STOPREQ) { + DEBUGP("%s without STOP_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->strep.resultCode == PPTP_STOP_OK) + info->sstate = PPTP_SESSION_NONE; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_OUT_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.ocack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server accepted call, we now expect GRE frames */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + if (info->cstate != PPTP_CALL_OUT_REQ && + info->cstate != PPTP_CALL_OUT_CONF) { + DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); + break; + } + if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { + info->cstate = PPTP_CALL_NONE; + break; + } + + cid = &pptpReq->ocack.callID; + pcid = &pptpReq->ocack.peersCallID; + + info->pac_call_id = ntohs(*cid); + + if (htons(info->pns_call_id) != *pcid) { + DEBUGP("%s for unknown callid %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(*cid), ntohs(*pcid)); + + info->cstate = PPTP_CALL_OUT_CONF; + + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + break; + + case PPTP_IN_CALL_REQUEST: + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server tells us about incoming call request */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + pcid = &pptpReq->icack.peersCallID; + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_REQ; + info->pac_call_id = ntohs(*pcid); + break; + + case PPTP_IN_CALL_CONNECT: + if (reqlen < sizeof(_pptpReq.iccon)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server tells us about incoming call established */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + if (info->sstate != PPTP_CALL_IN_REP + && info->sstate != PPTP_CALL_IN_CONF) { + DEBUGP("%s but never sent IN_CALL_REPLY\n", + pptp_msg_name[msg]); + break; + } + + pcid = &pptpReq->iccon.peersCallID; + cid = &info->pac_call_id; + + if (info->pns_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown CallID %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_CONF; + + /* we expect a GRE connection from PAC to PNS */ + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + + break; + + case PPTP_CALL_DISCONNECT_NOTIFY: + if (reqlen < sizeof(_pptpReq.disc)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms disconnect */ + cid = &pptpReq->disc.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + break; + + case PPTP_WAN_ERROR_NOTIFY: + break; + + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) + ? pptp_msg_name[msg]:pptp_msg_name[0], msg); + break; + } + + + if (ip_nat_pptp_hook_inbound) + return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, + pptpReq); + + return NF_ACCEPT; + +} + +static inline int +pptp_outbound_pkt(struct sk_buff **pskb, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg; + __be16 *cid, *pcid; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) + return NF_ACCEPT; + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) + return NF_ACCEPT; + + msg = ntohs(ctlh->messageType); + DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REQUEST: + /* client requests for new control session */ + if (info->sstate != PPTP_SESSION_NONE) { + DEBUGP("%s but we already have one", + pptp_msg_name[msg]); + } + info->sstate = PPTP_SESSION_REQUESTED; + break; + case PPTP_STOP_SESSION_REQUEST: + /* client requests end of control session */ + info->sstate = PPTP_SESSION_STOPREQ; + break; + + case PPTP_OUT_CALL_REQUEST: + if (reqlen < sizeof(_pptpReq.ocreq)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + /* FIXME: break; */ + } + + /* client initiating connection to server */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", + pptp_msg_name[msg]); + break; + } + info->cstate = PPTP_CALL_OUT_REQ; + /* track PNS call id */ + cid = &pptpReq->ocreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + info->pns_call_id = ntohs(*cid); + break; + case PPTP_IN_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* client answers incoming call */ + if (info->cstate != PPTP_CALL_IN_REQ + && info->cstate != PPTP_CALL_IN_REP) { + DEBUGP("%s without incall_req\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { + info->cstate = PPTP_CALL_NONE; + break; + } + pcid = &pptpReq->icack.peersCallID; + if (info->pac_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown call %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = ntohs(pptpReq->icack.callID); + break; + + case PPTP_CALL_CLEAR_REQUEST: + /* client requests hangup of call */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("CLEAR_CALL but no session\n"); + break; + } + /* FUTURE: iterate over all calls and check if + * call ID is valid. We don't do this without newnat, + * because we only know about last call */ + info->cstate = PPTP_CALL_CLEAR_REQ; + break; + case PPTP_SET_LINK_INFO: + break; + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0], msg); + /* unknown: no need to create GRE masq table entry */ + break; + } + + if (ip_nat_pptp_hook_outbound) + return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, + pptpReq); + + return NF_ACCEPT; +} + + +/* track caller id inside control connection, call expect_related */ +static int +conntrack_pptp_help(struct sk_buff **pskb, + struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) + +{ + struct pptp_pkt_hdr _pptph, *pptph; + struct tcphdr _tcph, *tcph; + u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + u_int32_t datalen; + int dir = CTINFO2DIR(ctinfo); + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + unsigned int nexthdr_off; + + int oldsstate, oldcstate; + int ret; + + /* don't do any tracking before tcp handshake complete */ + if (ctinfo != IP_CT_ESTABLISHED + && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + DEBUGP("ctinfo = %u, skipping\n", ctinfo); + return NF_ACCEPT; + } + + nexthdr_off = (*pskb)->nh.iph->ihl*4; + tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); + BUG_ON(!tcph); + nexthdr_off += tcph->doff * 4; + datalen = tcplen - tcph->doff * 4; + + if (tcph->fin || tcph->rst) { + DEBUGP("RST/FIN received, timeouting GRE\n"); + /* can't do this after real newnat */ + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + } + + pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); + if (!pptph) { + DEBUGP("no full PPTP header, can't track\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_pptph); + datalen -= sizeof(_pptph); + + /* if it's not a control message we can't do anything with it */ + if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || + ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { + DEBUGP("not a control packet\n"); + return NF_ACCEPT; + } + + oldsstate = info->sstate; + oldcstate = info->cstate; + + spin_lock_bh(&ip_pptp_lock); + + /* FIXME: We just blindly assume that the control connection is always + * established from PNS->PAC. However, RFC makes no guarantee */ + if (dir == IP_CT_DIR_ORIGINAL) + /* client -> server (PNS -> PAC) */ + ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ctinfo); + else + /* server -> client (PAC -> PNS) */ + ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ctinfo); + DEBUGP("sstate: %d->%d, cstate: %d->%d\n", + oldsstate, info->sstate, oldcstate, info->cstate); + spin_unlock_bh(&ip_pptp_lock); + + return ret; +} + +/* control protocol helper */ +static struct ip_conntrack_helper pptp = { + .list = { NULL, NULL }, + .name = "pptp", + .me = THIS_MODULE, + .max_expected = 2, + .timeout = 5 * 60, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = IPPROTO_TCP + } + }, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = __constant_htons(0xffff) } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = 0xff + } + }, + .help = conntrack_pptp_help +}; + +extern void __exit ip_ct_proto_gre_fini(void); +extern int __init ip_ct_proto_gre_init(void); + +/* ip_conntrack_pptp initialization */ +static int __init init(void) +{ + int retcode; + + retcode = ip_ct_proto_gre_init(); + if (retcode < 0) + return retcode; + + DEBUGP(" registering helper\n"); + if ((retcode = ip_conntrack_helper_register(&pptp))) { + printk(KERN_ERR "Unable to register conntrack application " + "helper for pptp: %d\n", retcode); + ip_ct_proto_gre_fini(); + return retcode; + } + + printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + ip_conntrack_helper_unregister(&pptp); + ip_ct_proto_gre_fini(); + printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(ip_nat_pptp_hook_outbound); +EXPORT_SYMBOL(ip_nat_pptp_hook_inbound); +EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre); +EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn); diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index 25438eec21a..15457415a4f 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -34,7 +34,7 @@ #include <linux/moduleparam.h> #define MAX_PORTS 8 -static int ports[MAX_PORTS]; +static short ports[MAX_PORTS]; static int ports_c; static int max_dcc_channels = 8; static unsigned int dcc_timeout = 300; @@ -52,7 +52,7 @@ EXPORT_SYMBOL_GPL(ip_nat_irc_hook); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); MODULE_LICENSE("GPL"); -module_param_array(ports, int, &ports_c, 0400); +module_param_array(ports, short, &ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of IRC servers"); module_param(max_dcc_channels, int, 0400); MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session"); @@ -221,6 +221,7 @@ static int help(struct sk_buff **pskb, { { 0, { 0 } }, { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); exp->expectfn = NULL; + exp->flags = 0; if (ip_nat_irc_hook) ret = ip_nat_irc_hook(pskb, ctinfo, addr_beg_p - ib_ptr, @@ -239,7 +240,7 @@ static int help(struct sk_buff **pskb, } static struct ip_conntrack_helper irc_helpers[MAX_PORTS]; -static char irc_names[MAX_PORTS][10]; +static char irc_names[MAX_PORTS][sizeof("irc-65535")]; static void fini(void); diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c new file mode 100644 index 00000000000..577bac22dcc --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -0,0 +1,142 @@ +/* + * NetBIOS name service broadcast connection tracking helper + * + * (c) 2005 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +/* + * This helper tracks locally originating NetBIOS name service + * requests by issuing permanent expectations (valid until + * timing out) matching all reply connections from the + * destination network. The only NetBIOS specific thing is + * actually the port number. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> + +#define NMBD_PORT 137 + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); +MODULE_LICENSE("GPL"); + +static unsigned int timeout = 3; +module_param(timeout, int, 0600); +MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); + +static int help(struct sk_buff **pskb, + struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) +{ + struct ip_conntrack_expect *exp; + struct iphdr *iph = (*pskb)->nh.iph; + struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct in_device *in_dev; + u_int32_t mask = 0; + + /* we're only interested in locally generated packets */ + if ((*pskb)->sk == NULL) + goto out; + if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) + goto out; + if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + goto out; + + rcu_read_lock(); + in_dev = __in_dev_get(rt->u.dst.dev); + if (in_dev != NULL) { + for_primary_ifa(in_dev) { + if (ifa->ifa_broadcast == iph->daddr) { + mask = ifa->ifa_mask; + break; + } + } endfor_ifa(in_dev); + } + rcu_read_unlock(); + + if (mask == 0) + goto out; + + exp = ip_conntrack_expect_alloc(ct); + if (exp == NULL) + goto out; + + exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + exp->tuple.src.u.udp.port = ntohs(NMBD_PORT); + + exp->mask.src.ip = mask; + exp->mask.src.u.udp.port = 0xFFFF; + exp->mask.dst.ip = 0xFFFFFFFF; + exp->mask.dst.u.udp.port = 0xFFFF; + exp->mask.dst.protonum = 0xFF; + + exp->expectfn = NULL; + exp->flags = IP_CT_EXPECT_PERMANENT; + + ip_conntrack_expect_related(exp); + ip_conntrack_expect_put(exp); + + ip_ct_refresh(ct, *pskb, timeout * HZ); +out: + return NF_ACCEPT; +} + +static struct ip_conntrack_helper helper = { + .name = "netbios-ns", + .tuple = { + .src = { + .u = { + .udp = { + .port = __constant_htons(NMBD_PORT), + } + } + }, + .dst = { + .protonum = IPPROTO_UDP, + }, + }, + .mask = { + .src = { + .u = { + .udp = { + .port = 0xFFFF, + } + } + }, + .dst = { + .protonum = 0xFF, + }, + }, + .max_expected = 1, + .me = THIS_MODULE, + .help = help, +}; + +static int __init init(void) +{ + helper.timeout = timeout; + return ip_conntrack_helper_register(&helper); +} + +static void __exit fini(void) +{ + ip_conntrack_helper_unregister(&helper); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index a4e9278db4e..b08a432efcf 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = ip_conntrack_expect_find_get(&tuple); + exp = ip_conntrack_expect_find(&tuple); if (!exp) return -ENOENT; @@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = ip_conntrack_expect_find_get(&tuple); + exp = ip_conntrack_expect_find(&tuple); if (!exp) return -ENOENT; @@ -1349,8 +1349,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { if (exp->master->helper == h - && del_timer(&exp->timeout)) - __ip_ct_expect_unlink_destroy(exp); + && del_timer(&exp->timeout)) { + ip_ct_unlink_expect(exp); + ip_conntrack_expect_put(exp); + } } write_unlock(&ip_conntrack_lock); } else { @@ -1358,8 +1360,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, write_lock_bh(&ip_conntrack_lock); list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { - if (del_timer(&exp->timeout)) - __ip_ct_expect_unlink_destroy(exp); + if (del_timer(&exp->timeout)) { + ip_ct_unlink_expect(exp); + ip_conntrack_expect_put(exp); + } } write_unlock_bh(&ip_conntrack_lock); } @@ -1413,6 +1417,7 @@ ctnetlink_create_expect(struct nfattr *cda[]) } exp->expectfn = NULL; + exp->flags = 0; exp->master = ct; memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple)); memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple)); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c new file mode 100644 index 00000000000..de3cb9db6f8 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -0,0 +1,327 @@ +/* + * ip_conntrack_proto_gre.c - Version 3.0 + * + * Connection tracking protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/netfilter.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/list.h> + +static DEFINE_RWLOCK(ip_ct_gre_lock); +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + +#include <linux/netfilter_ipv4/listhelp.h> +#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> + +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); + +/* shamelessly stolen from ip_conntrack_proto_udp.c */ +#define GRE_TIMEOUT (30*HZ) +#define GRE_STREAM_TIMEOUT (180*HZ) + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \ + NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \ + NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key)) +#else +#define DEBUGP(x, args...) +#define DUMP_TUPLE_GRE(x) +#endif + +/* GRE KEYMAP HANDLING FUNCTIONS */ +static LIST_HEAD(gre_keymap_list); + +static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km, + const struct ip_conntrack_tuple *t) +{ + return ((km->tuple.src.ip == t->src.ip) && + (km->tuple.dst.ip == t->dst.ip) && + (km->tuple.dst.protonum == t->dst.protonum) && + (km->tuple.dst.u.all == t->dst.u.all)); +} + +/* look up the source key for a given tuple */ +static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t) +{ + struct ip_ct_gre_keymap *km; + u_int32_t key = 0; + + read_lock_bh(&ip_ct_gre_lock); + km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (km) + key = km->tuple.src.u.gre.key; + read_unlock_bh(&ip_ct_gre_lock); + + DEBUGP("lookup src key 0x%x up key for ", key); + DUMP_TUPLE_GRE(t); + + return key; +} + +/* add a single keymap entry, associate with specified master ct */ +int +ip_ct_gre_keymap_add(struct ip_conntrack *ct, + struct ip_conntrack_tuple *t, int reply) +{ + struct ip_ct_gre_keymap **exist_km, *km, *old; + + if (!ct->helper || strcmp(ct->helper->name, "pptp")) { + DEBUGP("refusing to add GRE keymap to non-pptp session\n"); + return -1; + } + + if (!reply) + exist_km = &ct->help.ct_pptp_info.keymap_orig; + else + exist_km = &ct->help.ct_pptp_info.keymap_reply; + + if (*exist_km) { + /* check whether it's a retransmission */ + old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (old == *exist_km) { + DEBUGP("retransmission\n"); + return 0; + } + + DEBUGP("trying to override keymap_%s for ct %p\n", + reply? "reply":"orig", ct); + return -EEXIST; + } + + km = kmalloc(sizeof(*km), GFP_ATOMIC); + if (!km) + return -ENOMEM; + + memcpy(&km->tuple, t, sizeof(*t)); + *exist_km = km; + + DEBUGP("adding new entry %p: ", km); + DUMP_TUPLE_GRE(&km->tuple); + + write_lock_bh(&ip_ct_gre_lock); + list_append(&gre_keymap_list, km); + write_unlock_bh(&ip_ct_gre_lock); + + return 0; +} + +/* destroy the keymap entries associated with specified master ct */ +void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) +{ + DEBUGP("entering for ct %p\n", ct); + + if (!ct->helper || strcmp(ct->helper->name, "pptp")) { + DEBUGP("refusing to destroy GRE keymap to non-pptp session\n"); + return; + } + + write_lock_bh(&ip_ct_gre_lock); + if (ct->help.ct_pptp_info.keymap_orig) { + DEBUGP("removing %p from list\n", + ct->help.ct_pptp_info.keymap_orig); + list_del(&ct->help.ct_pptp_info.keymap_orig->list); + kfree(ct->help.ct_pptp_info.keymap_orig); + ct->help.ct_pptp_info.keymap_orig = NULL; + } + if (ct->help.ct_pptp_info.keymap_reply) { + DEBUGP("removing %p from list\n", + ct->help.ct_pptp_info.keymap_reply); + list_del(&ct->help.ct_pptp_info.keymap_reply->list); + kfree(ct->help.ct_pptp_info.keymap_reply); + ct->help.ct_pptp_info.keymap_reply = NULL; + } + write_unlock_bh(&ip_ct_gre_lock); +} + + +/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ + +/* invert gre part of tuple */ +static int gre_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->dst.u.gre.key = orig->src.u.gre.key; + tuple->src.u.gre.key = orig->dst.u.gre.key; + + return 1; +} + +/* gre hdr info to tuple */ +static int gre_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct ip_conntrack_tuple *tuple) +{ + struct gre_hdr_pptp _pgrehdr, *pgrehdr; + u_int32_t srckey; + struct gre_hdr _grehdr, *grehdr; + + /* first only delinearize old RFC1701 GRE header */ + grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); + if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { + /* try to behave like "ip_conntrack_proto_generic" */ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + return 1; + } + + /* PPTP header is variable length, only need up to the call_id field */ + pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); + if (!pgrehdr) + return 1; + + if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { + DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); + return 0; + } + + tuple->dst.u.gre.key = pgrehdr->call_id; + srckey = gre_keymap_lookup(tuple); + tuple->src.u.gre.key = srckey; + + return 1; +} + +/* print gre part of tuple */ +static int gre_print_tuple(struct seq_file *s, + const struct ip_conntrack_tuple *tuple) +{ + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + ntohs(tuple->src.u.gre.key), + ntohs(tuple->dst.u.gre.key)); +} + +/* print private data for conntrack */ +static int gre_print_conntrack(struct seq_file *s, + const struct ip_conntrack *ct) +{ + return seq_printf(s, "timeout=%u, stream_timeout=%u ", + (ct->proto.gre.timeout / HZ), + (ct->proto.gre.stream_timeout / HZ)); +} + +/* Returns verdict for packet, and may modify conntrack */ +static int gre_packet(struct ip_conntrack *ct, + const struct sk_buff *skb, + enum ip_conntrack_info conntrackinfo) +{ + /* If we've seen traffic both ways, this is a GRE connection. + * Extend timeout. */ + if (ct->status & IPS_SEEN_REPLY) { + ip_ct_refresh_acct(ct, conntrackinfo, skb, + ct->proto.gre.stream_timeout); + /* Also, more likely to be important, and not a probe. */ + set_bit(IPS_ASSURED_BIT, &ct->status); + } else + ip_ct_refresh_acct(ct, conntrackinfo, skb, + ct->proto.gre.timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int gre_new(struct ip_conntrack *ct, + const struct sk_buff *skb) +{ + DEBUGP(": "); + DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + /* initialize to sane value. Ideally a conntrack helper + * (e.g. in case of pptp) is increasing them */ + ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT; + ct->proto.gre.timeout = GRE_TIMEOUT; + + return 1; +} + +/* Called when a conntrack entry has already been removed from the hashes + * and is about to be deleted from memory */ +static void gre_destroy(struct ip_conntrack *ct) +{ + struct ip_conntrack *master = ct->master; + DEBUGP(" entering\n"); + + if (!master) + DEBUGP("no master !?!\n"); + else + ip_ct_gre_keymap_destroy(master); +} + +/* protocol helper struct */ +static struct ip_conntrack_protocol gre = { + .proto = IPPROTO_GRE, + .name = "gre", + .pkt_to_tuple = gre_pkt_to_tuple, + .invert_tuple = gre_invert_tuple, + .print_tuple = gre_print_tuple, + .print_conntrack = gre_print_conntrack, + .packet = gre_packet, + .new = gre_new, + .destroy = gre_destroy, + .me = THIS_MODULE, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif +}; + +/* ip_conntrack_proto_gre initialization */ +int __init ip_ct_proto_gre_init(void) +{ + return ip_conntrack_protocol_register(&gre); +} + +void __exit ip_ct_proto_gre_fini(void) +{ + struct list_head *pos, *n; + + /* delete all keymap entries */ + write_lock_bh(&ip_ct_gre_lock); + list_for_each_safe(pos, n, &gre_keymap_list) { + DEBUGP("deleting keymap %p at module unload time\n", pos); + list_del(pos); + kfree(pos); + } + write_unlock_bh(&ip_ct_gre_lock); + + ip_conntrack_protocol_unregister(&gre); +} + +EXPORT_SYMBOL(ip_ct_gre_keymap_add); +EXPORT_SYMBOL(ip_ct_gre_keymap_destroy); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index f23ef1f88c4..1985abc59d2 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -349,6 +349,7 @@ static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, return 0; nfattr_failure: + read_unlock_bh(&tcp_lock); return -1; } #endif diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ee5895afd0c..dd476b191f4 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -989,16 +989,16 @@ EXPORT_SYMBOL(need_ip_conntrack); EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_iterate_cleanup); -EXPORT_SYMBOL(ip_ct_refresh_acct); +EXPORT_SYMBOL(__ip_ct_refresh_acct); EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); +EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_find); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_unexpect_related); EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); -EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); -EXPORT_SYMBOL_GPL(__ip_ct_expect_unlink_destroy); +EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); EXPORT_SYMBOL(ip_conntrack_tuple_taken); EXPORT_SYMBOL(ip_ct_gather_frags); diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index f8ff170f390..a78736b8525 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -26,9 +26,9 @@ MODULE_DESCRIPTION("tftp connection tracking helper"); MODULE_LICENSE("GPL"); #define MAX_PORTS 8 -static int ports[MAX_PORTS]; +static short ports[MAX_PORTS]; static int ports_c; -module_param_array(ports, int, &ports_c, 0400); +module_param_array(ports, short, &ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of tftp servers"); #if 0 @@ -75,6 +75,7 @@ static int tftp_help(struct sk_buff **pskb, exp->mask.dst.u.udp.port = 0xffff; exp->mask.dst.protonum = 0xff; exp->expectfn = NULL; + exp->flags = 0; DEBUGP("expect: "); DUMP_TUPLE(&exp->tuple); @@ -99,7 +100,7 @@ static int tftp_help(struct sk_buff **pskb, } static struct ip_conntrack_helper tftp[MAX_PORTS]; -static char tftp_names[MAX_PORTS][10]; +static char tftp_names[MAX_PORTS][sizeof("tftp-65535")]; static void fini(void) { diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1adedb743f6..c3ea891d38e 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -578,6 +578,8 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) return ret; } +EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); +EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); #endif int __init ip_nat_init(void) diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c new file mode 100644 index 00000000000..3cdd0684d30 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -0,0 +1,401 @@ +/* + * ip_nat_pptp.c - Version 3.0 + * + * NAT support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * TODO: - NAT to a unique tuple, not to TCP source port + * (needs netfilter tuple reservation) + * + * Changes: + * 2002-02-10 - Version 1.3 + * - Use ip_nat_mangle_tcp_packet() because of cloned skb's + * in local connections (Philip Craig <philipc@snapgear.com>) + * - add checks for magicCookie and pptp version + * - make argument list of pptp_{out,in}bound_packet() shorter + * - move to C99 style initializers + * - print version number at module loadtime + * 2003-09-22 - Version 1.5 + * - use SNATed tcp sourceport as callid, since we get called before + * TCP header is mangled (Philip Craig <philipc@snapgear.com>) + * 2004-10-22 - Version 2.0 + * - kernel 2.6.x version + * 2005-06-10 - Version 3.0 + * - kernel >= 2.6.11 version, + * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_rule.h> +#include <linux/netfilter_ipv4/ip_nat_helper.h> +#include <linux/netfilter_ipv4/ip_nat_pptp.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +#define IP_NAT_PPTP_VERSION "3.0" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); + + +#if 0 +extern const char *pptp_msg_name[]; +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +static void pptp_nat_expected(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) +{ + struct ip_conntrack *master = ct->master; + struct ip_conntrack_expect *other_exp; + struct ip_conntrack_tuple t; + struct ip_ct_pptp_master *ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info; + + ct_pptp_info = &master->help.ct_pptp_info; + nat_pptp_info = &master->nat.help.nat_pptp_info; + + /* And here goes the grand finale of corrosion... */ + + if (exp->dir == IP_CT_DIR_ORIGINAL) { + DEBUGP("we are PNS->PAC\n"); + /* therefore, build tuple for PAC->PNS */ + t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); + t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); + t.dst.protonum = IPPROTO_GRE; + } else { + DEBUGP("we are PAC->PNS\n"); + /* build tuple for PNS->PAC */ + t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + t.src.u.gre.key = + htons(master->nat.help.nat_pptp_info.pns_call_id); + t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + t.dst.u.gre.key = + htons(master->nat.help.nat_pptp_info.pac_call_id); + t.dst.protonum = IPPROTO_GRE; + } + + DEBUGP("trying to unexpect other dir: "); + DUMP_TUPLE(&t); + other_exp = ip_conntrack_expect_find(&t); + if (other_exp) { + ip_conntrack_unexpect_related(other_exp); + ip_conntrack_expect_put(other_exp); + DEBUGP("success\n"); + } else { + DEBUGP("not found!\n"); + } + + ip_nat_follow_master(ct, exp); +} + +/* outbound packets == from PNS to PAC */ +static int +pptp_outbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) + +{ + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + + u_int16_t msg, *cid = NULL, new_callid; + + new_callid = htons(ct_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REQUEST: + cid = &pptpReq->ocreq.callID; + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = ntohs(new_callid); + break; + case PPTP_IN_CALL_REPLY: + cid = &pptpReq->icreq.callID; + break; + case PPTP_CALL_CLEAR_REQUEST: + cid = &pptpReq->clrreq.callID; + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass + * down to here */ + + IP_NF_ASSERT(cid); + + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_callid)); + + /* mangle packet */ + if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_callid), + (char *)&new_callid, + sizeof(new_callid)) == 0) + return NF_DROP; + + return NF_ACCEPT; +} + +static int +pptp_exp_gre(struct ip_conntrack_expect *expect_orig, + struct ip_conntrack_expect *expect_reply) +{ + struct ip_ct_pptp_master *ct_pptp_info = + &expect_orig->master->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = + &expect_orig->master->nat.help.nat_pptp_info; + + struct ip_conntrack *ct = expect_orig->master; + + struct ip_conntrack_tuple inv_t; + struct ip_conntrack_tuple *orig_t, *reply_t; + + /* save original PAC call ID in nat_info */ + nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + + /* alter expectation */ + orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + /* alter expectation for PNS->PAC direction */ + invert_tuplepr(&inv_t, &expect_orig->tuple); + expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); + expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + inv_t.src.ip = reply_t->src.ip; + inv_t.dst.ip = reply_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + + if (!ip_conntrack_expect_related(expect_orig)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_orig)\n"); + return 1; + } + + /* alter expectation for PAC->PNS direction */ + invert_tuplepr(&inv_t, &expect_reply->tuple); + expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); + expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + inv_t.src.ip = orig_t->src.ip; + inv_t.dst.ip = orig_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + + if (!ip_conntrack_expect_related(expect_reply)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_reply)\n"); + ip_conntrack_unexpect_related(expect_orig); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { + DEBUGP("can't register original keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { + DEBUGP("can't register reply keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + ip_ct_gre_keymap_destroy(ct); + return 1; + } + + return 0; +} + +/* inbound packets == from PAC to PNS */ +static int +pptp_inbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) +{ + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL; + + int ret = NF_ACCEPT, rv; + + new_pcid = htons(nat_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REPLY: + pcid = &pptpReq->ocack.peersCallID; + cid = &pptpReq->ocack.callID; + break; + case PPTP_IN_CALL_CONNECT: + pcid = &pptpReq->iccon.peersCallID; + break; + case PPTP_IN_CALL_REQUEST: + /* only need to nat in case PAC is behind NAT box */ + break; + case PPTP_WAN_ERROR_NOTIFY: + pcid = &pptpReq->wanerr.peersCallID; + break; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid = &pptpReq->disc.callID; + break; + case PPTP_SET_LINK_INFO: + pcid = &pptpReq->setlink.peersCallID; + break; + + default: + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, + * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ + + /* mangle packet */ + IP_NF_ASSERT(pcid); + DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(*pcid), ntohs(new_pcid)); + + rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid)); + if (rv != NF_ACCEPT) + return rv; + + if (new_cid) { + IP_NF_ASSERT(cid); + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_cid)); + rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_cid), + (char *)&new_cid, + sizeof(new_cid)); + if (rv != NF_ACCEPT) + return rv; + } + + /* check for earlier return value of 'switch' above */ + if (ret != NF_ACCEPT) + return ret; + + /* great, at least we don't need to resize packets */ + return NF_ACCEPT; +} + + +extern int __init ip_nat_proto_gre_init(void); +extern void __exit ip_nat_proto_gre_fini(void); + +static int __init init(void) +{ + int ret; + + DEBUGP("%s: registering NAT helper\n", __FILE__); + + ret = ip_nat_proto_gre_init(); + if (ret < 0) + return ret; + + BUG_ON(ip_nat_pptp_hook_outbound); + ip_nat_pptp_hook_outbound = &pptp_outbound_pkt; + + BUG_ON(ip_nat_pptp_hook_inbound); + ip_nat_pptp_hook_inbound = &pptp_inbound_pkt; + + BUG_ON(ip_nat_pptp_hook_exp_gre); + ip_nat_pptp_hook_exp_gre = &pptp_exp_gre; + + BUG_ON(ip_nat_pptp_hook_expectfn); + ip_nat_pptp_hook_expectfn = &pptp_nat_expected; + + printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + DEBUGP("cleanup_module\n" ); + + ip_nat_pptp_hook_expectfn = NULL; + ip_nat_pptp_hook_exp_gre = NULL; + ip_nat_pptp_hook_inbound = NULL; + ip_nat_pptp_hook_outbound = NULL; + + ip_nat_proto_gre_fini(); + /* Make sure noone calls it, meanwhile */ + synchronize_net(); + + printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c new file mode 100644 index 00000000000..7c128540167 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -0,0 +1,214 @@ +/* + * ip_nat_proto_gre.c - Version 2.0 + * + * NAT protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_rule.h> +#include <linux/netfilter_ipv4/ip_nat_protocol.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(x, args...) +#endif + +/* is key in given range between min and max */ +static int +gre_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + u_int32_t key; + + if (maniptype == IP_NAT_MANIP_SRC) + key = tuple->src.u.gre.key; + else + key = tuple->dst.u.gre.key; + + return ntohl(key) >= ntohl(min->gre.key) + && ntohl(key) <= ntohl(max->gre.key); +} + +/* generate unique tuple ... */ +static int +gre_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t key; + u_int16_t *keyptr; + unsigned int min, i, range_size; + + if (maniptype == IP_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + DEBUGP("%p: NATing GRE PPTP\n", conntrack); + min = 1; + range_size = 0xffff; + } else { + min = ntohl(range->min.gre.key); + range_size = ntohl(range->max.gre.key) - min + 1; + } + + DEBUGP("min = %u, range_size = %u\n", min, range_size); + + for (i = 0; i < range_size; i++, key++) { + *keyptr = htonl(min + key % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + + DEBUGP("%p: no NAT mapping\n", conntrack); + + return 0; +} + +/* manipulate a GRE packet according to maniptype */ +static int +gre_manip_pkt(struct sk_buff **pskb, + unsigned int iphdroff, + const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype) +{ + struct gre_hdr *greh; + struct gre_hdr_pptp *pgreh; + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + unsigned int hdroff = iphdroff + iph->ihl*4; + + /* pgreh includes two optional 32bit fields which are not required + * to be there. That's where the magic '8' comes from */ + if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8)) + return 0; + + greh = (void *)(*pskb)->data + hdroff; + pgreh = (struct gre_hdr_pptp *) greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype == IP_NAT_MANIP_DST) { + /* key manipulation is always dest */ + switch (greh->version) { + case 0: + if (!greh->key) { + DEBUGP("can't nat GRE w/o key\n"); + break; + } + if (greh->csum) { + /* FIXME: Never tested this code... */ + *(gre_csum(greh)) = + ip_nat_cheat_check(~*(gre_key(greh)), + tuple->dst.u.gre.key, + *(gre_csum(greh))); + } + *(gre_key(greh)) = tuple->dst.u.gre.key; + break; + case GRE_VERSION_PPTP: + DEBUGP("call_id -> 0x%04x\n", + ntohl(tuple->dst.u.gre.key)); + pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key)); + break; + default: + DEBUGP("can't nat unknown GRE version\n"); + return 0; + break; + } + } + return 1; +} + +/* print out a nat tuple */ +static unsigned int +gre_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.gre.key) + len += sprintf(buffer + len, "srckey=0x%x ", + ntohl(match->src.u.gre.key)); + + if (mask->dst.u.gre.key) + len += sprintf(buffer + len, "dstkey=0x%x ", + ntohl(match->src.u.gre.key)); + + return len; +} + +/* print a range of keys */ +static unsigned int +gre_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.gre.key != 0 + || range->max.gre.key != 0xFFFF) { + if (range->min.gre.key == range->max.gre.key) + return sprintf(buffer, "key 0x%x ", + ntohl(range->min.gre.key)); + else + return sprintf(buffer, "keys 0x%u-0x%u ", + ntohl(range->min.gre.key), + ntohl(range->max.gre.key)); + } else + return 0; +} + +/* nat helper struct */ +static struct ip_nat_protocol gre = { + .name = "GRE", + .protonum = IPPROTO_GRE, + .manip_pkt = gre_manip_pkt, + .in_range = gre_in_range, + .unique_tuple = gre_unique_tuple, + .print = gre_print, + .print_range = gre_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, +#endif +}; + +int __init ip_nat_proto_gre_init(void) +{ + return ip_nat_protocol_register(&gre); +} + +void __exit ip_nat_proto_gre_fini(void) +{ + ip_nat_protocol_unregister(&gre); +} diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 60d70fa41a1..cb66b8bddeb 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -255,6 +255,27 @@ alloc_null_binding(struct ip_conntrack *conntrack, return ip_nat_setup_info(conntrack, &range, hooknum); } +unsigned int +alloc_null_binding_confirmed(struct ip_conntrack *conntrack, + struct ip_nat_info *info, + unsigned int hooknum) +{ + u_int32_t ip + = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip + : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); + u_int16_t all + = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all + : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all); + struct ip_nat_range range + = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } }; + + DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", + conntrack, NIPQUAD(ip)); + return ip_nat_setup_info(conntrack, &range, hooknum); +} + int ip_nat_rule_find(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in, diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 89db052add8..0ff368b131f 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -123,8 +123,12 @@ ip_nat_fn(unsigned int hooknum, if (!ip_nat_initialized(ct, maniptype)) { unsigned int ret; - /* LOCAL_IN hook doesn't have a chain! */ - if (hooknum == NF_IP_LOCAL_IN) + if (unlikely(is_confirmed(ct))) + /* NAT module was loaded late */ + ret = alloc_null_binding_confirmed(ct, info, + hooknum); + else if (hooknum == NF_IP_LOCAL_IN) + /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, info, hooknum); else ret = ip_nat_rule_find(pskb, hooknum, diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2d05cafec22..9bcb398fbc1 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -13,6 +13,7 @@ #include <linux/config.h> #include <linux/proc_fs.h> #include <linux/jhash.h> +#include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -30,7 +31,7 @@ #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> #include <linux/netfilter_ipv4/ip_conntrack.h> -#define CLUSTERIP_VERSION "0.7" +#define CLUSTERIP_VERSION "0.8" #define DEBUG_CLUSTERIP @@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP"); struct clusterip_config { struct list_head list; /* list of all configs */ atomic_t refcount; /* reference count */ + atomic_t entries; /* number of entries/rules + * referencing us */ u_int32_t clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ - u_int16_t num_local_nodes; /* number of local nodes */ - u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ + unsigned long local_nodes; /* node number array */ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; /* proc dir entry */ @@ -66,8 +68,7 @@ struct clusterip_config { static LIST_HEAD(clusterip_configs); -/* clusterip_lock protects the clusterip_configs list _AND_ the configurable - * data within all structurses (num_local_nodes, local_nodes[]) */ +/* clusterip_lock protects the clusterip_configs list */ static DEFINE_RWLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS @@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir; #endif static inline void -clusterip_config_get(struct clusterip_config *c) { +clusterip_config_get(struct clusterip_config *c) +{ atomic_inc(&c->refcount); } static inline void -clusterip_config_put(struct clusterip_config *c) { - if (atomic_dec_and_test(&c->refcount)) { +clusterip_config_put(struct clusterip_config *c) +{ + if (atomic_dec_and_test(&c->refcount)) + kfree(c); +} + +/* increase the count of entries(rules) using/referencing this config */ +static inline void +clusterip_config_entry_get(struct clusterip_config *c) +{ + atomic_inc(&c->entries); +} + +/* decrease the count of entries using/referencing this config. If last + * entry(rule) is removed, remove the config from lists, but don't free it + * yet, since proc-files could still be holding references */ +static inline void +clusterip_config_entry_put(struct clusterip_config *c) +{ + if (atomic_dec_and_test(&c->entries)) { write_lock_bh(&clusterip_lock); list_del(&c->list); write_unlock_bh(&clusterip_lock); + dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); dev_put(c->dev); - kfree(c); + + /* In case anyone still accesses the file, the open/close + * functions are also incrementing the refcount on their own, + * so it's safe to remove the entry even if it's in use. */ +#ifdef CONFIG_PROC_FS + remove_proc_entry(c->pde->name, c->pde->parent); +#endif } } - static struct clusterip_config * __clusterip_config_find(u_int32_t clusterip) { @@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip) +clusterip_config_find_get(u_int32_t clusterip, int entry) { struct clusterip_config *c; @@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip) return NULL; } atomic_inc(&c->refcount); + if (entry) + atomic_inc(&c->entries); read_unlock_bh(&clusterip_lock); return c; } +static void +clusterip_config_init_nodelist(struct clusterip_config *c, + const struct ipt_clusterip_tgt_info *i) +{ + int n; + + for (n = 0; n < i->num_local_nodes; n++) { + set_bit(i->local_nodes[n] - 1, &c->local_nodes); + } +} + static struct clusterip_config * clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, struct net_device *dev) @@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; - c->num_local_nodes = i->num_local_nodes; - memcpy(&c->local_nodes, &i->local_nodes, sizeof(&c->local_nodes)); + clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); + atomic_set(&c->entries, 1); #ifdef CONFIG_PROC_FS /* create proc dir entry */ @@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, static int clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - if (c->num_local_nodes >= CLUSTERIP_MAX_NODES - || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - - /* check if we alrady have this number in our array */ - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - write_unlock_bh(&clusterip_lock); - return 1; - } - } - c->local_nodes[c->num_local_nodes++] = nodenum; + /* check if we already have this number in our bitfield */ + if (test_and_set_bit(nodenum - 1, &c->local_nodes)) + return 1; - write_unlock_bh(&clusterip_lock); return 0; } static int clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - - if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); - memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); - c->num_local_nodes--; - write_unlock_bh(&clusterip_lock); - return 0; - } - } + if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) + return 0; - write_unlock_bh(&clusterip_lock); return 1; } @@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) static inline int clusterip_responsible(struct clusterip_config *config, u_int32_t hash) { - int i; - - read_lock_bh(&clusterip_lock); - - if (config->num_local_nodes == 0) { - read_unlock_bh(&clusterip_lock); - return 0; - } - - for (i = 0; i < config->num_local_nodes; i++) { - if (config->local_nodes[i] == hash) { - read_unlock_bh(&clusterip_lock); - return 1; - } - } - - read_unlock_bh(&clusterip_lock); - - return 0; + return test_bit(hash - 1, &config->local_nodes); } /*********************************************************************** @@ -415,8 +411,26 @@ checkentry(const char *tablename, /* FIXME: further sanity checks */ - config = clusterip_config_find_get(e->ip.dst.s_addr); - if (!config) { + config = clusterip_config_find_get(e->ip.dst.s_addr, 1); + if (config) { + if (cipinfo->config != NULL) { + /* Case A: This is an entry that gets reloaded, since + * it still has a cipinfo->config pointer. Simply + * increase the entry refcount and return */ + if (cipinfo->config != config) { + printk(KERN_ERR "CLUSTERIP: Reloaded entry " + "has invalid config pointer!\n"); + return 0; + } + clusterip_config_entry_get(cipinfo->config); + } else { + /* Case B: This is a new rule referring to an existing + * clusterip config. */ + cipinfo->config = config; + clusterip_config_entry_get(cipinfo->config); + } + } else { + /* Case C: This is a completely new clusterip config */ if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); return 0; @@ -443,10 +457,9 @@ checkentry(const char *tablename, } dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); } + cipinfo->config = config; } - cipinfo->config = config; - return 1; } @@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize) { struct ipt_clusterip_tgt_info *cipinfo = matchinfo; - /* we first remove the proc entry and then drop the reference - * count. In case anyone still accesses the file, the open/close - * functions are also incrementing the refcount on their own */ -#ifdef CONFIG_PROC_FS - remove_proc_entry(cipinfo->config->pde->name, - cipinfo->config->pde->parent); -#endif + /* if no more entries are referencing the config, remove it + * from the list and destroy the proc entry */ + clusterip_config_entry_put(cipinfo->config); + clusterip_config_put(cipinfo->config); } @@ -533,7 +543,7 @@ arp_mangle(unsigned int hook, /* if there is no clusterip configuration for the arp reply's * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(payload->src_ip); + c = clusterip_config_find_get(payload->src_ip, 0); if (!c) return NF_ACCEPT; @@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = { #ifdef CONFIG_PROC_FS +struct clusterip_seq_position { + unsigned int pos; /* position */ + unsigned int weight; /* number of bits set == size */ + unsigned int bit; /* current bit */ + unsigned long val; /* current value */ +}; + static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { struct proc_dir_entry *pde = s->private; struct clusterip_config *c = pde->data; - unsigned int *nodeidx; - - read_lock_bh(&clusterip_lock); - if (*pos >= c->num_local_nodes) + unsigned int weight; + u_int32_t local_nodes; + struct clusterip_seq_position *idx; + + /* FIXME: possible race */ + local_nodes = c->local_nodes; + weight = hweight32(local_nodes); + if (*pos >= weight) return NULL; - nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); - if (!nodeidx) + idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); + if (!idx) return ERR_PTR(-ENOMEM); - *nodeidx = *pos; - return nodeidx; + idx->pos = *pos; + idx->weight = weight; + idx->bit = ffs(local_nodes); + idx->val = local_nodes; + clear_bit(idx->bit - 1, &idx->val); + + return idx; } static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - *pos = ++(*nodeidx); - if (*pos >= c->num_local_nodes) { + *pos = ++idx->pos; + if (*pos >= idx->weight) { kfree(v); return NULL; } - return nodeidx; + idx->bit = ffs(idx->val); + clear_bit(idx->bit - 1, &idx->val); + return idx; } static void clusterip_seq_stop(struct seq_file *s, void *v) { kfree(v); - - read_unlock_bh(&clusterip_lock); } static int clusterip_seq_show(struct seq_file *s, void *v) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - if (*nodeidx != 0) + if (idx->pos != 0) seq_putc(s, ','); - seq_printf(s, "%u", c->local_nodes[*nodeidx]); - if (*nodeidx == c->num_local_nodes-1) + seq_printf(s, "%u", idx->bit); + + if (idx->pos == idx->weight - 1) seq_putc(s, '\n'); return 0; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 2f3e181c8e9..275a174c6fe 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -90,6 +90,12 @@ masquerade_target(struct sk_buff **pskb, IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + /* Source address is 0.0.0.0 - locally generated packet that is + * probably not supposed to be masqueraded. + */ + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0) + return NF_ACCEPT; + mr = targinfo; rt = (struct rtable *)(*pskb)->dst; newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index d2e13447678..715cb613405 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -88,14 +88,18 @@ redirect_target(struct sk_buff **pskb, newdst = htonl(0x7F000001); else { struct in_device *indev; + struct in_ifaddr *ifa; - /* Device might not have an associated in_device. */ - indev = (struct in_device *)(*pskb)->dev->ip_ptr; - if (indev == NULL || indev->ifa_list == NULL) - return NF_DROP; + newdst = 0; + + rcu_read_lock(); + indev = __in_dev_get((*pskb)->dev); + if (indev && (ifa = indev->ifa_list)) + newdst = ifa->ifa_local; + rcu_read_unlock(); - /* Grab first address on interface. */ - newdst = indev->ifa_list->ifa_local; + if (!newdst) + return NF_DROP; } /* Transfer from original range. */ diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index f115a84a4ac..f057025a719 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -92,10 +92,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, fl.fl_ip_sport = tcph->dest; fl.fl_ip_dport = tcph->source; - if (xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0)) { - dst_release(&rt->u.dst); - rt = NULL; - } + xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); return rt; } diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index c1889f88262..0cee2862ed8 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/file.h> +#include <linux/rcupdate.h> #include <net/sock.h> #include <linux/netfilter_ipv4/ipt_owner.h> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 304bb0a1d4f..4b0d7e4d626 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (type && code) { get_user(fl->fl_icmp_type, type); - __get_user(fl->fl_icmp_code, code); + get_user(fl->fl_icmp_code, code); probed = 1; } break; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8c0b14e3bee..8549f26e249 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1760,6 +1760,7 @@ static inline int __mkroute_input(struct sk_buff *skb, goto cleanup; } + atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED if (res->fi->fib_nhs > 1) @@ -1820,7 +1821,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); if (err) return err; - atomic_set(&rth->u.dst.__refcnt, 1); /* put it into the cache */ hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); @@ -1834,8 +1834,8 @@ static inline int ip_mkroute_input(struct sk_buff *skb, u32 daddr, u32 saddr, u32 tos) { #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct rtable* rth = NULL; - unsigned char hop, hopcount, lasthop; + struct rtable* rth = NULL, *rtres; + unsigned char hop, hopcount; int err = -EINVAL; unsigned int hash; @@ -1844,8 +1844,6 @@ static inline int ip_mkroute_input(struct sk_buff *skb, else hopcount = 1; - lasthop = hopcount - 1; - /* distinguish between multipath and singlepath */ if (hopcount < 2) return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, @@ -1855,6 +1853,10 @@ static inline int ip_mkroute_input(struct sk_buff *skb, for (hop = 0; hop < hopcount; hop++) { res->nh_sel = hop; + /* put reference to previous result */ + if (hop) + ip_rt_put(rtres); + /* create a routing cache entry */ err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); @@ -1863,7 +1865,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); - err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); + err = rt_intern_hash(hash, rth, &rtres); if (err) return err; @@ -1873,13 +1875,8 @@ static inline int ip_mkroute_input(struct sk_buff *skb, FIB_RES_NETMASK(*res), res->prefixlen, &FIB_RES_NH(*res)); - - /* only for the last hop the reference count is handled - * outside - */ - if (hop == lasthop) - atomic_set(&(skb->dst->__refcnt), 1); } + skb->dst = &rtres->u.dst; return err; #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); @@ -2208,6 +2205,7 @@ static inline int __mkroute_output(struct rtable **result, goto cleanup; } + atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED if (res->fi) { @@ -2290,8 +2288,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp, if (err == 0) { u32 tos = RT_FL_TOS(oldflp); - atomic_set(&rth->u.dst.__refcnt, 1); - hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ (oldflp->oif << 5), tos); err = rt_intern_hash(hash, rth, rp); @@ -2326,6 +2322,10 @@ static inline int ip_mkroute_output(struct rtable** rp, dev2nexthop = FIB_RES_DEV(*res); dev_hold(dev2nexthop); + /* put reference to previous result */ + if (hop) + ip_rt_put(*rp); + err = __mkroute_output(&rth, res, fl, oldflp, dev2nexthop, flags); @@ -2350,7 +2350,6 @@ static inline int ip_mkroute_output(struct rtable** rp, if (err != 0) return err; } - atomic_set(&(*rp)->u.dst.__refcnt, 1); return err; } else { return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 02fdda68718..f3f0013a958 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -552,8 +552,7 @@ new_segment: tcp_mark_push(tp, skb); goto new_segment; } - if (sk->sk_forward_alloc < copy && - !sk_stream_mem_schedule(sk, copy, 0)) + if (!sk_stream_wmem_schedule(sk, copy)) goto wait_for_memory; if (can_coalesce) { @@ -770,19 +769,23 @@ new_segment: if (off == PAGE_SIZE) { put_page(page); TCP_PAGE(sk) = page = NULL; + off = 0; } - } + } else + off = 0; + + if (copy > PAGE_SIZE - off) + copy = PAGE_SIZE - off; + + if (!sk_stream_wmem_schedule(sk, copy)) + goto wait_for_memory; if (!page) { /* Allocate new cache page. */ if (!(page = sk_stream_alloc_page(sk))) goto wait_for_memory; - off = 0; } - if (copy > PAGE_SIZE - off) - copy = PAGE_SIZE - off; - /* Time to copy data. We are close to * the end! */ err = skb_copy_to_page(sk, from, skb, page, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1afb080bdf0..a7537c7bbd0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -923,14 +923,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int flag = 0; int i; - /* So, SACKs for already sent large segments will be lost. - * Not good, but alternative is to resegment the queue. */ - if (sk->sk_route_caps & NETIF_F_TSO) { - sk->sk_route_caps &= ~NETIF_F_TSO; - sock_set_flag(sk, SOCK_NO_LARGESEND); - tp->mss_cache = tp->mss_cache; - } - if (!tp->sacked_out) tp->fackets_out = 0; prior_fackets = tp->fackets_out; @@ -978,20 +970,42 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag |= FLAG_DATA_LOST; sk_stream_for_retrans_queue(skb, sk) { - u8 sacked = TCP_SKB_CB(skb)->sacked; - int in_sack; + int in_sack, pcount; + u8 sacked; /* The retransmission queue is always in order, so * we can short-circuit the walk early. */ - if(!before(TCP_SKB_CB(skb)->seq, end_seq)) + if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; - fack_count += tcp_skb_pcount(skb); - in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); + pcount = tcp_skb_pcount(skb); + + if (pcount > 1 && !in_sack && + after(TCP_SKB_CB(skb)->end_seq, start_seq)) { + unsigned int pkt_len; + + in_sack = !after(start_seq, + TCP_SKB_CB(skb)->seq); + + if (!in_sack) + pkt_len = (start_seq - + TCP_SKB_CB(skb)->seq); + else + pkt_len = (end_seq - + TCP_SKB_CB(skb)->seq); + if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size)) + break; + pcount = tcp_skb_pcount(skb); + } + + fack_count += pcount; + + sacked = TCP_SKB_CB(skb)->sacked; + /* Account D-SACK for retransmitted packet. */ if ((dup_sack && in_sack) && (sacked & TCPCB_RETRANS) && diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a88db28b0af..b1a63b2c6b4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newicsk->icsk_ca_ops = &tcp_reno; + newicsk->icsk_ca_ops = &tcp_init_congestion_ops; tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 75b68116682..d6e3d269e90 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -428,13 +428,15 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned * packet to the list. This won't be called frequently, I hope. * Remember, these are still headerless SKBs at this point. */ -static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) +int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int nsize; + int nsize, old_factor; u16 flags; + BUG_ON(len >= skb->len); + nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; @@ -459,9 +461,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; - TCP_SKB_CB(buff)->sacked = - (TCP_SKB_CB(skb)->sacked & - (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL)); + TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { @@ -485,23 +485,44 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; buff->tstamp = skb->tstamp; - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { - tp->lost_out -= tcp_skb_pcount(skb); - tp->left_out -= tcp_skb_pcount(skb); - } + old_factor = tcp_skb_pcount(skb); /* Fix up tso_factor for both original and new SKB. */ tcp_set_skb_tso_segs(sk, skb, mss_now); tcp_set_skb_tso_segs(sk, buff, mss_now); - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { - tp->lost_out += tcp_skb_pcount(skb); - tp->left_out += tcp_skb_pcount(skb); - } + /* If this packet has been sent out already, we must + * adjust the various packet counters. + */ + if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { + int diff = old_factor - tcp_skb_pcount(skb) - + tcp_skb_pcount(buff); - if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { - tp->lost_out += tcp_skb_pcount(buff); - tp->left_out += tcp_skb_pcount(buff); + tp->packets_out -= diff; + + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + tp->sacked_out -= diff; + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) + tp->retrans_out -= diff; + + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { + tp->lost_out -= diff; + tp->left_out -= diff; + } + + if (diff > 0) { + /* Adjust Reno SACK estimate. */ + if (!tp->rx_opt.sack_ok) { + tp->sacked_out -= diff; + if ((int)tp->sacked_out < 0) + tp->sacked_out = 0; + tcp_sync_left_out(tp); + } + + tp->fackets_out -= diff; + if ((int)tp->fackets_out < 0) + tp->fackets_out = 0; + } } /* Link BUFF into the send queue. */ @@ -1350,12 +1371,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) BUG(); - - if (sk->sk_route_caps & NETIF_F_TSO) { - sk->sk_route_caps &= ~NETIF_F_TSO; - sock_set_flag(sk, SOCK_NO_LARGESEND); - } - if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } @@ -1370,22 +1385,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -EAGAIN; if (skb->len > cur_mss) { - int old_factor = tcp_skb_pcount(skb); - int diff; - if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ - - /* New SKB created, account for it. */ - diff = old_factor - tcp_skb_pcount(skb) - - tcp_skb_pcount(skb->next); - tp->packets_out -= diff; - - if (diff > 0) { - tp->fackets_out -= diff; - if ((int)tp->fackets_out < 0) - tp->fackets_out = 0; - } } /* Collapse two adjacent packets if worthwhile and we can. */ @@ -1993,12 +1994,6 @@ int tcp_write_wakeup(struct sock *sk) TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; if (tcp_fragment(sk, skb, seg_size, mss)) return -1; - /* SWS override triggered forced fragmentation. - * Disable TSO, the connection is too sick. */ - if (sk->sk_route_caps & NETIF_F_TSO) { - sock_set_flag(sk, SOCK_NO_LARGESEND); - sk->sk_route_caps &= ~NETIF_F_TSO; - } } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb, mss); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e5beca7de86..e0bd1013cb0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,7 +1141,7 @@ int udp_rcv(struct sk_buff *skb) if (ulen > len || ulen < sizeof(*uh)) goto short_packet; - if (pskb_trim(skb, ulen)) + if (pskb_trim_rcsum(skb, ulen)) goto short_packet; if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 937ad32db77..2fea3f4402a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -123,8 +123,7 @@ DEFINE_RWLOCK(addrconf_lock); static void addrconf_verify(unsigned long); -static struct timer_list addr_chk_timer = - TIMER_INITIALIZER(addrconf_verify, 0, 0); +static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); static DEFINE_SPINLOCK(addrconf_verify_lock); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); @@ -3593,10 +3592,8 @@ void __exit addrconf_cleanup(void) rtnl_unlock(); #ifdef CONFIG_IPV6_PRIVACY - if (likely(md5_tfm != NULL)) { - crypto_free_tfm(md5_tfm); - md5_tfm = NULL; - } + crypto_free_tfm(md5_tfm); + md5_tfm = NULL; #endif #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0ebfad907a0..f3629730eb1 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -401,10 +401,8 @@ static int ah6_init_state(struct xfrm_state *x) error: if (ahp) { - if (ahp->work_icv) - kfree(ahp->work_icv); - if (ahp->tfm) - crypto_free_tfm(ahp->tfm); + kfree(ahp->work_icv); + crypto_free_tfm(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -417,14 +415,10 @@ static void ah6_destroy(struct xfrm_state *x) if (!ahp) return; - if (ahp->work_icv) { - kfree(ahp->work_icv); - ahp->work_icv = NULL; - } - if (ahp->tfm) { - crypto_free_tfm(ahp->tfm); - ahp->tfm = NULL; - } + kfree(ahp->work_icv); + ahp->work_icv = NULL; + crypto_free_tfm(ahp->tfm); + ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 01468fab3d3..cc518405b3e 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -175,10 +175,8 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out; - } /* source address lookup done in ip6_dst_lookup */ @@ -390,32 +388,101 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxtclass) { + int tclass = (ntohl(*(u32 *)skb->nh.ipv6h) >> 20) & 0xff; + put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); + } + if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) { u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK; put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } + + /* HbH is allowed only once */ if (np->rxopt.bits.hopopts && opt->hop) { u8 *ptr = skb->nh.raw + opt->hop; put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); } - if (np->rxopt.bits.dstopts && opt->dst0) { + + if (opt->lastopt && + (np->rxopt.bits.dstopts || np->rxopt.bits.srcrt)) { + /* + * Silly enough, but we need to reparse in order to + * report extension headers (except for HbH) + * in order. + * + * Also note that IPV6_RECVRTHDRDSTOPTS is NOT + * (and WILL NOT be) defined because + * IPV6_RECVDSTOPTS is more generic. --yoshfuji + */ + unsigned int off = sizeof(struct ipv6hdr); + u8 nexthdr = skb->nh.ipv6h->nexthdr; + + while (off <= opt->lastopt) { + unsigned len; + u8 *ptr = skb->nh.raw + off; + + switch(nexthdr) { + case IPPROTO_DSTOPTS: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + if (np->rxopt.bits.dstopts) + put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr); + break; + case IPPROTO_ROUTING: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + if (np->rxopt.bits.srcrt) + put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr); + break; + case IPPROTO_AH: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 2; + break; + default: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + break; + } + + off += len; + } + } + + /* socket options in old style */ + if (np->rxopt.bits.rxoinfo) { + struct in6_pktinfo src_info; + + src_info.ipi6_ifindex = opt->iif; + ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr); + put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); + } + if (np->rxopt.bits.rxohlim) { + int hlim = skb->nh.ipv6h->hop_limit; + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); + } + if (np->rxopt.bits.ohopopts && opt->hop) { + u8 *ptr = skb->nh.raw + opt->hop; + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); + } + if (np->rxopt.bits.odstopts && opt->dst0) { u8 *ptr = skb->nh.raw + opt->dst0; - put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); } - if (np->rxopt.bits.srcrt && opt->srcrt) { + if (np->rxopt.bits.osrcrt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt); - put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, (rthdr->hdrlen+1) << 3, rthdr); + put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr); } - if (np->rxopt.bits.dstopts && opt->dst1) { + if (np->rxopt.bits.odstopts && opt->dst1) { u8 *ptr = skb->nh.raw + opt->dst1; - put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); } return 0; } int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, - int *hlimit) + int *hlimit, int *tclass) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -438,6 +505,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (cmsg->cmsg_type) { case IPV6_PKTINFO: + case IPV6_2292PKTINFO: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; @@ -492,6 +560,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg); break; + case IPV6_2292HOPOPTS: case IPV6_HOPOPTS: if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { err = -EINVAL; @@ -512,7 +581,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->hopopt = hdr; break; - case IPV6_DSTOPTS: + case IPV6_2292DSTOPTS: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { err = -EINVAL; goto exit_f; @@ -536,6 +605,33 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->dst1opt = hdr; break; + case IPV6_DSTOPTS: + case IPV6_RTHDRDSTOPTS: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { + err = -EINVAL; + goto exit_f; + } + + hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg); + len = ((hdr->hdrlen + 1) << 3); + if (cmsg->cmsg_len < CMSG_LEN(len)) { + err = -EINVAL; + goto exit_f; + } + if (!capable(CAP_NET_RAW)) { + err = -EPERM; + goto exit_f; + } + if (cmsg->cmsg_type == IPV6_DSTOPTS) { + opt->opt_flen += len; + opt->dst1opt = hdr; + } else { + opt->opt_nflen += len; + opt->dst0opt = hdr; + } + break; + + case IPV6_2292RTHDR: case IPV6_RTHDR: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) { err = -EINVAL; @@ -568,7 +664,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->opt_nflen += len; opt->srcrt = rthdr; - if (opt->dst1opt) { + if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) { int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3); opt->opt_nflen += dsthdrlen; @@ -579,6 +675,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, break; + case IPV6_2292HOPLIMIT: case IPV6_HOPLIMIT: if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { err = -EINVAL; @@ -588,6 +685,24 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, *hlimit = *(int *)CMSG_DATA(cmsg); break; + case IPV6_TCLASS: + { + int tc; + + err = -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { + goto exit_f; + } + + tc = *(int *)CMSG_DATA(cmsg); + if (tc < 0 || tc > 0xff) + goto exit_f; + + err = 0; + *tclass = tc; + + break; + } default: LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e8bff9d3d96..9b27460f0cc 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -276,22 +276,14 @@ static void esp6_destroy(struct xfrm_state *x) if (!esp) return; - if (esp->conf.tfm) { - crypto_free_tfm(esp->conf.tfm); - esp->conf.tfm = NULL; - } - if (esp->conf.ivec) { - kfree(esp->conf.ivec); - esp->conf.ivec = NULL; - } - if (esp->auth.tfm) { - crypto_free_tfm(esp->auth.tfm); - esp->auth.tfm = NULL; - } - if (esp->auth.work_icv) { - kfree(esp->auth.work_icv); - esp->auth.work_icv = NULL; - } + crypto_free_tfm(esp->conf.tfm); + esp->conf.tfm = NULL; + kfree(esp->conf.ivec); + esp->conf.ivec = NULL; + crypto_free_tfm(esp->auth.tfm); + esp->auth.tfm = NULL; + kfree(esp->auth.work_icv); + esp->auth.work_icv = NULL; kfree(esp); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5be6da2584e..922549581ab 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -164,6 +164,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) return -1; } + opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { @@ -243,6 +244,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) looped_back: if (hdr->segments_left == 0) { + opt->lastopt = skb->h.raw - skb->nh.raw; opt->srcrt = skb->h.raw - skb->nh.raw; skb->h.raw += (hdr->hdrlen + 1) << 3; opt->dst0 = opt->dst1; @@ -404,8 +406,7 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr) memcpy(opt->srcrt, hdr, sizeof(*hdr)); irthdr = (struct rt0_hdr*)opt->srcrt; - /* Obsolete field, MBZ, when originated by us */ - irthdr->bitmap = 0; + irthdr->reserved = 0; opt->srcrt->segments_left = n; for (i=0; i<n; i++) memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16); @@ -459,11 +460,10 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } - if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { - __pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr)); - if (skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_NONE; - } + + if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) + goto drop; + return 1; drop: @@ -539,10 +539,15 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, struct in6_addr **daddr) { - if (opt->srcrt) + if (opt->srcrt) { ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); - if (opt->dst0opt) - ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + /* + * IPV6_RTHDRDSTOPTS is ignored + * unless IPV6_RTHDR is set (RFC3542). + */ + if (opt->dst0opt) + ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + } if (opt->hopopt) ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); } @@ -573,3 +578,97 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) } return opt2; } + +static int ipv6_renew_option(void *ohdr, + struct ipv6_opt_hdr __user *newopt, int newoptlen, + int inherit, + struct ipv6_opt_hdr **hdr, + char **p) +{ + if (inherit) { + if (ohdr) { + memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr)); + *hdr = (struct ipv6_opt_hdr *)*p; + *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr)); + } + } else { + if (newopt) { + if (copy_from_user(*p, newopt, newoptlen)) + return -EFAULT; + *hdr = (struct ipv6_opt_hdr *)*p; + if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen) + return -EINVAL; + *p += CMSG_ALIGN(newoptlen); + } + } + return 0; +} + +struct ipv6_txoptions * +ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, + int newtype, + struct ipv6_opt_hdr __user *newopt, int newoptlen) +{ + int tot_len = 0; + char *p; + struct ipv6_txoptions *opt2; + int err; + + if (newtype != IPV6_HOPOPTS && opt->hopopt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt)); + if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt)); + if (newtype != IPV6_RTHDR && opt->srcrt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt)); + if (newtype != IPV6_DSTOPTS && opt->dst1opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt)); + if (newopt && newoptlen) + tot_len += CMSG_ALIGN(newoptlen); + + if (!tot_len) + return NULL; + + opt2 = sock_kmalloc(sk, tot_len, GFP_ATOMIC); + if (!opt2) + return ERR_PTR(-ENOBUFS); + + memset(opt2, 0, tot_len); + + opt2->tot_len = tot_len; + p = (char *)(opt2 + 1); + + err = ipv6_renew_option(opt->hopopt, newopt, newoptlen, + newtype != IPV6_HOPOPTS, + &opt2->hopopt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->dst0opt, newopt, newoptlen, + newtype != IPV6_RTHDRDSTOPTS, + &opt2->dst0opt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->srcrt, newopt, newoptlen, + newtype != IPV6_RTHDR, + (struct ipv6_opt_hdr **)opt2->srcrt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->dst1opt, newopt, newoptlen, + newtype != IPV6_DSTOPTS, + &opt2->dst1opt, &p); + if (err) + goto out; + + opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) + + (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) + + (opt2->srcrt ? ipv6_optlen(opt2->srcrt) : 0); + opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0); + + return opt2; +out: + sock_kfree_s(sk, p, tot_len); + return ERR_PTR(err); +} + diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5176fc655ea..b7185fb3377 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -287,7 +287,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, int iif = 0; int addr_type = 0; int len; - int hlimit; + int hlimit, tclass; int err = 0; if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail) @@ -374,7 +374,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (err) goto out; if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto out_dst_release; + goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; @@ -385,6 +385,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + msg.skb = skb; msg.offset = skb->nh.raw - skb->data; @@ -400,7 +404,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), - hlimit, NULL, &fl, (struct rt6_info*)dst, + hlimit, tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT); if (err) { ip6_flush_pending_frames(sk); @@ -434,6 +438,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + int tclass; saddr = &skb->nh.ipv6h->daddr; @@ -464,7 +469,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (err) goto out; if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto out_dst_release; + goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; @@ -475,13 +480,17 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + idev = in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, NULL, &fl, + sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT); if (err) { @@ -496,7 +505,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) out_put: if (likely(idev != NULL)) in6_dev_put(idev); -out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(); @@ -549,7 +557,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) read_lock(&raw_v6_lock); if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, - skb->dev->ifindex))) { + IP6CB(skb)->iif))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 16af874c9e8..4fcc5a7acf6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -92,7 +92,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); static __u32 rt_sernum; -static struct timer_list ip6_fib_timer = TIMER_INITIALIZER(fib6_run_gc, 0, 0); +static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); struct fib6_walker_t fib6_walker_list = { .prev = &fib6_walker_list, diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b6c73da5ff3..f841bde30c1 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -50,7 +50,7 @@ static atomic_t fl_size = ATOMIC_INIT(0); static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(unsigned long dummy); -static struct timer_list ip6_fl_gc_timer = TIMER_INITIALIZER(ip6_fl_gc, 0, 0); +static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0); /* FL hash table lock: it protects only of GC */ @@ -225,16 +225,20 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, struct ip6_flowlabel * fl, struct ipv6_txoptions * fopt) { - struct ipv6_txoptions * fl_opt = fl->opt; + struct ipv6_txoptions * fl_opt = fl ? fl->opt : NULL; - if (fopt == NULL || fopt->opt_flen == 0) - return fl_opt; + if (fopt == NULL || fopt->opt_flen == 0) { + if (!fl_opt || !fl_opt->dst0opt || fl_opt->srcrt) + return fl_opt; + } if (fl_opt != NULL) { opt_space->hopopt = fl_opt->hopopt; - opt_space->dst0opt = fl_opt->dst0opt; + opt_space->dst0opt = fl_opt->srcrt ? fl_opt->dst0opt : NULL; opt_space->srcrt = fl_opt->srcrt; opt_space->opt_nflen = fl_opt->opt_nflen; + if (fl_opt->dst0opt && !fl_opt->srcrt) + opt_space->opt_nflen -= ipv6_optlen(fl_opt->dst0opt); } else { if (fopt->opt_nflen == 0) return fopt; @@ -310,7 +314,7 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int * msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk); + err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01ef94f7c7f..2f589f24c09 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -166,7 +166,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6hdr *hdr; u8 proto = fl->proto; int seg_len = skb->len; - int hlimit; + int hlimit, tclass; u32 mtu; if (opt) { @@ -202,7 +202,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, * Fill in the IPv6 header */ - *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel; hlimit = -1; if (np) hlimit = np->hop_limit; @@ -211,6 +210,14 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = -1; + if (np) + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + + *(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; @@ -762,10 +769,11 @@ out_err_release: return err; } -int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, - unsigned int flags) +int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, + int offset, int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, + struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -803,6 +811,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse np->cork.rt = rt; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; + np->cork.tclass = tclass; inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); if (dst_allfrag(rt->u.dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; @@ -1084,7 +1093,8 @@ int ip6_push_pending_frames(struct sock *sk) skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr)); - *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000); + *(u32*)hdr = fl->fl6_flowlabel | + htonl(0x60000000 | ((int)np->cork.tclass << 20)); if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 09613729404..cf94372d1af 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -673,11 +673,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); - else + else { dst = ip6_route_output(NULL, &fl); - if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0) - goto tx_err_link_failure; + if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0) + goto tx_err_link_failure; + } tdev = dst->dev; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 135383ef538..85bfbc69b2c 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -341,8 +341,7 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms) for_each_cpu(cpu) { struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); } free_percpu(tfms); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 76466af8331..8567873d0dd 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -210,39 +210,139 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; - case IPV6_PKTINFO: + case IPV6_RECVPKTINFO: np->rxopt.bits.rxinfo = valbool; retv = 0; break; + + case IPV6_2292PKTINFO: + np->rxopt.bits.rxoinfo = valbool; + retv = 0; + break; - case IPV6_HOPLIMIT: + case IPV6_RECVHOPLIMIT: np->rxopt.bits.rxhlim = valbool; retv = 0; break; - case IPV6_RTHDR: + case IPV6_2292HOPLIMIT: + np->rxopt.bits.rxohlim = valbool; + retv = 0; + break; + + case IPV6_RECVRTHDR: if (val < 0 || val > 2) goto e_inval; np->rxopt.bits.srcrt = val; retv = 0; break; - case IPV6_HOPOPTS: + case IPV6_2292RTHDR: + if (val < 0 || val > 2) + goto e_inval; + np->rxopt.bits.osrcrt = val; + retv = 0; + break; + + case IPV6_RECVHOPOPTS: np->rxopt.bits.hopopts = valbool; retv = 0; break; - case IPV6_DSTOPTS: + case IPV6_2292HOPOPTS: + np->rxopt.bits.ohopopts = valbool; + retv = 0; + break; + + case IPV6_RECVDSTOPTS: np->rxopt.bits.dstopts = valbool; retv = 0; break; + case IPV6_2292DSTOPTS: + np->rxopt.bits.odstopts = valbool; + retv = 0; + break; + + case IPV6_TCLASS: + if (val < 0 || val > 0xff) + goto e_inval; + np->tclass = val; + retv = 0; + break; + + case IPV6_RECVTCLASS: + np->rxopt.bits.rxtclass = valbool; + retv = 0; + break; + case IPV6_FLOWINFO: np->rxopt.bits.rxflow = valbool; retv = 0; break; - case IPV6_PKTOPTIONS: + case IPV6_HOPOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + { + struct ipv6_txoptions *opt; + if (optlen == 0) + optval = 0; + + /* hop-by-hop / destination options are privileged option */ + retv = -EPERM; + if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) + break; + + retv = -EINVAL; + if (optlen & 0x7 || optlen > 8 * 255) + break; + + opt = ipv6_renew_options(sk, np->opt, optname, + (struct ipv6_opt_hdr __user *)optval, + optlen); + if (IS_ERR(opt)) { + retv = PTR_ERR(opt); + break; + } + + /* routing header option needs extra check */ + if (optname == IPV6_RTHDR && opt->srcrt) { + struct ipv6_rt_hdr *rthdr = opt->srcrt; + if (rthdr->type) + goto sticky_done; + if ((rthdr->hdrlen & 1) || + (rthdr->hdrlen >> 1) != rthdr->segments_left) + goto sticky_done; + } + + retv = 0; + if (sk->sk_type == SOCK_STREAM) { + if (opt) { + struct tcp_sock *tp = tcp_sk(sk); + if (!((1 << sk->sk_state) & + (TCPF_LISTEN | TCPF_CLOSE)) + && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { + tp->ext_header_len = opt->opt_flen + opt->opt_nflen; + tcp_sync_mss(sk, tp->pmtu_cookie); + } + } + opt = xchg(&np->opt, opt); + sk_dst_reset(sk); + } else { + write_lock(&sk->sk_dst_lock); + opt = xchg(&np->opt, opt); + write_unlock(&sk->sk_dst_lock); + sk_dst_reset(sk); + } +sticky_done: + if (opt) + sock_kfree_s(sk, opt, opt->tot_len); + break; + } + + case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; @@ -276,7 +376,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk); + retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); if (retv) goto done; update: @@ -529,6 +629,17 @@ e_inval: return -EINVAL; } +int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_opt_hdr *hdr, + char __user *optval, int len) +{ + if (!hdr) + return 0; + len = min_t(int, len, ipv6_optlen(hdr)); + if (copy_to_user(optval, hdr, ipv6_optlen(hdr))) + return -EFAULT; + return len; +} + int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -567,7 +678,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, return err; } - case IPV6_PKTOPTIONS: + case IPV6_2292PKTOPTIONS: { struct msghdr msg; struct sk_buff *skb; @@ -601,6 +712,16 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxoinfo) { + struct in6_pktinfo src_info; + src_info.ipi6_ifindex = np->mcast_oif; + ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); + put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); + } + if (np->rxopt.bits.rxohlim) { + int hlim = np->mcast_hops; + put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); + } } len -= msg.msg_controllen; return put_user(len, optlen); @@ -625,26 +746,67 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->ipv6only; break; - case IPV6_PKTINFO: + case IPV6_RECVPKTINFO: val = np->rxopt.bits.rxinfo; break; - case IPV6_HOPLIMIT: + case IPV6_2292PKTINFO: + val = np->rxopt.bits.rxoinfo; + break; + + case IPV6_RECVHOPLIMIT: val = np->rxopt.bits.rxhlim; break; - case IPV6_RTHDR: + case IPV6_2292HOPLIMIT: + val = np->rxopt.bits.rxohlim; + break; + + case IPV6_RECVRTHDR: val = np->rxopt.bits.srcrt; break; + case IPV6_2292RTHDR: + val = np->rxopt.bits.osrcrt; + break; + case IPV6_HOPOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + { + + lock_sock(sk); + len = ipv6_getsockopt_sticky(sk, np->opt->hopopt, + optval, len); + release_sock(sk); + return put_user(len, optlen); + } + + case IPV6_RECVHOPOPTS: val = np->rxopt.bits.hopopts; break; - case IPV6_DSTOPTS: + case IPV6_2292HOPOPTS: + val = np->rxopt.bits.ohopopts; + break; + + case IPV6_RECVDSTOPTS: val = np->rxopt.bits.dstopts; break; + case IPV6_2292DSTOPTS: + val = np->rxopt.bits.odstopts; + break; + + case IPV6_TCLASS: + val = np->tclass; + break; + + case IPV6_RECVTCLASS: + val = np->rxopt.bits.rxtclass; + break; + case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 29fed6e58d0..519899fb11d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1968,7 +1968,7 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) } pmc->mca_sources = NULL; pmc->mca_sfmode = MCAST_EXCLUDE; - pmc->mca_sfcount[MCAST_EXCLUDE] = 0; + pmc->mca_sfcount[MCAST_INCLUDE] = 0; pmc->mca_sfcount[MCAST_EXCLUDE] = 1; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a7eae30f455..555a31347ed 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -447,10 +447,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) { - dst_release(dst); + if (err < 0) return; - } if (inc_opt) { if (dev->addr_len) @@ -539,10 +537,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) { - dst_release(dst); + if (err < 0) return; - } len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); send_llinfo = dev->addr_len && !ipv6_addr_any(saddr); @@ -616,10 +612,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) { - dst_release(dst); + if (err < 0) return; - } len = sizeof(struct icmp6hdr); if (dev->addr_len) @@ -1353,10 +1347,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err) { - dst_release(dst); + if (err) return; - } rt = (struct rt6_info *) dst; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1cb8adb2787..2da514b16d9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1955,6 +1955,57 @@ static void __exit fini(void) #endif } +/* + * find specified header up to transport protocol header. + * If found target header, the offset to the header is set to *offset + * and return 0. otherwise, return -1. + * + * Notes: - non-1st Fragment Header isn't skipped. + * - ESP header isn't skipped. + * - The target header may be trancated. + */ +int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) +{ + unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; + u8 nexthdr = skb->nh.ipv6h->nexthdr; + unsigned int len = skb->len - start; + + while (nexthdr != target) { + struct ipv6_opt_hdr _hdr, *hp; + unsigned int hdrlen; + + if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) + return -1; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (hp == NULL) + return -1; + if (nexthdr == NEXTHDR_FRAGMENT) { + unsigned short _frag_off, *fp; + fp = skb_header_pointer(skb, + start+offsetof(struct frag_hdr, + frag_off), + sizeof(_frag_off), + &_frag_off); + if (fp == NULL) + return -1; + + if (ntohs(*fp) & ~0x7) + return -1; + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hp->hdrlen + 2) << 2; + else + hdrlen = ipv6_optlen(hp); + + nexthdr = hp->nexthdr; + len -= hdrlen; + start += hdrlen; + } + + *offset = start; + return 0; +} + EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); EXPORT_SYMBOL(ip6t_do_table); @@ -1963,6 +2014,7 @@ EXPORT_SYMBOL(ip6t_unregister_match); EXPORT_SYMBOL(ip6t_register_target); EXPORT_SYMBOL(ip6t_unregister_target); EXPORT_SYMBOL(ip6t_ext_hdr); +EXPORT_SYMBOL(ipv6_find_hdr); module_init(init); module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 14316c3ebde..b03e87adca9 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -100,11 +100,8 @@ static void send_reset(struct sk_buff *oldskb) dst = ip6_route_output(NULL, &fl); if (dst == NULL) return; - if (dst->error || - xfrm_lookup(&dst, &fl, NULL, 0)) { - dst_release(dst); + if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) return; - } hh_len = (dst->dev->hard_header_len + 15)&~15; nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index d5b94f142bb..dde37793d20 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -48,92 +48,21 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ip_auth_hdr *ah = NULL, _ah; + struct ip_auth_hdr *ah, _ah; const struct ip6t_ah *ahinfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; - /*DEBUGP("IPv6 AH entered\n");*/ - /* if (opt->auth == 0) return 0; - * It does not filled on output */ - - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_ah header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) - break; - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) - break; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) - hdrlen = 8; - else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* AH -> evaluate */ - if (nexthdr == NEXTHDR_AUTH) { - temp |= MASK_AH; - break; - } - - - /* set the flag */ - switch (nexthdr) { - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr); - return 0; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if (ptr > skb->len) { - DEBUGP("ipv6_ah: new pointer too large! \n"); - break; - } - } - - /* AH header not found */ - if (temp != MASK_AH) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0) return 0; - if (len < sizeof(struct ip_auth_hdr)){ + ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); + if (ah == NULL) { *hotdrop = 1; return 0; } - ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); - BUG_ON(ah == NULL); + hdrlen = (ah->hdrlen + 2) << 2; DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); DEBUGP("RES %04X ", ah->reserved); diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index 540925e4a7a..c450a635e54 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c @@ -63,8 +63,6 @@ match(const struct sk_buff *skb, struct ipv6_opt_hdr _optsh, *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; @@ -72,97 +70,25 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_opts header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* OPTS -> evaluate */ #if HOPBYHOP - if (nexthdr == NEXTHDR_HOP) { - temp |= MASK_HOPOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) #else - if (nexthdr == NEXTHDR_DEST) { - temp |= MASK_DSTOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) #endif - break; - } - + return 0; - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_opts: new pointer is too large! \n"); - break; - } - } - - /* OPTIONS header not found */ -#if HOPBYHOP - if ( temp != MASK_HOPOPTS ) return 0; -#else - if ( temp != MASK_DSTOPTS ) return 0; -#endif - - if (len < (int)sizeof(struct ipv6_opt_hdr)){ + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen){ /* Packet smaller than it's length field */ return 0; } - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - BUG_ON(oh == NULL); - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); DEBUGP("len %02X %04X %02X ", diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index e39dd236fd8..24bc0cde43a 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c @@ -48,87 +48,22 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ip_esp_hdr _esp, *eh = NULL; + struct ip_esp_hdr _esp, *eh; const struct ip6t_esp *espinfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; /* Make sure this isn't an evil packet */ /*DEBUGP("ipv6_esp entered \n");*/ - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - int hdrlen; - - DEBUGP("ipv6_esp header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) - break; - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - temp |= MASK_ESP; - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) - hdrlen = 8; - else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* set the flag */ - switch (nexthdr) { - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr); - return 0; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if (ptr > skb->len) { - DEBUGP("ipv6_esp: new pointer too large! \n"); - break; - } - } - - /* ESP header not found */ - if (temp != MASK_ESP) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0) return 0; - if (len < sizeof(struct ip_esp_hdr)) { + eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); + if (eh == NULL) { *hotdrop = 1; return 0; } - eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); - BUG_ON(eh == NULL); - DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi)); return (eh != NULL) diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 4bfa30a9bc8..085d5f8eea2 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -48,90 +48,18 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct frag_hdr _frag, *fh = NULL; + struct frag_hdr _frag, *fh; const struct ip6t_frag *fraginfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; - unsigned int hdrlen = 0; - - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_frag header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* FRAG -> evaluate */ - if (nexthdr == NEXTHDR_FRAGMENT) { - temp |= MASK_FRAGMENT; - break; - } - - - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_frag: new pointer too large! \n"); - break; - } - } - - /* FRAG header not found */ - if ( temp != MASK_FRAGMENT ) return 0; - - if (len < sizeof(struct frag_hdr)){ - *hotdrop = 1; - return 0; - } - fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); - BUG_ON(fh == NULL); + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0) + return 0; + + fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); + if (fh == NULL){ + *hotdrop = 1; + return 0; + } DEBUGP("INFO %04X ", fh->frag_off); DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 27f3650d127..1d09485111d 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -63,8 +63,6 @@ match(const struct sk_buff *skb, struct ipv6_opt_hdr _optsh, *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; @@ -72,97 +70,25 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_opts header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* OPTS -> evaluate */ #if HOPBYHOP - if (nexthdr == NEXTHDR_HOP) { - temp |= MASK_HOPOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) #else - if (nexthdr == NEXTHDR_DEST) { - temp |= MASK_DSTOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) #endif - break; - } - + return 0; - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_opts: new pointer is too large! \n"); - break; - } - } - - /* OPTIONS header not found */ -#if HOPBYHOP - if ( temp != MASK_HOPOPTS ) return 0; -#else - if ( temp != MASK_DSTOPTS ) return 0; -#endif - - if (len < (int)sizeof(struct ipv6_opt_hdr)){ + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen){ /* Packet smaller than it's length field */ return 0; } - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - BUG_ON(oh == NULL); - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); DEBUGP("len %02X %04X %02X ", diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 9b91decbfdd..4de4cdad4b7 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/file.h> +#include <linux/rcupdate.h> #include <net/sock.h> #include <linux/netfilter_ipv6/ip6t_owner.h> diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index a9526b773d2..beb2fd5cebb 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -50,98 +50,29 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ipv6_rt_hdr _route, *rh = NULL; + struct ipv6_rt_hdr _route, *rh; const struct ip6t_rt *rtinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; struct in6_addr *ap, _addr; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0) + return 0; - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_rt header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* ROUTING -> evaluate */ - if (nexthdr == NEXTHDR_ROUTING) { - temp |= MASK_ROUTING; - break; - } - - - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_rt: new pointer is too large! \n"); - break; - } - } - - /* ROUTING header not found */ - if ( temp != MASK_ROUTING ) return 0; - - if (len < (int)sizeof(struct ipv6_rt_hdr)){ + rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); + if (rh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(rh); + if (skb->len - ptr < hdrlen){ /* Pcket smaller than its length field */ return 0; } - rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); - BUG_ON(rh == NULL); - DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); DEBUGP("TYPE %04X ", rh->type); DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); @@ -161,8 +92,8 @@ match(const struct sk_buff *skb, ((rtinfo->hdrlen == hdrlen) ^ !!(rtinfo->invflags & IP6T_RT_INV_LEN)))); DEBUGP("res %02X %02X %02X ", - (rtinfo->flags & IP6T_RT_RES), ((struct rt0_hdr *)rh)->bitmap, - !((rtinfo->flags & IP6T_RT_RES) && (((struct rt0_hdr *)rh)->bitmap))); + (rtinfo->flags & IP6T_RT_RES), ((struct rt0_hdr *)rh)->reserved, + !((rtinfo->flags & IP6T_RT_RES) && (((struct rt0_hdr *)rh)->reserved))); ret = (rh != NULL) && @@ -179,12 +110,12 @@ match(const struct sk_buff *skb, !!(rtinfo->invflags & IP6T_RT_INV_TYP))); if (ret && (rtinfo->flags & IP6T_RT_RES)) { - u_int32_t *bp, _bitmap; - bp = skb_header_pointer(skb, - ptr + offsetof(struct rt0_hdr, bitmap), - sizeof(_bitmap), &_bitmap); + u_int32_t *rp, _reserved; + rp = skb_header_pointer(skb, + ptr + offsetof(struct rt0_hdr, reserved), + sizeof(_reserved), &_reserved); - ret = (*bp == 0); + ret = (*rp == 0); } DEBUGP("#%d ",rtinfo->addrnr); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 7a5863298f3..a1265a320b1 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -166,7 +166,7 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (sk == NULL) goto out; - sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, skb->dev->ifindex); + sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { delivered = 1; @@ -178,7 +178,7 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) rawv6_rcv(sk, clone); } sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, - skb->dev->ifindex); + IP6CB(skb)->iif); } out: read_unlock(&raw_v6_lock); @@ -627,7 +627,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (type && code) { get_user(fl->fl_icmp_type, type); - __get_user(fl->fl_icmp_code, code); + get_user(fl->fl_icmp_code, code); probed = 1; } break; @@ -655,6 +655,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct flowi fl; int addr_len = msg->msg_namelen; int hlimit = -1; + int tclass = -1; u16 proto; int err; @@ -740,7 +741,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit); + err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -755,8 +756,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if (opt == NULL) opt = np->opt; - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); + opt = fl6_merge_options(&opt_space, flowlabel, opt); fl.proto = proto; rawv6_probe_proto_opt(&fl, msg); @@ -782,10 +782,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out; - } if (hlimit < 0) { if (ipv6_addr_is_multicast(&fl.fl6_dst)) @@ -798,6 +796,12 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, hlimit = ipv6_get_hoplimit(dst->dev); } + if (tclass < 0) { + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + } + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -806,8 +810,9 @@ back_from_confirm: err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); } else { lock_sock(sk); - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, - hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags); + err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, + len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, + msg->msg_flags); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 9d9e04344c7..e4fe9ee484d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -479,12 +479,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) goto err; - if (end-offset < skb->len) { - if (pskb_trim(skb, end - offset)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } + + if (pskb_trim_rcsum(skb, end - offset)) + goto err; /* Find out which fragments are in front and at the back of us * in the chain of fragments so far. We must know where to put diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 794734f1d23..80643e6b346 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -632,10 +632,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto failure; - } if (saddr == NULL) { saddr = &fl.fl6_src; @@ -849,7 +847,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, if (dst == NULL) { opt = np->opt; if (opt == NULL && - np->rxopt.bits.srcrt == 2 && + np->rxopt.bits.osrcrt == 2 && treq->pktopts) { struct sk_buff *pktopts = treq->pktopts; struct inet6_skb_parm *rxopt = IP6CB(pktopts); @@ -888,7 +886,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, } done: - dst_release(dst); if (opt && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); return err; @@ -915,11 +912,10 @@ static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); if (np->rxopt.all) { - if ((opt->hop && np->rxopt.bits.hopopts) || - ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) && - np->rxopt.bits.rxflow) || - (opt->srcrt && np->rxopt.bits.srcrt) || - ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts)) + if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || + ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) || + (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || + ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) return 1; } return 0; @@ -1001,10 +997,8 @@ static void tcp_v6_send_reset(struct sk_buff *skb) /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { - if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) { - dst_release(buff->dst); + if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) return; - } ip6_xmit(NULL, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); @@ -1068,10 +1062,8 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 fl.fl_ip_sport = t1->source; if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { - if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) { - dst_release(buff->dst); + if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) return; - } ip6_xmit(NULL, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); return; @@ -1190,8 +1182,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, skb->h.th); treq->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || - np->rxopt.bits.rxhlim) { + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); treq->pktopts = skb; } @@ -1288,7 +1280,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (np->rxopt.bits.srcrt == 2 && + if (np->rxopt.bits.osrcrt == 2 && opt == NULL && treq->pktopts) { struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts); if (rxopt->srcrt) @@ -1544,9 +1536,9 @@ ipv6_pktoptions: tp = tcp_sk(sk); if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - if (np->rxopt.bits.rxinfo) + if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); - if (np->rxopt.bits.rxhlim) + if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); @@ -1734,7 +1726,6 @@ static int tcp_v6_rebuild_header(struct sock *sk) if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; - dst_release(dst); return err; } @@ -1787,7 +1778,6 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { sk->sk_route_caps = 0; - dst_release(dst); return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 390d750449c..6001948600f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -405,9 +405,8 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, continue; if (!ipv6_addr_any(&np->rcv_saddr)) { - if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) - return s; - continue; + if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + continue; } if(!inet6_mc_check(s, loc_addr, rmt_addr)) continue; @@ -483,7 +482,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) } if (ulen < skb->len) { - if (__pskb_trim(skb, ulen)) + if (pskb_trim_rcsum(skb, ulen)) goto discard; saddr = &skb->nh.ipv6h->saddr; daddr = &skb->nh.ipv6h->daddr; @@ -637,8 +636,10 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int addr_len = msg->msg_namelen; int ulen = len; int hlimit = -1; + int tclass = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; + int connected = 0; /* destination address check */ if (sin6) { @@ -748,6 +749,7 @@ do_udp_sendmsg: fl->fl_ip_dport = inet->dport; daddr = &np->daddr; fl->fl6_flowlabel = np->flow_label; + connected = 1; } if (!fl->oif) @@ -758,7 +760,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, fl, opt, &hlimit); + err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -770,11 +772,11 @@ do_udp_sendmsg: } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; + connected = 0; } if (opt == NULL) opt = np->opt; - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); + opt = fl6_merge_options(&opt_space, flowlabel, opt); fl->proto = IPPROTO_UDP; ipv6_addr_copy(&fl->fl6_dst, daddr); @@ -788,10 +790,13 @@ do_udp_sendmsg: ipv6_addr_copy(&final, &fl->fl6_dst); ipv6_addr_copy(&fl->fl6_dst, rt0->addr); final_p = &final; + connected = 0; } - if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) + if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) { fl->oif = np->mcast_oif; + connected = 0; + } err = ip6_dst_lookup(sk, &dst, fl); if (err) @@ -799,10 +804,8 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl->fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) goto out; - } if (hlimit < 0) { if (ipv6_addr_is_multicast(&fl->fl6_dst)) @@ -815,6 +818,12 @@ do_udp_sendmsg: hlimit = ipv6_get_hoplimit(dst->dev); } + if (tclass < 0) { + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + } + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -834,15 +843,16 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), - hlimit, opt, fl, (struct rt6_info*)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), hlimit, tclass, opt, fl, + (struct rt6_info*)dst, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) err = udp_v6_push_pending_frames(sk, up); - if (dst) + if (dst && connected) ip6_dst_store(sk, dst, ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? &np->daddr : NULL); diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 5d1e61168eb..6f20b4206e0 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -567,10 +567,8 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) self->tty = NULL; if (self->blocked_open) { - if (self->close_delay) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(self->close_delay); - } + if (self->close_delay) + schedule_timeout_interruptible(self->close_delay); wake_up_interruptible(&self->open_wait); } @@ -863,8 +861,7 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) spin_lock_irqsave(&self->spinlock, flags); while (self->tx_skb && self->tx_skb->len) { spin_unlock_irqrestore(&self->spinlock, flags); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(poll_time); + schedule_timeout_interruptible(poll_time); spin_lock_irqsave(&self->spinlock, flags); if (signal_pending(current)) break; diff --git a/net/irda/irlan/irlan_filter.c b/net/irda/irlan/irlan_filter.c index 343c5d4a1a1..ca7d358dab5 100644 --- a/net/irda/irlan/irlan_filter.c +++ b/net/irda/irlan/irlan_filter.c @@ -27,6 +27,7 @@ #include <linux/seq_file.h> #include <net/irda/irlan_common.h> +#include <net/irda/irlan_filter.h> /* * Function irlan_filter_request (self, skb) diff --git a/net/irda/qos.c b/net/irda/qos.c index df732d56cc5..ddfb5c502a9 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -37,6 +37,7 @@ #include <net/irda/parameters.h> #include <net/irda/qos.h> #include <net/irda/irlap.h> +#include <net/irda/irlap_frame.h> /* * Maximum values of the baud rate we negociate with the other end. diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e089f17bb80..49a3900e3d3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -344,14 +344,14 @@ static void nfnetlink_rcv(struct sock *sk, int len) } while(nfnl && nfnl->sk_receive_queue.qlen); } -void __exit nfnetlink_exit(void) +static void __exit nfnetlink_exit(void) { printk("Removing netfilter NETLINK layer.\n"); sock_release(nfnl->sk_socket); return; } -int __init nfnetlink_init(void) +static int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e3a5285329a..f81fe8c52e9 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -76,17 +76,6 @@ typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); static DEFINE_RWLOCK(instances_lock); -u_int64_t htonll(u_int64_t in) -{ - u_int64_t out; - int i; - - for (i = 0; i < sizeof(u_int64_t); i++) - ((u_int8_t *)&out)[sizeof(u_int64_t)-1] = ((u_int8_t *)&in)[i]; - - return out; -} - #define INSTANCE_BUCKETS 16 static struct hlist_head instance_table[INSTANCE_BUCKETS]; @@ -382,6 +371,12 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, break; case NFQNL_COPY_PACKET: + if (entry->skb->ip_summed == CHECKSUM_HW && + (*errp = skb_checksum_help(entry->skb, + entry->info->outdev == NULL))) { + spin_unlock_bh(&queue->lock); + return NULL; + } if (queue->copy_range == 0 || queue->copy_range > entry->skb->len) data_len = entry->skb->len; @@ -497,8 +492,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entry->skb->tstamp.off_sec) { struct nfqnl_msg_packet_timestamp ts; - ts.sec = htonll(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); - ts.usec = htonll(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); + ts.sec = cpu_to_be64(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); + ts.usec = cpu_to_be64(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } @@ -647,7 +642,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) if (!skb_make_writable(&e->skb, data_len)) return -ENOMEM; memcpy(e->skb->data, data, data_len); - + e->skb->ip_summed = CHECKSUM_NONE; return 0; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 62435ffc618..a64e1d5ce3c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -398,24 +398,13 @@ static int netlink_create(struct socket *sock, int protocol) if (nl_table[protocol].registered && try_module_get(nl_table[protocol].module)) module = nl_table[protocol].module; - else - err = -EPROTONOSUPPORT; groups = nl_table[protocol].groups; netlink_unlock_table(); - if (err || (err = __netlink_create(sock, protocol) < 0)) + if ((err = __netlink_create(sock, protocol) < 0)) goto out_module; nlk = nlk_sk(sock->sk); - - nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL); - if (nlk->groups == NULL) { - err = -ENOMEM; - goto out_module; - } - memset(nlk->groups, 0, NLGRPSZ(groups)); - nlk->ngroups = groups; - nlk->module = module; out: return err; @@ -534,6 +523,29 @@ netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) nlk->subscriptions = subscriptions; } +static int netlink_alloc_groups(struct sock *sk) +{ + struct netlink_sock *nlk = nlk_sk(sk); + unsigned int groups; + int err = 0; + + netlink_lock_table(); + groups = nl_table[sk->sk_protocol].groups; + if (!nl_table[sk->sk_protocol].registered) + err = -ENOENT; + netlink_unlock_table(); + + if (err) + return err; + + nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL); + if (nlk->groups == NULL) + return -ENOMEM; + memset(nlk->groups, 0, NLGRPSZ(groups)); + nlk->ngroups = groups; + return 0; +} + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; @@ -545,8 +557,15 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len return -EINVAL; /* Only superuser is allowed to listen multicasts */ - if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_RECV)) - return -EPERM; + if (nladdr->nl_groups) { + if (!netlink_capable(sock, NL_NONROOT_RECV)) + return -EPERM; + if (nlk->groups == NULL) { + err = netlink_alloc_groups(sk); + if (err) + return err; + } + } if (nlk->pid) { if (nladdr->nl_pid != nlk->pid) @@ -559,7 +578,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len return err; } - if (!nladdr->nl_groups && !(u32)nlk->groups[0]) + if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) return 0; netlink_table_grab(); @@ -620,7 +639,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { nladdr->nl_pid = nlk->pid; - nladdr->nl_groups = nlk->groups[0]; + nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; } return 0; } @@ -976,6 +995,11 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (!netlink_capable(sock, NL_NONROOT_RECV)) return -EPERM; + if (nlk->groups == NULL) { + err = netlink_alloc_groups(sk); + if (err) + return err; + } if (!val || val - 1 >= nlk->ngroups) return -EINVAL; netlink_table_grab(); @@ -1483,8 +1507,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) s, s->sk_protocol, nlk->pid, - nlk->flags & NETLINK_KERNEL_SOCKET ? - 0 : (unsigned int)nlk->groups[0], + nlk->groups ? (u32)nlk->groups[0] : 0, atomic_read(&s->sk_rmem_alloc), atomic_read(&s->sk_wmem_alloc), nlk->cb, diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4b53de98211..e5d82d711ca 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -56,6 +56,7 @@ int sysctl_netrom_transport_requested_window_size = NR_DEFAULT_WINDOW; int sysctl_netrom_transport_no_activity_timeout = NR_DEFAULT_IDLE; int sysctl_netrom_routing_control = NR_DEFAULT_ROUTING; int sysctl_netrom_link_fails_count = NR_DEFAULT_FAILS; +int sysctl_netrom_reset_circuit = NR_DEFAULT_RESET; static unsigned short circuit = 0x101; @@ -908,17 +909,17 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) if (frametype != NR_CONNREQ) { /* * Here it would be nice to be able to send a reset but - * NET/ROM doesn't have one. The following hack would - * have been a way to extend the protocol but apparently - * it kills BPQ boxes... :-( + * NET/ROM doesn't have one. We've tried to extend the protocol + * by sending NR_CONNACK | NR_CHOKE_FLAGS replies but that + * apparently kills BPQ boxes... :-( + * So now we try to follow the established behaviour of + * G8PZT's Xrouter which is sending packets with command type 7 + * as an extension of the protocol. */ -#if 0 - /* - * Never reply to a CONNACK/CHOKE. - */ - if (frametype != NR_CONNACK || flags != NR_CHOKE_FLAG) - nr_transmit_refusal(skb, 1); -#endif + if (sysctl_netrom_reset_circuit && + (frametype != NR_RESET || flags != 0)) + nr_transmit_reset(skb, 1); + return 0; } @@ -1187,9 +1188,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } case SIOCGSTAMP: - ret = -EINVAL; - if (sk != NULL) - ret = sock_get_timestamp(sk, argp); + ret = sock_get_timestamp(sk, argp); release_sock(sk); return ret; @@ -1261,6 +1260,7 @@ static int nr_info_show(struct seq_file *seq, void *v) struct net_device *dev; struct nr_sock *nr; const char *devname; + char buf[11]; if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -1276,11 +1276,11 @@ static int nr_info_show(struct seq_file *seq, void *v) else devname = dev->name; - seq_printf(seq, "%-9s ", ax2asc(&nr->user_addr)); - seq_printf(seq, "%-9s ", ax2asc(&nr->dest_addr)); + seq_printf(seq, "%-9s ", ax2asc(buf, &nr->user_addr)); + seq_printf(seq, "%-9s ", ax2asc(buf, &nr->dest_addr)); seq_printf(seq, "%-9s %-3s %02X/%02X %02X/%02X %2d %3d %3d %3d %3lu/%03lu %2lu/%02lu %3lu/%03lu %3lu/%03lu %2d/%02d %3d %5d %5d %ld\n", - ax2asc(&nr->source_addr), + ax2asc(buf, &nr->source_addr), devname, nr->my_index, nr->my_id, @@ -1392,8 +1392,7 @@ static int __init nr_proto_init(void) struct net_device *dev; sprintf(name, "nr%d", i); - dev = alloc_netdev(sizeof(struct net_device_stats), name, - nr_setup); + dev = alloc_netdev(sizeof(struct nr_private), name, nr_setup); if (!dev) { printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); goto fail; diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 263da4c2649..4e66eef9a03 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -47,7 +47,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = netdev_priv(dev); if (!netif_running(dev)) { - stats->rx_errors++; + stats->rx_dropped++; return 0; } @@ -71,15 +71,10 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) static int nr_rebuild_header(struct sk_buff *skb) { - struct net_device *dev = skb->dev; - struct net_device_stats *stats = netdev_priv(dev); - struct sk_buff *skbn; unsigned char *bp = skb->data; - int len; - if (arp_find(bp + 7, skb)) { + if (arp_find(bp + 7, skb)) return 1; - } bp[6] &= ~AX25_CBIT; bp[6] &= ~AX25_EBIT; @@ -90,27 +85,7 @@ static int nr_rebuild_header(struct sk_buff *skb) bp[6] |= AX25_EBIT; bp[6] |= AX25_SSSID_SPARE; - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { - kfree_skb(skb); - return 1; - } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - kfree_skb(skb); - - len = skbn->len; - - if (!nr_route_frame(skbn, NULL)) { - kfree_skb(skbn); - stats->tx_errors++; - } - - stats->tx_packets++; - stats->tx_bytes += len; - - return 1; + return 0; } #else @@ -185,15 +160,27 @@ static int nr_close(struct net_device *dev) static int nr_xmit(struct sk_buff *skb, struct net_device *dev) { - struct net_device_stats *stats = netdev_priv(dev); - dev_kfree_skb(skb); - stats->tx_errors++; + struct nr_private *nr = netdev_priv(dev); + struct net_device_stats *stats = &nr->stats; + unsigned int len = skb->len; + + if (!nr_route_frame(skb, NULL)) { + kfree_skb(skb); + stats->tx_errors++; + return 0; + } + + stats->tx_packets++; + stats->tx_bytes += len; + return 0; } static struct net_device_stats *nr_get_stats(struct net_device *dev) { - return netdev_priv(dev); + struct nr_private *nr = netdev_priv(dev); + + return &nr->stats; } void nr_setup(struct net_device *dev) @@ -208,12 +195,11 @@ void nr_setup(struct net_device *dev) dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_NETROM; - dev->tx_queue_len = 40; dev->rebuild_header = nr_rebuild_header; dev->set_mac_address = nr_set_mac_address; /* New-style flags. */ - dev->flags = 0; + dev->flags = IFF_NOARP; dev->get_stats = nr_get_stats; } diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 64b81a79690..004e8599b8f 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -98,6 +98,11 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, nr_disconnect(sk, ECONNREFUSED); break; + case NR_RESET: + if (sysctl_netrom_reset_circuit); + nr_disconnect(sk, ECONNRESET); + break; + default: break; } @@ -124,6 +129,11 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, nr_disconnect(sk, 0); break; + case NR_RESET: + if (sysctl_netrom_reset_circuit); + nr_disconnect(sk, ECONNRESET); + break; + default: break; } @@ -254,6 +264,11 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype } break; + case NR_RESET: + if (sysctl_netrom_reset_circuit); + nr_disconnect(sk, ECONNRESET); + break; + default: break; } diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 165b2abce11..e856ae1b360 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -17,7 +17,7 @@ static void nr_loopback_timer(unsigned long); static struct sk_buff_head loopback_queue; -static struct timer_list loopback_timer = TIMER_INITIALIZER(nr_loopback_timer, 0, 0); +static DEFINE_TIMER(loopback_timer, nr_loopback_timer, 0, 0); void __init nr_loopback_init(void) { diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 7a86b36cba5..b3b9097c87c 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -881,6 +881,7 @@ static void nr_node_stop(struct seq_file *seq, void *v) static int nr_node_show(struct seq_file *seq, void *v) { + char buf[11]; int i; if (v == SEQ_START_TOKEN) @@ -890,7 +891,7 @@ static int nr_node_show(struct seq_file *seq, void *v) struct nr_node *nr_node = v; nr_node_lock(nr_node); seq_printf(seq, "%-9s %-7s %d %d", - ax2asc(&nr_node->callsign), + ax2asc(buf, &nr_node->callsign), (nr_node->mnemonic[0] == '\0') ? "*" : nr_node->mnemonic, nr_node->which + 1, nr_node->count); @@ -964,6 +965,7 @@ static void nr_neigh_stop(struct seq_file *seq, void *v) static int nr_neigh_show(struct seq_file *seq, void *v) { + char buf[11]; int i; if (v == SEQ_START_TOKEN) @@ -973,7 +975,7 @@ static int nr_neigh_show(struct seq_file *seq, void *v) seq_printf(seq, "%05d %-9s %-4s %3d %d %3d %3d", nr_neigh->number, - ax2asc(&nr_neigh->callsign), + ax2asc(buf, &nr_neigh->callsign), nr_neigh->dev ? nr_neigh->dev->name : "???", nr_neigh->quality, nr_neigh->locked, @@ -983,7 +985,7 @@ static int nr_neigh_show(struct seq_file *seq, void *v) if (nr_neigh->digipeat != NULL) { for (i = 0; i < nr_neigh->digipeat->ndigi; i++) seq_printf(seq, " %s", - ax2asc(&nr_neigh->digipeat->calls[i])); + ax2asc(buf, &nr_neigh->digipeat->calls[i])); } seq_puts(seq, "\n"); diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 587bed2674b..bcb9946b4f5 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -210,10 +210,9 @@ void nr_write_internal(struct sock *sk, int frametype) } /* - * This routine is called when a Connect Acknowledge with the Choke Flag - * set is needed to refuse a connection. + * This routine is called to send an error reply. */ -void nr_transmit_refusal(struct sk_buff *skb, int mine) +void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) { struct sk_buff *skbn; unsigned char *dptr; @@ -254,7 +253,7 @@ void nr_transmit_refusal(struct sk_buff *skb, int mine) *dptr++ = 0; } - *dptr++ = NR_CONNACK | NR_CHOKE_FLAG; + *dptr++ = cmdflags; *dptr++ = 0; if (!nr_route_frame(skbn, NULL)) diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index c9ed50382ea..6bb8dda849d 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -30,6 +30,7 @@ static int min_idle[] = {0 * HZ}; static int max_idle[] = {65535 * HZ}; static int min_route[] = {0}, max_route[] = {1}; static int min_fails[] = {1}, max_fails[] = {10}; +static int min_reset[] = {0}, max_reset[] = {1}; static struct ctl_table_header *nr_table_header; @@ -155,6 +156,17 @@ static ctl_table nr_table[] = { .extra1 = &min_fails, .extra2 = &max_fails }, + { + .ctl_name = NET_NETROM_RESET, + .procname = "reset", + .data = &sysctl_netrom_reset_circuit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_reset, + .extra2 = &max_reset + }, { .ctl_name = 0 } }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba997095f08..ee865d88183 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -36,6 +36,11 @@ * Michal Ostrowski : Module initialization cleanup. * Ulises Alonso : Frame number limit removal and * packet_set_ring memory leak. + * Eric Biederman : Allow for > 8 byte hardware addresses. + * The convention is that longer addresses + * will simply extend the hardware address + * byte arrays at the end of sockaddr_ll + * and packet_mreq. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -161,7 +166,17 @@ struct packet_mclist int count; unsigned short type; unsigned short alen; - unsigned char addr[8]; + unsigned char addr[MAX_ADDR_LEN]; +}; +/* identical to struct packet_mreq except it has + * a longer address field. + */ +struct packet_mreq_max +{ + int mr_ifindex; + unsigned short mr_type; + unsigned short mr_alen; + unsigned char mr_address[MAX_ADDR_LEN]; }; #endif #ifdef CONFIG_PACKET_MMAP @@ -716,6 +731,8 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; + if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) + goto out; ifindex = saddr->sll_ifindex; proto = saddr->sll_protocol; addr = saddr->sll_addr; @@ -744,6 +761,12 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (dev->hard_header) { int res; err = -EINVAL; + if (saddr) { + if (saddr->sll_halen != dev->addr_len) + goto out_free; + if (saddr->sll_hatype != dev->type) + goto out_free; + } res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (sock->type != SOCK_DGRAM) { skb->tail = skb->data; @@ -1045,6 +1068,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; + struct sockaddr_ll *sll; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) @@ -1057,16 +1081,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, #endif /* - * If the address length field is there to be filled in, we fill - * it in now. - */ - - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sizeof(struct sockaddr_ll); - - /* * Call the generic datagram receiver. This handles all sorts * of horrible races and re-entrancy so we can forget about it * in the protocol layers. @@ -1087,6 +1101,17 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; /* + * If the address length field is there to be filled in, we fill + * it in now. + */ + + sll = (struct sockaddr_ll*)skb->cb; + if (sock->type == SOCK_PACKET) + msg->msg_namelen = sizeof(struct sockaddr_pkt); + else + msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + + /* * You lose any data beyond the buffer you gave. If it worries a * user program they can ask the device for its MTU anyway. */ @@ -1166,7 +1191,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; } - *uaddr_len = sizeof(*sll); + *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; return 0; } @@ -1199,7 +1224,7 @@ static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, i } } -static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq) +static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml, *i; @@ -1249,7 +1274,7 @@ done: return err; } -static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq) +static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_mclist *ml, **mlp; @@ -1315,11 +1340,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { - struct packet_mreq mreq; - if (optlen<sizeof(mreq)) + struct packet_mreq_max mreq; + int len = optlen; + memset(&mreq, 0, sizeof(mreq)); + if (len < sizeof(struct packet_mreq)) return -EINVAL; - if (copy_from_user(&mreq,optval,sizeof(mreq))) + if (len > sizeof(mreq)) + len = sizeof(mreq); + if (copy_from_user(&mreq,optval,len)) return -EFAULT; + if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) + return -EINVAL; if (optname == PACKET_ADD_MEMBERSHIP) ret = packet_mc_add(sk, &mreq); else @@ -1535,8 +1566,7 @@ static unsigned int packet_poll(struct file * file, struct socket *sock, static void packet_mm_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; - struct inode *inode = file->f_dentry->d_inode; - struct socket * sock = SOCKET_I(inode); + struct socket * sock = file->private_data; struct sock *sk = sock->sk; if (sk) @@ -1546,8 +1576,7 @@ static void packet_mm_open(struct vm_area_struct *vma) static void packet_mm_close(struct vm_area_struct *vma) { struct file *file = vma->vm_file; - struct inode *inode = file->f_dentry->d_inode; - struct socket * sock = SOCKET_I(inode); + struct socket * sock = file->private_data; struct sock *sk = sock->sk; if (sk) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index c6e59f84c3a..5acb1680524 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1243,7 +1243,7 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); if (amount < 0) amount = 0; - return put_user(amount, (unsigned int __user *)argp); + return put_user(amount, (unsigned int __user *) argp); } case TIOCINQ: { @@ -1252,13 +1252,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) /* These two are safe on a single CPU system as only user tasks fiddle here */ if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; - return put_user(amount, (unsigned int __user *)argp); + return put_user(amount, (unsigned int __user *) argp); } case SIOCGSTAMP: - if (sk != NULL) - return sock_get_timestamp(sk, (struct timeval __user *)argp); - return -EINVAL; + return sock_get_timestamp(sk, (struct timeval __user *) argp); case SIOCGIFADDR: case SIOCSIFADDR: @@ -1363,6 +1361,8 @@ static void rose_info_stop(struct seq_file *seq, void *v) static int rose_info_show(struct seq_file *seq, void *v) { + char buf[11]; + if (v == SEQ_START_TOKEN) seq_puts(seq, "dest_addr dest_call src_addr src_call dev lci neigh st vs vr va t t1 t2 t3 hb idle Snd-Q Rcv-Q inode\n"); @@ -1380,12 +1380,12 @@ static int rose_info_show(struct seq_file *seq, void *v) seq_printf(seq, "%-10s %-9s ", rose2asc(&rose->dest_addr), - ax2asc(&rose->dest_call)); + ax2asc(buf, &rose->dest_call)); if (ax25cmp(&rose->source_call, &null_ax25_address) == 0) callsign = "??????-?"; else - callsign = ax2asc(&rose->source_call); + callsign = ax2asc(buf, &rose->source_call); seq_printf(seq, "%-10s %-9s %-5s %3.3X %05d %d %d %d %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %ld\n", diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index a8ed9a1d09f..d297af737d1 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -149,6 +149,6 @@ void rose_setup(struct net_device *dev) dev->set_mac_address = rose_set_mac_address; /* New-style flags. */ - dev->flags = 0; + dev->flags = IFF_NOARP; dev->get_stats = rose_get_stats; } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 4510cd7613e..e556d92c0bc 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -851,6 +851,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) unsigned char cause, diagnostic; struct net_device *dev; int len, res = 0; + char buf[11]; #if 0 if (call_in_firewall(PF_ROSE, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) @@ -876,7 +877,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (rose_neigh == NULL) { printk("rose_route : unknown neighbour or device %s\n", - ax2asc(&ax25->dest_addr)); + ax2asc(buf, &ax25->dest_addr)); goto out; } @@ -1178,6 +1179,7 @@ static void rose_neigh_stop(struct seq_file *seq, void *v) static int rose_neigh_show(struct seq_file *seq, void *v) { + char buf[11]; int i; if (v == SEQ_START_TOKEN) @@ -1189,7 +1191,7 @@ static int rose_neigh_show(struct seq_file *seq, void *v) /* if (!rose_neigh->loopback) { */ seq_printf(seq, "%05d %-9s %-4s %3d %3d %3s %3s %3lu %3lu", rose_neigh->number, - (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(&rose_neigh->callsign), + (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign), rose_neigh->dev ? rose_neigh->dev->name : "???", rose_neigh->count, rose_neigh->use, @@ -1200,7 +1202,7 @@ static int rose_neigh_show(struct seq_file *seq, void *v) if (rose_neigh->digipeat != NULL) { for (i = 0; i < rose_neigh->digipeat->ndigi; i++) - seq_printf(seq, " %s", ax2asc(&rose_neigh->digipeat->calls[i])); + seq_printf(seq, " %s", ax2asc(buf, &rose_neigh->digipeat->calls[i])); } seq_puts(seq, "\n"); @@ -1260,6 +1262,8 @@ static void rose_route_stop(struct seq_file *seq, void *v) static int rose_route_show(struct seq_file *seq, void *v) { + char buf[11]; + if (v == SEQ_START_TOKEN) seq_puts(seq, "lci address callsign neigh <-> lci address callsign neigh\n"); @@ -1271,7 +1275,7 @@ static int rose_route_show(struct seq_file *seq, void *v) "%3.3X %-10s %-9s %05d ", rose_route->lci1, rose2asc(&rose_route->src_addr), - ax2asc(&rose_route->src_call), + ax2asc(buf, &rose_route->src_call), rose_route->neigh1->number); else seq_puts(seq, @@ -1282,7 +1286,7 @@ static int rose_route_show(struct seq_file *seq, void *v) "%3.3X %-10s %-9s %05d\n", rose_route->lci2, rose2asc(&rose_route->dest_addr), - ax2asc(&rose_route->dest_call), + ax2asc(buf, &rose_route->dest_call), rose_route->neigh2->number); else seq_puts(seq, diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index a29a3a960fd..36a77944622 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -337,13 +337,13 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN); memcpy(callsign, p + 12, l - 10); callsign[l - 10] = '\0'; - facilities->source_call = *asc2ax(callsign); + asc2ax(&facilities->source_call, callsign); } if (*p == FAC_CCITT_SRC_NSAP) { memcpy(&facilities->dest_addr, p + 7, ROSE_ADDR_LEN); memcpy(callsign, p + 12, l - 10); callsign[l - 10] = '\0'; - facilities->dest_call = *asc2ax(callsign); + asc2ax(&facilities->dest_call, callsign); } p += l + 2; n += l + 2; @@ -400,6 +400,7 @@ static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose) { unsigned char *p = buffer + 1; char *callsign; + char buf[11]; int len, nb; /* National Facilities */ @@ -456,7 +457,7 @@ static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose) *p++ = FAC_CCITT_DEST_NSAP; - callsign = ax2asc(&rose->dest_call); + callsign = ax2asc(buf, &rose->dest_call); *p++ = strlen(callsign) + 10; *p++ = (strlen(callsign) + 9) * 2; /* ??? */ @@ -471,7 +472,7 @@ static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose) *p++ = FAC_CCITT_SRC_NSAP; - callsign = ax2asc(&rose->source_call); + callsign = ax2asc(buf, &rose->source_call); *p++ = strlen(callsign) + 10; *p++ = (strlen(callsign) + 9) * 2; /* ??? */ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 737681cb9a9..31570b9a6e9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1194,7 +1194,7 @@ EXPORT_SYMBOL(psched_time_base); * with 32-bit get_cycles(). Safe up to 4GHz CPU. */ static void psched_tick(unsigned long); -static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0); +static DEFINE_TIMER(psched_timer, psched_tick, 0, 0); static void psched_tick(unsigned long dummy) { diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index e47ac0d1a6d..e22ccd65596 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -193,8 +193,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) sctp_unhash_endpoint(ep); /* Free up the HMAC transform. */ - if (sctp_sk(ep->base.sk)->hmac) - sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac); + sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 86073df418f..505c7de10c5 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2414,6 +2414,17 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); chunk->subh.shutdown_hdr = sdh; + /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT + * When a peer sends a SHUTDOWN, SCTP delivers this notification to + * inform the application that it should cease sending data. + */ + ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC); + if (!ev) { + disposition = SCTP_DISPOSITION_NOMEM; + goto out; + } + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + /* Upon the reception of the SHUTDOWN, the peer endpoint shall * - enter the SHUTDOWN-RECEIVED state, * - stop accepting new data from its SCTP user @@ -2439,17 +2450,6 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN, SCTP_U32(chunk->subh.shutdown_hdr->cum_tsn_ack)); - /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT - * When a peer sends a SHUTDOWN, SCTP delivers this notification to - * inform the application that it should cease sending data. - */ - ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC); - if (!ev) { - disposition = SCTP_DISPOSITION_NOMEM; - goto out; - } - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - out: return disposition; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4454afe4727..91ec8c93691 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4194,8 +4194,7 @@ out: sctp_release_sock(sk); return err; cleanup: - if (tfm) - sctp_crypto_free_tfm(tfm); + sctp_crypto_free_tfm(tfm); goto out; } diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index dc4893474f1..75b28dd634f 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -42,6 +42,7 @@ */ #include <net/sctp/structs.h> +#include <net/sctp/sctp.h> #include <linux/sysctl.h> static ctl_handler sctp_sysctl_jiffies_ms; diff --git a/net/socket.c b/net/socket.c index 94fe638b4d7..f9264472377 100644 --- a/net/socket.c +++ b/net/socket.c @@ -667,7 +667,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, } iocb->private = x; x->kiocb = iocb; - sock = SOCKET_I(iocb->ki_filp->f_dentry->d_inode); + sock = iocb->ki_filp->private_data; x->async_msg.msg_name = NULL; x->async_msg.msg_namelen = 0; @@ -709,7 +709,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, } iocb->private = x; x->kiocb = iocb; - sock = SOCKET_I(iocb->ki_filp->f_dentry->d_inode); + sock = iocb->ki_filp->private_data; x->async_msg.msg_name = NULL; x->async_msg.msg_namelen = 0; @@ -732,7 +732,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, struct socket *sock; int flags; - sock = SOCKET_I(file->f_dentry->d_inode); + sock = file->private_data; flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; if (more) @@ -741,14 +741,14 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, return sock->ops->sendpage(sock, page, offset, size, flags); } -static int sock_readv_writev(int type, struct inode * inode, +static int sock_readv_writev(int type, struct file * file, const struct iovec * iov, long count, size_t size) { struct msghdr msg; struct socket *sock; - sock = SOCKET_I(inode); + sock = file->private_data; msg.msg_name = NULL; msg.msg_namelen = 0; @@ -775,7 +775,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *vector, int i; for (i = 0 ; i < count ; i++) tot_len += vector[i].iov_len; - return sock_readv_writev(VERIFY_WRITE, file->f_dentry->d_inode, + return sock_readv_writev(VERIFY_WRITE, file, vector, count, tot_len); } @@ -786,7 +786,7 @@ static ssize_t sock_writev(struct file *file, const struct iovec *vector, int i; for (i = 0 ; i < count ; i++) tot_len += vector[i].iov_len; - return sock_readv_writev(VERIFY_READ, file->f_dentry->d_inode, + return sock_readv_writev(VERIFY_READ, file, vector, count, tot_len); } @@ -840,7 +840,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) void __user *argp = (void __user *)arg; int pid, err; - sock = SOCKET_I(file->f_dentry->d_inode); + sock = file->private_data; if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { err = dev_ioctl(cmd, argp); } else @@ -939,13 +939,13 @@ static unsigned int sock_poll(struct file *file, poll_table * wait) /* * We can't return errors to poll, so it's either yes or no. */ - sock = SOCKET_I(file->f_dentry->d_inode); + sock = file->private_data; return sock->ops->poll(file, sock, wait); } static int sock_mmap(struct file * file, struct vm_area_struct * vma) { - struct socket *sock = SOCKET_I(file->f_dentry->d_inode); + struct socket *sock = file->private_data; return sock->ops->mmap(file, sock, vma); } @@ -995,7 +995,7 @@ static int sock_fasync(int fd, struct file *filp, int on) return -ENOMEM; } - sock = SOCKET_I(filp->f_dentry->d_inode); + sock = filp->private_data; if ((sk=sock->sk) == NULL) { kfree(fna); @@ -1745,10 +1745,11 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) goto out_freeiov; ctl_len = msg_sys.msg_controllen; if ((MSG_CMSG_COMPAT & flags) && ctl_len) { - err = cmsghdr_from_user_compat_to_kern(&msg_sys, ctl, sizeof(ctl)); + err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl)); if (err) goto out_freeiov; ctl_buf = msg_sys.msg_control; + ctl_len = msg_sys.msg_controllen; } else if (ctl_len) { if (ctl_len > sizeof(ctl)) { @@ -1861,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag if (err < 0) goto out_freeiov; } - err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); + err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), + COMPAT_FLAGS(msg)); if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 5a7265aeaf8..ee6ae74cd1b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -160,7 +160,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, " unsupported checksum %d", cksumtype); goto out; } - if (!(tfm = crypto_alloc_tfm(cksumname, 0))) + if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP))) goto out; cksum->len = crypto_tfm_alg_digestsize(tfm); if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) @@ -199,8 +199,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, crypto_digest_final(tfm, cksum->data); code = 0; out: - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); return code; } diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index cf726510df8..606a8a82caf 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -185,12 +185,9 @@ static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; - if (kctx->seq) - crypto_free_tfm(kctx->seq); - if (kctx->enc) - crypto_free_tfm(kctx->enc); - if (kctx->mech_used.data) - kfree(kctx->mech_used.data); + crypto_free_tfm(kctx->seq); + crypto_free_tfm(kctx->enc); + kfree(kctx->mech_used.data); kfree(kctx); } diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index dad05994c3e..6c97d61baa9 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -214,14 +214,10 @@ static void gss_delete_sec_context_spkm3(void *internal_ctx) { struct spkm3_ctx *sctx = internal_ctx; - if(sctx->derived_integ_key) - crypto_free_tfm(sctx->derived_integ_key); - if(sctx->derived_conf_key) - crypto_free_tfm(sctx->derived_conf_key); - if(sctx->share_key.data) - kfree(sctx->share_key.data); - if(sctx->mech_used.data) - kfree(sctx->mech_used.data); + crypto_free_tfm(sctx->derived_integ_key); + crypto_free_tfm(sctx->derived_conf_key); + kfree(sctx->share_key.data); + kfree(sctx->mech_used.data); kfree(sctx); } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 5c8fe3bfc49..e3308195374 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -250,6 +250,7 @@ out: } static struct cache_detail rsi_cache = { + .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, .hash_table = rsi_table, .name = "auth.rpcsec.init", @@ -436,6 +437,7 @@ out: } static struct cache_detail rsc_cache = { + .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, .hash_table = rsc_table, .name = "auth.rpcsec.context", @@ -1074,7 +1076,9 @@ gss_svc_init(void) void gss_svc_shutdown(void) { - cache_unregister(&rsc_cache); - cache_unregister(&rsi_cache); + if (cache_unregister(&rsc_cache)) + printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n"); + if (cache_unregister(&rsi_cache)) + printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n"); svc_auth_unregister(RPC_AUTH_GSS); } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 900f5bc7e33..f509e999276 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -177,7 +177,7 @@ void cache_register(struct cache_detail *cd) cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); if (cd->proc_ent) { struct proc_dir_entry *p; - cd->proc_ent->owner = THIS_MODULE; + cd->proc_ent->owner = cd->owner; cd->channel_ent = cd->content_ent = NULL; p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, @@ -185,7 +185,7 @@ void cache_register(struct cache_detail *cd) cd->flush_ent = p; if (p) { p->proc_fops = &cache_flush_operations; - p->owner = THIS_MODULE; + p->owner = cd->owner; p->data = cd; } @@ -195,7 +195,7 @@ void cache_register(struct cache_detail *cd) cd->channel_ent = p; if (p) { p->proc_fops = &cache_file_operations; - p->owner = THIS_MODULE; + p->owner = cd->owner; p->data = cd; } } @@ -205,7 +205,7 @@ void cache_register(struct cache_detail *cd) cd->content_ent = p; if (p) { p->proc_fops = &content_file_operations; - p->owner = THIS_MODULE; + p->owner = cd->owner; p->data = cd; } } diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 9b67dc19944..4979f226e28 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -35,13 +35,13 @@ static int rpc_proc_show(struct seq_file *seq, void *v) { int i, j; seq_printf(seq, - "net %d %d %d %d\n", + "net %u %u %u %u\n", statp->netcnt, statp->netudpcnt, statp->nettcpcnt, statp->nettcpconn); seq_printf(seq, - "rpc %d %d %d\n", + "rpc %u %u %u\n", statp->rpccnt, statp->rpcretrans, statp->rpcauthrefresh); @@ -50,10 +50,10 @@ static int rpc_proc_show(struct seq_file *seq, void *v) { const struct rpc_version *vers = prog->version[i]; if (!vers) continue; - seq_printf(seq, "proc%d %d", + seq_printf(seq, "proc%u %u", vers->number, vers->nrprocs); for (j = 0; j < vers->nrprocs; j++) - seq_printf(seq, " %d", + seq_printf(seq, " %u", vers->procs[j].p_count); seq_putc(seq, '\n'); } @@ -83,13 +83,13 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { int i, j; seq_printf(seq, - "net %d %d %d %d\n", + "net %u %u %u %u\n", statp->netcnt, statp->netudpcnt, statp->nettcpcnt, statp->nettcpconn); seq_printf(seq, - "rpc %d %d %d %d %d\n", + "rpc %u %u %u %u %u\n", statp->rpccnt, statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt, statp->rpcbadfmt, @@ -99,9 +99,9 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { for (i = 0; i < prog->pg_nvers; i++) { if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc)) continue; - seq_printf(seq, "proc%d %d", i, vers->vs_nproc); + seq_printf(seq, "proc%d %u", i, vers->vs_nproc); for (j = 0; j < vers->vs_nproc; j++, proc++) - seq_printf(seq, " %d", proc->pc_count); + seq_printf(seq, " %u", proc->pc_count); seq_putc(seq, '\n'); } } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 62a07349527..ed48ff022d3 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -176,8 +176,10 @@ cleanup_sunrpc(void) { unregister_rpc_pipefs(); rpc_destroy_mempool(); - cache_unregister(&auth_domain_cache); - cache_unregister(&ip_map_cache); + if (cache_unregister(&auth_domain_cache)) + printk(KERN_ERR "sunrpc: failed to unregister auth_domain cache\n"); + if (cache_unregister(&ip_map_cache)) + printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); #ifdef RPC_DEBUG rpc_unregister_sysctl(); #endif diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index bde8147ef2d..dda4f0c6351 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -143,6 +143,7 @@ static void auth_domain_drop(struct cache_head *item, struct cache_detail *cd) struct cache_detail auth_domain_cache = { + .owner = THIS_MODULE, .hash_size = DN_HASHMAX, .hash_table = auth_domain_table, .name = "auth.domain", diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index d6baf6fdf8a..cac2e774dd8 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -242,6 +242,7 @@ static int ip_map_show(struct seq_file *m, struct cache_detail ip_map_cache = { + .owner = THIS_MODULE, .hash_size = IP_HASHMAX, .hash_table = ip_table, .name = "auth.unix.ip", diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 05fe2e73553..30ec3efc48a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -512,15 +512,14 @@ svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) static void svc_udp_data_ready(struct sock *sk, int count) { - struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); + struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - if (!svsk) - goto out; - dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", - svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); - set_bit(SK_DATA, &svsk->sk_flags); - svc_sock_enqueue(svsk); - out: + if (svsk) { + dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", + svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); + set_bit(SK_DATA, &svsk->sk_flags); + svc_sock_enqueue(svsk); + } if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); } @@ -540,7 +539,7 @@ svc_write_space(struct sock *sk) } if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { - printk(KERN_WARNING "RPC svc_write_space: some sleeping on %p\n", + dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); wake_up_interruptible(sk->sk_sleep); } @@ -692,31 +691,29 @@ svc_udp_init(struct svc_sock *svsk) static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) { - struct svc_sock *svsk; + struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; dprintk("svc: socket %p TCP (listen) state change %d\n", - sk, sk->sk_state); + sk, sk->sk_state); - if (sk->sk_state != TCP_LISTEN) { - /* - * This callback may called twice when a new connection - * is established as a child socket inherits everything - * from a parent LISTEN socket. - * 1) data_ready method of the parent socket will be called - * when one of child sockets become ESTABLISHED. - * 2) data_ready method of the child socket may be called - * when it receives data before the socket is accepted. - * In case of 2, we should ignore it silently. - */ - goto out; - } - if (!(svsk = (struct svc_sock *) sk->sk_user_data)) { - printk("svc: socket %p: no user data\n", sk); - goto out; + /* + * This callback may called twice when a new connection + * is established as a child socket inherits everything + * from a parent LISTEN socket. + * 1) data_ready method of the parent socket will be called + * when one of child sockets become ESTABLISHED. + * 2) data_ready method of the child socket may be called + * when it receives data before the socket is accepted. + * In case of 2, we should ignore it silently. + */ + if (sk->sk_state == TCP_LISTEN) { + if (svsk) { + set_bit(SK_CONN, &svsk->sk_flags); + svc_sock_enqueue(svsk); + } else + printk("svc: socket %p: no user data\n", sk); } - set_bit(SK_CONN, &svsk->sk_flags); - svc_sock_enqueue(svsk); - out: + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep); } @@ -727,18 +724,17 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) static void svc_tcp_state_change(struct sock *sk) { - struct svc_sock *svsk; + struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", - sk, sk->sk_state, sk->sk_user_data); + sk, sk->sk_state, sk->sk_user_data); - if (!(svsk = (struct svc_sock *) sk->sk_user_data)) { + if (!svsk) printk("svc: socket %p: no user data\n", sk); - goto out; + else { + set_bit(SK_CLOSE, &svsk->sk_flags); + svc_sock_enqueue(svsk); } - set_bit(SK_CLOSE, &svsk->sk_flags); - svc_sock_enqueue(svsk); - out: if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep); } @@ -746,15 +742,14 @@ svc_tcp_state_change(struct sock *sk) static void svc_tcp_data_ready(struct sock *sk, int count) { - struct svc_sock * svsk; + struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; dprintk("svc: socket %p TCP data ready (svsk %p)\n", - sk, sk->sk_user_data); - if (!(svsk = (struct svc_sock *)(sk->sk_user_data))) - goto out; - set_bit(SK_DATA, &svsk->sk_flags); - svc_sock_enqueue(svsk); - out: + sk, sk->sk_user_data); + if (svsk) { + set_bit(SK_DATA, &svsk->sk_flags); + svc_sock_enqueue(svsk); + } if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); } @@ -1170,8 +1165,7 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) while (rqstp->rq_arghi < pages) { struct page *p = alloc_page(GFP_KERNEL); if (!p) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ/2); + schedule_timeout_uninterruptible(msecs_to_jiffies(500)); continue; } rqstp->rq_argpages[rqstp->rq_arghi++] = p; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 83c8135e176..fda737d77ed 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -765,8 +765,8 @@ restart: switch (policy->action) { case XFRM_POLICY_BLOCK: /* Prohibit the flow */ - xfrm_pol_put(policy); - return -EPERM; + err = -EPERM; + goto error; case XFRM_POLICY_ALLOW: if (policy->xfrm_nr == 0) { @@ -782,8 +782,8 @@ restart: */ dst = xfrm_find_bundle(fl, policy, family); if (IS_ERR(dst)) { - xfrm_pol_put(policy); - return PTR_ERR(dst); + err = PTR_ERR(dst); + goto error; } if (dst) |