diff options
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/Kconfig | 74 | ||||
-rw-r--r-- | net/netfilter/Makefile | 8 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 1538 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_ftp.c | 698 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_l3proto_generic.c | 98 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_generic.c | 85 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_sctp.c | 670 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_tcp.c | 1162 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_udp.c | 216 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_standalone.c | 869 |
10 files changed, 5418 insertions, 0 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8296b38bf27..a84f9221e5f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1,3 +1,6 @@ +menu "Core Netfilter Configuration" + depends on NET && NETFILTER + config NETFILTER_NETLINK tristate "Netfilter netlink interface" help @@ -22,3 +25,74 @@ config NETFILTER_NETLINK_LOG and is also scheduled to replace the old syslog-based ipt_LOG and ip6t_LOG modules. +config NF_CONNTRACK + tristate "Layer 3 Independent Connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL && IP_NF_CONNTRACK=n + default n + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + +config NF_CT_ACCT + bool "Connection tracking flow accounting" + depends on NF_CONNTRACK + help + If this option is enabled, the connection tracking code will + keep per-flow packet and byte counters. + + Those counters can be used for flow-based accounting or the + `connbytes' match. + + If unsure, say `N'. + +config NF_CONNTRACK_MARK + bool 'Connection mark tracking support' + depends on NF_CONNTRACK + help + This option enables support for connection marks, used by the + `CONNMARK' target and `connmark' match. Similar to the mark value + of packets, but this mark value is kept in the conntrack session + instead of the individual packets. + +config NF_CONNTRACK_EVENTS + bool "Connection tracking events" + depends on NF_CONNTRACK + help + If this option is enabled, the connection tracking code will + provide a notifier chain that can be used by other kernel code + to get notified aboutchanges in the connection tracking state. + + If unsure, say `N'. + +config NF_CT_PROTO_SCTP + tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)' + depends on EXPERIMENTAL && NF_CONNTRACK + default n + help + With this option enabled, the layer 3 independent connection + tracking code will be able to do state tracking on SCTP connections. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config NF_CONNTRACK_FTP + tristate "FTP support on new connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + help + Tracking FTP connections is problematic: special helpers are + required for tracking them, and doing masquerading and other forms + of Network Address Translation on them. + + This is FTP support on Layer 3 independent connection tracking. + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + +endmenu diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b3b44f8b415..55f019ad2c0 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -5,3 +5,11 @@ obj-$(CONFIG_NETFILTER) = netfilter.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o + +nf_conntrack-objs := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o + +obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o +obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o + +# SCTP protocol connection tracking +obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c new file mode 100644 index 00000000000..9a67c796b38 --- /dev/null +++ b/net/netfilter/nf_conntrack_core.c @@ -0,0 +1,1538 @@ +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> + * - new API and handling of conntrack/nat helpers + * - now capable of multiple expectations for one master + * 16 Jul 2002: Harald Welte <laforge@gnumonks.org> + * - add usage/reference counts to ip_conntrack_expect + * - export ip_conntrack[_expect]_{find_get,put} functions + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol denendent part. + * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - add support various size of conntrack structures. + * + * Derived from net/ipv4/netfilter/ip_conntrack_core.c + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <linux/vmalloc.h> +#include <linux/stddef.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/jhash.h> +#include <linux/err.h> +#include <linux/percpu.h> +#include <linux/moduleparam.h> +#include <linux/notifier.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/socket.h> + +/* This rwlock protects the main hash table, protocol/helper/expected + registrations, conntrack timers*/ +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <linux/netfilter_ipv4/listhelp.h> + +#define NF_CONNTRACK_VERSION "0.4.1" + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DEFINE_RWLOCK(nf_conntrack_lock); + +/* nf_conntrack_standalone needs this */ +atomic_t nf_conntrack_count = ATOMIC_INIT(0); + +void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; +LIST_HEAD(nf_conntrack_expect_list); +struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; +struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; +static LIST_HEAD(helpers); +unsigned int nf_conntrack_htable_size = 0; +int nf_conntrack_max; +struct list_head *nf_conntrack_hash; +static kmem_cache_t *nf_conntrack_expect_cachep; +struct nf_conn nf_conntrack_untracked; +unsigned int nf_ct_log_invalid; +static LIST_HEAD(unconfirmed); +static int nf_conntrack_vmalloc; + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +struct notifier_block *nf_conntrack_chain; +struct notifier_block *nf_conntrack_expect_chain; + +DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); + +/* deliver cached events and clear cache entry - must be called with locally + * disabled softirqs */ +static inline void +__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) +{ + DEBUGP("ecache: delivering events for %p\n", ecache->ct); + if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) + && ecache->events) + notifier_call_chain(&nf_conntrack_chain, ecache->events, + ecache->ct); + + ecache->events = 0; + nf_ct_put(ecache->ct); + ecache->ct = NULL; +} + +/* Deliver all cached events for a particular conntrack. This is called + * by code prior to async packet handling for freeing the skb */ +void nf_ct_deliver_cached_events(const struct nf_conn *ct) +{ + struct nf_conntrack_ecache *ecache; + + local_bh_disable(); + ecache = &__get_cpu_var(nf_conntrack_ecache); + if (ecache->ct == ct) + __nf_ct_deliver_cached_events(ecache); + local_bh_enable(); +} + +/* Deliver cached events for old pending events, if current conntrack != old */ +void __nf_ct_event_cache_init(struct nf_conn *ct) +{ + struct nf_conntrack_ecache *ecache; + + /* take care of delivering potentially old events */ + ecache = &__get_cpu_var(nf_conntrack_ecache); + BUG_ON(ecache->ct == ct); + if (ecache->ct) + __nf_ct_deliver_cached_events(ecache); + /* initialize for this conntrack/packet */ + ecache->ct = ct; + nf_conntrack_get(&ct->ct_general); +} + +/* flush the event cache - touches other CPU's data and must not be called + * while packets are still passing through the code */ +static void nf_ct_event_cache_flush(void) +{ + struct nf_conntrack_ecache *ecache; + int cpu; + + for_each_cpu(cpu) { + ecache = &per_cpu(nf_conntrack_ecache, cpu); + if (ecache->ct) + nf_ct_put(ecache->ct); + } +} +#else +static inline void nf_ct_event_cache_flush(void) {} +#endif /* CONFIG_NF_CONNTRACK_EVENTS */ + +DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); +EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); + +/* + * This scheme offers various size of "struct nf_conn" dependent on + * features(helper, nat, ...) + */ + +#define NF_CT_FEATURES_NAMELEN 256 +static struct { + /* name of slab cache. printed in /proc/slabinfo */ + char *name; + + /* size of slab cache */ + size_t size; + + /* slab cache pointer */ + kmem_cache_t *cachep; + + /* allocated slab cache + modules which uses this slab cache */ + int use; + + /* Initialization */ + int (*init_conntrack)(struct nf_conn *, u_int32_t); + +} nf_ct_cache[NF_CT_F_NUM]; + +/* protect members of nf_ct_cache except of "use" */ +DEFINE_RWLOCK(nf_ct_cache_lock); + +/* This avoids calling kmem_cache_create() with same name simultaneously */ +DECLARE_MUTEX(nf_ct_cache_mutex); + +extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; +struct nf_conntrack_protocol * +nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol) +{ + if (unlikely(nf_ct_protos[l3proto] == NULL)) + return &nf_conntrack_generic_protocol; + + return nf_ct_protos[l3proto][protocol]; +} + +static int nf_conntrack_hash_rnd_initted; +static unsigned int nf_conntrack_hash_rnd; + +static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, + unsigned int size, unsigned int rnd) +{ + unsigned int a, b; + a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all), + ((tuple->src.l3num) << 16) | tuple->dst.protonum); + b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all), + (tuple->src.u.all << 16) | tuple->dst.u.all); + + return jhash_2words(a, b, rnd) % size; +} + +static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) +{ + return __hash_conntrack(tuple, nf_conntrack_htable_size, + nf_conntrack_hash_rnd); +} + +/* Initialize "struct nf_conn" which has spaces for helper */ +static int +init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features) +{ + + conntrack->help = (union nf_conntrack_help *) + (((unsigned long)conntrack->data + + (__alignof__(union nf_conntrack_help) - 1)) + & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1)))); + return 0; +} + +int nf_conntrack_register_cache(u_int32_t features, const char *name, + size_t size, + int (*init)(struct nf_conn *, u_int32_t)) +{ + int ret = 0; + char *cache_name; + kmem_cache_t *cachep; + + DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", + features, name, size); + + if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) { + DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n", + features); + return -EINVAL; + } + + down(&nf_ct_cache_mutex); + + write_lock_bh(&nf_ct_cache_lock); + /* e.g: multiple helpers are loaded */ + if (nf_ct_cache[features].use > 0) { + DEBUGP("nf_conntrack_register_cache: already resisterd.\n"); + if ((!strncmp(nf_ct_cache[features].name, name, + NF_CT_FEATURES_NAMELEN)) + && nf_ct_cache[features].size == size + && nf_ct_cache[features].init_conntrack == init) { + DEBUGP("nf_conntrack_register_cache: reusing.\n"); + nf_ct_cache[features].use++; + ret = 0; + } else + ret = -EBUSY; + + write_unlock_bh(&nf_ct_cache_lock); + up(&nf_ct_cache_mutex); + return ret; + } + write_unlock_bh(&nf_ct_cache_lock); + + /* + * The memory space for name of slab cache must be alive until + * cache is destroyed. + */ + cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC); + if (cache_name == NULL) { + DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n"); + ret = -ENOMEM; + goto out_up_mutex; + } + + if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN) + >= NF_CT_FEATURES_NAMELEN) { + printk("nf_conntrack_register_cache: name too long\n"); + ret = -EINVAL; + goto out_free_name; + } + + cachep = kmem_cache_create(cache_name, size, 0, 0, + NULL, NULL); + if (!cachep) { + printk("nf_conntrack_register_cache: Can't create slab cache " + "for the features = 0x%x\n", features); + ret = -ENOMEM; + goto out_free_name; + } + + write_lock_bh(&nf_ct_cache_lock); + nf_ct_cache[features].use = 1; + nf_ct_cache[features].size = size; + nf_ct_cache[features].init_conntrack = init; + nf_ct_cache[features].cachep = cachep; + nf_ct_cache[features].name = cache_name; + write_unlock_bh(&nf_ct_cache_lock); + + goto out_up_mutex; + +out_free_name: + kfree(cache_name); +out_up_mutex: + up(&nf_ct_cache_mutex); + return ret; +} + +/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ +void nf_conntrack_unregister_cache(u_int32_t features) +{ + kmem_cache_t *cachep; + char *name; + + /* + * This assures that kmem_cache_create() isn't called before destroying + * slab cache. + */ + DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features); + down(&nf_ct_cache_mutex); + + write_lock_bh(&nf_ct_cache_lock); + if (--nf_ct_cache[features].use > 0) { + write_unlock_bh(&nf_ct_cache_lock); + up(&nf_ct_cache_mutex); + return; + } + cachep = nf_ct_cache[features].cachep; + name = nf_ct_cache[features].name; + nf_ct_cache[features].cachep = NULL; + nf_ct_cache[features].name = NULL; + nf_ct_cache[features].init_conntrack = NULL; + nf_ct_cache[features].size = 0; + write_unlock_bh(&nf_ct_cache_lock); + + synchronize_net(); + + kmem_cache_destroy(cachep); + kfree(name); + + up(&nf_ct_cache_mutex); +} + +int +nf_ct_get_tuple(const struct sk_buff *skb, + unsigned int nhoff, + unsigned int dataoff, + u_int16_t l3num, + u_int8_t protonum, + struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_protocol *protocol) +{ + NF_CT_TUPLE_U_BLANK(tuple); + + tuple->src.l3num = l3num; + if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) + return 0; + + tuple->dst.protonum = protonum; + tuple->dst.dir = IP_CT_DIR_ORIGINAL; + + return protocol->pkt_to_tuple(skb, dataoff, tuple); +} + +int +nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_protocol *protocol) +{ + NF_CT_TUPLE_U_BLANK(inverse); + + inverse->src.l3num = orig->src.l3num; + if (l3proto->invert_tuple(inverse, orig) == 0) + return 0; + + inverse->dst.dir = !orig->dst.dir; + + inverse->dst.protonum = orig->dst.protonum; + return protocol->invert_tuple(inverse, orig); +} + +/* nf_conntrack_expect helper functions */ +static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) +{ + ASSERT_WRITE_LOCK(&nf_conntrack_lock); + NF_CT_ASSERT(!timer_pending(&exp_timeout)); + list_del(&exp->list); + NF_CT_STAT_INC(expect_delete); + exp->master->expecting--; + nf_conntrack_expect_put(exp); +} + +static void expectation_timed_out(unsigned long ul_expect) +{ + struct nf_conntrack_expect *exp = (void *)ul_expect; + + write_lock_bh(&nf_conntrack_lock); + nf_ct_unlink_expect(exp); + write_unlock_bh(&nf_conntrack_lock); + nf_conntrack_expect_put(exp); +} + +/* If an expectation for this connection is found, it gets delete from + * global list then returned. */ +static struct nf_conntrack_expect * +find_expectation(const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_expect *i; + + list_for_each_entry(i, &nf_conntrack_expect_list, list) { + /* If master is not in hash table yet (ie. packet hasn't left + this machine yet), how can other end know about expected? + Hence these are not the droids you are looking for (if + master ct never got confirmed, we'd hold a reference to it + and weird things would happen to future packets). */ + if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) + && nf_ct_is_confirmed(i->master)) { + if (i->flags & NF_CT_EXPECT_PERMANENT) { + atomic_inc(&i->use); + return i; + } else if (del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + return i; + } + } + } + return NULL; +} + +/* delete all expectations for this conntrack */ +static void remove_expectations(struct nf_conn *ct) +{ + struct nf_conntrack_expect *i, *tmp; + + /* Optimization: most connection never expect any others. */ + if (ct->expecting == 0) + return; + + list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { + if (i->master == ct && del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + nf_conntrack_expect_put(i); + } + } +} + +static void +clean_from_lists(struct nf_conn *ct) +{ + unsigned int ho, hr; + + DEBUGP("clean_from_lists(%p)\n", ct); + ASSERT_WRITE_LOCK(&nf_conntrack_lock); + + ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + + /* Destroy all pending expectations */ + remove_expectations(ct); +} + +static void +destroy_conntrack(struct nf_conntrack *nfct) +{ + struct nf_conn *ct = (struct nf_conn *)nfct; + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_protocol *proto; + + DEBUGP("destroy_conntrack(%p)\n", ct); + NF_CT_ASSERT(atomic_read(&nfct->use) == 0); + NF_CT_ASSERT(!timer_pending(&ct->timeout)); + + nf_conntrack_event(IPCT_DESTROY, ct); + set_bit(IPS_DYING_BIT, &ct->status); + + /* To make sure we don't get any weird locking issues here: + * destroy_conntrack() MUST NOT be called with a write lock + * to nf_conntrack_lock!!! -HW */ + l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); + if (l3proto && l3proto->destroy) + l3proto->destroy(ct); + + proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + if (proto && proto->destroy) + proto->destroy(ct); + + if (nf_conntrack_destroyed) + nf_conntrack_destroyed(ct); + + write_lock_bh(&nf_conntrack_lock); + /* Expectations will have been removed in clean_from_lists, + * except TFTP can create an expectation on the first packet, + * before connection is in the list, so we need to clean here, + * too. */ + remove_expectations(ct); + + /* We overload first tuple to link into unconfirmed list. */ + if (!nf_ct_is_confirmed(ct)) { + BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list)); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + } + + NF_CT_STAT_INC(delete); + write_unlock_bh(&nf_conntrack_lock); + + if (ct->master) + nf_ct_put(ct->master); + + DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); + nf_conntrack_free(ct); +} + +static void death_by_timeout(unsigned long ul_conntrack) +{ + struct nf_conn *ct = (void *)ul_conntrack; + + write_lock_bh(&nf_conntrack_lock); + /* Inside lock so preempt is disabled on module removal path. + * Otherwise we can get spurious warnings. */ + NF_CT_STAT_INC(delete_list); + clean_from_lists(ct); + write_unlock_bh(&nf_conntrack_lock); + nf_ct_put(ct); +} + +static inline int +conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + ASSERT_READ_LOCK(&nf_conntrack_lock); + return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack + && nf_ct_tuple_equal(tuple, &i->tuple); +} + +static struct nf_conntrack_tuple_hash * +__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + struct nf_conntrack_tuple_hash *h; + unsigned int hash = hash_conntrack(tuple); + + ASSERT_READ_LOCK(&nf_conntrack_lock); + list_for_each_entry(h, &nf_conntrack_hash[hash], list) { + if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + NF_CT_STAT_INC(found); + return h; + } + NF_CT_STAT_INC(searched); + } + + return NULL; +} + +/* Find a connection corresponding to a tuple. */ +struct nf_conntrack_tuple_hash * +nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + struct nf_conntrack_tuple_hash *h; + + read_lock_bh(&nf_conntrack_lock); + h = __nf_conntrack_find(tuple, ignored_conntrack); + if (h) + atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); + read_unlock_bh(&nf_conntrack_lock); + + return h; +} + +/* Confirm a connection given skb; places it in hash table */ +int +__nf_conntrack_confirm(struct sk_buff **pskb) +{ + unsigned int hash, repl_hash; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + + ct = nf_ct_get(*pskb, &ctinfo); + + /* ipt_REJECT uses nf_conntrack_attach to attach related + ICMP/TCP RST packets in other direction. Actual packet + which created connection will be IP_CT_NEW or for an + expected connection, IP_CT_RELATED. */ + if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + /* We're not in hash table, and we refuse to set up related + connections for unconfirmed conns. But packet copies and + REJECT will give spurious warnings here. */ + /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ + + /* No external references means noone else could have + confirmed us. */ + NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); + DEBUGP("Confirming conntrack %p\n", ct); + + write_lock_bh(&nf_conntrack_lock); + + /* See if there's one in the list already, including reverse: + NAT could have grabbed it without realizing, since we're + not in the hash. If there is, we lost race. */ + if (!LIST_FIND(&nf_conntrack_hash[hash], + conntrack_tuple_cmp, + struct nf_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) + && !LIST_FIND(&nf_conntrack_hash[repl_hash], + conntrack_tuple_cmp, + struct nf_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + + list_prepend(&nf_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + list_prepend(&nf_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY]); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + NF_CT_STAT_INC(insert); + write_unlock_bh(&nf_conntrack_lock); + if (ct->helper) + nf_conntrack_event_cache(IPCT_HELPER, *pskb); +#ifdef CONFIG_NF_NAT_NEEDED + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_NATINFO, *pskb); +#endif + nf_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); + return NF_ACCEPT; + } + + NF_CT_STAT_INC(insert_failed); + write_unlock_bh(&nf_conntrack_lock); + return NF_DROP; +} + +/* Returns true if a connection correspondings to the tuple (required + for NAT). */ +int +nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + struct nf_conntrack_tuple_hash *h; + + read_lock_bh(&nf_conntrack_lock); + h = __nf_conntrack_find(tuple, ignored_conntrack); + read_unlock_bh(&nf_conntrack_lock); + + return h != NULL; +} + +/* There's a small race here where we may free a just-assured + connection. Too bad: we're in trouble anyway. */ +static inline int unreplied(const struct nf_conntrack_tuple_hash *i) +{ + return !(test_bit(IPS_ASSURED_BIT, + &nf_ct_tuplehash_to_ctrack(i)->status)); +} + +static int early_drop(struct list_head *chain) +{ + /* Traverse backwards: gives us oldest, which is roughly LRU */ + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct = NULL; + int dropped = 0; + + read_lock_bh(&nf_conntrack_lock); + h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); + if (h) { + ct = nf_ct_tuplehash_to_ctrack(h); + atomic_inc(&ct->ct_general.use); + } + read_unlock_bh(&nf_conntrack_lock); + + if (!ct) + return dropped; + + if (del_timer(&ct->timeout)) { + death_by_timeout((unsigned long)ct); + dropped = 1; + NF_CT_STAT_INC(early_drop); + } + nf_ct_put(ct); + return dropped; +} + +static inline int helper_cmp(const struct nf_conntrack_helper *i, + const struct nf_conntrack_tuple *rtuple) +{ + return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); +} + +static struct nf_conntrack_helper * +nf_ct_find_helper(const struct nf_conntrack_tuple *tuple) +{ + return LIST_FIND(&helpers, helper_cmp, + struct nf_conntrack_helper *, + tuple); +} + +static struct nf_conn * +__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl, + const struct nf_conntrack_l3proto *l3proto) +{ + struct nf_conn *conntrack = NULL; + u_int32_t features = 0; + + if (!nf_conntrack_hash_rnd_initted) { + get_random_bytes(&nf_conntrack_hash_rnd, 4); + nf_conntrack_hash_rnd_initted = 1; + } + + if (nf_conntrack_max + && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { + unsigned int hash = hash_conntrack(orig); + /* Try dropping from this hash chain. */ + if (!early_drop(&nf_conntrack_hash[hash])) { + if (net_ratelimit()) + printk(KERN_WARNING + "nf_conntrack: table full, dropping" + " packet.\n"); + return ERR_PTR(-ENOMEM); + } + } + + /* find features needed by this conntrack. */ + features = l3proto->get_features(orig); + read_lock_bh(&nf_conntrack_lock); + if (nf_ct_find_helper(repl) != NULL) + features |= NF_CT_F_HELP; + read_unlock_bh(&nf_conntrack_lock); + + DEBUGP("nf_conntrack_alloc: features=0x%x\n", features); + + read_lock_bh(&nf_ct_cache_lock); + + if (!nf_ct_cache[features].use) { + DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n", + features); + goto out; + } + + conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC); + if (conntrack == NULL) { + DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n"); + goto out; + } + + memset(conntrack, 0, nf_ct_cache[features].size); + conntrack->features = features; + if (nf_ct_cache[features].init_conntrack && + nf_ct_cache[features].init_conntrack(conntrack, features) < 0) { + DEBUGP("nf_conntrack_alloc: failed to init\n"); + kmem_cache_free(nf_ct_cache[features].cachep, conntrack); + conntrack = NULL; + goto out; + } + + atomic_set(&conntrack->ct_general.use, 1); + conntrack->ct_general.destroy = destroy_conntrack; + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; + /* Don't set timer yet: wait for confirmation */ + init_timer(&conntrack->timeout); + conntrack->timeout.data = (unsigned long)conntrack; + conntrack->timeout.function = death_by_timeout; + + atomic_inc(&nf_conntrack_count); +out: + read_unlock_bh(&nf_ct_cache_lock); + return conntrack; +} + +struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl) +{ + struct nf_conntrack_l3proto *l3proto; + + l3proto = nf_ct_find_l3proto(orig->src.l3num); + return __nf_conntrack_alloc(orig, repl, l3proto); +} + +void nf_conntrack_free(struct nf_conn *conntrack) +{ + u_int32_t features = conntrack->features; + NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM); + DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features, + conntrack); + kmem_cache_free(nf_ct_cache[features].cachep, conntrack); + atomic_dec(&nf_conntrack_count); +} + +/* Allocate a new conntrack: we return -ENOMEM if classification + failed due to stress. Otherwise it really is unclassifiable. */ +static struct nf_conntrack_tuple_hash * +init_conntrack(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_l3proto *l3proto, + struct nf_conntrack_protocol *protocol, + struct sk_buff *skb, + unsigned int dataoff) +{ + struct nf_conn *conntrack; + struct nf_conntrack_tuple repl_tuple; + struct nf_conntrack_expect *exp; + + if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return NULL; + } + + conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto); + if (conntrack == NULL || IS_ERR(conntrack)) { + DEBUGP("Can't allocate conntrack.\n"); + return (struct nf_conntrack_tuple_hash *)conntrack; + } + + if (!protocol->new(conntrack, skb, dataoff)) { + nf_conntrack_free(conntrack); + DEBUGP("init conntrack: can't track with proto module\n"); + return NULL; + } + + write_lock_bh(&nf_conntrack_lock); + exp = find_expectation(tuple); + + if (exp) { + DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", + conntrack, exp); + /* Welcome, Mr. Bond. We've been expecting you... */ + __set_bit(IPS_EXPECTED_BIT, &conntrack->status); + conntrack->master = exp->master; +#ifdef CONFIG_NF_CONNTRACK_MARK + conntrack->mark = exp->master->mark; +#endif + nf_conntrack_get(&conntrack->master->ct_general); + NF_CT_STAT_INC(expect_new); + } else { + conntrack->helper = nf_ct_find_helper(&repl_tuple); + + NF_CT_STAT_INC(new); + } + + /* Overload tuple linked list to put us in unconfirmed list. */ + list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); + + write_unlock_bh(&nf_conntrack_lock); + + if (exp) { + if (exp->expectfn) + exp->expectfn(conntrack, exp); + nf_conntrack_expect_put(exp); + } + + return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; +} + +/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ +static inline struct nf_conn * +resolve_normal_ct(struct sk_buff *skb, + unsigned int dataoff, + u_int16_t l3num, + u_int8_t protonum, + struct nf_conntrack_l3proto *l3proto, + struct nf_conntrack_protocol *proto, + int *set_reply, + enum ip_conntrack_info *ctinfo) +{ + struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; + + if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data), + dataoff, l3num, protonum, &tuple, l3proto, + proto)) { + DEBUGP("resolve_normal_ct: Can't get tuple\n"); + return NULL; + } + + /* look for tuple match */ + h = nf_conntrack_find_get(&tuple, NULL); + if (!h) { + h = init_conntrack(&tuple, l3proto, proto, skb, dataoff); + if (!h) + return NULL; + if (IS_ERR(h)) + return (void *)h; + } + ct = nf_ct_tuplehash_to_ctrack(h); + + /* It exists; we have (non-exclusive) reference. */ + if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { + *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + /* Please set reply bit if this packet OK */ + *set_reply = 1; + } else { + /* Once we've had two way comms, always ESTABLISHED. */ + if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + DEBUGP("nf_conntrack_in: normal packet for %p\n", ct); + *ctinfo = IP_CT_ESTABLISHED; + } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { + DEBUGP("nf_conntrack_in: related packet for %p\n", ct); + *ctinfo = IP_CT_RELATED; + } else { + DEBUGP("nf_conntrack_in: new packet for %p\n", ct); + *ctinfo = IP_CT_NEW; + } + *set_reply = 0; + } + skb->nfct = &ct->ct_general; + skb->nfctinfo = *ctinfo; + return ct; +} + +unsigned int +nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_protocol *proto; + unsigned int dataoff; + u_int8_t protonum; + int set_reply = 0; + int ret; + + /* Previously seen (loopback or untracked)? Ignore. */ + if ((*pskb)->nfct) { + NF_CT_STAT_INC(ignore); + return NF_ACCEPT; + } + + l3proto = nf_ct_find_l3proto((u_int16_t)pf); + if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { + DEBUGP("not prepared to track yet or error occured\n"); + return -ret; + } + + proto = nf_ct_find_proto((u_int16_t)pf, protonum); + + /* It may be an special packet, error, unclean... + * inverse of the return code tells to the netfilter + * core what to do with the packet. */ + if (proto->error != NULL && + (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { + NF_CT_STAT_INC(error); + NF_CT_STAT_INC(invalid); + return -ret; + } + + ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto, + &set_reply, &ctinfo); + if (!ct) { + /* Not valid part of a connection */ + NF_CT_STAT_INC(invalid); + return NF_ACCEPT; + } + + if (IS_ERR(ct)) { + /* Too stressed to deal. */ + NF_CT_STAT_INC(drop); + return NF_DROP; + } + + NF_CT_ASSERT((*pskb)->nfct); + + ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); + if (ret < 0) { + /* Invalid: inverse of the return code tells + * the netfilter core what to do */ + DEBUGP("nf_conntrack_in: Can't track with proto module\n"); + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + NF_CT_STAT_INC(invalid); + return -ret; + } + + if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_STATUS, *pskb); + + return ret; +} + +int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig) +{ + return nf_ct_invert_tuple(inverse, orig, + nf_ct_find_l3proto(orig->src.l3num), + nf_ct_find_proto(orig->src.l3num, + orig->dst.protonum)); +} + +/* Would two expected things clash? */ +static inline int expect_clash(const struct nf_conntrack_expect *a, + const struct nf_conntrack_expect *b) +{ + /* Part covered by intersection of masks must be unequal, + otherwise they clash */ + struct nf_conntrack_tuple intersect_mask; + int count; + + intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; + intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; + intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; + intersect_mask.dst.protonum = a->mask.dst.protonum + & b->mask.dst.protonum; + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + intersect_mask.src.u3.all[count] = + a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; + } + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + intersect_mask.dst.u3.all[count] = + a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; + } + + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); +} + +static inline int expect_matches(const struct nf_conntrack_expect *a, + const struct nf_conntrack_expect *b) +{ + return a->master == b->master + && nf_ct_tuple_equal(&a->tuple, &b->tuple) + && nf_ct_tuple_equal(&a->mask, &b->mask); +} + +/* Generally a bad idea to call this: could have matched already. */ +void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) +{ + struct nf_conntrack_expect *i; + + write_lock_bh(&nf_conntrack_lock); + /* choose the the oldest expectation to evict */ + list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { + if (expect_matches(i, exp) && del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + write_unlock_bh(&nf_conntrack_lock); + nf_conntrack_expect_put(i); + return; + } + } + write_unlock_bh(&nf_conntrack_lock); +} + +/* We don't increase the master conntrack refcount for non-fulfilled + * conntracks. During the conntrack destruction, the expectations are + * always killed before the conntrack itself */ +struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) +{ + struct nf_conntrack_expect *new; + + new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); + if (!new) { + DEBUGP("expect_related: OOM allocating expect\n"); + return NULL; + } + new->master = me; + atomic_set(&new->use, 1); + return new; +} + +void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) +{ + if (atomic_dec_and_test(&exp->use)) + kmem_cache_free(nf_conntrack_expect_cachep, exp); +} + +static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) +{ + atomic_inc(&exp->use); + exp->master->expecting++; + list_add(&exp->list, &nf_conntrack_expect_list); + + init_timer(&exp->timeout); + exp->timeout.data = (unsigned long)exp; + exp->timeout.function = expectation_timed_out; + exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; + add_timer(&exp->timeout); + + atomic_inc(&exp->use); + NF_CT_STAT_INC(expect_create); +} + +/* Race with expectations being used means we could have none to find; OK. */ +static void evict_oldest_expect(struct nf_conn *master) +{ + struct nf_conntrack_expect *i; + + list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { + if (i->master == master) { + if (del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + nf_conntrack_expect_put(i); + } + break; + } + } +} + +static inline int refresh_timer(struct nf_conntrack_expect *i) +{ + if (!del_timer(&i->timeout)) + return 0; + + i->timeout.expires = jiffies + i->master->helper->timeout*HZ; + add_timer(&i->timeout); + return 1; +} + +int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) +{ + struct nf_conntrack_expect *i; + int ret; + + DEBUGP("nf_conntrack_expect_related %p\n", related_to); + DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple); + DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask); + + write_lock_bh(&nf_conntrack_lock); + list_for_each_entry(i, &nf_conntrack_expect_list, list) { + if (expect_matches(i, expect)) { + /* Refresh timer: if it's dying, ignore.. */ + if (refresh_timer(i)) { + ret = 0; + goto out; + } + } else if (expect_clash(i, expect)) { + ret = -EBUSY; + goto out; + } + } + /* Will be over limit? */ + if (expect->master->helper->max_expected && + expect->master->expecting >= expect->master->helper->max_expected) + evict_oldest_expect(expect->master); + + nf_conntrack_expect_insert(expect); + nf_conntrack_expect_event(IPEXP_NEW, expect); + ret = 0; +out: + write_unlock_bh(&nf_conntrack_lock); + return ret; +} + +/* Alter reply tuple (maybe alter helper). This is for NAT, and is + implicitly racy: see __nf_conntrack_confirm */ +void nf_conntrack_alter_reply(struct nf_conn *conntrack, + const struct nf_conntrack_tuple *newreply) +{ + write_lock_bh(&nf_conntrack_lock); + /* Should be unconfirmed, so not in hash table yet */ + NF_CT_ASSERT(!nf_ct_is_confirmed(conntrack)); + + DEBUGP("Altering reply tuple of %p to ", conntrack); + NF_CT_DUMP_TUPLE(newreply); + + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; + if (!conntrack->master && conntrack->expecting == 0) + conntrack->helper = nf_ct_find_helper(newreply); + write_unlock_bh(&nf_conntrack_lock); +} + +int nf_conntrack_helper_register(struct nf_conntrack_helper *me) +{ + int ret; + BUG_ON(me->timeout == 0); + + ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", + sizeof(struct nf_conn) + + sizeof(union nf_conntrack_help) + + __alignof__(union nf_conntrack_help), + init_conntrack_for_helper); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n"); + return ret; + } + write_lock_bh(&nf_conntrack_lock); + list_prepend(&helpers, me); + write_unlock_bh(&nf_conntrack_lock); + + return 0; +} + +static inline int unhelp(struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_helper *me) +{ + if (nf_ct_tuplehash_to_ctrack(i)->helper == me) { + nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i)); + nf_ct_tuplehash_to_ctrack(i)->helper = NULL; + } + return 0; +} + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +{ + unsigned int i; + struct nf_conntrack_expect *exp, *tmp; + + /* Need write lock here, to delete helper. */ + write_lock_bh(&nf_conntrack_lock); + LIST_DELETE(&helpers, me); + + /* Get rid of expectations */ + list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { + if (exp->master->helper == me && del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_conntrack_expect_put(exp); + } + } + + /* Get rid of expecteds, set helpers to NULL. */ + LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); + for (i = 0; i < nf_conntrack_htable_size; i++) + LIST_FIND_W(&nf_conntrack_hash[i], unhelp, + struct nf_conntrack_tuple_hash *, me); + write_unlock_bh(&nf_conntrack_lock); + + /* Someone could be still looking at the helper in a bh. */ + synchronize_net(); +} + +/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ +void __nf_ct_refresh_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies, + int do_acct) +{ + int event = 0; + + NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); + NF_CT_ASSERT(skb); + + write_lock_bh(&nf_conntrack_lock); + + /* If not in hash table, timer will not be active yet */ + if (!nf_ct_is_confirmed(ct)) { + ct->timeout.expires = extra_jiffies; + event = IPCT_REFRESH; + } else { + /* Need del_timer for race avoidance (may already be dying). */ + if (del_timer(&ct->timeout)) { + ct->timeout.expires = jiffies + extra_jiffies; + add_timer(&ct->timeout); + event = IPCT_REFRESH; + } + } + +#ifdef CONFIG_NF_CT_ACCT + if (do_acct) { + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + skb->len - (unsigned int)(skb->nh.raw - skb->data); + if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) + || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) + event |= IPCT_COUNTER_FILLING; + } +#endif + + write_unlock_bh(&nf_conntrack_lock); + + /* must be unlocked when calling event cache */ + if (event) + nf_conntrack_event_cache(event, skb); +} + +/* Used by ipt_REJECT and ip6t_REJECT. */ +void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + + /* This ICMP is in reverse direction to the packet which caused it */ + ct = nf_ct_get(skb, &ctinfo); + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) + ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; + else + ctinfo = IP_CT_RELATED; + + /* Attach to new skbuff, and increment count */ + nskb->nfct = &ct->ct_general; + nskb->nfctinfo = ctinfo; + nf_conntrack_get(nskb->nfct); +} + +static inline int +do_iter(const struct nf_conntrack_tuple_hash *i, + int (*iter)(struct nf_conn *i, void *data), + void *data) +{ + return iter(nf_ct_tuplehash_to_ctrack(i), data); +} + +/* Bring out ya dead! */ +static struct nf_conntrack_tuple_hash * +get_next_corpse(int (*iter)(struct nf_conn *i, void *data), + void *data, unsigned int *bucket) +{ + struct nf_conntrack_tuple_hash *h = NULL; + + write_lock_bh(&nf_conntrack_lock); + for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { + h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, + struct nf_conntrack_tuple_hash *, iter, data); + if (h) + break; + } + if (!h) + h = LIST_FIND_W(&unconfirmed, do_iter, + struct nf_conntrack_tuple_hash *, iter, data); + if (h) + atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); + write_unlock_bh(&nf_conntrack_lock); + + return h; +} + +void +nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) +{ + struct nf_conntrack_tuple_hash *h; + unsigned int bucket = 0; + + while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + /* Time to push up daises... */ + if (del_timer(&ct->timeout)) + death_by_timeout((unsigned long)ct); + /* ... else the timer will get him soon. */ + + nf_ct_put(ct); + } +} + +static int kill_all(struct nf_conn *i, void *data) +{ + return 1; +} + +static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size) +{ + if (vmalloced) + vfree(hash); + else + free_pages((unsigned long)hash, + get_order(sizeof(struct list_head) * size)); +} + +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void nf_conntrack_cleanup(void) +{ + int i; + + /* This makes sure all current packets have passed through + netfilter framework. Roll on, two-stage module + delete... */ + synchronize_net(); + + nf_ct_event_cache_flush(); + i_see_dead_people: + nf_ct_iterate_cleanup(kill_all, NULL); + if (atomic_read(&nf_conntrack_count) != 0) { + schedule(); + goto i_see_dead_people; + } + + for (i = 0; i < NF_CT_F_NUM; i++) { + if (nf_ct_cache[i].use == 0) + continue; + + NF_CT_ASSERT(nf_ct_cache[i].use == 1); + nf_ct_cache[i].use = 1; + nf_conntrack_unregister_cache(i); + } + kmem_cache_destroy(nf_conntrack_expect_cachep); + free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_conntrack_htable_size); +} + +static struct list_head *alloc_hashtable(int size, int *vmalloced) +{ + struct list_head *hash; + unsigned int i; + + *vmalloced = 0; + hash = (void*)__get_free_pages(GFP_KERNEL, + get_order(sizeof(struct list_head) + * size)); + if (!hash) { + *vmalloced = 1; + printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); + hash = vmalloc(sizeof(struct list_head) * size); + } + + if (hash) + for (i = 0; i < size; i++) + INIT_LIST_HEAD(&hash[i]); + + return hash; +} + +int set_hashsize(const char *val, struct kernel_param *kp) +{ + int i, bucket, hashsize, vmalloced; + int old_vmalloced, old_size; + int rnd; + struct list_head *hash, *old_hash; + struct nf_conntrack_tuple_hash *h; + + /* On boot, we can set this without any fancy locking. */ + if (!nf_conntrack_htable_size) + return param_set_uint(val, kp); + + hashsize = simple_strtol(val, NULL, 0); + if (!hashsize) + return -EINVAL; + + hash = alloc_hashtable(hashsize, &vmalloced); + if (!hash) + return -ENOMEM; + + /* We have to rehahs for the new table anyway, so we also can + * use a newrandom seed */ + get_random_bytes(&rnd, 4); + + write_lock_bh(&nf_conntrack_lock); + for (i = 0; i < nf_conntrack_htable_size; i++) { + while (!list_empty(&nf_conntrack_hash[i])) { + h = list_entry(nf_conntrack_hash[i].next, + struct nf_conntrack_tuple_hash, list); + list_del(&h->list); + bucket = __hash_conntrack(&h->tuple, hashsize, rnd); + list_add_tail(&h->list, &hash[bucket]); + } + } + old_size = nf_conntrack_htable_size; + old_vmalloced = nf_conntrack_vmalloc; + old_hash = nf_conntrack_hash; + + nf_conntrack_htable_size = hashsize; + nf_conntrack_vmalloc = vmalloced; + nf_conntrack_hash = hash; + nf_conntrack_hash_rnd = rnd; + write_unlock_bh(&nf_conntrack_lock); + + free_conntrack_hash(old_hash, old_vmalloced, old_size); + return 0; +} + +module_param_call(hashsize, set_hashsize, param_get_uint, + &nf_conntrack_htable_size, 0600); + +int __init nf_conntrack_init(void) +{ + unsigned int i; + int ret; + + /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB + * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ + if (!nf_conntrack_htable_size) { + nf_conntrack_htable_size + = (((num_physpages << PAGE_SHIFT) / 16384) + / sizeof(struct list_head)); + if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) + nf_conntrack_htable_size = 8192; + if (nf_conntrack_htable_size < 16) + nf_conntrack_htable_size = 16; + } + nf_conntrack_max = 8 * nf_conntrack_htable_size; + + printk("nf_conntrack version %s (%u buckets, %d max)\n", + NF_CONNTRACK_VERSION, nf_conntrack_htable_size, + nf_conntrack_max); + + nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size, + &nf_conntrack_vmalloc); + if (!nf_conntrack_hash) { + printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); + goto err_out; + } + + ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic", + sizeof(struct nf_conn), NULL); + if (ret < 0) { + printk(KERN_ERR "Unable to create nf_conn slab cache\n"); + goto err_free_hash; + } + + nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect", + sizeof(struct nf_conntrack_expect), + 0, 0, NULL, NULL); + if (!nf_conntrack_expect_cachep) { + printk(KERN_ERR "Unable to create nf_expect slab cache\n"); + goto err_free_conntrack_slab; + } + + /* Don't NEED lock here, but good form anyway. */ + write_lock_bh(&nf_conntrack_lock); + for (i = 0; i < PF_MAX; i++) + nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto; + write_unlock_bh(&nf_conntrack_lock); + + /* Set up fake conntrack: + - to never be deleted, not in any hashes */ + atomic_set(&nf_conntrack_untracked.ct_general.use, 1); + /* - and look it like as a confirmed connection */ + set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); + + return ret; + +err_free_conntrack_slab: + nf_conntrack_unregister_cache(NF_CT_F_BASIC); +err_free_hash: + free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_conntrack_htable_size); +err_out: + return -ENOMEM; +} diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c new file mode 100644 index 00000000000..65080e269f2 --- /dev/null +++ b/net/netfilter/nf_conntrack_ftp.c @@ -0,0 +1,698 @@ +/* FTP extension for connection tracking. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - enable working with Layer 3 protocol independent connection tracking. + * - track EPRT and EPSV commands with IPv6 address. + * + * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netfilter.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/ctype.h> +#include <net/checksum.h> +#include <net/tcp.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_ftp.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); +MODULE_DESCRIPTION("ftp connection tracking helper"); + +/* This is slow, but it's simple. --RR */ +static char *ftp_buffer; + +static DEFINE_SPINLOCK(nf_ftp_lock); + +#define MAX_PORTS 8 +static u_int16_t ports[MAX_PORTS]; +static unsigned int ports_c; +module_param_array(ports, ushort, &ports_c, 0400); + +static int loose; +module_param(loose, int, 0600); + +unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + enum ip_ct_ftp_type type, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp, + u32 *seq); +EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); +static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); +static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, + char); + +static struct ftp_search { + enum ip_conntrack_dir dir; + const char *pattern; + size_t plen; + char skip; + char term; + enum ip_ct_ftp_type ftptype; + int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); +} search[] = { + { + IP_CT_DIR_ORIGINAL, + "PORT", sizeof("PORT") - 1, ' ', '\r', + IP_CT_FTP_PORT, + try_rfc959, + }, + { + IP_CT_DIR_REPLY, + "227 ", sizeof("227 ") - 1, '(', ')', + IP_CT_FTP_PASV, + try_rfc959, + }, + { + IP_CT_DIR_ORIGINAL, + "EPRT", sizeof("EPRT") - 1, ' ', '\r', + IP_CT_FTP_EPRT, + try_eprt, + }, + { + IP_CT_DIR_REPLY, + "229 ", sizeof("229 ") - 1, '(', ')', + IP_CT_FTP_EPSV, + try_epsv_response, + }, +}; + +/* This code is based on inet_pton() in glibc-2.2.4 */ +static int +get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) +{ + static const char xdigits[] = "0123456789abcdef"; + u_int8_t tmp[16], *tp, *endp, *colonp; + int ch, saw_xdigit; + u_int32_t val; + size_t clen = 0; + + tp = memset(tmp, '\0', sizeof(tmp)); + endp = tp + sizeof(tmp); + colonp = NULL; + + /* Leading :: requires some special handling. */ + if (*src == ':'){ + if (*++src != ':') { + DEBUGP("invalid \":\" at the head of addr\n"); + return 0; + } + clen++; + } + + saw_xdigit = 0; + val = 0; + while ((clen < dlen) && (*src != term)) { + const char *pch; + + ch = tolower(*src++); + clen++; + + pch = strchr(xdigits, ch); + if (pch != NULL) { + val <<= 4; + val |= (pch - xdigits); + if (val > 0xffff) + return 0; + + saw_xdigit = 1; + continue; + } + if (ch != ':') { + DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch); + return 0; + } + + if (!saw_xdigit) { + if (colonp) { + DEBUGP("invalid location of \"::\".\n"); + return 0; + } + colonp = tp; + continue; + } else if (*src == term) { + DEBUGP("trancated IPv6 addr\n"); + return 0; + } + + if (tp + 2 > endp) + return 0; + *tp++ = (u_int8_t) (val >> 8) & 0xff; + *tp++ = (u_int8_t) val & 0xff; + + saw_xdigit = 0; + val = 0; + continue; + } + if (saw_xdigit) { + if (tp + 2 > endp) + return 0; + *tp++ = (u_int8_t) (val >> 8) & 0xff; + *tp++ = (u_int8_t) val & 0xff; + } + if (colonp != NULL) { + /* + * Since some memmove()'s erroneously fail to handle + * overlapping regions, we'll do the shift by hand. + */ + const int n = tp - colonp; + int i; + + if (tp == endp) + return 0; + + for (i = 1; i <= n; i++) { + endp[- i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp || (*src != term)) + return 0; + + memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr)); + return clen; +} + +static int try_number(const char *data, size_t dlen, u_int32_t array[], + int array_size, char sep, char term) +{ + u_int32_t i, len; + + memset(array, 0, sizeof(array[0])*array_size); + + /* Keep data pointing at next char. */ + for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) { + if (*data >= '0' && *data <= '9') { + array[i] = array[i]*10 + *data - '0'; + } + else if (*data == sep) + i++; + else { + /* Unexpected character; true if it's the + terminator and we're finished. */ + if (*data == term && i == array_size - 1) + return len; + + DEBUGP("Char %u (got %u nums) `%u' unexpected\n", + len, i, *data); + return 0; + } + } + DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep); + + return 0; +} + +/* Returns 0, or length of numbers: 192,168,1,1,5,6 */ +static int try_rfc959(const char *data, size_t dlen, + struct nf_conntrack_man *cmd, char term) +{ + int length; + u_int32_t array[6]; + + length = try_number(data, dlen, array, 6, ',', term); + if (length == 0) + return 0; + + cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | + (array[2] << 8) | array[3]); + cmd->u.tcp.port = htons((array[4] << 8) | array[5]); + return length; +} + +/* Grab port: number up to delimiter */ +static int get_port(const char *data, int start, size_t dlen, char delim, + u_int16_t *port) +{ + u_int16_t tmp_port = 0; + int i; + + for (i = start; i < dlen; i++) { + /* Finished? */ + if (data[i] == delim) { + if (tmp_port == 0) + break; + *port = htons(tmp_port); + DEBUGP("get_port: return %d\n", tmp_port); + return i + 1; + } + else if (data[i] >= '0' && data[i] <= '9') + tmp_port = tmp_port*10 + data[i] - '0'; + else { /* Some other crap */ + DEBUGP("get_port: invalid char.\n"); + break; + } + } + return 0; +} + +/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */ +static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, + char term) +{ + char delim; + int length; + + /* First character is delimiter, then "1" for IPv4 or "2" for IPv6, + then delimiter again. */ + if (dlen <= 3) { + DEBUGP("EPRT: too short\n"); + return 0; + } + delim = data[0]; + if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) { + DEBUGP("try_eprt: invalid delimitter.\n"); + return 0; + } + + if ((cmd->l3num == PF_INET && data[1] != '1') || + (cmd->l3num == PF_INET6 && data[1] != '2')) { + DEBUGP("EPRT: invalid protocol number.\n"); + return 0; + } + + DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim); + + if (data[1] == '1') { + u_int32_t array[4]; + + /* Now we have IP address. */ + length = try_number(data + 3, dlen - 3, array, 4, '.', delim); + if (length != 0) + cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) + | (array[2] << 8) | array[3]); + } else { + /* Now we have IPv6 address. */ + length = get_ipv6_addr(data + 3, dlen - 3, + (struct in6_addr *)cmd->u3.ip6, delim); + } + + if (length == 0) + return 0; + DEBUGP("EPRT: Got IP address!\n"); + /* Start offset includes initial "|1|", and trailing delimiter */ + return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port); +} + +/* Returns 0, or length of numbers: |||6446| */ +static int try_epsv_response(const char *data, size_t dlen, + struct nf_conntrack_man *cmd, char term) +{ + char delim; + + /* Three delimiters. */ + if (dlen <= 3) return 0; + delim = data[0]; + if (isdigit(delim) || delim < 33 || delim > 126 + || data[1] != delim || data[2] != delim) + return 0; + + return get_port(data, 3, dlen, delim, &cmd->u.tcp.port); +} + +/* Return 1 for match, 0 for accept, -1 for partial. */ +static int find_pattern(const char *data, size_t dlen, + const char *pattern, size_t plen, + char skip, char term, + unsigned int *numoff, + unsigned int *numlen, + struct nf_conntrack_man *cmd, + int (*getnum)(const char *, size_t, + struct nf_conntrack_man *, char)) +{ + size_t i; + + DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen); + if (dlen == 0) + return 0; + + if (dlen <= plen) { + /* Short packet: try for partial? */ + if (strnicmp(data, pattern, dlen) == 0) + return -1; + else return 0; + } + + if (strnicmp(data, pattern, plen) != 0) { +#if 0 + size_t i; + + DEBUGP("ftp: string mismatch\n"); + for (i = 0; i < plen; i++) { + DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", + i, data[i], data[i], + pattern[i], pattern[i]); + } +#endif + return 0; + } + + DEBUGP("Pattern matches!\n"); + /* Now we've found the constant string, try to skip + to the 'skip' character */ + for (i = plen; data[i] != skip; i++) + if (i == dlen - 1) return -1; + + /* Skip over the last character */ + i++; + + DEBUGP("Skipped up to `%c'!\n", skip); + + *numoff = i; + *numlen = getnum(data + i, dlen - i, cmd, term); + if (!*numlen) + return -1; + + DEBUGP("Match succeeded!\n"); + return 1; +} + +/* Look up to see if we're just after a \n. */ +static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) +{ + unsigned int i; + + for (i = 0; i < info->seq_aft_nl_num[dir]; i++) + if (info->seq_aft_nl[dir][i] == seq) + return 1; + return 0; +} + +/* We don't update if it's older than what we have. */ +static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, + struct sk_buff *skb) +{ + unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; + + /* Look for oldest: if we find exact match, we're done. */ + for (i = 0; i < info->seq_aft_nl_num[dir]; i++) { + if (info->seq_aft_nl[dir][i] == nl_seq) + return; + + if (oldest == info->seq_aft_nl_num[dir] + || before(info->seq_aft_nl[dir][i], oldest)) + oldest = i; + } + + if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { + info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + } else if (oldest != NUM_SEQ_TO_REMEMBER) { + info->seq_aft_nl[dir][oldest] = nl_seq; + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + } +} + +static int help(struct sk_buff **pskb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + unsigned int dataoff, datalen; + struct tcphdr _tcph, *th; + char *fb_ptr; + int ret; + u32 seq; + int dir = CTINFO2DIR(ctinfo); + unsigned int matchlen, matchoff; + struct ip_ct_ftp_master *ct_ftp_info = &ct->help->ct_ftp_info; + struct nf_conntrack_expect *exp; + struct nf_conntrack_man cmd = {}; + + unsigned int i; + int found = 0, ends_in_nl; + + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP_CT_ESTABLISHED + && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); + return NF_ACCEPT; + } + + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + if (th == NULL) + return NF_ACCEPT; + + dataoff = protoff + th->doff * 4; + /* No data? */ + if (dataoff >= (*pskb)->len) { + DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, + (*pskb)->len); + return NF_ACCEPT; + } + datalen = (*pskb)->len - dataoff; + + spin_lock_bh(&nf_ftp_lock); + fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer); + BUG_ON(fb_ptr == NULL); + + ends_in_nl = (fb_ptr[datalen - 1] == '\n'); + seq = ntohl(th->seq) + datalen; + + /* Look up to see if we're just after a \n. */ + if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { + /* Now if this ends in \n, update ftp info. */ + DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n", + ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", + ct_ftp_info->seq_aft_nl[dir][0], + ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)", + ct_ftp_info->seq_aft_nl[dir][1]); + ret = NF_ACCEPT; + goto out_update_nl; + } + + /* Initialize IP/IPv6 addr to expected address (it's not mentioned + in EPSV responses) */ + cmd.l3num = ct->tuplehash[dir].tuple.src.l3num; + memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all, + sizeof(cmd.u3.all)); + + for (i = 0; i < ARRAY_SIZE(search); i++) { + if (search[i].dir != dir) continue; + + found = find_pattern(fb_ptr, datalen, + search[i].pattern, + search[i].plen, + search[i].skip, + search[i].term, + &matchoff, &matchlen, + &cmd, + search[i].getnum); + if (found) break; + } + if (found == -1) { + /* We don't usually drop packets. After all, this is + connection tracking, not packet filtering. + However, it is necessary for accurate tracking in + this case. */ + if (net_ratelimit()) + printk("conntrack_ftp: partial %s %u+%u\n", + search[i].pattern, + ntohl(th->seq), datalen); + ret = NF_DROP; + goto out; + } else if (found == 0) { /* No match */ + ret = NF_ACCEPT; + goto out_update_nl; + } + + DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", + (int)matchlen, fb_ptr + matchoff, + matchlen, ntohl(th->seq) + matchoff); + + exp = nf_conntrack_expect_alloc(ct); + if (exp == NULL) { + ret = NF_DROP; + goto out; + } + + /* We refer to the reverse direction ("!dir") tuples here, + * because we're expecting something in the other direction. + * Doesn't matter unless NAT is happening. */ + exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3; + + /* Update the ftp info */ + if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) && + memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all, + sizeof(cmd.u3.all))) { + /* Enrico Scholz's passive FTP to partially RNAT'd ftp + server: it really wants us to connect to a + different IP address. Simply don't record it for + NAT. */ + if (cmd.l3num == PF_INET) { + DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n", + NIPQUAD(cmd.u3.ip), + NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); + } else { + DEBUGP("conntrack_ftp: NOT RECORDING: %x:%x:%x:%x:%x:%x:%x:%x != %x:%x:%x:%x:%x:%x:%x:%x\n", + NIP6(*((struct in6_addr *)cmd.u3.ip6)), + NIP6(*((struct in6_addr *)ct->tuplehash[dir] + .tuple.src.u3.ip6))); + } + + /* Thanks to Cristiano Lincoln Mattos + <lincoln@cesar.org.br> for reporting this potential + problem (DMZ machines opening holes to internal + networks, or the packet filter itself). */ + if (!loose) { + ret = NF_ACCEPT; + goto out_put_expect; + } + memcpy(&exp->tuple.dst.u3, &cmd.u3.all, + sizeof(exp->tuple.dst.u3)); + } + + exp->tuple.src.u3 = ct->tuplehash[!dir].tuple.src.u3; + exp->tuple.src.l3num = cmd.l3num; + exp->tuple.src.u.tcp.port = 0; + exp->tuple.dst.u.tcp.port = cmd.u.tcp.port; + exp->tuple.dst.protonum = IPPROTO_TCP; + + exp->mask = (struct nf_conntrack_tuple) + { .src = { .l3num = 0xFFFF, + .u = { .tcp = { 0 }}, + }, + .dst = { .protonum = 0xFF, + .u = { .tcp = { 0xFFFF }}, + }, + }; + if (cmd.l3num == PF_INET) { + exp->mask.src.u3.ip = 0xFFFFFFFF; + exp->mask.dst.u3.ip = 0xFFFFFFFF; + } else { + memset(exp->mask.src.u3.ip6, 0xFF, + sizeof(exp->mask.src.u3.ip6)); + memset(exp->mask.dst.u3.ip6, 0xFF, + sizeof(exp->mask.src.u3.ip6)); + } + + exp->expectfn = NULL; + exp->flags = 0; + + /* Now, NAT might want to mangle the packet, and register the + * (possibly changed) expectation itself. */ + if (nf_nat_ftp_hook) + ret = nf_nat_ftp_hook(pskb, ctinfo, search[i].ftptype, + matchoff, matchlen, exp, &seq); + else { + /* Can't expect this? Best to drop packet now. */ + if (nf_conntrack_expect_related(exp) != 0) + ret = NF_DROP; + else + ret = NF_ACCEPT; + } + +out_put_expect: + nf_conntrack_expect_put(exp); + +out_update_nl: + /* Now if this ends in \n, update ftp info. Seq may have been + * adjusted by NAT code. */ + if (ends_in_nl) + update_nl_seq(seq, ct_ftp_info, dir, *pskb); + out: + spin_unlock_bh(&nf_ftp_lock); + return ret; +} + +static struct nf_conntrack_helper ftp[MAX_PORTS][2]; +static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")]; + +/* don't make this __exit, since it's called from __init ! */ +static void fini(void) +{ + int i, j; + for (i = 0; i < ports_c; i++) { + for (j = 0; j < 2; j++) { + if (ftp[i][j].me == NULL) + continue; + + DEBUGP("nf_ct_ftp: unregistering helper for pf: %d " + "port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); + nf_conntrack_helper_unregister(&ftp[i][j]); + } + } + + kfree(ftp_buffer); +} + +static int __init init(void) +{ + int i, j = -1, ret = 0; + char *tmpname; + + ftp_buffer = kmalloc(65536, GFP_KERNEL); + if (!ftp_buffer) + return -ENOMEM; + + if (ports_c == 0) + ports[ports_c++] = FTP_PORT; + + /* FIXME should be configurable whether IPv4 and IPv6 FTP connections + are tracked or not - YK */ + for (i = 0; i < ports_c; i++) { + memset(&ftp[i], 0, sizeof(struct nf_conntrack_helper)); + + ftp[i][0].tuple.src.l3num = PF_INET; + ftp[i][1].tuple.src.l3num = PF_INET6; + for (j = 0; j < 2; j++) { + ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); + ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; + ftp[i][j].mask.src.u.tcp.port = 0xFFFF; + ftp[i][j].mask.dst.protonum = 0xFF; + ftp[i][j].max_expected = 1; + ftp[i][j].timeout = 5 * 60; /* 5 Minutes */ + ftp[i][j].me = THIS_MODULE; + ftp[i][j].help = help; + tmpname = &ftp_names[i][j][0]; + if (ports[i] == FTP_PORT) + sprintf(tmpname, "ftp"); + else + sprintf(tmpname, "ftp-%d", ports[i]); + ftp[i][j].name = tmpname; + + DEBUGP("nf_ct_ftp: registering helper for pf: %d " + "port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); + ret = nf_conntrack_helper_register(&ftp[i][j]); + if (ret) { + printk("nf_ct_ftp: failed to register helper " + " for pf: %d port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); + fini(); + return ret; + } + } + } + + return 0; +} + +module_init(init); +module_exit(fini); diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c new file mode 100644 index 00000000000..7de4f06c63c --- /dev/null +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -0,0 +1,98 @@ +/* + * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * + * Based largely upon the original ip_conntrack code which + * had the following copyright information: + * + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Author: + * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/icmp.h> +#include <linux/sysctl.h> +#include <net/ip.h> + +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); + +static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, + struct nf_conntrack_tuple *tuple) +{ + memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); + memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); + + return 1; +} + +static int generic_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); + memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); + + return 1; +} + +static int generic_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return 0; +} + +static int generic_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +static int +generic_prepare(struct sk_buff **pskb, unsigned int hooknum, + unsigned int *dataoff, u_int8_t *protonum) +{ + /* Never track !!! */ + return -NF_ACCEPT; +} + + +static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple) + +{ + return NF_CT_F_BASIC; +} + +struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = { + .l3proto = PF_UNSPEC, + .name = "unknown", + .pkt_to_tuple = generic_pkt_to_tuple, + .invert_tuple = generic_invert_tuple, + .print_tuple = generic_print_tuple, + .print_conntrack = generic_print_conntrack, + .prepare = generic_prepare, + .get_features = generic_get_features, + .me = THIS_MODULE, +}; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c new file mode 100644 index 00000000000..36425f6c833 --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -0,0 +1,85 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - enable working with L3 protocol independent connection tracking. + * + * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack_protocol.h> + +unsigned long nf_ct_generic_timeout = 600*HZ; + +static int generic_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +static int generic_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int generic_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return 0; +} + +/* Print out the private part of the conntrack. */ +static int generic_print_conntrack(struct seq_file *s, + const struct nf_conn *state) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_generic_timeout); + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int new(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + return 1; +} + +struct nf_conntrack_protocol nf_conntrack_generic_protocol = +{ + .l3proto = PF_UNSPEC, + .proto = 0, + .name = "unknown", + .pkt_to_tuple = generic_pkt_to_tuple, + .invert_tuple = generic_invert_tuple, + .print_tuple = generic_print_tuple, + .print_conntrack = generic_print_conntrack, + .packet = packet, + .new = new, +}; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c new file mode 100644 index 00000000000..3a600f77b4e --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -0,0 +1,670 @@ +/* + * Connection tracking protocol helper module for SCTP. + * + * SCTP is defined in RFC 2960. References to various sections in this code + * are to this RFC. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - enable working with L3 protocol independent connection tracking. + * + * Derived from net/ipv4/ip_conntrack_sctp.c + */ + +/* + * Added support for proc manipulation of timeouts. + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/sctp.h> +#include <linux/string.h> +#include <linux/seq_file.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_protocol.h> + +#if 0 +#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) +#else +#define DEBUGP(format, args...) +#endif + +/* Protects conntrack->proto.sctp */ +static DEFINE_RWLOCK(sctp_lock); + +/* FIXME: Examine ipfilter's timeouts and conntrack transitions more + closely. They're more complex. --RR + + And so for me for SCTP :D -Kiran */ + +static const char *sctp_conntrack_names[] = { + "NONE", + "CLOSED", + "COOKIE_WAIT", + "COOKIE_ECHOED", + "ESTABLISHED", + "SHUTDOWN_SENT", + "SHUTDOWN_RECD", + "SHUTDOWN_ACK_SENT", +}; + +#define SECS * HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS +#define DAYS * 24 HOURS + +static unsigned long nf_ct_sctp_timeout_closed = 10 SECS; +static unsigned long nf_ct_sctp_timeout_cookie_wait = 3 SECS; +static unsigned long nf_ct_sctp_timeout_cookie_echoed = 3 SECS; +static unsigned long nf_ct_sctp_timeout_established = 5 DAYS; +static unsigned long nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; +static unsigned long nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; +static unsigned long nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; + +static unsigned long * sctp_timeouts[] += { NULL, /* SCTP_CONNTRACK_NONE */ + &nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ + &nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ + &nf_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */ + &nf_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */ + &nf_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */ + &nf_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */ + &nf_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */ + }; + +#define sNO SCTP_CONNTRACK_NONE +#define sCL SCTP_CONNTRACK_CLOSED +#define sCW SCTP_CONNTRACK_COOKIE_WAIT +#define sCE SCTP_CONNTRACK_COOKIE_ECHOED +#define sES SCTP_CONNTRACK_ESTABLISHED +#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT +#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD +#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT +#define sIV SCTP_CONNTRACK_MAX + +/* + These are the descriptions of the states: + +NOTE: These state names are tantalizingly similar to the states of an +SCTP endpoint. But the interpretation of the states is a little different, +considering that these are the states of the connection and not of an end +point. Please note the subtleties. -Kiran + +NONE - Nothing so far. +COOKIE WAIT - We have seen an INIT chunk in the original direction, or also + an INIT_ACK chunk in the reply direction. +COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. +ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. +SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. +SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. +SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite + to that of the SHUTDOWN chunk. +CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of + the SHUTDOWN chunk. Connection is closed. +*/ + +/* TODO + - I have assumed that the first INIT is in the original direction. + This messes things when an INIT comes in the reply direction in CLOSED + state. + - Check the error type in the reply dir before transitioning from +cookie echoed to closed. + - Sec 5.2.4 of RFC 2960 + - Multi Homing support. +*/ + +/* SCTP conntrack state transitions */ +static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { + { +/* ORIGINAL */ +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ +/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA}, +/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA}, +/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, +/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ +/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ +/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} + }, + { +/* REPLY */ +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ +/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */ +/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA}, +/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, +/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, +/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ +/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, +/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} + } +}; + +static int sctp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + sctp_sctphdr_t _hdr, *hp; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + /* Actually only need first 8 bytes. */ + hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + if (hp == NULL) + return 0; + + tuple->src.u.sctp.port = hp->source; + tuple->dst.u.sctp.port = hp->dest; + return 1; +} + +static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + tuple->src.u.sctp.port = orig->dst.u.sctp.port; + tuple->dst.u.sctp.port = orig->src.u.sctp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int sctp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + return seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.sctp.port), + ntohs(tuple->dst.u.sctp.port)); +} + +/* Print out the private part of the conntrack. */ +static int sctp_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + enum sctp_conntrack state; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + read_lock_bh(&sctp_lock); + state = conntrack->proto.sctp.state; + read_unlock_bh(&sctp_lock); + + return seq_printf(s, "%s ", sctp_conntrack_names[state]); +} + +#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ +for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \ + offset < skb->len && \ + (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ + offset += (htons(sch->length) + 3) & ~3, count++) + +/* Some validity checks to make sure the chunks are fine */ +static int do_basic_checks(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + char *map) +{ + u_int32_t offset, count; + sctp_chunkhdr_t _sch, *sch; + int flag; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + flag = 0; + + for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { + DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type); + + if (sch->type == SCTP_CID_INIT + || sch->type == SCTP_CID_INIT_ACK + || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { + flag = 1; + } + + /* Cookie Ack/Echo chunks not the first OR + Init / Init Ack / Shutdown compl chunks not the only chunks */ + if ((sch->type == SCTP_CID_COOKIE_ACK + || sch->type == SCTP_CID_COOKIE_ECHO + || flag) + && count !=0 ) { + DEBUGP("Basic checks failed\n"); + return 1; + } + + if (map) { + set_bit(sch->type, (void *)map); + } + } + + DEBUGP("Basic checks passed\n"); + return 0; +} + +static int new_state(enum ip_conntrack_dir dir, + enum sctp_conntrack cur_state, + int chunk_type) +{ + int i; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + DEBUGP("Chunk type: %d\n", chunk_type); + + switch (chunk_type) { + case SCTP_CID_INIT: + DEBUGP("SCTP_CID_INIT\n"); + i = 0; break; + case SCTP_CID_INIT_ACK: + DEBUGP("SCTP_CID_INIT_ACK\n"); + i = 1; break; + case SCTP_CID_ABORT: + DEBUGP("SCTP_CID_ABORT\n"); + i = 2; break; + case SCTP_CID_SHUTDOWN: + DEBUGP("SCTP_CID_SHUTDOWN\n"); + i = 3; break; + case SCTP_CID_SHUTDOWN_ACK: + DEBUGP("SCTP_CID_SHUTDOWN_ACK\n"); + i = 4; break; + case SCTP_CID_ERROR: + DEBUGP("SCTP_CID_ERROR\n"); + i = 5; break; + case SCTP_CID_COOKIE_ECHO: + DEBUGP("SCTP_CID_COOKIE_ECHO\n"); + i = 6; break; + case SCTP_CID_COOKIE_ACK: + DEBUGP("SCTP_CID_COOKIE_ACK\n"); + i = 7; break; + case SCTP_CID_SHUTDOWN_COMPLETE: + DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n"); + i = 8; break; + default: + /* Other chunks like DATA, SACK, HEARTBEAT and + its ACK do not cause a change in state */ + DEBUGP("Unknown chunk type, Will stay in %s\n", + sctp_conntrack_names[cur_state]); + return cur_state; + } + + DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", + dir, sctp_conntrack_names[cur_state], chunk_type, + sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); + + return sctp_conntracks[dir][i][cur_state]; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int sctp_packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + enum sctp_conntrack newconntrack, oldsctpstate; + sctp_sctphdr_t _sctph, *sh; + sctp_chunkhdr_t _sch, *sch; + u_int32_t offset, count; + char map[256 / sizeof (char)] = {0}; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); + if (sh == NULL) + return -1; + + if (do_basic_checks(conntrack, skb, dataoff, map) != 0) + return -1; + + /* Check the verification tag (Sec 8.5) */ + if (!test_bit(SCTP_CID_INIT, (void *)map) + && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) + && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) + && !test_bit(SCTP_CID_ABORT, (void *)map) + && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) + && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { + DEBUGP("Verification tag check failed\n"); + return -1; + } + + oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; + for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { + write_lock_bh(&sctp_lock); + + /* Special cases of Verification tag check (Sec 8.5.1) */ + if (sch->type == SCTP_CID_INIT) { + /* Sec 8.5.1 (A) */ + if (sh->vtag != 0) { + write_unlock_bh(&sctp_lock); + return -1; + } + } else if (sch->type == SCTP_CID_ABORT) { + /* Sec 8.5.1 (B) */ + if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) + && !(sh->vtag == conntrack->proto.sctp.vtag + [1 - CTINFO2DIR(ctinfo)])) { + write_unlock_bh(&sctp_lock); + return -1; + } + } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { + /* Sec 8.5.1 (C) */ + if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) + && !(sh->vtag == conntrack->proto.sctp.vtag + [1 - CTINFO2DIR(ctinfo)] + && (sch->flags & 1))) { + write_unlock_bh(&sctp_lock); + return -1; + } + } else if (sch->type == SCTP_CID_COOKIE_ECHO) { + /* Sec 8.5.1 (D) */ + if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { + write_unlock_bh(&sctp_lock); + return -1; + } + } + + oldsctpstate = conntrack->proto.sctp.state; + newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type); + + /* Invalid */ + if (newconntrack == SCTP_CONNTRACK_MAX) { + DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", + CTINFO2DIR(ctinfo), sch->type, oldsctpstate); + write_unlock_bh(&sctp_lock); + return -1; + } + + /* If it is an INIT or an INIT ACK note down the vtag */ + if (sch->type == SCTP_CID_INIT + || sch->type == SCTP_CID_INIT_ACK) { + sctp_inithdr_t _inithdr, *ih; + + ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), + sizeof(_inithdr), &_inithdr); + if (ih == NULL) { + write_unlock_bh(&sctp_lock); + return -1; + } + DEBUGP("Setting vtag %x for dir %d\n", + ih->init_tag, !CTINFO2DIR(ctinfo)); + conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag; + } + + conntrack->proto.sctp.state = newconntrack; + if (oldsctpstate != newconntrack) + nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + write_unlock_bh(&sctp_lock); + } + + nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); + + if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED + && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY + && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { + DEBUGP("Setting assured bit\n"); + set_bit(IPS_ASSURED_BIT, &conntrack->status); + nf_conntrack_event_cache(IPCT_STATUS, skb); + } + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + enum sctp_conntrack newconntrack; + sctp_sctphdr_t _sctph, *sh; + sctp_chunkhdr_t _sch, *sch; + u_int32_t offset, count; + char map[256 / sizeof (char)] = {0}; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); + if (sh == NULL) + return 0; + + if (do_basic_checks(conntrack, skb, dataoff, map) != 0) + return 0; + + /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ + if ((test_bit (SCTP_CID_ABORT, (void *)map)) + || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)) + || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) { + return 0; + } + + newconntrack = SCTP_CONNTRACK_MAX; + for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { + /* Don't need lock here: this conntrack not in circulation yet */ + newconntrack = new_state(IP_CT_DIR_ORIGINAL, + SCTP_CONNTRACK_NONE, sch->type); + + /* Invalid: delete conntrack */ + if (newconntrack == SCTP_CONNTRACK_MAX) { + DEBUGP("nf_conntrack_sctp: invalid new deleting.\n"); + return 0; + } + + /* Copy the vtag into the state info */ + if (sch->type == SCTP_CID_INIT) { + if (sh->vtag == 0) { + sctp_inithdr_t _inithdr, *ih; + + ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), + sizeof(_inithdr), &_inithdr); + if (ih == NULL) + return 0; + + DEBUGP("Setting vtag %x for new conn\n", + ih->init_tag); + + conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = + ih->init_tag; + } else { + /* Sec 8.5.1 (A) */ + return 0; + } + } + /* If it is a shutdown ack OOTB packet, we expect a return + shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ + else { + DEBUGP("Setting vtag %x for new conn OOTB\n", + sh->vtag); + conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; + } + + conntrack->proto.sctp.state = newconntrack; + } + + return 1; +} + +struct nf_conntrack_protocol nf_conntrack_protocol_sctp4 = { + .l3proto = PF_INET, + .proto = IPPROTO_SCTP, + .name = "sctp", + .pkt_to_tuple = sctp_pkt_to_tuple, + .invert_tuple = sctp_invert_tuple, + .print_tuple = sctp_print_tuple, + .print_conntrack = sctp_print_conntrack, + .packet = sctp_packet, + .new = sctp_new, + .destroy = NULL, + .me = THIS_MODULE +}; + +struct nf_conntrack_protocol nf_conntrack_protocol_sctp6 = { + .l3proto = PF_INET6, + .proto = IPPROTO_SCTP, + .name = "sctp", + .pkt_to_tuple = sctp_pkt_to_tuple, + .invert_tuple = sctp_invert_tuple, + .print_tuple = sctp_print_tuple, + .print_conntrack = sctp_print_conntrack, + .packet = sctp_packet, + .new = sctp_new, + .destroy = NULL, + .me = THIS_MODULE +}; + +#ifdef CONFIG_SYSCTL +static ctl_table nf_ct_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, + .procname = "nf_conntrack_sctp_timeout_closed", + .data = &nf_ct_sctp_timeout_closed, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, + .procname = "nf_conntrack_sctp_timeout_cookie_wait", + .data = &nf_ct_sctp_timeout_cookie_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, + .procname = "nf_conntrack_sctp_timeout_cookie_echoed", + .data = &nf_ct_sctp_timeout_cookie_echoed, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, + .procname = "nf_conntrack_sctp_timeout_established", + .data = &nf_ct_sctp_timeout_established, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, + .procname = "nf_conntrack_sctp_timeout_shutdown_sent", + .data = &nf_ct_sctp_timeout_shutdown_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, + .procname = "nf_conntrack_sctp_timeout_shutdown_recd", + .data = &nf_ct_sctp_timeout_shutdown_recd, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, + .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", + .data = &nf_ct_sctp_timeout_shutdown_ack_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_netfilter_table[] = { + { + .ctl_name = NET_NETFILTER, + .procname = "netfilter", + .mode = 0555, + .child = nf_ct_sysctl_table, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_net_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = nf_ct_netfilter_table, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *nf_ct_sysctl_header; +#endif + +int __init init(void) +{ + int ret; + + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp4); + if (ret) { + printk("nf_conntrack_proto_sctp4: protocol register failed\n"); + goto out; + } + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp6); + if (ret) { + printk("nf_conntrack_proto_sctp6: protocol register failed\n"); + goto cleanup_sctp4; + } + +#ifdef CONFIG_SYSCTL + nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); + if (nf_ct_sysctl_header == NULL) { + printk("nf_conntrack_proto_sctp: can't register to sysctl.\n"); + goto cleanup; + } +#endif + + return ret; + +#ifdef CONFIG_SYSCTL + cleanup: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); +#endif + cleanup_sctp4: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); + out: + DEBUGP("SCTP conntrack module loading %s\n", + ret ? "failed": "succeeded"); + return ret; +} + +void __exit fini(void) +{ + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(nf_ct_sysctl_header); +#endif + DEBUGP("SCTP conntrack module unloaded\n"); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Kiran Kumar Immidi"); +MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c new file mode 100644 index 00000000000..83d90dd624f --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -0,0 +1,1162 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>: + * - Real stateful connection tracking + * - Modified state transitions table + * - Window scaling support added + * - SACK support added + * + * Willy Tarreau: + * - State table bugfixes + * - More robust state changes + * - Tuning timer parameters + * + * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - genelized Layer 3 protocol part. + * + * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c + * + * version 2.2 + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/ipv6.h> +#include <net/ip6_checksum.h> + +#include <net/tcp.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_protocol.h> + +#if 0 +#define DEBUGP printk +#define DEBUGP_VARS +#else +#define DEBUGP(format, args...) +#endif + +/* Protects conntrack->proto.tcp */ +static DEFINE_RWLOCK(tcp_lock); + +/* "Be conservative in what you do, + be liberal in what you accept from others." + If it's non-zero, we mark only out of window RST segments as INVALID. */ +int nf_ct_tcp_be_liberal = 0; + +/* When connection is picked up from the middle, how many packets are required + to pass in each direction when we assume we are in sync - if any side uses + window scaling, we lost the game. + If it is set to zero, we disable picking up already established + connections. */ +int nf_ct_tcp_loose = 3; + +/* Max number of the retransmitted packets without receiving an (acceptable) + ACK from the destination. If this number is reached, a shorter timer + will be started. */ +int nf_ct_tcp_max_retrans = 3; + + /* FIXME: Examine ipfilter's timeouts and conntrack transitions more + closely. They're more complex. --RR */ + +static const char *tcp_conntrack_names[] = { + "NONE", + "SYN_SENT", + "SYN_RECV", + "ESTABLISHED", + "FIN_WAIT", + "CLOSE_WAIT", + "LAST_ACK", + "TIME_WAIT", + "CLOSE", + "LISTEN" +}; + +#define SECS * HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS +#define DAYS * 24 HOURS + +unsigned long nf_ct_tcp_timeout_syn_sent = 2 MINS; +unsigned long nf_ct_tcp_timeout_syn_recv = 60 SECS; +unsigned long nf_ct_tcp_timeout_established = 5 DAYS; +unsigned long nf_ct_tcp_timeout_fin_wait = 2 MINS; +unsigned long nf_ct_tcp_timeout_close_wait = 60 SECS; +unsigned long nf_ct_tcp_timeout_last_ack = 30 SECS; +unsigned long nf_ct_tcp_timeout_time_wait = 2 MINS; +unsigned long nf_ct_tcp_timeout_close = 10 SECS; + +/* RFC1122 says the R2 limit should be at least 100 seconds. + Linux uses 15 packets as limit, which corresponds + to ~13-30min depending on RTO. */ +unsigned long nf_ct_tcp_timeout_max_retrans = 5 MINS; + +static unsigned long * tcp_timeouts[] += { NULL, /* TCP_CONNTRACK_NONE */ + &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ + &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ + &nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ + &nf_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ + &nf_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ + &nf_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ + &nf_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ + &nf_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ + NULL, /* TCP_CONNTRACK_LISTEN */ + }; + +#define sNO TCP_CONNTRACK_NONE +#define sSS TCP_CONNTRACK_SYN_SENT +#define sSR TCP_CONNTRACK_SYN_RECV +#define sES TCP_CONNTRACK_ESTABLISHED +#define sFW TCP_CONNTRACK_FIN_WAIT +#define sCW TCP_CONNTRACK_CLOSE_WAIT +#define sLA TCP_CONNTRACK_LAST_ACK +#define sTW TCP_CONNTRACK_TIME_WAIT +#define sCL TCP_CONNTRACK_CLOSE +#define sLI TCP_CONNTRACK_LISTEN +#define sIV TCP_CONNTRACK_MAX +#define sIG TCP_CONNTRACK_IGNORE + +/* What TCP flags are set from RST/SYN/FIN/ACK. */ +enum tcp_bit_set { + TCP_SYN_SET, + TCP_SYNACK_SET, + TCP_FIN_SET, + TCP_ACK_SET, + TCP_RST_SET, + TCP_NONE_SET, +}; + +/* + * The TCP state transition table needs a few words... + * + * We are the man in the middle. All the packets go through us + * but might get lost in transit to the destination. + * It is assumed that the destinations can't receive segments + * we haven't seen. + * + * The checked segment is in window, but our windows are *not* + * equivalent with the ones of the sender/receiver. We always + * try to guess the state of the current sender. + * + * The meaning of the states are: + * + * NONE: initial state + * SYN_SENT: SYN-only packet seen + * SYN_RECV: SYN-ACK packet seen + * ESTABLISHED: ACK packet seen + * FIN_WAIT: FIN packet seen + * CLOSE_WAIT: ACK seen (after FIN) + * LAST_ACK: FIN seen (after FIN) + * TIME_WAIT: last ACK seen + * CLOSE: closed connection + * + * LISTEN state is not used. + * + * Packets marked as IGNORED (sIG): + * if they may be either invalid or valid + * and the receiver may send back a connection + * closing RST or a SYN/ACK. + * + * Packets marked as INVALID (sIV): + * if they are invalid + * or we do not support the request (simultaneous open) + */ +static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { + { +/* ORIGINAL */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, +/* + * sNO -> sSS Initialize a new connection + * sSS -> sSS Retransmitted SYN + * sSR -> sIG Late retransmitted SYN? + * sES -> sIG Error: SYNs in window outside the SYN_SENT state + * are errors. Receiver will reply with RST + * and close the connection. + * Or we are not in sync and hold a dead connection. + * sFW -> sIG + * sCW -> sIG + * sLA -> sIG + * sTW -> sSS Reopened connection (RFC 1122). + * sCL -> sSS + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* + * A SYN/ACK from the client is always invalid: + * - either it tries to set up a simultaneous open, which is + * not supported; + * - or the firewall has just been inserted between the two hosts + * during the session set-up. The SYN will be retransmitted + * by the true client (or it'll time out). + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, +/* + * sNO -> sIV Too late and no reason to do anything... + * sSS -> sIV Client migth not send FIN in this state: + * we enforce waiting for a SYN/ACK reply first. + * sSR -> sFW Close started. + * sES -> sFW + * sFW -> sLA FIN seen in both directions, waiting for + * the last ACK. + * Migth be a retransmitted FIN as well... + * sCW -> sLA + * sLA -> sLA Retransmitted FIN. Remain in the same state. + * sTW -> sTW + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* + * sNO -> sES Assumed. + * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. + * sSR -> sES Established state is reached. + * sES -> sES :-) + * sFW -> sCW Normal close request answered by ACK. + * sCW -> sCW + * sLA -> sTW Last ACK detected. + * sTW -> sTW Retransmitted last ACK. Remain in the same state. + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + }, + { +/* REPLY */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* + * sNO -> sIV Never reached. + * sSS -> sIV Simultaneous open, not supported + * sSR -> sIV Simultaneous open, not supported. + * sES -> sIV Server may not initiate a connection. + * sFW -> sIV + * sCW -> sIV + * sLA -> sIV + * sTW -> sIV Reopened connection, but server may not do it. + * sCL -> sIV + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, +/* + * sSS -> sSR Standard open. + * sSR -> sSR Retransmitted SYN/ACK. + * sES -> sIG Late retransmitted SYN/ACK? + * sFW -> sIG Might be SYN/ACK answering ignored SYN + * sCW -> sIG + * sLA -> sIG + * sTW -> sIG + * sCL -> sIG + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, +/* + * sSS -> sIV Server might not send FIN in this state. + * sSR -> sFW Close started. + * sES -> sFW + * sFW -> sLA FIN seen in both directions. + * sCW -> sLA + * sLA -> sLA Retransmitted FIN. + * sTW -> sTW + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* + * sSS -> sIV Might be a half-open connection. + * sSR -> sSR Might answer late resent SYN. + * sES -> sES :-) + * sFW -> sCW Normal close request answered by ACK. + * sCW -> sCW + * sLA -> sTW Last ACK detected. + * sTW -> sTW Retransmitted last ACK. + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + } +}; + +static int tcp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct tcphdr _hdr, *hp; + + /* Actually only need first 8 bytes. */ + hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + if (hp == NULL) + return 0; + + tuple->src.u.tcp.port = hp->source; + tuple->dst.u.tcp.port = hp->dest; + + return 1; +} + +static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u.tcp.port = orig->dst.u.tcp.port; + tuple->dst.u.tcp.port = orig->src.u.tcp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int tcp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.tcp.port), + ntohs(tuple->dst.u.tcp.port)); +} + +/* Print out the private part of the conntrack. */ +static int tcp_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + enum tcp_conntrack state; + + read_lock_bh(&tcp_lock); + state = conntrack->proto.tcp.state; + read_unlock_bh(&tcp_lock); + + return seq_printf(s, "%s ", tcp_conntrack_names[state]); +} + +static unsigned int get_conntrack_index(const struct tcphdr *tcph) +{ + if (tcph->rst) return TCP_RST_SET; + else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); + else if (tcph->fin) return TCP_FIN_SET; + else if (tcph->ack) return TCP_ACK_SET; + else return TCP_NONE_SET; +} + +/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering + in IP Filter' by Guido van Rooij. + + http://www.nluug.nl/events/sane2000/papers.html + http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz + + The boundaries and the conditions are changed according to RFC793: + the packet must intersect the window (i.e. segments may be + after the right or before the left edge) and thus receivers may ACK + segments after the right edge of the window. + + td_maxend = max(sack + max(win,1)) seen in reply packets + td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets + td_maxwin += seq + len - sender.td_maxend + if seq + len > sender.td_maxend + td_end = max(seq + len) seen in sent packets + + I. Upper bound for valid data: seq <= sender.td_maxend + II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin + III. Upper bound for valid ack: sack <= receiver.td_end + IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW + + where sack is the highest right edge of sack block found in the packet. + + The upper bound limit for a valid ack is not ignored - + we doesn't have to deal with fragments. +*/ + +static inline __u32 segment_seq_plus_len(__u32 seq, + size_t len, + unsigned int dataoff, + struct tcphdr *tcph) +{ + /* XXX Should I use payload length field in IP/IPv6 header ? + * - YK */ + return (seq + len - dataoff - tcph->doff*4 + + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); +} + +/* Fixme: what about big packets? */ +#define MAXACKWINCONST 66000 +#define MAXACKWINDOW(sender) \ + ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ + : MAXACKWINCONST) + +/* + * Simplified tcp_parse_options routine from tcp_input.c + */ +static void tcp_options(const struct sk_buff *skb, + unsigned int dataoff, + struct tcphdr *tcph, + struct ip_ct_tcp_state *state) +{ + unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; + unsigned char *ptr; + int length = (tcph->doff*4) - sizeof(struct tcphdr); + + if (!length) + return; + + ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), + length, buff); + BUG_ON(ptr == NULL); + + state->td_scale = + state->flags = 0; + + while (length > 0) { + int opcode=*ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + opsize=*ptr++; + if (opsize < 2) /* "silly options" */ + return; + if (opsize > length) + break; /* don't parse partial options */ + + if (opcode == TCPOPT_SACK_PERM + && opsize == TCPOLEN_SACK_PERM) + state->flags |= IP_CT_TCP_FLAG_SACK_PERM; + else if (opcode == TCPOPT_WINDOW + && opsize == TCPOLEN_WINDOW) { + state->td_scale = *(u_int8_t *)ptr; + + if (state->td_scale > 14) { + /* See RFC1323 */ + state->td_scale = 14; + } + state->flags |= + IP_CT_TCP_FLAG_WINDOW_SCALE; + } + ptr += opsize - 2; + length -= opsize; + } + } +} + +static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, + struct tcphdr *tcph, __u32 *sack) +{ + unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; + unsigned char *ptr; + int length = (tcph->doff*4) - sizeof(struct tcphdr); + __u32 tmp; + + if (!length) + return; + + ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), + length, buff); + BUG_ON(ptr == NULL); + + /* Fast path for timestamp-only option */ + if (length == TCPOLEN_TSTAMP_ALIGNED*4 + && *(__u32 *)ptr == + __constant_ntohl((TCPOPT_NOP << 24) + | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) + | TCPOLEN_TIMESTAMP)) + return; + + while (length > 0) { + int opcode = *ptr++; + int opsize, i; + + switch (opcode) { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2) /* "silly options" */ + return; + if (opsize > length) + break; /* don't parse partial options */ + + if (opcode == TCPOPT_SACK + && opsize >= (TCPOLEN_SACK_BASE + + TCPOLEN_SACK_PERBLOCK) + && !((opsize - TCPOLEN_SACK_BASE) + % TCPOLEN_SACK_PERBLOCK)) { + for (i = 0; + i < (opsize - TCPOLEN_SACK_BASE); + i += TCPOLEN_SACK_PERBLOCK) { + memcpy(&tmp, (__u32 *)(ptr + i) + 1, + sizeof(__u32)); + tmp = ntohl(tmp); + + if (after(tmp, *sack)) + *sack = tmp; + } + return; + } + ptr += opsize - 2; + length -= opsize; + } + } +} + +static int tcp_in_window(struct ip_ct_tcp *state, + enum ip_conntrack_dir dir, + unsigned int index, + const struct sk_buff *skb, + unsigned int dataoff, + struct tcphdr *tcph, + int pf) +{ + struct ip_ct_tcp_state *sender = &state->seen[dir]; + struct ip_ct_tcp_state *receiver = &state->seen[!dir]; + __u32 seq, ack, sack, end, win, swin; + int res; + + /* + * Get the required data from the packet. + */ + seq = ntohl(tcph->seq); + ack = sack = ntohl(tcph->ack_seq); + win = ntohs(tcph->window); + end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); + + if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) + tcp_sack(skb, dataoff, tcph, &sack); + + DEBUGP("tcp_in_window: START\n"); + DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "seq=%u ack=%u sack=%u win=%u end=%u\n", + NIPQUAD(iph->saddr), ntohs(tcph->source), + NIPQUAD(iph->daddr), ntohs(tcph->dest), + seq, ack, sack, win, end); + DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + + if (sender->td_end == 0) { + /* + * Initialize sender data. + */ + if (tcph->syn && tcph->ack) { + /* + * Outgoing SYN-ACK in reply to a SYN. + */ + sender->td_end = + sender->td_maxend = end; + sender->td_maxwin = (win == 0 ? 1 : win); + + tcp_options(skb, dataoff, tcph, sender); + /* + * RFC 1323: + * Both sides must send the Window Scale option + * to enable window scaling in either direction. + */ + if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE + && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) + sender->td_scale = + receiver->td_scale = 0; + } else { + /* + * We are in the middle of a connection, + * its history is lost for us. + * Let's try to use the data from the packet. + */ + sender->td_end = end; + sender->td_maxwin = (win == 0 ? 1 : win); + sender->td_maxend = end + sender->td_maxwin; + } + } else if (((state->state == TCP_CONNTRACK_SYN_SENT + && dir == IP_CT_DIR_ORIGINAL) + || (state->state == TCP_CONNTRACK_SYN_RECV + && dir == IP_CT_DIR_REPLY)) + && after(end, sender->td_end)) { + /* + * RFC 793: "if a TCP is reinitialized ... then it need + * not wait at all; it must only be sure to use sequence + * numbers larger than those recently used." + */ + sender->td_end = + sender->td_maxend = end; + sender->td_maxwin = (win == 0 ? 1 : win); + + tcp_options(skb, dataoff, tcph, sender); + } + + if (!(tcph->ack)) { + /* + * If there is no ACK, just pretend it was set and OK. + */ + ack = sack = receiver->td_end; + } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == + (TCP_FLAG_ACK|TCP_FLAG_RST)) + && (ack == 0)) { + /* + * Broken TCP stacks, that set ACK in RST packets as well + * with zero ack value. + */ + ack = sack = receiver->td_end; + } + + if (seq == end + && (!tcph->rst + || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) + /* + * Packets contains no data: we assume it is valid + * and check the ack value only. + * However RST segments are always validated by their + * SEQ number, except when seq == 0 (reset sent answering + * SYN. + */ + seq = end = sender->td_end; + + DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "seq=%u ack=%u sack =%u win=%u end=%u\n", + NIPQUAD(iph->saddr), ntohs(tcph->source), + NIPQUAD(iph->daddr), ntohs(tcph->dest), + seq, ack, sack, win, end); + DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + + DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", + before(seq, sender->td_maxend + 1), + after(end, sender->td_end - receiver->td_maxwin - 1), + before(sack, receiver->td_end + 1), + after(ack, receiver->td_end - MAXACKWINDOW(sender))); + + if (sender->loose || receiver->loose || + (before(seq, sender->td_maxend + 1) && + after(end, sender->td_end - receiver->td_maxwin - 1) && + before(sack, receiver->td_end + 1) && + after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { + /* + * Take into account window scaling (RFC 1323). + */ + if (!tcph->syn) + win <<= sender->td_scale; + + /* + * Update sender data. + */ + swin = win + (sack - ack); + if (sender->td_maxwin < swin) + sender->td_maxwin = swin; + if (after(end, sender->td_end)) + sender->td_end = end; + /* + * Update receiver data. + */ + if (after(end, sender->td_maxend)) + receiver->td_maxwin += end - sender->td_maxend; + if (after(sack + win, receiver->td_maxend - 1)) { + receiver->td_maxend = sack + win; + if (win == 0) + receiver->td_maxend++; + } + + /* + * Check retransmissions. + */ + if (index == TCP_ACK_SET) { + if (state->last_dir == dir + && state->last_seq == seq + && state->last_ack == ack + && state->last_end == end) + state->retrans++; + else { + state->last_dir = dir; + state->last_seq = seq; + state->last_ack = ack; + state->last_end = end; + state->retrans = 0; + } + } + /* + * Close the window of disabled window tracking :-) + */ + if (sender->loose) + sender->loose--; + + res = 1; + } else { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: %s ", + before(seq, sender->td_maxend + 1) ? + after(end, sender->td_end - receiver->td_maxwin - 1) ? + before(sack, receiver->td_end + 1) ? + after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" + : "ACK is under the lower bound (possible overly delayed ACK)" + : "ACK is over the upper bound (ACKed data not seen yet)" + : "SEQ is under the lower bound (already ACKed data retransmitted)" + : "SEQ is over the upper bound (over the window of the receiver)"); + + res = nf_ct_tcp_be_liberal; + } + + DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " + "receiver end=%u maxend=%u maxwin=%u\n", + res, sender->td_end, sender->td_maxend, sender->td_maxwin, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin); + + return res; +} + +#ifdef CONFIG_IP_NF_NAT_NEEDED +/* Update sender->td_end after NAT successfully mangled the packet */ +/* Caller must linearize skb at tcp header. */ +void nf_conntrack_tcp_update(struct sk_buff *skb, + unsigned int dataoff, + struct nf_conn *conntrack, + int dir) +{ + struct tcphdr *tcph = (void *)skb->data + dataoff; + __u32 end; +#ifdef DEBUGP_VARS + struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; + struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; +#endif + + end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); + + write_lock_bh(&tcp_lock); + /* + * We have to worry for the ack in the reply packet only... + */ + if (after(end, conntrack->proto.tcp.seen[dir].td_end)) + conntrack->proto.tcp.seen[dir].td_end = end; + conntrack->proto.tcp.last_end = end; + write_unlock_bh(&tcp_lock); + DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); +} + +#endif + +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 +#define TH_ECE 0x40 +#define TH_CWR 0x80 + +/* table of valid flag combinations - ECE and CWR are always valid */ +static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = +{ + [TH_SYN] = 1, + [TH_SYN|TH_ACK] = 1, + [TH_SYN|TH_ACK|TH_PUSH] = 1, + [TH_RST] = 1, + [TH_RST|TH_ACK] = 1, + [TH_RST|TH_ACK|TH_PUSH] = 1, + [TH_FIN|TH_ACK] = 1, + [TH_ACK] = 1, + [TH_ACK|TH_PUSH] = 1, + [TH_ACK|TH_URG] = 1, + [TH_ACK|TH_URG|TH_PUSH] = 1, + [TH_FIN|TH_ACK|TH_PUSH] = 1, + [TH_FIN|TH_ACK|TH_URG] = 1, + [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1, +}; + +/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ +static int tcp_error(struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + int pf, + unsigned int hooknum, + int(*csum)(const struct sk_buff *,unsigned int)) +{ + struct tcphdr _tcph, *th; + unsigned int tcplen = skb->len - dataoff; + u_int8_t tcpflags; + + /* Smaller that minimal TCP header? */ + th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); + if (th == NULL) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: short packet "); + return -NF_ACCEPT; + } + + /* Not whole TCP header or malformed packet */ + if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: truncated/malformed packet "); + return -NF_ACCEPT; + } + + /* Checksum invalid? Ignore. + * We skip checking packets on the outgoing path + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. + */ + /* FIXME: Source route IP option packets --RR */ + if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || + (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) + && skb->ip_summed != CHECKSUM_UNNECESSARY + && csum(skb, dataoff)) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: bad TCP checksum "); + return -NF_ACCEPT; + } + + /* Check TCP flags. */ + tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); + if (!tcp_valid_flags[tcpflags]) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: invalid TCP flag combination "); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} + +static int csum4(const struct sk_buff *skb, unsigned int dataoff) +{ + return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, + skb->len - dataoff, IPPROTO_TCP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, dataoff, + skb->len - dataoff, 0)); +} + +static int csum6(const struct sk_buff *skb, unsigned int dataoff) +{ + return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, + skb->len - dataoff, IPPROTO_TCP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, dataoff, skb->len - dataoff, + 0)); +} + +static int tcp_error4(struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum4); +} + +static int tcp_error6(struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum6); +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int tcp_packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + enum tcp_conntrack new_state, old_state; + enum ip_conntrack_dir dir; + struct tcphdr *th, _tcph; + unsigned long timeout; + unsigned int index; + + th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); + BUG_ON(th == NULL); + + write_lock_bh(&tcp_lock); + old_state = conntrack->proto.tcp.state; + dir = CTINFO2DIR(ctinfo); + index = get_conntrack_index(th); + new_state = tcp_conntracks[dir][index][old_state]; + + switch (new_state) { + case TCP_CONNTRACK_IGNORE: + /* Either SYN in ORIGINAL + * or SYN/ACK in REPLY. */ + if (index == TCP_SYNACK_SET + && conntrack->proto.tcp.last_index == TCP_SYN_SET + && conntrack->proto.tcp.last_dir != dir + && ntohl(th->ack_seq) == + conntrack->proto.tcp.last_end) { + /* This SYN/ACK acknowledges a SYN that we earlier + * ignored as invalid. This means that the client and + * the server are both in sync, while the firewall is + * not. We kill this session and block the SYN/ACK so + * that the client cannot but retransmit its SYN and + * thus initiate a clean new session. + */ + write_unlock_bh(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: killing out of sync session "); + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return -NF_DROP; + } + conntrack->proto.tcp.last_index = index; + conntrack->proto.tcp.last_dir = dir; + conntrack->proto.tcp.last_seq = ntohl(th->seq); + conntrack->proto.tcp.last_end = + segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); + + write_unlock_bh(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: invalid packed ignored "); + return NF_ACCEPT; + case TCP_CONNTRACK_MAX: + /* Invalid packet */ + DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", + dir, get_conntrack_index(th), + old_state); + write_unlock_bh(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: invalid state "); + return -NF_ACCEPT; + case TCP_CONNTRACK_SYN_SENT: + if (old_state < TCP_CONNTRACK_TIME_WAIT) + break; + if ((conntrack->proto.tcp.seen[dir].flags & + IP_CT_TCP_FLAG_CLOSE_INIT) + || after(ntohl(th->seq), + conntrack->proto.tcp.seen[dir].td_end)) { + /* Attempt to reopen a closed connection. + * Delete this connection and look up again. */ + write_unlock_bh(&tcp_lock); + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return -NF_REPEAT; + } + case TCP_CONNTRACK_CLOSE: + if (index == TCP_RST_SET + && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_SYN_SET + && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { + /* RST sent to invalid SYN we had let trough + * SYN was in window then, tear down connection. + * We skip window checking, because packet might ACK + * segments we ignored in the SYN. */ + goto in_window; + } + /* Just fall trough */ + default: + /* Keep compilers happy. */ + break; + } + + if (!tcp_in_window(&conntrack->proto.tcp, dir, index, + skb, dataoff, th, pf)) { + write_unlock_bh(&tcp_lock); + return -NF_ACCEPT; + } + in_window: + /* From now on we have got in-window packets */ + conntrack->proto.tcp.last_index = index; + + DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest), + (th->syn ? 1 : 0), (th->ack ? 1 : 0), + (th->fin ? 1 : 0), (th->rst ? 1 : 0), + old_state, new_state); + + conntrack->proto.tcp.state = new_state; + if (old_state != new_state + && (new_state == TCP_CONNTRACK_FIN_WAIT + || new_state == TCP_CONNTRACK_CLOSE)) + conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; + timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans + && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans + ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; + write_unlock_bh(&tcp_lock); + + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + if (new_state != old_state) + nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + + if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { + /* If only reply is a RST, we can consider ourselves not to + have an established connection: this is a fairly common + problem case, so we can delete the conntrack + immediately. --RR */ + if (th->rst) { + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return NF_ACCEPT; + } + } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) + && (old_state == TCP_CONNTRACK_SYN_RECV + || old_state == TCP_CONNTRACK_ESTABLISHED) + && new_state == TCP_CONNTRACK_ESTABLISHED) { + /* Set ASSURED if we see see valid ack in ESTABLISHED + after SYN_RECV or a valid answer for a picked up + connection. */ + set_bit(IPS_ASSURED_BIT, &conntrack->status); + nf_conntrack_event_cache(IPCT_STATUS, skb); + } + nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int tcp_new(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff) +{ + enum tcp_conntrack new_state; + struct tcphdr *th, _tcph; +#ifdef DEBUGP_VARS + struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; + struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; +#endif + + th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); + BUG_ON(th == NULL); + + /* Don't need lock here: this conntrack not in circulation yet */ + new_state + = tcp_conntracks[0][get_conntrack_index(th)] + [TCP_CONNTRACK_NONE]; + + /* Invalid: delete conntrack */ + if (new_state >= TCP_CONNTRACK_MAX) { + DEBUGP("nf_ct_tcp: invalid new deleting.\n"); + return 0; + } + + if (new_state == TCP_CONNTRACK_SYN_SENT) { + /* SYN packet */ + conntrack->proto.tcp.seen[0].td_end = + segment_seq_plus_len(ntohl(th->seq), skb->len, + dataoff, th); + conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); + if (conntrack->proto.tcp.seen[0].td_maxwin == 0) + conntrack->proto.tcp.seen[0].td_maxwin = 1; + conntrack->proto.tcp.seen[0].td_maxend = + conntrack->proto.tcp.seen[0].td_end; + + tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); + conntrack->proto.tcp.seen[1].flags = 0; + conntrack->proto.tcp.seen[0].loose = + conntrack->proto.tcp.seen[1].loose = 0; + } else if (nf_ct_tcp_loose == 0) { + /* Don't try to pick up connections. */ + return 0; + } else { + /* + * We are in the middle of a connection, + * its history is lost for us. + * Let's try to use the data from the packet. + */ + conntrack->proto.tcp.seen[0].td_end = + segment_seq_plus_len(ntohl(th->seq), skb->len, + dataoff, th); + conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); + if (conntrack->proto.tcp.seen[0].td_maxwin == 0) + conntrack->proto.tcp.seen[0].td_maxwin = 1; + conntrack->proto.tcp.seen[0].td_maxend = + conntrack->proto.tcp.seen[0].td_end + + conntrack->proto.tcp.seen[0].td_maxwin; + conntrack->proto.tcp.seen[0].td_scale = 0; + + /* We assume SACK. Should we assume window scaling too? */ + conntrack->proto.tcp.seen[0].flags = + conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; + conntrack->proto.tcp.seen[0].loose = + conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose; + } + + conntrack->proto.tcp.seen[1].td_end = 0; + conntrack->proto.tcp.seen[1].td_maxend = 0; + conntrack->proto.tcp.seen[1].td_maxwin = 1; + conntrack->proto.tcp.seen[1].td_scale = 0; + + /* tcp_packet will set them */ + conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; + conntrack->proto.tcp.last_index = TCP_NONE_SET; + + DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + return 1; +} + +struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 = +{ + .l3proto = PF_INET, + .proto = IPPROTO_TCP, + .name = "tcp", + .pkt_to_tuple = tcp_pkt_to_tuple, + .invert_tuple = tcp_invert_tuple, + .print_tuple = tcp_print_tuple, + .print_conntrack = tcp_print_conntrack, + .packet = tcp_packet, + .new = tcp_new, + .error = tcp_error4, +}; + +struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 = +{ + .l3proto = PF_INET6, + .proto = IPPROTO_TCP, + .name = "tcp", + .pkt_to_tuple = tcp_pkt_to_tuple, + .invert_tuple = tcp_invert_tuple, + .print_tuple = tcp_print_tuple, + .print_conntrack = tcp_print_conntrack, + .packet = tcp_packet, + .new = tcp_new, + .error = tcp_error6, +}; + +EXPORT_SYMBOL(nf_conntrack_protocol_tcp4); +EXPORT_SYMBOL(nf_conntrack_protocol_tcp6); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c new file mode 100644 index 00000000000..3cae7ce420d --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -0,0 +1,216 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - enable working with Layer 3 protocol independent connection tracking. + * + * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/udp.h> +#include <linux/seq_file.h> +#include <linux/skbuff.h> +#include <linux/ipv6.h> +#include <net/ip6_checksum.h> +#include <net/checksum.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_conntrack_protocol.h> + +unsigned long nf_ct_udp_timeout = 30*HZ; +unsigned long nf_ct_udp_timeout_stream = 180*HZ; + +static int udp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct udphdr _hdr, *hp; + + /* Actually only need first 8 bytes. */ + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return 0; + + tuple->src.u.udp.port = hp->source; + tuple->dst.u.udp.port = hp->dest; + + return 1; +} + +static int udp_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u.udp.port = orig->dst.u.udp.port; + tuple->dst.u.udp.port = orig->src.u.udp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int udp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); +} + +/* Print out the private part of the conntrack. */ +static int udp_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, and may modify conntracktype */ +static int udp_packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + /* If we've seen traffic both ways, this is some kind of UDP + stream. Extend timeout. */ + if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { + nf_ct_refresh_acct(conntrack, ctinfo, skb, + nf_ct_udp_timeout_stream); + /* Also, more likely to be important, and not a probe */ + if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) + nf_conntrack_event_cache(IPCT_STATUS, skb); + } else + nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int udp_new(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + return 1; +} + +static int udp_error(struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + int pf, + unsigned int hooknum, + int (*csum)(const struct sk_buff *, unsigned int)) +{ + unsigned int udplen = skb->len - dataoff; + struct udphdr _hdr, *hdr; + + /* Header is too small? */ + hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hdr == NULL) { + if (LOG_INVALID(IPPROTO_UDP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udp: short packet "); + return -NF_ACCEPT; + } + + /* Truncated/malformed packets */ + if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { + if (LOG_INVALID(IPPROTO_UDP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udp: truncated/malformed packet "); + return -NF_ACCEPT; + } + + /* Packet with no checksum */ + if (!hdr->check) + return NF_ACCEPT; + + /* Checksum invalid? Ignore. + * We skip checking packets on the outgoing path + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. + * FIXME: Source route IP option packets --RR */ + if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || + (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) + && skb->ip_summed != CHECKSUM_UNNECESSARY + && csum(skb, dataoff)) { + if (LOG_INVALID(IPPROTO_UDP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udp: bad UDP checksum "); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} + +static int csum4(const struct sk_buff *skb, unsigned int dataoff) +{ + return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, + skb->len - dataoff, IPPROTO_UDP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, dataoff, + skb->len - dataoff, 0)); +} + +static int csum6(const struct sk_buff *skb, unsigned int dataoff) +{ + return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, + skb->len - dataoff, IPPROTO_UDP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, dataoff, skb->len - dataoff, + 0)); +} + +static int udp_error4(struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum4); +} + +static int udp_error6(struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum6); +} + +struct nf_conntrack_protocol nf_conntrack_protocol_udp4 = +{ + .l3proto = PF_INET, + .proto = IPPROTO_UDP, + .name = "udp", + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .print_conntrack = udp_print_conntrack, + .packet = udp_packet, + .new = udp_new, + .error = udp_error4, +}; + +struct nf_conntrack_protocol nf_conntrack_protocol_udp6 = +{ + .l3proto = PF_INET6, + .proto = IPPROTO_UDP, + .name = "udp", + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .print_conntrack = udp_print_conntrack, + .packet = udp_packet, + .new = udp_new, + .error = udp_error6, +}; + +EXPORT_SYMBOL(nf_conntrack_protocol_udp4); +EXPORT_SYMBOL(nf_conntrack_protocol_udp6); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c new file mode 100644 index 00000000000..45224db4fe2 --- /dev/null +++ b/net/netfilter/nf_conntrack_standalone.c @@ -0,0 +1,869 @@ +/* This file contains all the functions required for the standalone + nf_conntrack module. + + These are not required by the compatibility layer. +*/ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol dependent part. + * + * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/percpu.h> +#include <linux/netdevice.h> +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif + +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter_ipv4/listhelp.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_LICENSE("GPL"); + +extern atomic_t nf_conntrack_count; +DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); + +static int kill_l3proto(struct nf_conn *i, void *data) +{ + return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == + ((struct nf_conntrack_l3proto *)data)->l3proto); +} + +static int kill_proto(struct nf_conn *i, void *data) +{ + struct nf_conntrack_protocol *proto; + proto = (struct nf_conntrack_protocol *)data; + return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == + proto->proto) && + (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == + proto->l3proto); +} + +#ifdef CONFIG_PROC_FS +static int +print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_l3proto *l3proto, + struct nf_conntrack_protocol *proto) +{ + return l3proto->print_tuple(s, tuple) || proto->print_tuple(s, tuple); +} + +#ifdef CONFIG_NF_CT_ACCT +static unsigned int +seq_print_counters(struct seq_file *s, + const struct ip_conntrack_counter *counter) +{ + return seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)counter->packets, + (unsigned long long)counter->bytes); +} +#else +#define seq_print_counters(x, y) 0 +#endif + +struct ct_iter_state { + unsigned int bucket; +}; + +static struct list_head *ct_get_first(struct seq_file *seq) +{ + struct ct_iter_state *st = seq->private; + + for (st->bucket = 0; + st->bucket < nf_conntrack_htable_size; + st->bucket++) { + if (!list_empty(&nf_conntrack_hash[st->bucket])) + return nf_conntrack_hash[st->bucket].next; + } + return NULL; +} + +static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) +{ + struct ct_iter_state *st = seq->private; + + head = head->next; + while (head == &nf_conntrack_hash[st->bucket]) { + if (++st->bucket >= nf_conntrack_htable_size) + return NULL; + head = nf_conntrack_hash[st->bucket].next; + } + return head; +} + +static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) +{ + struct list_head *head = ct_get_first(seq); + + if (head) + while (pos && (head = ct_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *ct_seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&nf_conntrack_lock); + return ct_get_idx(seq, *pos); +} + +static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return ct_get_next(s, v); +} + +static void ct_seq_stop(struct seq_file *s, void *v) +{ + read_unlock_bh(&nf_conntrack_lock); +} + +/* return 0 on success, 1 in case of error */ +static int ct_seq_show(struct seq_file *s, void *v) +{ + const struct nf_conntrack_tuple_hash *hash = v; + const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash); + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_protocol *proto; + + ASSERT_READ_LOCK(&nf_conntrack_lock); + NF_CT_ASSERT(conntrack); + + /* we only want to print DIR_ORIGINAL */ + if (NF_CT_DIRECTION(hash)) + return 0; + + l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src.l3num); + + NF_CT_ASSERT(l3proto); + proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src.l3num, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + NF_CT_ASSERT(proto); + + if (seq_printf(s, "%-8s %u %-8s %u %ld ", + l3proto->name, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, + proto->name, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, + timer_pending(&conntrack->timeout) + ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0) + return -ENOSPC; + + if (l3proto->print_conntrack(s, conntrack)) + return -ENOSPC; + + if (proto->print_conntrack(s, conntrack)) + return -ENOSPC; + + if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + l3proto, proto)) + return -ENOSPC; + + if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) + return -ENOSPC; + + if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) + if (seq_printf(s, "[UNREPLIED] ")) + return -ENOSPC; + + if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, + l3proto, proto)) + return -ENOSPC; + + if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) + return -ENOSPC; + + if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) + if (seq_printf(s, "[ASSURED] ")) + return -ENOSPC; + +#if defined(CONFIG_NF_CONNTRACK_MARK) + if (seq_printf(s, "mark=%u ", conntrack->mark)) + return -ENOSPC; +#endif + + if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) + return -ENOSPC; + + return 0; +} + +static struct seq_operations ct_seq_ops = { + .start = ct_seq_start, + .next = ct_seq_next, + .stop = ct_seq_stop, + .show = ct_seq_show +}; + +static int ct_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + struct ct_iter_state *st; + int ret; + + st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); + if (st == NULL) + return -ENOMEM; + ret = seq_open(file, &ct_seq_ops); + if (ret) + goto out_free; + seq = file->private_data; + seq->private = st; + memset(st, 0, sizeof(struct ct_iter_state)); + return ret; +out_free: + kfree(st); + return ret; +} + +static struct file_operations ct_file_ops = { + .owner = THIS_MODULE, + .open = ct_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +/* expects */ +static void *exp_seq_start(struct seq_file *s, loff_t *pos) +{ + struct list_head *e = &nf_conntrack_expect_list; + loff_t i; + + /* strange seq_file api calls stop even if we fail, + * thus we need to grab lock since stop unlocks */ + read_lock_bh(&nf_conntrack_lock); + + if (list_empty(e)) + return NULL; + + for (i = 0; i <= *pos; i++) { + e = e->next; + if (e == &nf_conntrack_expect_list) + return NULL; + } + return e; +} + +static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct list_head *e = v; + + ++*pos; + e = e->next; + + if (e == &nf_conntrack_expect_list) + return NULL; + + return e; +} + +static void exp_seq_stop(struct seq_file *s, void *v) +{ + read_unlock_bh(&nf_conntrack_lock); +} + +static int exp_seq_show(struct seq_file *s, void *v) +{ + struct nf_conntrack_expect *expect = v; + + if (expect->timeout.function) + seq_printf(s, "%ld ", timer_pending(&expect->timeout) + ? (long)(expect->timeout.expires - jiffies)/HZ : 0); + else + seq_printf(s, "- "); + seq_printf(s, "l3proto = %u proto=%u ", + expect->tuple.src.l3num, + expect->tuple.dst.protonum); + print_tuple(s, &expect->tuple, + nf_ct_find_l3proto(expect->tuple.src.l3num), + nf_ct_find_proto(expect->tuple.src.l3num, + expect->tuple.dst.protonum)); + return seq_putc(s, '\n'); +} + +static struct seq_operations exp_seq_ops = { + .start = exp_seq_start, + .next = exp_seq_next, + .stop = exp_seq_stop, + .show = exp_seq_show +}; + +static int exp_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &exp_seq_ops); +} + +static struct file_operations exp_file_ops = { + .owner = THIS_MODULE, + .open = exp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + int cpu; + + if (*pos == 0) + return SEQ_START_TOKEN; + + for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu + 1; + return &per_cpu(nf_conntrack_stat, cpu); + } + + return NULL; +} + +static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + int cpu; + + for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu + 1; + return &per_cpu(nf_conntrack_stat, cpu); + } + + return NULL; +} + +static void ct_cpu_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int ct_cpu_seq_show(struct seq_file *seq, void *v) +{ + unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + struct ip_conntrack_stat *st = v; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); + return 0; + } + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x %08x %08x %08x %08x \n", + nr_conntracks, + st->searched, + st->found, + st->new, + st->invalid, + st->ignore, + st->delete, + st->delete_list, + st->insert, + st->insert_failed, + st->drop, + st->early_drop, + st->error, + + st->expect_new, + st->expect_create, + st->expect_delete + ); + return 0; +} + +static struct seq_operations ct_cpu_seq_ops = { + .start = ct_cpu_seq_start, + .next = ct_cpu_seq_next, + .stop = ct_cpu_seq_stop, + .show = ct_cpu_seq_show, +}; + +static int ct_cpu_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &ct_cpu_seq_ops); +} + +static struct file_operations ct_cpu_seq_fops = { + .owner = THIS_MODULE, + .open = ct_cpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; +#endif /* CONFIG_PROC_FS */ + +/* Sysctl support */ + +#ifdef CONFIG_SYSCTL + +/* From nf_conntrack_core.c */ +extern int nf_conntrack_max; +extern unsigned int nf_conntrack_htable_size; + +/* From nf_conntrack_proto_tcp.c */ +extern unsigned long nf_ct_tcp_timeout_syn_sent; +extern unsigned long nf_ct_tcp_timeout_syn_recv; +extern unsigned long nf_ct_tcp_timeout_established; +extern unsigned long nf_ct_tcp_timeout_fin_wait; +extern unsigned long nf_ct_tcp_timeout_close_wait; +extern unsigned long nf_ct_tcp_timeout_last_ack; +extern unsigned long nf_ct_tcp_timeout_time_wait; +extern unsigned long nf_ct_tcp_timeout_close; +extern unsigned long nf_ct_tcp_timeout_max_retrans; +extern int nf_ct_tcp_loose; +extern int nf_ct_tcp_be_liberal; +extern int nf_ct_tcp_max_retrans; + +/* From nf_conntrack_proto_udp.c */ +extern unsigned long nf_ct_udp_timeout; +extern unsigned long nf_ct_udp_timeout_stream; + +/* From nf_conntrack_proto_generic.c */ +extern unsigned long nf_ct_generic_timeout; + +/* Log invalid packets of a given protocol */ +static int log_invalid_proto_min = 0; +static int log_invalid_proto_max = 255; + +static struct ctl_table_header *nf_ct_sysctl_header; + +static ctl_table nf_ct_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_MAX, + .procname = "nf_conntrack_max", + .data = &nf_conntrack_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_COUNT, + .procname = "nf_conntrack_count", + .data = &nf_conntrack_count, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_BUCKETS, + .procname = "nf_conntrack_buckets", + .data = &nf_conntrack_htable_size, + .maxlen = sizeof(unsigned int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, + .procname = "nf_conntrack_tcp_timeout_syn_sent", + .data = &nf_ct_tcp_timeout_syn_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, + .procname = "nf_conntrack_tcp_timeout_syn_recv", + .data = &nf_ct_tcp_timeout_syn_recv, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, + .procname = "nf_conntrack_tcp_timeout_established", + .data = &nf_ct_tcp_timeout_established, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, + .procname = "nf_conntrack_tcp_timeout_fin_wait", + .data = &nf_ct_tcp_timeout_fin_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, + .procname = "nf_conntrack_tcp_timeout_close_wait", + .data = &nf_ct_tcp_timeout_close_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, + .procname = "nf_conntrack_tcp_timeout_last_ack", + .data = &nf_ct_tcp_timeout_last_ack, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, + .procname = "nf_conntrack_tcp_timeout_time_wait", + .data = &nf_ct_tcp_timeout_time_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, + .procname = "nf_conntrack_tcp_timeout_close", + .data = &nf_ct_tcp_timeout_close, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT, + .procname = "nf_conntrack_udp_timeout", + .data = &nf_ct_udp_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, + .procname = "nf_conntrack_udp_timeout_stream", + .data = &nf_ct_udp_timeout_stream, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT, + .procname = "nf_conntrack_generic_timeout", + .data = &nf_ct_generic_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, + .procname = "nf_conntrack_log_invalid", + .data = &nf_ct_log_invalid, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &log_invalid_proto_min, + .extra2 = &log_invalid_proto_max, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, + .procname = "nf_conntrack_tcp_timeout_max_retrans", + .data = &nf_ct_tcp_timeout_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, + .procname = "nf_conntrack_tcp_loose", + .data = &nf_ct_tcp_loose, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, + .procname = "nf_conntrack_tcp_be_liberal", + .data = &nf_ct_tcp_be_liberal, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, + .procname = "nf_conntrack_tcp_max_retrans", + .data = &nf_ct_tcp_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + + { .ctl_name = 0 } +}; + +#define NET_NF_CONNTRACK_MAX 2089 + +static ctl_table nf_ct_netfilter_table[] = { + { + .ctl_name = NET_NETFILTER, + .procname = "netfilter", + .mode = 0555, + .child = nf_ct_sysctl_table, + }, + { + .ctl_name = NET_NF_CONNTRACK_MAX, + .procname = "nf_conntrack_max", + .data = &nf_conntrack_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_net_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = nf_ct_netfilter_table, + }, + { .ctl_name = 0 } +}; +EXPORT_SYMBOL(nf_ct_log_invalid); +#endif /* CONFIG_SYSCTL */ + +static int init_or_cleanup(int init) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc, *proc_exp, *proc_stat; +#endif + int ret = 0; + + if (!init) goto cleanup; + + ret = nf_conntrack_init(); + if (ret < 0) + goto cleanup_nothing; + +#ifdef CONFIG_PROC_FS + proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); + if (!proc) goto cleanup_init; + + proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, + &exp_file_ops); + if (!proc_exp) goto cleanup_proc; + + proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); + if (!proc_stat) + goto cleanup_proc_exp; + + proc_stat->proc_fops = &ct_cpu_seq_fops; + proc_stat->owner = THIS_MODULE; +#endif +#ifdef CONFIG_SYSCTL + nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); + if (nf_ct_sysctl_header == NULL) { + printk("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; + goto cleanup_proc_stat; + } +#endif + + return ret; + + cleanup: +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(nf_ct_sysctl_header); + cleanup_proc_stat: +#endif +#ifdef CONFIG_PROC_FS + proc_net_remove("nf_conntrack_stat"); + cleanup_proc_exp: + proc_net_remove("nf_conntrack_expect"); + cleanup_proc: + proc_net_remove("nf_conntrack"); + cleanup_init: +#endif /* CNFIG_PROC_FS */ + nf_conntrack_cleanup(); + cleanup_nothing: + return ret; +} + +int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) +{ + int ret = 0; + + write_lock_bh(&nf_conntrack_lock); + if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_generic_l3proto) { + ret = -EBUSY; + goto out; + } + nf_ct_l3protos[proto->l3proto] = proto; +out: + write_unlock_bh(&nf_conntrack_lock); + + return ret; +} + +void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) +{ + write_lock_bh(&nf_conntrack_lock); + nf_ct_l3protos[proto->l3proto] = &nf_conntrack_generic_l3proto; + write_unlock_bh(&nf_conntrack_lock); + + /* Somebody could be still looking at the proto in bh. */ + synchronize_net(); + + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_cleanup(kill_l3proto, proto); +} + +/* FIXME: Allow NULL functions and sub in pointers to generic for + them. --RR */ +int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto) +{ + int ret = 0; + +retry: + write_lock_bh(&nf_conntrack_lock); + if (nf_ct_protos[proto->l3proto]) { + if (nf_ct_protos[proto->l3proto][proto->proto] + != &nf_conntrack_generic_protocol) { + ret = -EBUSY; + goto out_unlock; + } + } else { + /* l3proto may be loaded latter. */ + struct nf_conntrack_protocol **proto_array; + int i; + + write_unlock_bh(&nf_conntrack_lock); + + proto_array = (struct nf_conntrack_protocol **) + kmalloc(MAX_NF_CT_PROTO * + sizeof(struct nf_conntrack_protocol *), + GFP_KERNEL); + if (proto_array == NULL) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < MAX_NF_CT_PROTO; i++) + proto_array[i] = &nf_conntrack_generic_protocol; + + write_lock_bh(&nf_conntrack_lock); + if (nf_ct_protos[proto->l3proto]) { + /* bad timing, but no problem */ + write_unlock_bh(&nf_conntrack_lock); + kfree(proto_array); + } else { + nf_ct_protos[proto->l3proto] = proto_array; + write_unlock_bh(&nf_conntrack_lock); + } + + /* + * Just once because array is never freed until unloading + * nf_conntrack.ko + */ + goto retry; + } + + nf_ct_protos[proto->l3proto][proto->proto] = proto; + +out_unlock: + write_unlock_bh(&nf_conntrack_lock); +out: + return ret; +} + +void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto) +{ + write_lock_bh(&nf_conntrack_lock); + nf_ct_protos[proto->l3proto][proto->proto] + = &nf_conntrack_generic_protocol; + write_unlock_bh(&nf_conntrack_lock); + + /* Somebody could be still looking at the proto in bh. */ + synchronize_net(); + + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_cleanup(kill_proto, proto); +} + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +/* Some modules need us, but don't depend directly on any symbol. + They should call this. */ +void need_nf_conntrack(void) +{ +} + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +EXPORT_SYMBOL_GPL(nf_conntrack_chain); +EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain); +EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); +EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); +EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); +EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); +EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); +#endif +EXPORT_SYMBOL(nf_conntrack_l3proto_register); +EXPORT_SYMBOL(nf_conntrack_l3proto_unregister); +EXPORT_SYMBOL(nf_conntrack_protocol_register); +EXPORT_SYMBOL(nf_conntrack_protocol_unregister); +EXPORT_SYMBOL(nf_ct_invert_tuplepr); +EXPORT_SYMBOL(nf_conntrack_alter_reply); +EXPORT_SYMBOL(nf_conntrack_destroyed); +EXPORT_SYMBOL(need_nf_conntrack); +EXPORT_SYMBOL(nf_conntrack_helper_register); +EXPORT_SYMBOL(nf_conntrack_helper_unregister); +EXPORT_SYMBOL(nf_ct_iterate_cleanup); +EXPORT_SYMBOL(__nf_ct_refresh_acct); +EXPORT_SYMBOL(nf_ct_protos); +EXPORT_SYMBOL(nf_ct_find_proto); +EXPORT_SYMBOL(nf_ct_l3protos); +EXPORT_SYMBOL(nf_conntrack_expect_alloc); +EXPORT_SYMBOL(nf_conntrack_expect_put); +EXPORT_SYMBOL(nf_conntrack_expect_related); +EXPORT_SYMBOL(nf_conntrack_unexpect_related); +EXPORT_SYMBOL(nf_conntrack_tuple_taken); +EXPORT_SYMBOL(nf_conntrack_htable_size); +EXPORT_SYMBOL(nf_conntrack_lock); +EXPORT_SYMBOL(nf_conntrack_hash); +EXPORT_SYMBOL(nf_conntrack_untracked); +EXPORT_SYMBOL_GPL(nf_conntrack_find_get); +#ifdef CONFIG_IP_NF_NAT_NEEDED +EXPORT_SYMBOL(nf_conntrack_tcp_update); +#endif +EXPORT_SYMBOL(__nf_conntrack_confirm); +EXPORT_SYMBOL(nf_ct_get_tuple); +EXPORT_SYMBOL(nf_ct_invert_tuple); +EXPORT_SYMBOL(nf_conntrack_in); +EXPORT_SYMBOL(__nf_conntrack_attach); |