aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/ah4.c18
-rw-r--r--net/ipv4/esp4.c24
-rw-r--r--net/ipv4/fib_trie.c835
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inetpeer.c3
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ipcomp.c3
-rw-r--r--net/ipv4/ipconfig.c7
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c43
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c16
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c20
-rw-r--r--net/ipv4/netfilter/Kconfig56
-rw-r--r--net/ipv4/netfilter/Makefile6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c3
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c86
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c806
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c142
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c17
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c327
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c8
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c7
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c401
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c214
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c21
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c8
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c223
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c6
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c16
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c5
-rw-r--r--net/ipv4/netfilter/ipt_owner.c1
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c29
-rw-r--r--net/ipv4/tcp.c17
-rw-r--r--net/ipv4/tcp_input.c40
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c79
-rw-r--r--net/ipv4/udp.c2
42 files changed, 2759 insertions, 768 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index bf147f8db39..a9d84f93442 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1248,11 +1248,6 @@ module_init(inet_init);
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
-#ifdef CONFIG_IP_FIB_TRIE
-extern int fib_stat_proc_init(void);
-extern void fib_stat_proc_exit(void);
-#endif
-
static int __init ipv4_proc_init(void)
{
int rc = 0;
@@ -1265,19 +1260,11 @@ static int __init ipv4_proc_init(void)
goto out_udp;
if (fib_proc_init())
goto out_fib;
-#ifdef CONFIG_IP_FIB_TRIE
- if (fib_stat_proc_init())
- goto out_fib_stat;
-#endif
if (ip_misc_proc_init())
goto out_misc;
out:
return rc;
out_misc:
-#ifdef CONFIG_IP_FIB_TRIE
- fib_stat_proc_exit();
-out_fib_stat:
-#endif
fib_proc_exit();
out_fib:
udp4_proc_exit();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 514c85b2631..035ad2c9e1b 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -263,10 +263,8 @@ static int ah_init_state(struct xfrm_state *x)
error:
if (ahp) {
- if (ahp->work_icv)
- kfree(ahp->work_icv);
- if (ahp->tfm)
- crypto_free_tfm(ahp->tfm);
+ kfree(ahp->work_icv);
+ crypto_free_tfm(ahp->tfm);
kfree(ahp);
}
return -EINVAL;
@@ -279,14 +277,10 @@ static void ah_destroy(struct xfrm_state *x)
if (!ahp)
return;
- if (ahp->work_icv) {
- kfree(ahp->work_icv);
- ahp->work_icv = NULL;
- }
- if (ahp->tfm) {
- crypto_free_tfm(ahp->tfm);
- ahp->tfm = NULL;
- }
+ kfree(ahp->work_icv);
+ ahp->work_icv = NULL;
+ crypto_free_tfm(ahp->tfm);
+ ahp->tfm = NULL;
kfree(ahp);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b31ffc5053d..1b5a09d1b90 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -343,22 +343,14 @@ static void esp_destroy(struct xfrm_state *x)
if (!esp)
return;
- if (esp->conf.tfm) {
- crypto_free_tfm(esp->conf.tfm);
- esp->conf.tfm = NULL;
- }
- if (esp->conf.ivec) {
- kfree(esp->conf.ivec);
- esp->conf.ivec = NULL;
- }
- if (esp->auth.tfm) {
- crypto_free_tfm(esp->auth.tfm);
- esp->auth.tfm = NULL;
- }
- if (esp->auth.work_icv) {
- kfree(esp->auth.work_icv);
- esp->auth.work_icv = NULL;
- }
+ crypto_free_tfm(esp->conf.tfm);
+ esp->conf.tfm = NULL;
+ kfree(esp->conf.ivec);
+ esp->conf.ivec = NULL;
+ crypto_free_tfm(esp->auth.tfm);
+ esp->auth.tfm = NULL;
+ kfree(esp->auth.work_icv);
+ esp->auth.work_icv = NULL;
kfree(esp);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b2dea4e5da7..50c0519cd70 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#define VERSION "0.402"
+#define VERSION "0.404"
#include <linux/config.h>
#include <asm/uaccess.h>
@@ -164,7 +164,6 @@ static struct node *resize(struct trie *t, struct tnode *tn);
static struct tnode *inflate(struct trie *t, struct tnode *tn);
static struct tnode *halve(struct trie *t, struct tnode *tn);
static void tnode_free(struct tnode *tn);
-static void trie_dump_seq(struct seq_file *seq, struct trie *t);
static kmem_cache_t *fn_alias_kmem __read_mostly;
static struct trie *trie_local = NULL, *trie_main = NULL;
@@ -225,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b)
Consider a node 'n' and its parent 'tp'.
If n is a leaf, every bit in its key is significant. Its presence is
- necessitaded by path compression, since during a tree traversal (when
+ necessitated by path compression, since during a tree traversal (when
searching for a leaf - unless we are doing an insertion) we will completely
ignore all skipped bits we encounter. Thus we need to verify, at the end of
a potentially successful search, that we have indeed been walking the
@@ -837,11 +836,12 @@ static void trie_init(struct trie *t)
#endif
}
-/* readside most use rcu_read_lock currently dump routines
+/* readside must use rcu_read_lock currently dump routines
via get_fa_head and dump */
-static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
+static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
{
+ struct hlist_head *head = &l->list;
struct hlist_node *node;
struct leaf_info *li;
@@ -854,7 +854,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
static inline struct list_head * get_fa_head(struct leaf *l, int plen)
{
- struct leaf_info *li = find_leaf_info(&l->list, plen);
+ struct leaf_info *li = find_leaf_info(l, plen);
if (!li)
return NULL;
@@ -1086,7 +1086,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
}
if (tp && tp->pos + tp->bits > 32)
- printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
+ printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
tp, tp->pos, tp->bits, key, plen);
/* Rebalance the trie */
@@ -1249,7 +1249,7 @@ err:
}
-/* should be clalled with rcu_read_lock */
+/* should be called with rcu_read_lock */
static inline int check_leaf(struct trie *t, struct leaf *l,
t_key key, int *plen, const struct flowi *flp,
struct fib_result *res)
@@ -1591,7 +1591,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
l = fib_find_node(t, key);
- li = find_leaf_info(&l->list, plen);
+ li = find_leaf_info(l, plen);
list_del_rcu(&fa->fa_list);
@@ -1715,7 +1715,6 @@ static int fn_trie_flush(struct fib_table *tb)
t->revision++;
- rcu_read_lock();
for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
found += trie_flush_leaf(t, l);
@@ -1723,7 +1722,6 @@ static int fn_trie_flush(struct fib_table *tb)
trie_leaf_remove(t, ll->key);
ll = l;
}
- rcu_read_unlock();
if (ll && hlist_empty(&ll->list))
trie_leaf_remove(t, ll->key);
@@ -1834,16 +1832,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
i++;
continue;
}
- if (fa->fa_info->fib_nh == NULL) {
- printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
- i++;
- continue;
- }
- if (fa->fa_info == NULL) {
- printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
- i++;
- continue;
- }
+ BUG_ON(!fa->fa_info);
if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
@@ -1966,563 +1955,531 @@ struct fib_table * __init fib_hash_init(int id)
trie_main = t;
if (id == RT_TABLE_LOCAL)
- printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
+ printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
return tb;
}
-/* Trie dump functions */
+#ifdef CONFIG_PROC_FS
+/* Depth first Trie walk iterator */
+struct fib_trie_iter {
+ struct tnode *tnode;
+ struct trie *trie;
+ unsigned index;
+ unsigned depth;
+};
-static void putspace_seq(struct seq_file *seq, int n)
+static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
{
- while (n--)
- seq_printf(seq, " ");
-}
+ struct tnode *tn = iter->tnode;
+ unsigned cindex = iter->index;
+ struct tnode *p;
-static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
-{
- while (bits--)
- seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
-}
+ pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
+ iter->tnode, iter->index, iter->depth);
+rescan:
+ while (cindex < (1<<tn->bits)) {
+ struct node *n = tnode_get_child(tn, cindex);
-static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
- int pend, int cindex, int bits)
-{
- putspace_seq(seq, indent);
- if (IS_LEAF(n))
- seq_printf(seq, "|");
- else
- seq_printf(seq, "+");
- if (bits) {
- seq_printf(seq, "%d/", cindex);
- printbin_seq(seq, cindex, bits);
- seq_printf(seq, ": ");
- } else
- seq_printf(seq, "<root>: ");
- seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
+ if (n) {
+ if (IS_LEAF(n)) {
+ iter->tnode = tn;
+ iter->index = cindex + 1;
+ } else {
+ /* push down one level */
+ iter->tnode = (struct tnode *) n;
+ iter->index = 0;
+ ++iter->depth;
+ }
+ return n;
+ }
- if (IS_LEAF(n)) {
- struct leaf *l = (struct leaf *)n;
- struct fib_alias *fa;
- int i;
+ ++cindex;
+ }
- seq_printf(seq, "key=%d.%d.%d.%d\n",
- n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
-
- for (i = 32; i >= 0; i--)
- if (find_leaf_info(&l->list, i)) {
- struct list_head *fa_head = get_fa_head(l, i);
-
- if (!fa_head)
- continue;
-
- if (list_empty(fa_head))
- continue;
-
- putspace_seq(seq, indent+2);
- seq_printf(seq, "{/%d...dumping}\n", i);
-
- list_for_each_entry_rcu(fa, fa_head, fa_list) {
- putspace_seq(seq, indent+2);
- if (fa->fa_info == NULL) {
- seq_printf(seq, "Error fa_info=NULL\n");
- continue;
- }
- if (fa->fa_info->fib_nh == NULL) {
- seq_printf(seq, "Error _fib_nh=NULL\n");
- continue;
- }
-
- seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
- fa->fa_type,
- fa->fa_scope,
- fa->fa_tos);
- }
- }
- } else {
- struct tnode *tn = (struct tnode *)n;
- int plen = ((struct tnode *)n)->pos;
- t_key prf = MASK_PFX(n->key, plen);
-
- seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
- prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
-
- putspace_seq(seq, indent); seq_printf(seq, "| ");
- seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos));
- printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
- seq_printf(seq, "}\n");
- putspace_seq(seq, indent); seq_printf(seq, "| ");
- seq_printf(seq, "{pos=%d", tn->pos);
- seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
- seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
- putspace_seq(seq, indent); seq_printf(seq, "| ");
- seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
+ /* Current node exhausted, pop back up */
+ p = NODE_PARENT(tn);
+ if (p) {
+ cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
+ tn = p;
+ --iter->depth;
+ goto rescan;
}
+
+ /* got root? */
+ return NULL;
}
-static void trie_dump_seq(struct seq_file *seq, struct trie *t)
+static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
+ struct trie *t)
{
- struct node *n;
- int cindex = 0;
- int indent = 1;
- int pend = 0;
- int depth = 0;
- struct tnode *tn;
+ struct node *n = rcu_dereference(t->trie);
- rcu_read_lock();
- n = rcu_dereference(t->trie);
- seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
-
- if (!n) {
- seq_printf(seq, "------ trie is empty\n");
-
- rcu_read_unlock();
- return;
- }
-
- printnode_seq(seq, indent, n, pend, cindex, 0);
-
- if (!IS_TNODE(n)) {
- rcu_read_unlock();
- return;
+ if (n && IS_TNODE(n)) {
+ iter->tnode = (struct tnode *) n;
+ iter->trie = t;
+ iter->index = 0;
+ iter->depth = 1;
+ return n;
}
+ return NULL;
+}
- tn = (struct tnode *)n;
- pend = tn->pos+tn->bits;
- putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
- indent += 3;
- depth++;
-
- while (tn && cindex < (1 << tn->bits)) {
- struct node *child = rcu_dereference(tn->child[cindex]);
- if (!child)
- cindex++;
- else {
- /* Got a child */
- printnode_seq(seq, indent, child, pend,
- cindex, tn->bits);
-
- if (IS_LEAF(child))
- cindex++;
-
- else {
- /*
- * New tnode. Decend one level
- */
-
- depth++;
- n = child;
- tn = (struct tnode *)n;
- pend = tn->pos+tn->bits;
- putspace_seq(seq, indent);
- seq_printf(seq, "\\--\n");
- indent += 3;
- cindex = 0;
- }
- }
-
- /*
- * Test if we are done
- */
-
- while (cindex >= (1 << tn->bits)) {
- /*
- * Move upwards and test for root
- * pop off all traversed nodes
- */
+static void trie_collect_stats(struct trie *t, struct trie_stat *s)
+{
+ struct node *n;
+ struct fib_trie_iter iter;
- if (NODE_PARENT(tn) == NULL) {
- tn = NULL;
- break;
- }
+ memset(s, 0, sizeof(*s));
- cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
- cindex++;
- tn = NODE_PARENT(tn);
- pend = tn->pos + tn->bits;
- indent -= 3;
- depth--;
+ rcu_read_lock();
+ for (n = fib_trie_get_first(&iter, t); n;
+ n = fib_trie_get_next(&iter)) {
+ if (IS_LEAF(n)) {
+ s->leaves++;
+ s->totdepth += iter.depth;
+ if (iter.depth > s->maxdepth)
+ s->maxdepth = iter.depth;
+ } else {
+ const struct tnode *tn = (const struct tnode *) n;
+ int i;
+
+ s->tnodes++;
+ s->nodesizes[tn->bits]++;
+ for (i = 0; i < (1<<tn->bits); i++)
+ if (!tn->child[i])
+ s->nullpointers++;
}
}
rcu_read_unlock();
}
-static struct trie_stat *trie_stat_new(void)
+/*
+ * This outputs /proc/net/fib_triestats
+ */
+static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
{
- struct trie_stat *s;
- int i;
+ unsigned i, max, pointers, bytes, avdepth;
- s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
- if (!s)
- return NULL;
+ if (stat->leaves)
+ avdepth = stat->totdepth*100 / stat->leaves;
+ else
+ avdepth = 0;
- s->totdepth = 0;
- s->maxdepth = 0;
- s->tnodes = 0;
- s->leaves = 0;
- s->nullpointers = 0;
+ seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
+ seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
- for (i = 0; i < MAX_CHILDS; i++)
- s->nodesizes[i] = 0;
+ seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
- return s;
-}
+ bytes = sizeof(struct leaf) * stat->leaves;
+ seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
+ bytes += sizeof(struct tnode) * stat->tnodes;
-static struct trie_stat *trie_collect_stats(struct trie *t)
-{
- struct node *n;
- struct trie_stat *s = trie_stat_new();
- int cindex = 0;
- int pend = 0;
- int depth = 0;
+ max = MAX_CHILDS-1;
+ while (max >= 0 && stat->nodesizes[max] == 0)
+ max--;
- if (!s)
- return NULL;
+ pointers = 0;
+ for (i = 1; i <= max; i++)
+ if (stat->nodesizes[i] != 0) {
+ seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
+ pointers += (1<<i) * stat->nodesizes[i];
+ }
+ seq_putc(seq, '\n');
+ seq_printf(seq, "\tPointers: %d\n", pointers);
- rcu_read_lock();
- n = rcu_dereference(t->trie);
+ bytes += sizeof(struct node *) * pointers;
+ seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
+ seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024);
- if (!n)
- return s;
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+ seq_printf(seq, "Counters:\n---------\n");
+ seq_printf(seq,"gets = %d\n", t->stats.gets);
+ seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
+ seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
+ seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
+ seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
+ seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
+#ifdef CLEAR_STATS
+ memset(&(t->stats), 0, sizeof(t->stats));
+#endif
+#endif /* CONFIG_IP_FIB_TRIE_STATS */
+}
- if (IS_TNODE(n)) {
- struct tnode *tn = (struct tnode *)n;
- pend = tn->pos+tn->bits;
- s->nodesizes[tn->bits]++;
- depth++;
-
- while (tn && cindex < (1 << tn->bits)) {
- struct node *ch = rcu_dereference(tn->child[cindex]);
- if (ch) {
-
- /* Got a child */
-
- if (IS_LEAF(tn->child[cindex])) {
- cindex++;
-
- /* stats */
- if (depth > s->maxdepth)
- s->maxdepth = depth;
- s->totdepth += depth;
- s->leaves++;
- } else {
- /*
- * New tnode. Decend one level
- */
-
- s->tnodes++;
- s->nodesizes[tn->bits]++;
- depth++;
-
- n = ch;
- tn = (struct tnode *)n;
- pend = tn->pos+tn->bits;
-
- cindex = 0;
- }
- } else {
- cindex++;
- s->nullpointers++;
- }
+static int fib_triestat_seq_show(struct seq_file *seq, void *v)
+{
+ struct trie_stat *stat;
- /*
- * Test if we are done
- */
+ stat = kmalloc(sizeof(*stat), GFP_KERNEL);
+ if (!stat)
+ return -ENOMEM;
- while (cindex >= (1 << tn->bits)) {
- /*
- * Move upwards and test for root
- * pop off all traversed nodes
- */
+ seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
+ sizeof(struct leaf), sizeof(struct tnode));
- if (NODE_PARENT(tn) == NULL) {
- tn = NULL;
- n = NULL;
- break;
- }
+ if (trie_local) {
+ seq_printf(seq, "Local:\n");
+ trie_collect_stats(trie_local, stat);
+ trie_show_stats(seq, stat);
+ }
- cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
- tn = NODE_PARENT(tn);
- cindex++;
- n = (struct node *)tn;
- pend = tn->pos+tn->bits;
- depth--;
- }
- }
+ if (trie_main) {
+ seq_printf(seq, "Main:\n");
+ trie_collect_stats(trie_main, stat);
+ trie_show_stats(seq, stat);
}
+ kfree(stat);
- rcu_read_unlock();
- return s;
+ return 0;
}
-#ifdef CONFIG_PROC_FS
-
-static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
+static int fib_triestat_seq_open(struct inode *inode, struct file *file)
{
- return NULL;
+ return single_open(file, fib_triestat_seq_show, NULL);
}
-static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
+static struct file_operations fib_triestat_fops = {
+ .owner = THIS_MODULE,
+ .open = fib_triestat_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
+ loff_t pos)
{
+ loff_t idx = 0;
+ struct node *n;
+
+ for (n = fib_trie_get_first(iter, trie_local);
+ n; ++idx, n = fib_trie_get_next(iter)) {
+ if (pos == idx)
+ return n;
+ }
+
+ for (n = fib_trie_get_first(iter, trie_main);
+ n; ++idx, n = fib_trie_get_next(iter)) {
+ if (pos == idx)
+ return n;
+ }
return NULL;
}
-static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
+static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
{
- if (!ip_fib_main_table)
- return NULL;
-
- if (*pos)
- return fib_triestat_get_next(seq);
- else
+ rcu_read_lock();
+ if (*pos == 0)
return SEQ_START_TOKEN;
+ return fib_trie_get_idx(seq->private, *pos - 1);
}
-static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct fib_trie_iter *iter = seq->private;
+ void *l = v;
+
++*pos;
if (v == SEQ_START_TOKEN)
- return fib_triestat_get_first(seq);
- else
- return fib_triestat_get_next(seq);
-}
+ return fib_trie_get_idx(iter, 0);
-static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
-{
+ v = fib_trie_get_next(iter);
+ BUG_ON(v == l);
+ if (v)
+ return v;
-}
+ /* continue scan in next trie */
+ if (iter->trie == trie_local)
+ return fib_trie_get_first(iter, trie_main);
-/*
- * This outputs /proc/net/fib_triestats
- *
- * It always works in backward compatibility mode.
- * The format of the file is not supposed to be changed.
- */
+ return NULL;
+}
-static void collect_and_show(struct trie *t, struct seq_file *seq)
+static void fib_trie_seq_stop(struct seq_file *seq, void *v)
{
- int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
- int i, max, pointers;
- struct trie_stat *stat;
- int avdepth;
-
- stat = trie_collect_stats(t);
-
- bytes = 0;
- seq_printf(seq, "trie=%p\n", t);
-
- if (stat) {
- if (stat->leaves)
- avdepth = stat->totdepth*100 / stat->leaves;
- else
- avdepth = 0;
- seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100);
- seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
+ rcu_read_unlock();
+}
- seq_printf(seq, "Leaves: %d\n", stat->leaves);
- bytes += sizeof(struct leaf) * stat->leaves;
- seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
- bytes += sizeof(struct tnode) * stat->tnodes;
+static void seq_indent(struct seq_file *seq, int n)
+{
+ while (n-- > 0) seq_puts(seq, " ");
+}
- max = MAX_CHILDS-1;
+static inline const char *rtn_scope(enum rt_scope_t s)
+{
+ static char buf[32];
- while (max >= 0 && stat->nodesizes[max] == 0)
- max--;
- pointers = 0;
+ switch(s) {
+ case RT_SCOPE_UNIVERSE: return "universe";
+ case RT_SCOPE_SITE: return "site";
+ case RT_SCOPE_LINK: return "link";
+ case RT_SCOPE_HOST: return "host";
+ case RT_SCOPE_NOWHERE: return "nowhere";
+ default:
+ snprintf(buf, sizeof(buf), "scope=%d", s);
+ return buf;
+ }
+}
- for (i = 1; i <= max; i++)
- if (stat->nodesizes[i] != 0) {
- seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
- pointers += (1<<i) * stat->nodesizes[i];
- }
- seq_printf(seq, "\n");
- seq_printf(seq, "Pointers: %d\n", pointers);
- bytes += sizeof(struct node *) * pointers;
- seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
- seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
+static const char *rtn_type_names[__RTN_MAX] = {
+ [RTN_UNSPEC] = "UNSPEC",
+ [RTN_UNICAST] = "UNICAST",
+ [RTN_LOCAL] = "LOCAL",
+ [RTN_BROADCAST] = "BROADCAST",
+ [RTN_ANYCAST] = "ANYCAST",
+ [RTN_MULTICAST] = "MULTICAST",
+ [RTN_BLACKHOLE] = "BLACKHOLE",
+ [RTN_UNREACHABLE] = "UNREACHABLE",
+ [RTN_PROHIBIT] = "PROHIBIT",
+ [RTN_THROW] = "THROW",
+ [RTN_NAT] = "NAT",
+ [RTN_XRESOLVE] = "XRESOLVE",
+};
- kfree(stat);
- }
+static inline const char *rtn_type(unsigned t)
+{
+ static char buf[32];
-#ifdef CONFIG_IP_FIB_TRIE_STATS
- seq_printf(seq, "Counters:\n---------\n");
- seq_printf(seq,"gets = %d\n", t->stats.gets);
- seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
- seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
- seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
- seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
- seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
-#ifdef CLEAR_STATS
- memset(&(t->stats), 0, sizeof(t->stats));
-#endif
-#endif /* CONFIG_IP_FIB_TRIE_STATS */
+ if (t < __RTN_MAX && rtn_type_names[t])
+ return rtn_type_names[t];
+ snprintf(buf, sizeof(buf), "type %d", t);
+ return buf;
}
-static int fib_triestat_seq_show(struct seq_file *seq, void *v)
+/* Pretty print the trie */
+static int fib_trie_seq_show(struct seq_file *seq, void *v)
{
- char bf[128];
+ const struct fib_trie_iter *iter = seq->private;
+ struct node *n = v;
- if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
- sizeof(struct leaf), sizeof(struct tnode));
- if (trie_local)
- collect_and_show(trie_local, seq);
+ if (v == SEQ_START_TOKEN)
+ return 0;
- if (trie_main)
- collect_and_show(trie_main, seq);
- } else {
- snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400);
+ if (IS_TNODE(n)) {
+ struct tnode *tn = (struct tnode *) n;
+ t_key prf = ntohl(MASK_PFX(tn->key, tn->pos));
- seq_printf(seq, "%-127s\n", bf);
+ if (!NODE_PARENT(n)) {
+ if (iter->trie == trie_local)
+ seq_puts(seq, "<local>:\n");
+ else
+ seq_puts(seq, "<main>:\n");
+ }
+ seq_indent(seq, iter->depth-1);
+ seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
+ NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
+ tn->empty_children);
+
+ } else {
+ struct leaf *l = (struct leaf *) n;
+ int i;
+ u32 val = ntohl(l->key);
+
+ seq_indent(seq, iter->depth);
+ seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
+ for (i = 32; i >= 0; i--) {
+ struct leaf_info *li = find_leaf_info(l, i);
+ if (li) {
+ struct fib_alias *fa;
+ list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+ seq_indent(seq, iter->depth+1);
+ seq_printf(seq, " /%d %s %s", i,
+ rtn_scope(fa->fa_scope),
+ rtn_type(fa->fa_type));
+ if (fa->fa_tos)
+ seq_printf(seq, "tos =%d\n",
+ fa->fa_tos);
+ seq_putc(seq, '\n');
+ }
+ }
+ }
}
+
return 0;
}
-static struct seq_operations fib_triestat_seq_ops = {
- .start = fib_triestat_seq_start,
- .next = fib_triestat_seq_next,
- .stop = fib_triestat_seq_stop,
- .show = fib_triestat_seq_show,
+static struct seq_operations fib_trie_seq_ops = {
+ .start = fib_trie_seq_start,
+ .next = fib_trie_seq_next,
+ .stop = fib_trie_seq_stop,
+ .show = fib_trie_seq_show,
};
-static int fib_triestat_seq_open(struct inode *inode, struct file *file)
+static int fib_trie_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
+ struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+ if (!s)
+ goto out;
- rc = seq_open(file, &fib_triestat_seq_ops);
+ rc = seq_open(file, &fib_trie_seq_ops);
if (rc)
goto out_kfree;
- seq = file->private_data;
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
+ kfree(s);
goto out;
}
-static struct file_operations fib_triestat_seq_fops = {
- .owner = THIS_MODULE,
- .open = fib_triestat_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
+static struct file_operations fib_trie_fops = {
+ .owner = THIS_MODULE,
+ .open = fib_trie_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
.release = seq_release_private,
};
-int __init fib_stat_proc_init(void)
+static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
{
- if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
- return -ENOMEM;
- return 0;
-}
+ static unsigned type2flags[RTN_MAX + 1] = {
+ [7] = RTF_REJECT, [8] = RTF_REJECT,
+ };
+ unsigned flags = type2flags[type];
-void __init fib_stat_proc_exit(void)
-{
- proc_net_remove("fib_triestat");
+ if (fi && fi->fib_nh->nh_gw)
+ flags |= RTF_GATEWAY;
+ if (mask == 0xFFFFFFFF)
+ flags |= RTF_HOST;
+ flags |= RTF_UP;
+ return flags;
}
-static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
+/*
+ * This outputs /proc/net/route.
+ * The format of the file is not supposed to be changed
+ * and needs to be same as fib_hash output to avoid breaking
+ * legacy utilities
+ */
+static int fib_route_seq_show(struct seq_file *seq, void *v)
{
- return NULL;
-}
+ struct leaf *l = v;
+ int i;
+ char bf[128];
-static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
-{
- return NULL;
-}
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
+ "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
+ "\tWindow\tIRTT");
+ return 0;
+ }
-static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
-{
- if (!ip_fib_main_table)
- return NULL;
+ if (IS_TNODE(l))
+ return 0;
- if (*pos)
- return fib_trie_get_next(seq);
- else
- return SEQ_START_TOKEN;
-}
+ for (i=32; i>=0; i--) {
+ struct leaf_info *li = find_leaf_info(l, i);
+ struct fib_alias *fa;
+ u32 mask, prefix;
-static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return fib_trie_get_first(seq);
- else
- return fib_trie_get_next(seq);
+ if (!li)
+ continue;
-}
+ mask = inet_make_mask(li->plen);
+ prefix = htonl(l->key);
-static void fib_trie_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-/*
- * This outputs /proc/net/fib_trie.
- *
- * It always works in backward compatibility mode.
- * The format of the file is not supposed to be changed.
- */
+ list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+ const struct fib_info *fi = rcu_dereference(fa->fa_info);
+ unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
-static int fib_trie_seq_show(struct seq_file *seq, void *v)
-{
- char bf[128];
+ if (fa->fa_type == RTN_BROADCAST
+ || fa->fa_type == RTN_MULTICAST)
+ continue;
- if (v == SEQ_START_TOKEN) {
- if (trie_local)
- trie_dump_seq(seq, trie_local);
+ if (fi)
+ snprintf(bf, sizeof(bf),
+ "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+ fi->fib_dev ? fi->fib_dev->name : "*",
+ prefix,
+ fi->fib_nh->nh_gw, flags, 0, 0,
+ fi->fib_priority,
+ mask,
+ (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
+ fi->fib_window,
+ fi->fib_rtt >> 3);
+ else
+ snprintf(bf, sizeof(bf),
+ "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+ prefix, 0, flags, 0, 0, 0,
+ mask, 0, 0, 0);
- if (trie_main)
- trie_dump_seq(seq, trie_main);
- } else {
- snprintf(bf, sizeof(bf),
- "*\t%08X\t%08X", 200, 400);
- seq_printf(seq, "%-127s\n", bf);
+ seq_printf(seq, "%-127s\n", bf);
+ }
}
return 0;
}
-static struct seq_operations fib_trie_seq_ops = {
- .start = fib_trie_seq_start,
- .next = fib_trie_seq_next,
- .stop = fib_trie_seq_stop,
- .show = fib_trie_seq_show,
+static struct seq_operations fib_route_seq_ops = {
+ .start = fib_trie_seq_start,
+ .next = fib_trie_seq_next,
+ .stop = fib_trie_seq_stop,
+ .show = fib_route_seq_show,
};
-static int fib_trie_seq_open(struct inode *inode, struct file *file)
+static int fib_route_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
+ struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
- rc = seq_open(file, &fib_trie_seq_ops);
+ if (!s)
+ goto out;
+
+ rc = seq_open(file, &fib_route_seq_ops);
if (rc)
goto out_kfree;
- seq = file->private_data;
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
+ kfree(s);
goto out;
}
-static struct file_operations fib_trie_seq_fops = {
- .owner = THIS_MODULE,
- .open = fib_trie_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release= seq_release_private,
+static struct file_operations fib_route_fops = {
+ .owner = THIS_MODULE,
+ .open = fib_route_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
};
int __init fib_proc_init(void)
{
- if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
- return -ENOMEM;
+ if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
+ goto out1;
+
+ if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
+ goto out2;
+
+ if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
+ goto out3;
+
return 0;
+
+out3:
+ proc_net_remove("fib_triestat");
+out2:
+ proc_net_remove("fib_trie");
+out1:
+ return -ENOMEM;
}
void __init fib_proc_exit(void)
{
proc_net_remove("fib_trie");
+ proc_net_remove("fib_triestat");
+ proc_net_remove("route");
}
#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 44607f4767b..70c44e4c3ce 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1603,7 +1603,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
}
pmc->sources = NULL;
pmc->sfmode = MCAST_EXCLUDE;
- pmc->sfcount[MCAST_EXCLUDE] = 0;
+ pmc->sfcount[MCAST_INCLUDE] = 0;
pmc->sfcount[MCAST_EXCLUDE] = 1;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index f84ba9c9655..2fc3fd38924 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -100,8 +100,7 @@ DEFINE_SPINLOCK(inet_peer_unused_lock);
#define PEER_MAX_CLEANUP_WORK 30
static void peer_check_expire(unsigned long dummy);
-static struct timer_list peer_periodic_timer =
- TIMER_INITIALIZER(peer_check_expire, 0, 0);
+static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
/* Exported for sysctl_net_ipv4. */
int inet_peer_gc_mintime = 10 * HZ,
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9e6e683cc34..e7d26d9943c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -457,7 +457,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (pskb_pull(skb, ihl) == NULL)
goto err;
- if (pskb_trim(skb, end-offset))
+ if (pskb_trim_rcsum(skb, end-offset))
goto err;
/* Find out which fragments are in front and at the back of us
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index dcb7ee6c485..fc718df17b4 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -345,8 +345,7 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms)
for_each_cpu(cpu) {
struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
- if (tfm)
- crypto_free_tfm(tfm);
+ crypto_free_tfm(tfm);
}
free_percpu(tfms);
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 63e106605f2..e8674baaa8d 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -54,6 +54,7 @@
#include <linux/major.h>
#include <linux/root_dev.h>
#include <linux/delay.h>
+#include <linux/nfs_fs.h>
#include <net/arp.h>
#include <net/ip.h>
#include <net/ipconfig.h>
@@ -1102,10 +1103,8 @@ static int __init ic_dynamic(void)
#endif
jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout);
- while (time_before(jiffies, jiff) && !ic_got_reply) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(1);
- }
+ while (time_before(jiffies, jiff) && !ic_got_reply)
+ schedule_timeout_uninterruptible(1);
#ifdef IPCONFIG_DHCP
/* DHCP isn't done until we get a DHCPACK. */
if ((ic_got_reply & IC_BOOTP)
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index e11952ea17a..f828fa2eb7d 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (s_addr==cp->caddr && s_port==cp->cport &&
d_port==cp->vport && d_addr==cp->vaddr &&
+ ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
protocol==cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
@@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get
return cp;
}
+/* Get reference to connection template */
+struct ip_vs_conn *ip_vs_ct_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+ unsigned hash;
+ struct ip_vs_conn *cp;
+
+ hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+
+ ct_read_lock(hash);
+
+ list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (s_addr==cp->caddr && s_port==cp->cport &&
+ d_port==cp->vport && d_addr==cp->vaddr &&
+ cp->flags & IP_VS_CONN_F_TEMPLATE &&
+ protocol==cp->protocol) {
+ /* HIT */
+ atomic_inc(&cp->refcnt);
+ goto out;
+ }
+ }
+ cp = NULL;
+
+ out:
+ ct_read_unlock(hash);
+
+ IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+ ip_vs_proto_name(protocol),
+ NIPQUAD(s_addr), ntohs(s_port),
+ NIPQUAD(d_addr), ntohs(d_port),
+ cp?"hit":"not hit");
+
+ return cp;
+}
/*
* Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
@@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
atomic_read(&dest->refcnt));
/* Update the connection counters */
- if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
/* It is a normal connection, so increase the inactive
connection counter because it is in TCP SYNRECV
state (inactive) or other protocol inacive state */
@@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
atomic_read(&dest->refcnt));
/* Update the connection counters */
- if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
/* It is a normal connection, so decrease the inactconns
or activeconns counter */
if (cp->flags & IP_VS_CONN_F_INACTIVE) {
@@ -467,7 +502,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
/*
* Invalidate the connection template
*/
- if (ct->cport) {
+ if (ct->vport != 65535) {
if (ip_vs_conn_unhash(ct)) {
ct->dport = 65535;
ct->vport = 65535;
@@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void)
ct_write_lock_bh(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT))
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 3ac7eeca04a..981cc3244ef 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
if (ports[1] == svc->port) {
/* Check if a template already exists */
if (svc->port != FTPPORT)
- ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+ ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
iph->daddr, ports[1]);
else
- ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+ ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
iph->daddr, 0);
if (!ct || !ip_vs_check_template(ct)) {
@@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
iph->daddr,
ports[1],
dest->addr, dest->port,
- 0,
+ IP_VS_CONN_F_TEMPLATE,
dest);
else
ct = ip_vs_conn_new(iph->protocol,
snet, 0,
iph->daddr, 0,
dest->addr, 0,
- 0,
+ IP_VS_CONN_F_TEMPLATE,
dest);
if (ct == NULL)
return NULL;
@@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
*/
if (svc->fwmark)
- ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0,
+ ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
htonl(svc->fwmark), 0);
else
- ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+ ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
iph->daddr, 0);
if (!ct || !ip_vs_check_template(ct)) {
@@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
snet, 0,
htonl(svc->fwmark), 0,
dest->addr, 0,
- 0,
+ IP_VS_CONN_F_TEMPLATE,
dest);
else
ct = ip_vs_conn_new(iph->protocol,
snet, 0,
iph->daddr, 0,
dest->addr, 0,
- 0,
+ IP_VS_CONN_F_TEMPLATE,
dest);
if (ct == NULL)
return NULL;
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 574d1f509b4..2e5ced3d806 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
for (i=0; i<m->nr_conns; i++) {
+ unsigned flags;
+
s = (struct ip_vs_sync_conn *)p;
- cp = ip_vs_conn_in_get(s->protocol,
- s->caddr, s->cport,
- s->vaddr, s->vport);
+ flags = ntohs(s->flags);
+ if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ cp = ip_vs_conn_in_get(s->protocol,
+ s->caddr, s->cport,
+ s->vaddr, s->vport);
+ else
+ cp = ip_vs_ct_in_get(s->protocol,
+ s->caddr, s->cport,
+ s->vaddr, s->vport);
if (!cp) {
cp = ip_vs_conn_new(s->protocol,
s->caddr, s->cport,
s->vaddr, s->vport,
s->daddr, s->dport,
- ntohs(s->flags), NULL);
+ flags, NULL);
if (!cp) {
IP_VS_ERR("ip_vs_conn_new failed\n");
return;
@@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
} else if (!cp->dest) {
/* it is an entry created by the synchronization */
cp->state = ntohs(s->state);
- cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED;
+ cp->flags = flags | IP_VS_CONN_F_HASHED;
} /* Note that we don't touch its state and flags
if it is a normal entry. */
- if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) {
+ if (flags & IP_VS_CONN_F_SEQ_MASK) {
opt = (struct ip_vs_sync_conn_options *)&s[1];
memcpy(&cp->in_seq, opt, sizeof(*opt));
p += FULL_CONN_SIZE;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index e046f552181..3cf9b451675 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -34,6 +34,7 @@ config IP_NF_CT_ACCT
config IP_NF_CONNTRACK_MARK
bool 'Connection mark tracking support'
+ depends on IP_NF_CONNTRACK
help
This option enables support for connection marks, used by the
`CONNMARK' target and `connmark' match. Similar to the mark value
@@ -50,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS
IF unsure, say `N'.
+config IP_NF_CONNTRACK_NETLINK
+ tristate 'Connection tracking netlink interface'
+ depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
+ depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
+ help
+ This option enables support for a netlink-based userspace interface
+
+
config IP_NF_CT_PROTO_SCTP
tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
depends on IP_NF_CONNTRACK && EXPERIMENTAL
@@ -85,6 +94,25 @@ config IP_NF_IRC
To compile it as a module, choose M here. If unsure, say Y.
+config IP_NF_NETBIOS_NS
+ tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
+ depends on IP_NF_CONNTRACK && EXPERIMENTAL
+ help
+ NetBIOS name service requests are sent as broadcast messages from an
+ unprivileged port and responded to with unicast messages to the
+ same port. This make them hard to firewall properly because connection
+ tracking doesn't deal with broadcasts. This helper tracks locally
+ originating NetBIOS name service requests and the corresponding
+ responses. It relies on correct IP address configuration, specifically
+ netmask and broadcast address. When properly configured, the output
+ of "ip address show" should look similar to this:
+
+ $ ip -4 address show eth0
+ 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
+ inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_NF_TFTP
tristate "TFTP protocol support"
depends on IP_NF_CONNTRACK
@@ -109,6 +137,22 @@ config IP_NF_AMANDA
To compile it as a module, choose M here. If unsure, say Y.
+config IP_NF_PPTP
+ tristate 'PPTP protocol support'
+ help
+ This module adds support for PPTP (Point to Point Tunnelling
+ Protocol, RFC2637) conncection tracking and NAT.
+
+ If you are running PPTP sessions over a stateful firewall or NAT
+ box, you may want to enable this feature.
+
+ Please note that not all PPTP modes of operation are supported yet.
+ For more info, read top of the file
+ net/ipv4/netfilter/ip_conntrack_pptp.c
+
+ If you want to compile it as a module, say M here and read
+ Documentation/modules.txt. If unsure, say `N'.
+
config IP_NF_QUEUE
tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
help
@@ -593,6 +637,12 @@ config IP_NF_NAT_AMANDA
default IP_NF_NAT if IP_NF_AMANDA=y
default m if IP_NF_AMANDA=m
+config IP_NF_NAT_PPTP
+ tristate
+ depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
+ default IP_NF_NAT if IP_NF_PPTP=y
+ default m if IP_NF_PPTP=m
+
# mangle + specific targets
config IP_NF_MANGLE
tristate "Packet mangling"
@@ -754,11 +804,5 @@ config IP_NF_ARP_MANGLE
Allows altering the ARP packet payload: source and destination
hardware and network addresses.
-config IP_NF_CONNTRACK_NETLINK
- tristate 'Connection tracking netlink interface'
- depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
- help
- This option enables support for a netlink-based userspace interface
-
endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index a7bd38f5052..3d45d3c0283 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -6,6 +6,9 @@
ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
+ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
+
# connection tracking
obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
@@ -17,12 +20,15 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
# connection tracking helpers
+obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
+obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
# NAT helpers
+obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index be4c9eb3243..fa3f914117e 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb,
/* increase the UDP timeout of the master connection as replies from
* Amanda clients to the server can be quite delayed */
- ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
+ ip_ct_refresh(ct, *pskb, master_timeout * HZ);
/* No data? */
dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
@@ -108,6 +108,7 @@ static int help(struct sk_buff **pskb,
}
exp->expectfn = NULL;
+ exp->flags = 0;
exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
exp->tuple.src.u.tcp.port = 0;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index a0648600190..ea65dd3e517 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -197,7 +197,7 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
/* ip_conntrack_expect helper functions */
-static void unlink_expect(struct ip_conntrack_expect *exp)
+void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
{
ASSERT_WRITE_LOCK(&ip_conntrack_lock);
IP_NF_ASSERT(!timer_pending(&exp->timeout));
@@ -207,18 +207,12 @@ static void unlink_expect(struct ip_conntrack_expect *exp)
ip_conntrack_expect_put(exp);
}
-void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp)
-{
- unlink_expect(exp);
- ip_conntrack_expect_put(exp);
-}
-
static void expectation_timed_out(unsigned long ul_expect)
{
struct ip_conntrack_expect *exp = (void *)ul_expect;
write_lock_bh(&ip_conntrack_lock);
- unlink_expect(exp);
+ ip_ct_unlink_expect(exp);
write_unlock_bh(&ip_conntrack_lock);
ip_conntrack_expect_put(exp);
}
@@ -239,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
/* Just find a expectation corresponding to a tuple. */
struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
+ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
{
struct ip_conntrack_expect *i;
@@ -264,10 +258,14 @@ find_expectation(const struct ip_conntrack_tuple *tuple)
master ct never got confirmed, we'd hold a reference to it
and weird things would happen to future packets). */
if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
- && is_confirmed(i->master)
- && del_timer(&i->timeout)) {
- unlink_expect(i);
- return i;
+ && is_confirmed(i->master)) {
+ if (i->flags & IP_CT_EXPECT_PERMANENT) {
+ atomic_inc(&i->use);
+ return i;
+ } else if (del_timer(&i->timeout)) {
+ ip_ct_unlink_expect(i);
+ return i;
+ }
}
}
return NULL;
@@ -284,7 +282,7 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct)
list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
if (i->master == ct && del_timer(&i->timeout)) {
- unlink_expect(i);
+ ip_ct_unlink_expect(i);
ip_conntrack_expect_put(i);
}
}
@@ -925,7 +923,7 @@ void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
/* choose the the oldest expectation to evict */
list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
if (expect_matches(i, exp) && del_timer(&i->timeout)) {
- unlink_expect(i);
+ ip_ct_unlink_expect(i);
write_unlock_bh(&ip_conntrack_lock);
ip_conntrack_expect_put(i);
return;
@@ -934,6 +932,9 @@ void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
write_unlock_bh(&ip_conntrack_lock);
}
+/* We don't increase the master conntrack refcount for non-fulfilled
+ * conntracks. During the conntrack destruction, the expectations are
+ * always killed before the conntrack itself */
struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
{
struct ip_conntrack_expect *new;
@@ -944,17 +945,14 @@ struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
return NULL;
}
new->master = me;
- atomic_inc(&new->master->ct_general.use);
atomic_set(&new->use, 1);
return new;
}
void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
{
- if (atomic_dec_and_test(&exp->use)) {
- ip_conntrack_put(exp->master);
+ if (atomic_dec_and_test(&exp->use))
kmem_cache_free(ip_conntrack_expect_cachep, exp);
- }
}
static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
@@ -982,7 +980,7 @@ static void evict_oldest_expect(struct ip_conntrack *master)
list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
if (i->master == master) {
if (del_timer(&i->timeout)) {
- unlink_expect(i);
+ ip_ct_unlink_expect(i);
ip_conntrack_expect_put(i);
}
break;
@@ -1099,7 +1097,7 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
/* Get rid of expectations */
list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
if (exp->master->helper == me && del_timer(&exp->timeout)) {
- unlink_expect(exp);
+ ip_ct_unlink_expect(exp);
ip_conntrack_expect_put(exp);
}
}
@@ -1114,42 +1112,46 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
synchronize_net();
}
-static inline void ct_add_counters(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb)
-{
-#ifdef CONFIG_IP_NF_CT_ACCT
- if (skb) {
- ct->counters[CTINFO2DIR(ctinfo)].packets++;
- ct->counters[CTINFO2DIR(ctinfo)].bytes +=
- ntohs(skb->nh.iph->tot_len);
- }
-#endif
-}
-
-/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
-void ip_ct_refresh_acct(struct ip_conntrack *ct,
+/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+void __ip_ct_refresh_acct(struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
- unsigned long extra_jiffies)
+ unsigned long extra_jiffies,
+ int do_acct)
{
+ int do_event = 0;
+
IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
+ IP_NF_ASSERT(skb);
+
+ write_lock_bh(&ip_conntrack_lock);
/* If not in hash table, timer will not be active yet */
if (!is_confirmed(ct)) {
ct->timeout.expires = extra_jiffies;
- ct_add_counters(ct, ctinfo, skb);
+ do_event = 1;
} else {
- write_lock_bh(&ip_conntrack_lock);
/* Need del_timer for race avoidance (may already be dying). */
if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout);
- ip_conntrack_event_cache(IPCT_REFRESH, skb);
+ do_event = 1;
}
- ct_add_counters(ct, ctinfo, skb);
- write_unlock_bh(&ip_conntrack_lock);
}
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+ if (do_acct) {
+ ct->counters[CTINFO2DIR(ctinfo)].packets++;
+ ct->counters[CTINFO2DIR(ctinfo)].bytes +=
+ ntohs(skb->nh.iph->tot_len);
+ }
+#endif
+
+ write_unlock_bh(&ip_conntrack_lock);
+
+ /* must be unlocked when calling event cache */
+ if (do_event)
+ ip_conntrack_event_cache(IPCT_REFRESH, skb);
}
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 3a2627db172..d77d6b3f5f8 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -29,9 +29,9 @@ static char *ftp_buffer;
static DEFINE_SPINLOCK(ip_ftp_lock);
#define MAX_PORTS 8
-static int ports[MAX_PORTS];
+static short ports[MAX_PORTS];
static int ports_c;
-module_param_array(ports, int, &ports_c, 0400);
+module_param_array(ports, short, &ports_c, 0400);
static int loose;
module_param(loose, int, 0600);
@@ -421,6 +421,7 @@ static int help(struct sk_buff **pskb,
{ 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
exp->expectfn = NULL;
+ exp->flags = 0;
/* Now, NAT might want to mangle the packet, and register the
* (possibly changed) expectation itself. */
@@ -449,7 +450,7 @@ out_update_nl:
}
static struct ip_conntrack_helper ftp[MAX_PORTS];
-static char ftp_names[MAX_PORTS][10];
+static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
/* Not __exit: called from init() */
static void fini(void)
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
new file mode 100644
index 00000000000..926a6684643
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -0,0 +1,806 @@
+/*
+ * ip_conntrack_pptp.c - Version 3.0
+ *
+ * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft. PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702. Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * Limitations:
+ * - We blindly assume that control connections are always
+ * established in PNS->PAC direction. This is a violation
+ * of RFFC2673
+ * - We can only support one single call within each session
+ *
+ * TODO:
+ * - testing of incoming PPTP calls
+ *
+ * Changes:
+ * 2002-02-05 - Version 1.3
+ * - Call ip_conntrack_unexpect_related() from
+ * pptp_destroy_siblings() to destroy expectations in case
+ * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
+ * (Philip Craig <philipc@snapgear.com>)
+ * - Add Version information at module loadtime
+ * 2002-02-10 - Version 1.6
+ * - move to C99 style initializers
+ * - remove second expectation if first arrives
+ * 2004-10-22 - Version 2.0
+ * - merge Mandrake's 2.6.x port with recent 2.6.x API changes
+ * - fix lots of linear skb assumptions from Mandrake's port
+ * 2005-06-10 - Version 2.1
+ * - use ip_conntrack_expect_free() instead of kfree() on the
+ * expect's (which are from the slab for quite some time)
+ * 2005-06-10 - Version 3.0
+ * - port helper to post-2.6.11 API changes,
+ * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
+ * 2005-07-30 - Version 3.1
+ * - port helper to 2.6.13 API changes
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+#define IP_CT_PPTP_VERSION "3.1"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
+
+static DEFINE_SPINLOCK(ip_pptp_lock);
+
+int
+(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq);
+
+int
+(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq);
+
+int
+(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
+ struct ip_conntrack_expect *expect_reply);
+
+void
+(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
+ struct ip_conntrack_expect *exp);
+
+#if 0
+/* PptpControlMessageType names */
+const char *pptp_msg_name[] = {
+ "UNKNOWN_MESSAGE",
+ "START_SESSION_REQUEST",
+ "START_SESSION_REPLY",
+ "STOP_SESSION_REQUEST",
+ "STOP_SESSION_REPLY",
+ "ECHO_REQUEST",
+ "ECHO_REPLY",
+ "OUT_CALL_REQUEST",
+ "OUT_CALL_REPLY",
+ "IN_CALL_REQUEST",
+ "IN_CALL_REPLY",
+ "IN_CALL_CONNECT",
+ "CALL_CLEAR_REQUEST",
+ "CALL_DISCONNECT_NOTIFY",
+ "WAN_ERROR_NOTIFY",
+ "SET_LINK_INFO"
+};
+EXPORT_SYMBOL(pptp_msg_name);
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define SECS *HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+
+#define PPTP_GRE_TIMEOUT (10 MINS)
+#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS)
+
+static void pptp_expectfn(struct ip_conntrack *ct,
+ struct ip_conntrack_expect *exp)
+{
+ DEBUGP("increasing timeouts\n");
+
+ /* increase timeout of GRE data channel conntrack entry */
+ ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
+ ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
+
+ /* Can you see how rusty this code is, compared with the pre-2.6.11
+ * one? That's what happened to my shiny newnat of 2002 ;( -HW */
+
+ if (!ip_nat_pptp_hook_expectfn) {
+ struct ip_conntrack_tuple inv_t;
+ struct ip_conntrack_expect *exp_other;
+
+ /* obviously this tuple inversion only works until you do NAT */
+ invert_tuplepr(&inv_t, &exp->tuple);
+ DEBUGP("trying to unexpect other dir: ");
+ DUMP_TUPLE(&inv_t);
+
+ exp_other = ip_conntrack_expect_find(&inv_t);
+ if (exp_other) {
+ /* delete other expectation. */
+ DEBUGP("found\n");
+ ip_conntrack_unexpect_related(exp_other);
+ ip_conntrack_expect_put(exp_other);
+ } else {
+ DEBUGP("not found\n");
+ }
+ } else {
+ /* we need more than simple inversion */
+ ip_nat_pptp_hook_expectfn(ct, exp);
+ }
+}
+
+static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
+{
+ struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack_expect *exp;
+
+ DEBUGP("trying to timeout ct or exp for tuple ");
+ DUMP_TUPLE(t);
+
+ h = ip_conntrack_find_get(t, NULL);
+ if (h) {
+ struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
+ DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
+ sibling->proto.gre.timeout = 0;
+ sibling->proto.gre.stream_timeout = 0;
+ if (del_timer(&sibling->timeout))
+ sibling->timeout.function((unsigned long)sibling);
+ ip_conntrack_put(sibling);
+ return 1;
+ } else {
+ exp = ip_conntrack_expect_find(t);
+ if (exp) {
+ DEBUGP("unexpect_related of expect %p\n", exp);
+ ip_conntrack_unexpect_related(exp);
+ ip_conntrack_expect_put(exp);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+
+/* timeout GRE data connections */
+static void pptp_destroy_siblings(struct ip_conntrack *ct)
+{
+ struct ip_conntrack_tuple t;
+
+ /* Since ct->sibling_list has literally rusted away in 2.6.11,
+ * we now need another way to find out about our sibling
+ * contrack and expects... -HW */
+
+ /* try original (pns->pac) tuple */
+ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
+ t.dst.protonum = IPPROTO_GRE;
+ t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+ t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+
+ if (!destroy_sibling_or_exp(&t))
+ DEBUGP("failed to timeout original pns->pac ct/exp\n");
+
+ /* try reply (pac->pns) tuple */
+ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
+ t.dst.protonum = IPPROTO_GRE;
+ t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+ t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+
+ if (!destroy_sibling_or_exp(&t))
+ DEBUGP("failed to timeout reply pac->pns ct/exp\n");
+}
+
+/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
+static inline int
+exp_gre(struct ip_conntrack *master,
+ u_int32_t seq,
+ __be16 callid,
+ __be16 peer_callid)
+{
+ struct ip_conntrack_tuple inv_tuple;
+ struct ip_conntrack_tuple exp_tuples[] = {
+ /* tuple in original direction, PNS->PAC */
+ { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
+ .u = { .gre = { .key = peer_callid } }
+ },
+ .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
+ .u = { .gre = { .key = callid } },
+ .protonum = IPPROTO_GRE
+ },
+ },
+ /* tuple in reply direction, PAC->PNS */
+ { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
+ .u = { .gre = { .key = callid } }
+ },
+ .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
+ .u = { .gre = { .key = peer_callid } },
+ .protonum = IPPROTO_GRE
+ },
+ }
+ };
+ struct ip_conntrack_expect *exp_orig, *exp_reply;
+ int ret = 1;
+
+ exp_orig = ip_conntrack_expect_alloc(master);
+ if (exp_orig == NULL)
+ goto out;
+
+ exp_reply = ip_conntrack_expect_alloc(master);
+ if (exp_reply == NULL)
+ goto out_put_orig;
+
+ memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
+
+ exp_orig->mask.src.ip = 0xffffffff;
+ exp_orig->mask.src.u.all = 0;
+ exp_orig->mask.dst.u.all = 0;
+ exp_orig->mask.dst.u.gre.key = htons(0xffff);
+ exp_orig->mask.dst.ip = 0xffffffff;
+ exp_orig->mask.dst.protonum = 0xff;
+
+ exp_orig->master = master;
+ exp_orig->expectfn = pptp_expectfn;
+ exp_orig->flags = 0;
+
+ exp_orig->dir = IP_CT_DIR_ORIGINAL;
+
+ /* both expectations are identical apart from tuple */
+ memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
+ memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
+
+ exp_reply->dir = !exp_orig->dir;
+
+ if (ip_nat_pptp_hook_exp_gre)
+ ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
+ else {
+
+ DEBUGP("calling expect_related PNS->PAC");
+ DUMP_TUPLE(&exp_orig->tuple);
+
+ if (ip_conntrack_expect_related(exp_orig) != 0) {
+ DEBUGP("cannot expect_related()\n");
+ goto out_put_both;
+ }
+
+ DEBUGP("calling expect_related PAC->PNS");
+ DUMP_TUPLE(&exp_reply->tuple);
+
+ if (ip_conntrack_expect_related(exp_reply) != 0) {
+ DEBUGP("cannot expect_related()\n");
+ goto out_unexpect_orig;
+ }
+
+ /* Add GRE keymap entries */
+ if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
+ DEBUGP("cannot keymap_add() exp\n");
+ goto out_unexpect_both;
+ }
+
+ invert_tuplepr(&inv_tuple, &exp_reply->tuple);
+ if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
+ ip_ct_gre_keymap_destroy(master);
+ DEBUGP("cannot keymap_add() exp_inv\n");
+ goto out_unexpect_both;
+ }
+ ret = 0;
+ }
+
+out_put_both:
+ ip_conntrack_expect_put(exp_reply);
+out_put_orig:
+ ip_conntrack_expect_put(exp_orig);
+out:
+ return ret;
+
+out_unexpect_both:
+ ip_conntrack_unexpect_related(exp_reply);
+out_unexpect_orig:
+ ip_conntrack_unexpect_related(exp_orig);
+ goto out_put_both;
+}
+
+static inline int
+pptp_inbound_pkt(struct sk_buff **pskb,
+ struct tcphdr *tcph,
+ unsigned int nexthdr_off,
+ unsigned int datalen,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct PptpControlHeader _ctlh, *ctlh;
+ unsigned int reqlen;
+ union pptp_ctrl_union _pptpReq, *pptpReq;
+ struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+ u_int16_t msg;
+ __be16 *cid, *pcid;
+ u_int32_t seq;
+
+ ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+ if (!ctlh) {
+ DEBUGP("error during skb_header_pointer\n");
+ return NF_ACCEPT;
+ }
+ nexthdr_off += sizeof(_ctlh);
+ datalen -= sizeof(_ctlh);
+
+ reqlen = datalen;
+ if (reqlen > sizeof(*pptpReq))
+ reqlen = sizeof(*pptpReq);
+ pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+ if (!pptpReq) {
+ DEBUGP("error during skb_header_pointer\n");
+ return NF_ACCEPT;
+ }
+
+ msg = ntohs(ctlh->messageType);
+ DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
+
+ switch (msg) {
+ case PPTP_START_SESSION_REPLY:
+ if (reqlen < sizeof(_pptpReq.srep)) {
+ DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+ break;
+ }
+
+ /* server confirms new control session */
+ if (info->sstate < PPTP_SESSION_REQUESTED) {
+ DEBUGP("%s without START_SESS_REQUEST\n",
+ pptp_msg_name[msg]);
+ break;
+ }
+ if (pptpReq->srep.resultCode == PPTP_START_OK)
+ info->sstate = PPTP_SESSION_CONFIRMED;
+ else
+ info->sstate = PPTP_SESSION_ERROR;
+ break;
+
+ case PPTP_STOP_SESSION_REPLY:
+ if (reqlen < sizeof(_pptpReq.strep)) {
+ DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+ break;
+ }
+
+ /* server confirms end of control session */
+ if (info->sstate > PPTP_SESSION_STOPREQ) {
+ DEBUGP("%s without STOP_SESS_REQUEST\n",
+ pptp_msg_name[msg]);
+ break;
+ }
+ if (pptpReq->strep.resultCode == PPTP_STOP_OK)
+ info->sstate = PPTP_SESSION_NONE;
+ else
+ info->sstate = PPTP_SESSION_ERROR;
+ break;
+
+ case PPTP_OUT_CALL_REPLY:
+ if (reqlen < sizeof(_pptpReq.ocack)) {
+ DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+ break;
+ }
+
+ /* server accepted call, we now expect GRE frames */
+ if (info->sstate != PPTP_SESSION_CONFIRMED) {
+ DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+ break;
+ }
+ if (info->cstate != PPTP_CALL_OUT_REQ &&
+ info->cstate != PPTP_CALL_OUT_CONF) {
+ DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
+ break;
+ }
+ if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
+ info->cstate = PPTP_CALL_NONE;
+ break;
+ }
+
+ cid = &pptpReq->ocack.callID;
+ pcid = &pptpReq->ocack.peersCallID;
+
+ info->pac_call_id = ntohs(*cid);
+
+ if (htons(info->pns_call_id) != *pcid) {
+ DEBUGP("%s for unknown callid %u\n",
+ pptp_msg_name[msg], ntohs(*pcid));
+ break;
+ }
+
+ DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
+ ntohs(*cid), ntohs(*pcid));
+
+ info->cstate = PPTP_CALL_OUT_CONF;
+
+ seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
+ + sizeof(struct PptpControlHeader)
+ + ((void *)pcid - (void *)pptpReq);
+
+ if (exp_gre(ct, seq, *cid, *pcid) != 0)
+ printk("ip_conntrack_pptp: error during exp_gre\n");
+ break;
+
+ case PPTP_IN_CALL_REQUEST:
+ if (reqlen < sizeof(_pptpReq.icack)) {
+ DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+ break;
+ }
+
+ /* server tells us about incoming call request */
+ if (info->sstate != PPTP_SESSION_CONFIRMED) {
+ DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+ break;
+ }
+ pcid = &pptpReq->icack.peersCallID;
+ DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+ info->cstate = PPTP_CALL_IN_REQ;
+ info->pac_call_id = ntohs(*pcid);
+ break;
+
+ case PPTP_IN_CALL_CONNECT:
+ if (reqlen < sizeof(_pptpReq.iccon)) {
+ DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+ break;
+ }
+
+ /* server tells us about incoming call established */
+ if (info->sstate != PPTP_SESSION_CONFIRMED) {
+ DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+ break;
+ }
+ if (info->sstate != PPTP_CALL_IN_REP
+ && info->sstate != PPTP_CALL_IN_CONF) {
+ DEBUGP("%s but never sent IN_CALL_REPLY\n",
+ pptp_msg_name[msg]);
+ break;
+ }
+
+ pcid = &pptpReq->iccon.peersCallID;
+ cid = &info->pac_call_id;
+
+ if (info->pns_call_id != ntohs(*pcid)) {
+ DEBUGP("%s for unknown CallID %u\n",
+ pptp_msg_name[msg], ntohs(*pcid));
+ break;
+ }
+
+ DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+ info->cstate = PPTP_CALL_IN_CONF;
+
+ /* we expect a GRE connection from PAC to PNS */
+ seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
+ + sizeof(struct PptpControlHeader)
+ + ((void *)pcid - (void *)pptpReq);
+
+ if (exp_gre(ct, seq, *cid, *pcid) != 0)
+ printk("ip_conntrack_pptp: error during exp_gre\n");
+
+ break;
+
+ case PPTP_CALL_DISCONNECT_NOTIFY:
+ if (reqlen < sizeof(_pptpReq.disc)) {
+ DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+ break;
+ }
+
+ /* server confirms disconnect */
+ cid = &pptpReq->disc.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+ info->cstate = PPTP_CALL_NONE;
+
+ /* untrack this call id, unexpect GRE packets */
+ pptp_destroy_siblings(ct);
+ break;
+
+ case PPTP_WAN_ERROR_NOTIFY:
+ break;
+
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* I don't have to explain these ;) */
+ break;
+ default:
+ DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
+ ? pptp_msg_name[msg]:pptp_msg_name[0], msg);
+ break;
+ }
+
+
+ if (ip_nat_pptp_hook_inbound)
+ return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
+ pptpReq);
+
+ return NF_ACCEPT;
+
+}
+
+static inline int
+pptp_outbound_pkt(struct sk_buff **pskb,
+ struct tcphdr *tcph,
+ unsigned int nexthdr_off,
+ unsigned int datalen,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct PptpControlHeader _ctlh, *ctlh;
+ unsigned int reqlen;
+ union pptp_ctrl_union _pptpReq, *pptpReq;
+ struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+ u_int16_t msg;
+ __be16 *cid, *pcid;
+
+ ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+ if (!ctlh)
+ return NF_ACCEPT;
+ nexthdr_off += sizeof(_ctlh);
+ datalen -= sizeof(_ctlh);
+
+ reqlen = datalen;
+ if (reqlen > sizeof(*pptpReq))
+ reqlen = sizeof(*pptpReq);
+ pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+ if (!pptpReq)
+ return NF_ACCEPT;
+
+ msg = ntohs(ctlh->messageType);
+ DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
+
+ switch (msg) {
+ case PPTP_START_SESSION_REQUEST:
+ /* client requests for new control session */
+ if (info->sstate != PPTP_SESSION_NONE) {
+ DEBUGP("%s but we already have one",
+ pptp_msg_name[msg]);
+ }
+ info->sstate = PPTP_SESSION_REQUESTED;
+ break;
+ case PPTP_STOP_SESSION_REQUEST:
+ /* client requests end of control session */
+ info->sstate = PPTP_SESSION_STOPREQ;
+ break;
+
+ case PPTP_OUT_CALL_REQUEST:
+ if (reqlen < sizeof(_pptpReq.ocreq)) {
+ DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+ /* FIXME: break; */
+ }
+
+ /* client initiating connection to server */
+ if (info->sstate != PPTP_SESSION_CONFIRMED) {
+ DEBUGP("%s but no session\n",
+ pptp_msg_name[msg]);
+ break;
+ }
+ info->cstate = PPTP_CALL_OUT_REQ;
+ /* track PNS call id */
+ cid = &pptpReq->ocreq.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+ info->pns_call_id = ntohs(*cid);
+ break;
+ case PPTP_IN_CALL_REPLY:
+ if (reqlen < sizeof(_pptpReq.icack)) {
+ DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+ break;
+ }
+
+ /* client answers incoming call */
+ if (info->cstate != PPTP_CALL_IN_REQ
+ && info->cstate != PPTP_CALL_IN_REP) {
+ DEBUGP("%s without incall_req\n",
+ pptp_msg_name[msg]);
+ break;
+ }
+ if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
+ info->cstate = PPTP_CALL_NONE;
+ break;
+ }
+ pcid = &pptpReq->icack.peersCallID;
+ if (info->pac_call_id != ntohs(*pcid)) {
+ DEBUGP("%s for unknown call %u\n",
+ pptp_msg_name[msg], ntohs(*pcid));
+ break;
+ }
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+ /* part two of the three-way handshake */
+ info->cstate = PPTP_CALL_IN_REP;
+ info->pns_call_id = ntohs(pptpReq->icack.callID);
+ break;
+
+ case PPTP_CALL_CLEAR_REQUEST:
+ /* client requests hangup of call */
+ if (info->sstate != PPTP_SESSION_CONFIRMED) {
+ DEBUGP("CLEAR_CALL but no session\n");
+ break;
+ }
+ /* FUTURE: iterate over all calls and check if
+ * call ID is valid. We don't do this without newnat,
+ * because we only know about last call */
+ info->cstate = PPTP_CALL_CLEAR_REQ;
+ break;
+ case PPTP_SET_LINK_INFO:
+ break;
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* I don't have to explain these ;) */
+ break;
+ default:
+ DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)?
+ pptp_msg_name[msg]:pptp_msg_name[0], msg);
+ /* unknown: no need to create GRE masq table entry */
+ break;
+ }
+
+ if (ip_nat_pptp_hook_outbound)
+ return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
+ pptpReq);
+
+ return NF_ACCEPT;
+}
+
+
+/* track caller id inside control connection, call expect_related */
+static int
+conntrack_pptp_help(struct sk_buff **pskb,
+ struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
+
+{
+ struct pptp_pkt_hdr _pptph, *pptph;
+ struct tcphdr _tcph, *tcph;
+ u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
+ u_int32_t datalen;
+ int dir = CTINFO2DIR(ctinfo);
+ struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+ unsigned int nexthdr_off;
+
+ int oldsstate, oldcstate;
+ int ret;
+
+ /* don't do any tracking before tcp handshake complete */
+ if (ctinfo != IP_CT_ESTABLISHED
+ && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
+ DEBUGP("ctinfo = %u, skipping\n", ctinfo);
+ return NF_ACCEPT;
+ }
+
+ nexthdr_off = (*pskb)->nh.iph->ihl*4;
+ tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
+ BUG_ON(!tcph);
+ nexthdr_off += tcph->doff * 4;
+ datalen = tcplen - tcph->doff * 4;
+
+ if (tcph->fin || tcph->rst) {
+ DEBUGP("RST/FIN received, timeouting GRE\n");
+ /* can't do this after real newnat */
+ info->cstate = PPTP_CALL_NONE;
+
+ /* untrack this call id, unexpect GRE packets */
+ pptp_destroy_siblings(ct);
+ }
+
+ pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
+ if (!pptph) {
+ DEBUGP("no full PPTP header, can't track\n");
+ return NF_ACCEPT;
+ }
+ nexthdr_off += sizeof(_pptph);
+ datalen -= sizeof(_pptph);
+
+ /* if it's not a control message we can't do anything with it */
+ if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
+ ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
+ DEBUGP("not a control packet\n");
+ return NF_ACCEPT;
+ }
+
+ oldsstate = info->sstate;
+ oldcstate = info->cstate;
+
+ spin_lock_bh(&ip_pptp_lock);
+
+ /* FIXME: We just blindly assume that the control connection is always
+ * established from PNS->PAC. However, RFC makes no guarantee */
+ if (dir == IP_CT_DIR_ORIGINAL)
+ /* client -> server (PNS -> PAC) */
+ ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+ ctinfo);
+ else
+ /* server -> client (PAC -> PNS) */
+ ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+ ctinfo);
+ DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
+ oldsstate, info->sstate, oldcstate, info->cstate);
+ spin_unlock_bh(&ip_pptp_lock);
+
+ return ret;
+}
+
+/* control protocol helper */
+static struct ip_conntrack_helper pptp = {
+ .list = { NULL, NULL },
+ .name = "pptp",
+ .me = THIS_MODULE,
+ .max_expected = 2,
+ .timeout = 5 * 60,
+ .tuple = { .src = { .ip = 0,
+ .u = { .tcp = { .port =
+ __constant_htons(PPTP_CONTROL_PORT) } }
+ },
+ .dst = { .ip = 0,
+ .u = { .all = 0 },
+ .protonum = IPPROTO_TCP
+ }
+ },
+ .mask = { .src = { .ip = 0,
+ .u = { .tcp = { .port = __constant_htons(0xffff) } }
+ },
+ .dst = { .ip = 0,
+ .u = { .all = 0 },
+ .protonum = 0xff
+ }
+ },
+ .help = conntrack_pptp_help
+};
+
+extern void __exit ip_ct_proto_gre_fini(void);
+extern int __init ip_ct_proto_gre_init(void);
+
+/* ip_conntrack_pptp initialization */
+static int __init init(void)
+{
+ int retcode;
+
+ retcode = ip_ct_proto_gre_init();
+ if (retcode < 0)
+ return retcode;
+
+ DEBUGP(" registering helper\n");
+ if ((retcode = ip_conntrack_helper_register(&pptp))) {
+ printk(KERN_ERR "Unable to register conntrack application "
+ "helper for pptp: %d\n", retcode);
+ ip_ct_proto_gre_fini();
+ return retcode;
+ }
+
+ printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ ip_conntrack_helper_unregister(&pptp);
+ ip_ct_proto_gre_fini();
+ printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
+}
+
+module_init(init);
+module_exit(fini);
+
+EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
+EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
+EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
+EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index 25438eec21a..15457415a4f 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -34,7 +34,7 @@
#include <linux/moduleparam.h>
#define MAX_PORTS 8
-static int ports[MAX_PORTS];
+static short ports[MAX_PORTS];
static int ports_c;
static int max_dcc_channels = 8;
static unsigned int dcc_timeout = 300;
@@ -52,7 +52,7 @@ EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
MODULE_LICENSE("GPL");
-module_param_array(ports, int, &ports_c, 0400);
+module_param_array(ports, short, &ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of IRC servers");
module_param(max_dcc_channels, int, 0400);
MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
@@ -221,6 +221,7 @@ static int help(struct sk_buff **pskb,
{ { 0, { 0 } },
{ 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
exp->expectfn = NULL;
+ exp->flags = 0;
if (ip_nat_irc_hook)
ret = ip_nat_irc_hook(pskb, ctinfo,
addr_beg_p - ib_ptr,
@@ -239,7 +240,7 @@ static int help(struct sk_buff **pskb,
}
static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
-static char irc_names[MAX_PORTS][10];
+static char irc_names[MAX_PORTS][sizeof("irc-65535")];
static void fini(void);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
new file mode 100644
index 00000000000..577bac22dcc
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -0,0 +1,142 @@
+/*
+ * NetBIOS name service broadcast connection tracking helper
+ *
+ * (c) 2005 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+/*
+ * This helper tracks locally originating NetBIOS name service
+ * requests by issuing permanent expectations (valid until
+ * timing out) matching all reply connections from the
+ * destination network. The only NetBIOS specific thing is
+ * actually the port number.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+
+#define NMBD_PORT 137
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+
+static unsigned int timeout = 3;
+module_param(timeout, int, 0600);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+static int help(struct sk_buff **pskb,
+ struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
+{
+ struct ip_conntrack_expect *exp;
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
+ struct in_device *in_dev;
+ u_int32_t mask = 0;
+
+ /* we're only interested in locally generated packets */
+ if ((*pskb)->sk == NULL)
+ goto out;
+ if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+ goto out;
+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ goto out;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get(rt->u.dst.dev);
+ if (in_dev != NULL) {
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_broadcast == iph->daddr) {
+ mask = ifa->ifa_mask;
+ break;
+ }
+ } endfor_ifa(in_dev);
+ }
+ rcu_read_unlock();
+
+ if (mask == 0)
+ goto out;
+
+ exp = ip_conntrack_expect_alloc(ct);
+ if (exp == NULL)
+ goto out;
+
+ exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+ exp->tuple.src.u.udp.port = ntohs(NMBD_PORT);
+
+ exp->mask.src.ip = mask;
+ exp->mask.src.u.udp.port = 0xFFFF;
+ exp->mask.dst.ip = 0xFFFFFFFF;
+ exp->mask.dst.u.udp.port = 0xFFFF;
+ exp->mask.dst.protonum = 0xFF;
+
+ exp->expectfn = NULL;
+ exp->flags = IP_CT_EXPECT_PERMANENT;
+
+ ip_conntrack_expect_related(exp);
+ ip_conntrack_expect_put(exp);
+
+ ip_ct_refresh(ct, *pskb, timeout * HZ);
+out:
+ return NF_ACCEPT;
+}
+
+static struct ip_conntrack_helper helper = {
+ .name = "netbios-ns",
+ .tuple = {
+ .src = {
+ .u = {
+ .udp = {
+ .port = __constant_htons(NMBD_PORT),
+ }
+ }
+ },
+ .dst = {
+ .protonum = IPPROTO_UDP,
+ },
+ },
+ .mask = {
+ .src = {
+ .u = {
+ .udp = {
+ .port = 0xFFFF,
+ }
+ }
+ },
+ .dst = {
+ .protonum = 0xFF,
+ },
+ },
+ .max_expected = 1,
+ .me = THIS_MODULE,
+ .help = help,
+};
+
+static int __init init(void)
+{
+ helper.timeout = timeout;
+ return ip_conntrack_helper_register(&helper);
+}
+
+static void __exit fini(void)
+{
+ ip_conntrack_helper_unregister(&helper);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index a4e9278db4e..b08a432efcf 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- exp = ip_conntrack_expect_find_get(&tuple);
+ exp = ip_conntrack_expect_find(&tuple);
if (!exp)
return -ENOENT;
@@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
/* bump usage count to 2 */
- exp = ip_conntrack_expect_find_get(&tuple);
+ exp = ip_conntrack_expect_find(&tuple);
if (!exp)
return -ENOENT;
@@ -1349,8 +1349,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
list) {
if (exp->master->helper == h
- && del_timer(&exp->timeout))
- __ip_ct_expect_unlink_destroy(exp);
+ && del_timer(&exp->timeout)) {
+ ip_ct_unlink_expect(exp);
+ ip_conntrack_expect_put(exp);
+ }
}
write_unlock(&ip_conntrack_lock);
} else {
@@ -1358,8 +1360,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
write_lock_bh(&ip_conntrack_lock);
list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
list) {
- if (del_timer(&exp->timeout))
- __ip_ct_expect_unlink_destroy(exp);
+ if (del_timer(&exp->timeout)) {
+ ip_ct_unlink_expect(exp);
+ ip_conntrack_expect_put(exp);
+ }
}
write_unlock_bh(&ip_conntrack_lock);
}
@@ -1413,6 +1417,7 @@ ctnetlink_create_expect(struct nfattr *cda[])
}
exp->expectfn = NULL;
+ exp->flags = 0;
exp->master = ct;
memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple));
memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple));
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
new file mode 100644
index 00000000000..de3cb9db6f8
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -0,0 +1,327 @@
+/*
+ * ip_conntrack_proto_gre.c - Version 3.0
+ *
+ * Connection tracking protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/list.h>
+
+static DEFINE_RWLOCK(ip_ct_gre_lock);
+#define ASSERT_READ_LOCK(x)
+#define ASSERT_WRITE_LOCK(x)
+
+#include <linux/netfilter_ipv4/listhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
+
+/* shamelessly stolen from ip_conntrack_proto_udp.c */
+#define GRE_TIMEOUT (30*HZ)
+#define GRE_STREAM_TIMEOUT (180*HZ)
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
+ NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
+ NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
+#else
+#define DEBUGP(x, args...)
+#define DUMP_TUPLE_GRE(x)
+#endif
+
+/* GRE KEYMAP HANDLING FUNCTIONS */
+static LIST_HEAD(gre_keymap_list);
+
+static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
+ const struct ip_conntrack_tuple *t)
+{
+ return ((km->tuple.src.ip == t->src.ip) &&
+ (km->tuple.dst.ip == t->dst.ip) &&
+ (km->tuple.dst.protonum == t->dst.protonum) &&
+ (km->tuple.dst.u.all == t->dst.u.all));
+}
+
+/* look up the source key for a given tuple */
+static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t)
+{
+ struct ip_ct_gre_keymap *km;
+ u_int32_t key = 0;
+
+ read_lock_bh(&ip_ct_gre_lock);
+ km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
+ struct ip_ct_gre_keymap *, t);
+ if (km)
+ key = km->tuple.src.u.gre.key;
+ read_unlock_bh(&ip_ct_gre_lock);
+
+ DEBUGP("lookup src key 0x%x up key for ", key);
+ DUMP_TUPLE_GRE(t);
+
+ return key;
+}
+
+/* add a single keymap entry, associate with specified master ct */
+int
+ip_ct_gre_keymap_add(struct ip_conntrack *ct,
+ struct ip_conntrack_tuple *t, int reply)
+{
+ struct ip_ct_gre_keymap **exist_km, *km, *old;
+
+ if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
+ DEBUGP("refusing to add GRE keymap to non-pptp session\n");
+ return -1;
+ }
+
+ if (!reply)
+ exist_km = &ct->help.ct_pptp_info.keymap_orig;
+ else
+ exist_km = &ct->help.ct_pptp_info.keymap_reply;
+
+ if (*exist_km) {
+ /* check whether it's a retransmission */
+ old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
+ struct ip_ct_gre_keymap *, t);
+ if (old == *exist_km) {
+ DEBUGP("retransmission\n");
+ return 0;
+ }
+
+ DEBUGP("trying to override keymap_%s for ct %p\n",
+ reply? "reply":"orig", ct);
+ return -EEXIST;
+ }
+
+ km = kmalloc(sizeof(*km), GFP_ATOMIC);
+ if (!km)
+ return -ENOMEM;
+
+ memcpy(&km->tuple, t, sizeof(*t));
+ *exist_km = km;
+
+ DEBUGP("adding new entry %p: ", km);
+ DUMP_TUPLE_GRE(&km->tuple);
+
+ write_lock_bh(&ip_ct_gre_lock);
+ list_append(&gre_keymap_list, km);
+ write_unlock_bh(&ip_ct_gre_lock);
+
+ return 0;
+}
+
+/* destroy the keymap entries associated with specified master ct */
+void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
+{
+ DEBUGP("entering for ct %p\n", ct);
+
+ if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
+ DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
+ return;
+ }
+
+ write_lock_bh(&ip_ct_gre_lock);
+ if (ct->help.ct_pptp_info.keymap_orig) {
+ DEBUGP("removing %p from list\n",
+ ct->help.ct_pptp_info.keymap_orig);
+ list_del(&ct->help.ct_pptp_info.keymap_orig->list);
+ kfree(ct->help.ct_pptp_info.keymap_orig);
+ ct->help.ct_pptp_info.keymap_orig = NULL;
+ }
+ if (ct->help.ct_pptp_info.keymap_reply) {
+ DEBUGP("removing %p from list\n",
+ ct->help.ct_pptp_info.keymap_reply);
+ list_del(&ct->help.ct_pptp_info.keymap_reply->list);
+ kfree(ct->help.ct_pptp_info.keymap_reply);
+ ct->help.ct_pptp_info.keymap_reply = NULL;
+ }
+ write_unlock_bh(&ip_ct_gre_lock);
+}
+
+
+/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
+
+/* invert gre part of tuple */
+static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_conntrack_tuple *orig)
+{
+ tuple->dst.u.gre.key = orig->src.u.gre.key;
+ tuple->src.u.gre.key = orig->dst.u.gre.key;
+
+ return 1;
+}
+
+/* gre hdr info to tuple */
+static int gre_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct ip_conntrack_tuple *tuple)
+{
+ struct gre_hdr_pptp _pgrehdr, *pgrehdr;
+ u_int32_t srckey;
+ struct gre_hdr _grehdr, *grehdr;
+
+ /* first only delinearize old RFC1701 GRE header */
+ grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
+ if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+ /* try to behave like "ip_conntrack_proto_generic" */
+ tuple->src.u.all = 0;
+ tuple->dst.u.all = 0;
+ return 1;
+ }
+
+ /* PPTP header is variable length, only need up to the call_id field */
+ pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
+ if (!pgrehdr)
+ return 1;
+
+ if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
+ DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
+ return 0;
+ }
+
+ tuple->dst.u.gre.key = pgrehdr->call_id;
+ srckey = gre_keymap_lookup(tuple);
+ tuple->src.u.gre.key = srckey;
+
+ return 1;
+}
+
+/* print gre part of tuple */
+static int gre_print_tuple(struct seq_file *s,
+ const struct ip_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+ ntohs(tuple->src.u.gre.key),
+ ntohs(tuple->dst.u.gre.key));
+}
+
+/* print private data for conntrack */
+static int gre_print_conntrack(struct seq_file *s,
+ const struct ip_conntrack *ct)
+{
+ return seq_printf(s, "timeout=%u, stream_timeout=%u ",
+ (ct->proto.gre.timeout / HZ),
+ (ct->proto.gre.stream_timeout / HZ));
+}
+
+/* Returns verdict for packet, and may modify conntrack */
+static int gre_packet(struct ip_conntrack *ct,
+ const struct sk_buff *skb,
+ enum ip_conntrack_info conntrackinfo)
+{
+ /* If we've seen traffic both ways, this is a GRE connection.
+ * Extend timeout. */
+ if (ct->status & IPS_SEEN_REPLY) {
+ ip_ct_refresh_acct(ct, conntrackinfo, skb,
+ ct->proto.gre.stream_timeout);
+ /* Also, more likely to be important, and not a probe. */
+ set_bit(IPS_ASSURED_BIT, &ct->status);
+ } else
+ ip_ct_refresh_acct(ct, conntrackinfo, skb,
+ ct->proto.gre.timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int gre_new(struct ip_conntrack *ct,
+ const struct sk_buff *skb)
+{
+ DEBUGP(": ");
+ DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+
+ /* initialize to sane value. Ideally a conntrack helper
+ * (e.g. in case of pptp) is increasing them */
+ ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
+ ct->proto.gre.timeout = GRE_TIMEOUT;
+
+ return 1;
+}
+
+/* Called when a conntrack entry has already been removed from the hashes
+ * and is about to be deleted from memory */
+static void gre_destroy(struct ip_conntrack *ct)
+{
+ struct ip_conntrack *master = ct->master;
+ DEBUGP(" entering\n");
+
+ if (!master)
+ DEBUGP("no master !?!\n");
+ else
+ ip_ct_gre_keymap_destroy(master);
+}
+
+/* protocol helper struct */
+static struct ip_conntrack_protocol gre = {
+ .proto = IPPROTO_GRE,
+ .name = "gre",
+ .pkt_to_tuple = gre_pkt_to_tuple,
+ .invert_tuple = gre_invert_tuple,
+ .print_tuple = gre_print_tuple,
+ .print_conntrack = gre_print_conntrack,
+ .packet = gre_packet,
+ .new = gre_new,
+ .destroy = gre_destroy,
+ .me = THIS_MODULE,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+ .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
+ .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
+#endif
+};
+
+/* ip_conntrack_proto_gre initialization */
+int __init ip_ct_proto_gre_init(void)
+{
+ return ip_conntrack_protocol_register(&gre);
+}
+
+void __exit ip_ct_proto_gre_fini(void)
+{
+ struct list_head *pos, *n;
+
+ /* delete all keymap entries */
+ write_lock_bh(&ip_ct_gre_lock);
+ list_for_each_safe(pos, n, &gre_keymap_list) {
+ DEBUGP("deleting keymap %p at module unload time\n", pos);
+ list_del(pos);
+ kfree(pos);
+ }
+ write_unlock_bh(&ip_ct_gre_lock);
+
+ ip_conntrack_protocol_unregister(&gre);
+}
+
+EXPORT_SYMBOL(ip_ct_gre_keymap_add);
+EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index f23ef1f88c4..1985abc59d2 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -349,6 +349,7 @@ static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
return 0;
nfattr_failure:
+ read_unlock_bh(&tcp_lock);
return -1;
}
#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index ee5895afd0c..dd476b191f4 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -989,16 +989,16 @@ EXPORT_SYMBOL(need_ip_conntrack);
EXPORT_SYMBOL(ip_conntrack_helper_register);
EXPORT_SYMBOL(ip_conntrack_helper_unregister);
EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(ip_ct_refresh_acct);
+EXPORT_SYMBOL(__ip_ct_refresh_acct);
EXPORT_SYMBOL(ip_conntrack_expect_alloc);
EXPORT_SYMBOL(ip_conntrack_expect_put);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
+EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
EXPORT_SYMBOL(ip_conntrack_expect_related);
EXPORT_SYMBOL(ip_conntrack_unexpect_related);
EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
-EXPORT_SYMBOL_GPL(__ip_ct_expect_unlink_destroy);
+EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
EXPORT_SYMBOL(ip_conntrack_tuple_taken);
EXPORT_SYMBOL(ip_ct_gather_frags);
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index f8ff170f390..a78736b8525 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -26,9 +26,9 @@ MODULE_DESCRIPTION("tftp connection tracking helper");
MODULE_LICENSE("GPL");
#define MAX_PORTS 8
-static int ports[MAX_PORTS];
+static short ports[MAX_PORTS];
static int ports_c;
-module_param_array(ports, int, &ports_c, 0400);
+module_param_array(ports, short, &ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of tftp servers");
#if 0
@@ -75,6 +75,7 @@ static int tftp_help(struct sk_buff **pskb,
exp->mask.dst.u.udp.port = 0xffff;
exp->mask.dst.protonum = 0xff;
exp->expectfn = NULL;
+ exp->flags = 0;
DEBUGP("expect: ");
DUMP_TUPLE(&exp->tuple);
@@ -99,7 +100,7 @@ static int tftp_help(struct sk_buff **pskb,
}
static struct ip_conntrack_helper tftp[MAX_PORTS];
-static char tftp_names[MAX_PORTS][10];
+static char tftp_names[MAX_PORTS][sizeof("tftp-65535")];
static void fini(void)
{
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1adedb743f6..c3ea891d38e 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -578,6 +578,8 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
return ret;
}
+EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
+EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
#endif
int __init ip_nat_init(void)
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
new file mode 100644
index 00000000000..3cdd0684d30
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -0,0 +1,401 @@
+/*
+ * ip_nat_pptp.c - Version 3.0
+ *
+ * NAT support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft. PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702. Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * TODO: - NAT to a unique tuple, not to TCP source port
+ * (needs netfilter tuple reservation)
+ *
+ * Changes:
+ * 2002-02-10 - Version 1.3
+ * - Use ip_nat_mangle_tcp_packet() because of cloned skb's
+ * in local connections (Philip Craig <philipc@snapgear.com>)
+ * - add checks for magicCookie and pptp version
+ * - make argument list of pptp_{out,in}bound_packet() shorter
+ * - move to C99 style initializers
+ * - print version number at module loadtime
+ * 2003-09-22 - Version 1.5
+ * - use SNATed tcp sourceport as callid, since we get called before
+ * TCP header is mangled (Philip Craig <philipc@snapgear.com>)
+ * 2004-10-22 - Version 2.0
+ * - kernel 2.6.x version
+ * 2005-06-10 - Version 3.0
+ * - kernel >= 2.6.11 version,
+ * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_pptp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+#define IP_NAT_PPTP_VERSION "3.0"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
+
+
+#if 0
+extern const char *pptp_msg_name[];
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+ __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static void pptp_nat_expected(struct ip_conntrack *ct,
+ struct ip_conntrack_expect *exp)
+{
+ struct ip_conntrack *master = ct->master;
+ struct ip_conntrack_expect *other_exp;
+ struct ip_conntrack_tuple t;
+ struct ip_ct_pptp_master *ct_pptp_info;
+ struct ip_nat_pptp *nat_pptp_info;
+
+ ct_pptp_info = &master->help.ct_pptp_info;
+ nat_pptp_info = &master->nat.help.nat_pptp_info;
+
+ /* And here goes the grand finale of corrosion... */
+
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ DEBUGP("we are PNS->PAC\n");
+ /* therefore, build tuple for PAC->PNS */
+ t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+ t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
+ t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+ t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
+ t.dst.protonum = IPPROTO_GRE;
+ } else {
+ DEBUGP("we are PAC->PNS\n");
+ /* build tuple for PNS->PAC */
+ t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+ t.src.u.gre.key =
+ htons(master->nat.help.nat_pptp_info.pns_call_id);
+ t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+ t.dst.u.gre.key =
+ htons(master->nat.help.nat_pptp_info.pac_call_id);
+ t.dst.protonum = IPPROTO_GRE;
+ }
+
+ DEBUGP("trying to unexpect other dir: ");
+ DUMP_TUPLE(&t);
+ other_exp = ip_conntrack_expect_find(&t);
+ if (other_exp) {
+ ip_conntrack_unexpect_related(other_exp);
+ ip_conntrack_expect_put(other_exp);
+ DEBUGP("success\n");
+ } else {
+ DEBUGP("not found!\n");
+ }
+
+ ip_nat_follow_master(ct, exp);
+}
+
+/* outbound packets == from PNS to PAC */
+static int
+pptp_outbound_pkt(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+
+{
+ struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
+ struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
+
+ u_int16_t msg, *cid = NULL, new_callid;
+
+ new_callid = htons(ct_pptp_info->pns_call_id);
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REQUEST:
+ cid = &pptpReq->ocreq.callID;
+ /* FIXME: ideally we would want to reserve a call ID
+ * here. current netfilter NAT core is not able to do
+ * this :( For now we use TCP source port. This breaks
+ * multiple calls within one control session */
+
+ /* save original call ID in nat_info */
+ nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+ /* don't use tcph->source since we are at a DSTmanip
+ * hook (e.g. PREROUTING) and pkt is not mangled yet */
+ new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ /* save new call ID in ct info */
+ ct_pptp_info->pns_call_id = ntohs(new_callid);
+ break;
+ case PPTP_IN_CALL_REPLY:
+ cid = &pptpReq->icreq.callID;
+ break;
+ case PPTP_CALL_CLEAR_REQUEST:
+ cid = &pptpReq->clrreq.callID;
+ break;
+ default:
+ DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
+ (msg <= PPTP_MSG_MAX)?
+ pptp_msg_name[msg]:pptp_msg_name[0]);
+ /* fall through */
+
+ case PPTP_SET_LINK_INFO:
+ /* only need to NAT in case PAC is behind NAT box */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
+ * down to here */
+
+ IP_NF_ASSERT(cid);
+
+ DEBUGP("altering call id from 0x%04x to 0x%04x\n",
+ ntohs(*cid), ntohs(new_callid));
+
+ /* mangle packet */
+ if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
+ sizeof(new_callid),
+ (char *)&new_callid,
+ sizeof(new_callid)) == 0)
+ return NF_DROP;
+
+ return NF_ACCEPT;
+}
+
+static int
+pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
+ struct ip_conntrack_expect *expect_reply)
+{
+ struct ip_ct_pptp_master *ct_pptp_info =
+ &expect_orig->master->help.ct_pptp_info;
+ struct ip_nat_pptp *nat_pptp_info =
+ &expect_orig->master->nat.help.nat_pptp_info;
+
+ struct ip_conntrack *ct = expect_orig->master;
+
+ struct ip_conntrack_tuple inv_t;
+ struct ip_conntrack_tuple *orig_t, *reply_t;
+
+ /* save original PAC call ID in nat_info */
+ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
+
+ /* alter expectation */
+ orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+ /* alter expectation for PNS->PAC direction */
+ invert_tuplepr(&inv_t, &expect_orig->tuple);
+ expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
+ expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
+ expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+ inv_t.src.ip = reply_t->src.ip;
+ inv_t.dst.ip = reply_t->dst.ip;
+ inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
+ inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+
+ if (!ip_conntrack_expect_related(expect_orig)) {
+ DEBUGP("successfully registered expect\n");
+ } else {
+ DEBUGP("can't expect_related(expect_orig)\n");
+ return 1;
+ }
+
+ /* alter expectation for PAC->PNS direction */
+ invert_tuplepr(&inv_t, &expect_reply->tuple);
+ expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
+ expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
+ expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+ inv_t.src.ip = orig_t->src.ip;
+ inv_t.dst.ip = orig_t->dst.ip;
+ inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
+ inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+
+ if (!ip_conntrack_expect_related(expect_reply)) {
+ DEBUGP("successfully registered expect\n");
+ } else {
+ DEBUGP("can't expect_related(expect_reply)\n");
+ ip_conntrack_unexpect_related(expect_orig);
+ return 1;
+ }
+
+ if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
+ DEBUGP("can't register original keymap\n");
+ ip_conntrack_unexpect_related(expect_orig);
+ ip_conntrack_unexpect_related(expect_reply);
+ return 1;
+ }
+
+ if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
+ DEBUGP("can't register reply keymap\n");
+ ip_conntrack_unexpect_related(expect_orig);
+ ip_conntrack_unexpect_related(expect_reply);
+ ip_ct_gre_keymap_destroy(ct);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* inbound packets == from PAC to PNS */
+static int
+pptp_inbound_pkt(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+{
+ struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
+ u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
+
+ int ret = NF_ACCEPT, rv;
+
+ new_pcid = htons(nat_pptp_info->pns_call_id);
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REPLY:
+ pcid = &pptpReq->ocack.peersCallID;
+ cid = &pptpReq->ocack.callID;
+ break;
+ case PPTP_IN_CALL_CONNECT:
+ pcid = &pptpReq->iccon.peersCallID;
+ break;
+ case PPTP_IN_CALL_REQUEST:
+ /* only need to nat in case PAC is behind NAT box */
+ break;
+ case PPTP_WAN_ERROR_NOTIFY:
+ pcid = &pptpReq->wanerr.peersCallID;
+ break;
+ case PPTP_CALL_DISCONNECT_NOTIFY:
+ pcid = &pptpReq->disc.callID;
+ break;
+ case PPTP_SET_LINK_INFO:
+ pcid = &pptpReq->setlink.peersCallID;
+ break;
+
+ default:
+ DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
+ pptp_msg_name[msg]:pptp_msg_name[0]);
+ /* fall through */
+
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
+ * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
+
+ /* mangle packet */
+ IP_NF_ASSERT(pcid);
+ DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
+ ntohs(*pcid), ntohs(new_pcid));
+
+ rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
+ sizeof(new_pcid), (char *)&new_pcid,
+ sizeof(new_pcid));
+ if (rv != NF_ACCEPT)
+ return rv;
+
+ if (new_cid) {
+ IP_NF_ASSERT(cid);
+ DEBUGP("altering call id from 0x%04x to 0x%04x\n",
+ ntohs(*cid), ntohs(new_cid));
+ rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
+ sizeof(new_cid),
+ (char *)&new_cid,
+ sizeof(new_cid));
+ if (rv != NF_ACCEPT)
+ return rv;
+ }
+
+ /* check for earlier return value of 'switch' above */
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ /* great, at least we don't need to resize packets */
+ return NF_ACCEPT;
+}
+
+
+extern int __init ip_nat_proto_gre_init(void);
+extern void __exit ip_nat_proto_gre_fini(void);
+
+static int __init init(void)
+{
+ int ret;
+
+ DEBUGP("%s: registering NAT helper\n", __FILE__);
+
+ ret = ip_nat_proto_gre_init();
+ if (ret < 0)
+ return ret;
+
+ BUG_ON(ip_nat_pptp_hook_outbound);
+ ip_nat_pptp_hook_outbound = &pptp_outbound_pkt;
+
+ BUG_ON(ip_nat_pptp_hook_inbound);
+ ip_nat_pptp_hook_inbound = &pptp_inbound_pkt;
+
+ BUG_ON(ip_nat_pptp_hook_exp_gre);
+ ip_nat_pptp_hook_exp_gre = &pptp_exp_gre;
+
+ BUG_ON(ip_nat_pptp_hook_expectfn);
+ ip_nat_pptp_hook_expectfn = &pptp_nat_expected;
+
+ printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ DEBUGP("cleanup_module\n" );
+
+ ip_nat_pptp_hook_expectfn = NULL;
+ ip_nat_pptp_hook_exp_gre = NULL;
+ ip_nat_pptp_hook_inbound = NULL;
+ ip_nat_pptp_hook_outbound = NULL;
+
+ ip_nat_proto_gre_fini();
+ /* Make sure noone calls it, meanwhile */
+ synchronize_net();
+
+ printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
new file mode 100644
index 00000000000..7c128540167
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -0,0 +1,214 @@
+/*
+ * ip_nat_proto_gre.c - Version 2.0
+ *
+ * NAT protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+ __FUNCTION__, ## args)
+#else
+#define DEBUGP(x, args...)
+#endif
+
+/* is key in given range between min and max */
+static int
+gre_in_range(const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type maniptype,
+ const union ip_conntrack_manip_proto *min,
+ const union ip_conntrack_manip_proto *max)
+{
+ u_int32_t key;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ key = tuple->src.u.gre.key;
+ else
+ key = tuple->dst.u.gre.key;
+
+ return ntohl(key) >= ntohl(min->gre.key)
+ && ntohl(key) <= ntohl(max->gre.key);
+}
+
+/* generate unique tuple ... */
+static int
+gre_unique_tuple(struct ip_conntrack_tuple *tuple,
+ const struct ip_nat_range *range,
+ enum ip_nat_manip_type maniptype,
+ const struct ip_conntrack *conntrack)
+{
+ static u_int16_t key;
+ u_int16_t *keyptr;
+ unsigned int min, i, range_size;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ keyptr = &tuple->src.u.gre.key;
+ else
+ keyptr = &tuple->dst.u.gre.key;
+
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ DEBUGP("%p: NATing GRE PPTP\n", conntrack);
+ min = 1;
+ range_size = 0xffff;
+ } else {
+ min = ntohl(range->min.gre.key);
+ range_size = ntohl(range->max.gre.key) - min + 1;
+ }
+
+ DEBUGP("min = %u, range_size = %u\n", min, range_size);
+
+ for (i = 0; i < range_size; i++, key++) {
+ *keyptr = htonl(min + key % range_size);
+ if (!ip_nat_used_tuple(tuple, conntrack))
+ return 1;
+ }
+
+ DEBUGP("%p: no NAT mapping\n", conntrack);
+
+ return 0;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static int
+gre_manip_pkt(struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct ip_conntrack_tuple *tuple,
+ enum ip_nat_manip_type maniptype)
+{
+ struct gre_hdr *greh;
+ struct gre_hdr_pptp *pgreh;
+ struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+
+ /* pgreh includes two optional 32bit fields which are not required
+ * to be there. That's where the magic '8' comes from */
+ if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
+ return 0;
+
+ greh = (void *)(*pskb)->data + hdroff;
+ pgreh = (struct gre_hdr_pptp *) greh;
+
+ /* we only have destination manip of a packet, since 'source key'
+ * is not present in the packet itself */
+ if (maniptype == IP_NAT_MANIP_DST) {
+ /* key manipulation is always dest */
+ switch (greh->version) {
+ case 0:
+ if (!greh->key) {
+ DEBUGP("can't nat GRE w/o key\n");
+ break;
+ }
+ if (greh->csum) {
+ /* FIXME: Never tested this code... */
+ *(gre_csum(greh)) =
+ ip_nat_cheat_check(~*(gre_key(greh)),
+ tuple->dst.u.gre.key,
+ *(gre_csum(greh)));
+ }
+ *(gre_key(greh)) = tuple->dst.u.gre.key;
+ break;
+ case GRE_VERSION_PPTP:
+ DEBUGP("call_id -> 0x%04x\n",
+ ntohl(tuple->dst.u.gre.key));
+ pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
+ break;
+ default:
+ DEBUGP("can't nat unknown GRE version\n");
+ return 0;
+ break;
+ }
+ }
+ return 1;
+}
+
+/* print out a nat tuple */
+static unsigned int
+gre_print(char *buffer,
+ const struct ip_conntrack_tuple *match,
+ const struct ip_conntrack_tuple *mask)
+{
+ unsigned int len = 0;
+
+ if (mask->src.u.gre.key)
+ len += sprintf(buffer + len, "srckey=0x%x ",
+ ntohl(match->src.u.gre.key));
+
+ if (mask->dst.u.gre.key)
+ len += sprintf(buffer + len, "dstkey=0x%x ",
+ ntohl(match->src.u.gre.key));
+
+ return len;
+}
+
+/* print a range of keys */
+static unsigned int
+gre_print_range(char *buffer, const struct ip_nat_range *range)
+{
+ if (range->min.gre.key != 0
+ || range->max.gre.key != 0xFFFF) {
+ if (range->min.gre.key == range->max.gre.key)
+ return sprintf(buffer, "key 0x%x ",
+ ntohl(range->min.gre.key));
+ else
+ return sprintf(buffer, "keys 0x%u-0x%u ",
+ ntohl(range->min.gre.key),
+ ntohl(range->max.gre.key));
+ } else
+ return 0;
+}
+
+/* nat helper struct */
+static struct ip_nat_protocol gre = {
+ .name = "GRE",
+ .protonum = IPPROTO_GRE,
+ .manip_pkt = gre_manip_pkt,
+ .in_range = gre_in_range,
+ .unique_tuple = gre_unique_tuple,
+ .print = gre_print,
+ .print_range = gre_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+ .range_to_nfattr = ip_nat_port_range_to_nfattr,
+ .nfattr_to_range = ip_nat_port_nfattr_to_range,
+#endif
+};
+
+int __init ip_nat_proto_gre_init(void)
+{
+ return ip_nat_protocol_register(&gre);
+}
+
+void __exit ip_nat_proto_gre_fini(void)
+{
+ ip_nat_protocol_unregister(&gre);
+}
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 60d70fa41a1..cb66b8bddeb 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -255,6 +255,27 @@ alloc_null_binding(struct ip_conntrack *conntrack,
return ip_nat_setup_info(conntrack, &range, hooknum);
}
+unsigned int
+alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
+ struct ip_nat_info *info,
+ unsigned int hooknum)
+{
+ u_int32_t ip
+ = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
+ ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
+ : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
+ u_int16_t all
+ = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
+ ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
+ : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
+ struct ip_nat_range range
+ = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
+
+ DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
+ conntrack, NIPQUAD(ip));
+ return ip_nat_setup_info(conntrack, &range, hooknum);
+}
+
int ip_nat_rule_find(struct sk_buff **pskb,
unsigned int hooknum,
const struct net_device *in,
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 89db052add8..0ff368b131f 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -123,8 +123,12 @@ ip_nat_fn(unsigned int hooknum,
if (!ip_nat_initialized(ct, maniptype)) {
unsigned int ret;
- /* LOCAL_IN hook doesn't have a chain! */
- if (hooknum == NF_IP_LOCAL_IN)
+ if (unlikely(is_confirmed(ct)))
+ /* NAT module was loaded late */
+ ret = alloc_null_binding_confirmed(ct, info,
+ hooknum);
+ else if (hooknum == NF_IP_LOCAL_IN)
+ /* LOCAL_IN hook doesn't have a chain! */
ret = alloc_null_binding(ct, info, hooknum);
else
ret = ip_nat_rule_find(pskb, hooknum,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2d05cafec22..9bcb398fbc1 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -13,6 +13,7 @@
#include <linux/config.h>
#include <linux/proc_fs.h>
#include <linux/jhash.h>
+#include <linux/bitops.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/tcp.h>
@@ -30,7 +31,7 @@
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
-#define CLUSTERIP_VERSION "0.7"
+#define CLUSTERIP_VERSION "0.8"
#define DEBUG_CLUSTERIP
@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP");
struct clusterip_config {
struct list_head list; /* list of all configs */
atomic_t refcount; /* reference count */
+ atomic_t entries; /* number of entries/rules
+ * referencing us */
u_int32_t clusterip; /* the IP address */
u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
struct net_device *dev; /* device */
u_int16_t num_total_nodes; /* total number of nodes */
- u_int16_t num_local_nodes; /* number of local nodes */
- u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */
+ unsigned long local_nodes; /* node number array */
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *pde; /* proc dir entry */
@@ -66,8 +68,7 @@ struct clusterip_config {
static LIST_HEAD(clusterip_configs);
-/* clusterip_lock protects the clusterip_configs list _AND_ the configurable
- * data within all structurses (num_local_nodes, local_nodes[]) */
+/* clusterip_lock protects the clusterip_configs list */
static DEFINE_RWLOCK(clusterip_lock);
#ifdef CONFIG_PROC_FS
@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir;
#endif
static inline void
-clusterip_config_get(struct clusterip_config *c) {
+clusterip_config_get(struct clusterip_config *c)
+{
atomic_inc(&c->refcount);
}
static inline void
-clusterip_config_put(struct clusterip_config *c) {
- if (atomic_dec_and_test(&c->refcount)) {
+clusterip_config_put(struct clusterip_config *c)
+{
+ if (atomic_dec_and_test(&c->refcount))
+ kfree(c);
+}
+
+/* increase the count of entries(rules) using/referencing this config */
+static inline void
+clusterip_config_entry_get(struct clusterip_config *c)
+{
+ atomic_inc(&c->entries);
+}
+
+/* decrease the count of entries using/referencing this config. If last
+ * entry(rule) is removed, remove the config from lists, but don't free it
+ * yet, since proc-files could still be holding references */
+static inline void
+clusterip_config_entry_put(struct clusterip_config *c)
+{
+ if (atomic_dec_and_test(&c->entries)) {
write_lock_bh(&clusterip_lock);
list_del(&c->list);
write_unlock_bh(&clusterip_lock);
+
dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
dev_put(c->dev);
- kfree(c);
+
+ /* In case anyone still accesses the file, the open/close
+ * functions are also incrementing the refcount on their own,
+ * so it's safe to remove the entry even if it's in use. */
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry(c->pde->name, c->pde->parent);
+#endif
}
}
-
static struct clusterip_config *
__clusterip_config_find(u_int32_t clusterip)
{
@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip)
}
static inline struct clusterip_config *
-clusterip_config_find_get(u_int32_t clusterip)
+clusterip_config_find_get(u_int32_t clusterip, int entry)
{
struct clusterip_config *c;
@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip)
return NULL;
}
atomic_inc(&c->refcount);
+ if (entry)
+ atomic_inc(&c->entries);
read_unlock_bh(&clusterip_lock);
return c;
}
+static void
+clusterip_config_init_nodelist(struct clusterip_config *c,
+ const struct ipt_clusterip_tgt_info *i)
+{
+ int n;
+
+ for (n = 0; n < i->num_local_nodes; n++) {
+ set_bit(i->local_nodes[n] - 1, &c->local_nodes);
+ }
+}
+
static struct clusterip_config *
clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
struct net_device *dev)
@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
c->clusterip = ip;
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
c->num_total_nodes = i->num_total_nodes;
- c->num_local_nodes = i->num_local_nodes;
- memcpy(&c->local_nodes, &i->local_nodes, sizeof(&c->local_nodes));
+ clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
atomic_set(&c->refcount, 1);
+ atomic_set(&c->entries, 1);
#ifdef CONFIG_PROC_FS
/* create proc dir entry */
@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
static int
clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
{
- int i;
-
- write_lock_bh(&clusterip_lock);
- if (c->num_local_nodes >= CLUSTERIP_MAX_NODES
- || nodenum > CLUSTERIP_MAX_NODES) {
- write_unlock_bh(&clusterip_lock);
+ if (nodenum == 0 ||
+ nodenum > c->num_total_nodes)
return 1;
- }
-
- /* check if we alrady have this number in our array */
- for (i = 0; i < c->num_local_nodes; i++) {
- if (c->local_nodes[i] == nodenum) {
- write_unlock_bh(&clusterip_lock);
- return 1;
- }
- }
- c->local_nodes[c->num_local_nodes++] = nodenum;
+ /* check if we already have this number in our bitfield */
+ if (test_and_set_bit(nodenum - 1, &c->local_nodes))
+ return 1;
- write_unlock_bh(&clusterip_lock);
return 0;
}
static int
clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
{
- int i;
-
- write_lock_bh(&clusterip_lock);
-
- if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
- write_unlock_bh(&clusterip_lock);
+ if (nodenum == 0 ||
+ nodenum > c->num_total_nodes)
return 1;
- }
- for (i = 0; i < c->num_local_nodes; i++) {
- if (c->local_nodes[i] == nodenum) {
- int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
- memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
- c->num_local_nodes--;
- write_unlock_bh(&clusterip_lock);
- return 0;
- }
- }
+ if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
+ return 0;
- write_unlock_bh(&clusterip_lock);
return 1;
}
@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
static inline int
clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
{
- int i;
-
- read_lock_bh(&clusterip_lock);
-
- if (config->num_local_nodes == 0) {
- read_unlock_bh(&clusterip_lock);
- return 0;
- }
-
- for (i = 0; i < config->num_local_nodes; i++) {
- if (config->local_nodes[i] == hash) {
- read_unlock_bh(&clusterip_lock);
- return 1;
- }
- }
-
- read_unlock_bh(&clusterip_lock);
-
- return 0;
+ return test_bit(hash - 1, &config->local_nodes);
}
/***********************************************************************
@@ -415,8 +411,26 @@ checkentry(const char *tablename,
/* FIXME: further sanity checks */
- config = clusterip_config_find_get(e->ip.dst.s_addr);
- if (!config) {
+ config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
+ if (config) {
+ if (cipinfo->config != NULL) {
+ /* Case A: This is an entry that gets reloaded, since
+ * it still has a cipinfo->config pointer. Simply
+ * increase the entry refcount and return */
+ if (cipinfo->config != config) {
+ printk(KERN_ERR "CLUSTERIP: Reloaded entry "
+ "has invalid config pointer!\n");
+ return 0;
+ }
+ clusterip_config_entry_get(cipinfo->config);
+ } else {
+ /* Case B: This is a new rule referring to an existing
+ * clusterip config. */
+ cipinfo->config = config;
+ clusterip_config_entry_get(cipinfo->config);
+ }
+ } else {
+ /* Case C: This is a completely new clusterip config */
if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
return 0;
@@ -443,10 +457,9 @@ checkentry(const char *tablename,
}
dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
}
+ cipinfo->config = config;
}
- cipinfo->config = config;
-
return 1;
}
@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize)
{
struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
- /* we first remove the proc entry and then drop the reference
- * count. In case anyone still accesses the file, the open/close
- * functions are also incrementing the refcount on their own */
-#ifdef CONFIG_PROC_FS
- remove_proc_entry(cipinfo->config->pde->name,
- cipinfo->config->pde->parent);
-#endif
+ /* if no more entries are referencing the config, remove it
+ * from the list and destroy the proc entry */
+ clusterip_config_entry_put(cipinfo->config);
+
clusterip_config_put(cipinfo->config);
}
@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook,
/* if there is no clusterip configuration for the arp reply's
* source ip, we don't want to mangle it */
- c = clusterip_config_find_get(payload->src_ip);
+ c = clusterip_config_find_get(payload->src_ip, 0);
if (!c)
return NF_ACCEPT;
@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = {
#ifdef CONFIG_PROC_FS
+struct clusterip_seq_position {
+ unsigned int pos; /* position */
+ unsigned int weight; /* number of bits set == size */
+ unsigned int bit; /* current bit */
+ unsigned long val; /* current value */
+};
+
static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
{
struct proc_dir_entry *pde = s->private;
struct clusterip_config *c = pde->data;
- unsigned int *nodeidx;
-
- read_lock_bh(&clusterip_lock);
- if (*pos >= c->num_local_nodes)
+ unsigned int weight;
+ u_int32_t local_nodes;
+ struct clusterip_seq_position *idx;
+
+ /* FIXME: possible race */
+ local_nodes = c->local_nodes;
+ weight = hweight32(local_nodes);
+ if (*pos >= weight)
return NULL;
- nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL);
- if (!nodeidx)
+ idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
+ if (!idx)
return ERR_PTR(-ENOMEM);
- *nodeidx = *pos;
- return nodeidx;
+ idx->pos = *pos;
+ idx->weight = weight;
+ idx->bit = ffs(local_nodes);
+ idx->val = local_nodes;
+ clear_bit(idx->bit - 1, &idx->val);
+
+ return idx;
}
static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct proc_dir_entry *pde = s->private;
- struct clusterip_config *c = pde->data;
- unsigned int *nodeidx = (unsigned int *)v;
+ struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
- *pos = ++(*nodeidx);
- if (*pos >= c->num_local_nodes) {
+ *pos = ++idx->pos;
+ if (*pos >= idx->weight) {
kfree(v);
return NULL;
}
- return nodeidx;
+ idx->bit = ffs(idx->val);
+ clear_bit(idx->bit - 1, &idx->val);
+ return idx;
}
static void clusterip_seq_stop(struct seq_file *s, void *v)
{
kfree(v);
-
- read_unlock_bh(&clusterip_lock);
}
static int clusterip_seq_show(struct seq_file *s, void *v)
{
- struct proc_dir_entry *pde = s->private;
- struct clusterip_config *c = pde->data;
- unsigned int *nodeidx = (unsigned int *)v;
+ struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
- if (*nodeidx != 0)
+ if (idx->pos != 0)
seq_putc(s, ',');
- seq_printf(s, "%u", c->local_nodes[*nodeidx]);
- if (*nodeidx == c->num_local_nodes-1)
+ seq_printf(s, "%u", idx->bit);
+
+ if (idx->pos == idx->weight - 1)
seq_putc(s, '\n');
return 0;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 2f3e181c8e9..275a174c6fe 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -90,6 +90,12 @@ masquerade_target(struct sk_buff **pskb,
IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+ /* Source address is 0.0.0.0 - locally generated packet that is
+ * probably not supposed to be masqueraded.
+ */
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0)
+ return NF_ACCEPT;
+
mr = targinfo;
rt = (struct rtable *)(*pskb)->dst;
newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index d2e13447678..715cb613405 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -88,14 +88,18 @@ redirect_target(struct sk_buff **pskb,
newdst = htonl(0x7F000001);
else {
struct in_device *indev;
+ struct in_ifaddr *ifa;
- /* Device might not have an associated in_device. */
- indev = (struct in_device *)(*pskb)->dev->ip_ptr;
- if (indev == NULL || indev->ifa_list == NULL)
- return NF_DROP;
+ newdst = 0;
+
+ rcu_read_lock();
+ indev = __in_dev_get((*pskb)->dev);
+ if (indev && (ifa = indev->ifa_list))
+ newdst = ifa->ifa_local;
+ rcu_read_unlock();
- /* Grab first address on interface. */
- newdst = indev->ifa_list->ifa_local;
+ if (!newdst)
+ return NF_DROP;
}
/* Transfer from original range. */
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f115a84a4ac..f057025a719 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -92,10 +92,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb,
fl.fl_ip_sport = tcph->dest;
fl.fl_ip_dport = tcph->source;
- if (xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0)) {
- dst_release(&rt->u.dst);
- rt = NULL;
- }
+ xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
return rt;
}
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index c1889f88262..0cee2862ed8 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/file.h>
+#include <linux/rcupdate.h>
#include <net/sock.h>
#include <linux/netfilter_ipv4/ipt_owner.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 304bb0a1d4f..4b0d7e4d626 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
if (type && code) {
get_user(fl->fl_icmp_type, type);
- __get_user(fl->fl_icmp_code, code);
+ get_user(fl->fl_icmp_code, code);
probed = 1;
}
break;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8c0b14e3bee..8549f26e249 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1760,6 +1760,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
goto cleanup;
}
+ atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
if (res->fi->fib_nhs > 1)
@@ -1820,7 +1821,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
if (err)
return err;
- atomic_set(&rth->u.dst.__refcnt, 1);
/* put it into the cache */
hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos);
@@ -1834,8 +1834,8 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
u32 daddr, u32 saddr, u32 tos)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- struct rtable* rth = NULL;
- unsigned char hop, hopcount, lasthop;
+ struct rtable* rth = NULL, *rtres;
+ unsigned char hop, hopcount;
int err = -EINVAL;
unsigned int hash;
@@ -1844,8 +1844,6 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
else
hopcount = 1;
- lasthop = hopcount - 1;
-
/* distinguish between multipath and singlepath */
if (hopcount < 2)
return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
@@ -1855,6 +1853,10 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
for (hop = 0; hop < hopcount; hop++) {
res->nh_sel = hop;
+ /* put reference to previous result */
+ if (hop)
+ ip_rt_put(rtres);
+
/* create a routing cache entry */
err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
&rth);
@@ -1863,7 +1865,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
/* put it into the cache */
hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos);
- err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+ err = rt_intern_hash(hash, rth, &rtres);
if (err)
return err;
@@ -1873,13 +1875,8 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
FIB_RES_NETMASK(*res),
res->prefixlen,
&FIB_RES_NH(*res));
-
- /* only for the last hop the reference count is handled
- * outside
- */
- if (hop == lasthop)
- atomic_set(&(skb->dst->__refcnt), 1);
}
+ skb->dst = &rtres->u.dst;
return err;
#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
@@ -2208,6 +2205,7 @@ static inline int __mkroute_output(struct rtable **result,
goto cleanup;
}
+ atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
if (res->fi) {
@@ -2290,8 +2288,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp,
if (err == 0) {
u32 tos = RT_FL_TOS(oldflp);
- atomic_set(&rth->u.dst.__refcnt, 1);
-
hash = rt_hash_code(oldflp->fl4_dst,
oldflp->fl4_src ^ (oldflp->oif << 5), tos);
err = rt_intern_hash(hash, rth, rp);
@@ -2326,6 +2322,10 @@ static inline int ip_mkroute_output(struct rtable** rp,
dev2nexthop = FIB_RES_DEV(*res);
dev_hold(dev2nexthop);
+ /* put reference to previous result */
+ if (hop)
+ ip_rt_put(*rp);
+
err = __mkroute_output(&rth, res, fl, oldflp,
dev2nexthop, flags);
@@ -2350,7 +2350,6 @@ static inline int ip_mkroute_output(struct rtable** rp,
if (err != 0)
return err;
}
- atomic_set(&(*rp)->u.dst.__refcnt, 1);
return err;
} else {
return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 02fdda68718..f3f0013a958 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -552,8 +552,7 @@ new_segment:
tcp_mark_push(tp, skb);
goto new_segment;
}
- if (sk->sk_forward_alloc < copy &&
- !sk_stream_mem_schedule(sk, copy, 0))
+ if (!sk_stream_wmem_schedule(sk, copy))
goto wait_for_memory;
if (can_coalesce) {
@@ -770,19 +769,23 @@ new_segment:
if (off == PAGE_SIZE) {
put_page(page);
TCP_PAGE(sk) = page = NULL;
+ off = 0;
}
- }
+ } else
+ off = 0;
+
+ if (copy > PAGE_SIZE - off)
+ copy = PAGE_SIZE - off;
+
+ if (!sk_stream_wmem_schedule(sk, copy))
+ goto wait_for_memory;
if (!page) {
/* Allocate new cache page. */
if (!(page = sk_stream_alloc_page(sk)))
goto wait_for_memory;
- off = 0;
}
- if (copy > PAGE_SIZE - off)
- copy = PAGE_SIZE - off;
-
/* Time to copy data. We are close to
* the end! */
err = skb_copy_to_page(sk, from, skb, page,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1afb080bdf0..a7537c7bbd0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -923,14 +923,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
int flag = 0;
int i;
- /* So, SACKs for already sent large segments will be lost.
- * Not good, but alternative is to resegment the queue. */
- if (sk->sk_route_caps & NETIF_F_TSO) {
- sk->sk_route_caps &= ~NETIF_F_TSO;
- sock_set_flag(sk, SOCK_NO_LARGESEND);
- tp->mss_cache = tp->mss_cache;
- }
-
if (!tp->sacked_out)
tp->fackets_out = 0;
prior_fackets = tp->fackets_out;
@@ -978,20 +970,42 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
flag |= FLAG_DATA_LOST;
sk_stream_for_retrans_queue(skb, sk) {
- u8 sacked = TCP_SKB_CB(skb)->sacked;
- int in_sack;
+ int in_sack, pcount;
+ u8 sacked;
/* The retransmission queue is always in order, so
* we can short-circuit the walk early.
*/
- if(!before(TCP_SKB_CB(skb)->seq, end_seq))
+ if (!before(TCP_SKB_CB(skb)->seq, end_seq))
break;
- fack_count += tcp_skb_pcount(skb);
-
in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
!before(end_seq, TCP_SKB_CB(skb)->end_seq);
+ pcount = tcp_skb_pcount(skb);
+
+ if (pcount > 1 && !in_sack &&
+ after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
+ unsigned int pkt_len;
+
+ in_sack = !after(start_seq,
+ TCP_SKB_CB(skb)->seq);
+
+ if (!in_sack)
+ pkt_len = (start_seq -
+ TCP_SKB_CB(skb)->seq);
+ else
+ pkt_len = (end_seq -
+ TCP_SKB_CB(skb)->seq);
+ if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size))
+ break;
+ pcount = tcp_skb_pcount(skb);
+ }
+
+ fack_count += pcount;
+
+ sacked = TCP_SKB_CB(skb)->sacked;
+
/* Account D-SACK for retransmitted packet. */
if ((dup_sack && in_sack) &&
(sacked & TCPCB_RETRANS) &&
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a88db28b0af..b1a63b2c6b4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
- newicsk->icsk_ca_ops = &tcp_reno;
+ newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 75b68116682..d6e3d269e90 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -428,13 +428,15 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
* packet to the list. This won't be called frequently, I hope.
* Remember, these are still headerless SKBs at this point.
*/
-static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
+int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
- int nsize;
+ int nsize, old_factor;
u16 flags;
+ BUG_ON(len >= skb->len);
+
nsize = skb_headlen(skb) - len;
if (nsize < 0)
nsize = 0;
@@ -459,9 +461,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned
flags = TCP_SKB_CB(skb)->flags;
TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
TCP_SKB_CB(buff)->flags = flags;
- TCP_SKB_CB(buff)->sacked =
- (TCP_SKB_CB(skb)->sacked &
- (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL));
+ TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
@@ -485,23 +485,44 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned
TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
buff->tstamp = skb->tstamp;
- if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
- tp->lost_out -= tcp_skb_pcount(skb);
- tp->left_out -= tcp_skb_pcount(skb);
- }
+ old_factor = tcp_skb_pcount(skb);
/* Fix up tso_factor for both original and new SKB. */
tcp_set_skb_tso_segs(sk, skb, mss_now);
tcp_set_skb_tso_segs(sk, buff, mss_now);
- if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
- tp->lost_out += tcp_skb_pcount(skb);
- tp->left_out += tcp_skb_pcount(skb);
- }
+ /* If this packet has been sent out already, we must
+ * adjust the various packet counters.
+ */
+ if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
+ int diff = old_factor - tcp_skb_pcount(skb) -
+ tcp_skb_pcount(buff);
- if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
- tp->lost_out += tcp_skb_pcount(buff);
- tp->left_out += tcp_skb_pcount(buff);
+ tp->packets_out -= diff;
+
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ tp->sacked_out -= diff;
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
+ tp->retrans_out -= diff;
+
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
+ tp->lost_out -= diff;
+ tp->left_out -= diff;
+ }
+
+ if (diff > 0) {
+ /* Adjust Reno SACK estimate. */
+ if (!tp->rx_opt.sack_ok) {
+ tp->sacked_out -= diff;
+ if ((int)tp->sacked_out < 0)
+ tp->sacked_out = 0;
+ tcp_sync_left_out(tp);
+ }
+
+ tp->fackets_out -= diff;
+ if ((int)tp->fackets_out < 0)
+ tp->fackets_out = 0;
+ }
}
/* Link BUFF into the send queue. */
@@ -1350,12 +1371,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
BUG();
-
- if (sk->sk_route_caps & NETIF_F_TSO) {
- sk->sk_route_caps &= ~NETIF_F_TSO;
- sock_set_flag(sk, SOCK_NO_LARGESEND);
- }
-
if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
return -ENOMEM;
}
@@ -1370,22 +1385,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
return -EAGAIN;
if (skb->len > cur_mss) {
- int old_factor = tcp_skb_pcount(skb);
- int diff;
-
if (tcp_fragment(sk, skb, cur_mss, cur_mss))
return -ENOMEM; /* We'll try again later. */
-
- /* New SKB created, account for it. */
- diff = old_factor - tcp_skb_pcount(skb) -
- tcp_skb_pcount(skb->next);
- tp->packets_out -= diff;
-
- if (diff > 0) {
- tp->fackets_out -= diff;
- if ((int)tp->fackets_out < 0)
- tp->fackets_out = 0;
- }
}
/* Collapse two adjacent packets if worthwhile and we can. */
@@ -1993,12 +1994,6 @@ int tcp_write_wakeup(struct sock *sk)
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
if (tcp_fragment(sk, skb, seg_size, mss))
return -1;
- /* SWS override triggered forced fragmentation.
- * Disable TSO, the connection is too sick. */
- if (sk->sk_route_caps & NETIF_F_TSO) {
- sock_set_flag(sk, SOCK_NO_LARGESEND);
- sk->sk_route_caps &= ~NETIF_F_TSO;
- }
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(sk, skb, mss);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e5beca7de86..e0bd1013cb0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1141,7 +1141,7 @@ int udp_rcv(struct sk_buff *skb)
if (ulen > len || ulen < sizeof(*uh))
goto short_packet;
- if (pskb_trim(skb, ulen))
+ if (pskb_trim_rcsum(skb, ulen))
goto short_packet;
if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)