From 2c10b32bf57db7ec6d4cca4c4aa3d86bacb01c8a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 2 Sep 2008 17:30:27 -0700
Subject: netlink: Remove compat API for nested attributes

Removes all _nested_compat() functions from the API. The prio qdisc
no longer requires them and netem has its own format anyway. Their
existance is only confusing.

Resend: Also remove the wrapper macro.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 18 ++++++++++++++++--
 net/sched/sch_prio.c  |  6 +-----
 2 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3781e55046d..a11959908d9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -388,6 +388,20 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
 };
 
+static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+		      const struct nla_policy *policy, int len)
+{
+	int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+	if (nested_len < 0)
+		return -EINVAL;
+	if (nested_len >= nla_attr_size(0))
+		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+				 nested_len, policy);
+	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+	return 0;
+}
+
 /* Parse netlink message to set options */
 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 {
@@ -399,8 +413,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy,
-				      qopt, sizeof(*qopt));
+	qopt = nla_data(opt);
+	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
 	if (ret < 0)
 		return ret;
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index a6697c686c7..504a78cdb71 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -254,16 +254,12 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nest;
 	struct tc_prio_qopt opt;
 
 	opt.bands = q->bands;
 	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
 
-	nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
-	if (nest == NULL)
-		goto nla_put_failure;
-	nla_nest_compat_end(skb, nest);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
 	return skb->len;
 
-- 
cgit v1.2.3


From 5337407c673e2c7c66a84b9838d55a45a760ecff Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 8 Sep 2008 16:17:42 -0700
Subject: warn: Turn the netdev timeout WARN_ON() into a WARN()

this patch turns the netdev timeout WARN_ON_ONCE() into a WARN_ONCE(),
so that the device and driver names are inside the warning message.
This helps automated tools like kerneloops.org to collect the data
and do statistics, as well as making it more likely that humans
cut-n-paste the important message as part of a bugreport.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9634091ee2f..ec0a0839ce5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -215,10 +215,9 @@ static void dev_watchdog(unsigned long arg)
 			    time_after(jiffies, (dev->trans_start +
 						 dev->watchdog_timeo))) {
 				char drivername[64];
-				printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
+				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
 				       dev->name, netdev_drivername(dev, drivername, 64));
 				dev->tx_timeout(dev);
-				WARN_ON_ONCE(1);
 			}
 			if (!mod_timer(&dev->watchdog_timer,
 				       round_jiffies(jiffies +
-- 
cgit v1.2.3


From 92651940ab00dbe64722e908f70d816713d677b7 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Sep 2008 16:29:34 -0700
Subject: pkt_sched: Add multiqueue scheduler support

This patch is intended to add a qdisc to support the new tx multiqueue
architecture by providing a band for each hardware queue.  By doing
this it is possible to support a different qdisc per physical hardware
queue.

This qdisc uses the skb->queue_mapping to select which band to place
the traffic onto.  It then uses a round robin w/ a check to see if the
subqueue is stopped to determine which band to dequeue the packet from.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig      |   9 +
 net/sched/Makefile     |   1 +
 net/sched/sch_multiq.c | 467 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 477 insertions(+)
 create mode 100644 net/sched/sch_multiq.c

(limited to 'net/sched')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9437b27ff84..efaa7a75e7f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -106,6 +106,15 @@ config NET_SCH_PRIO
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_prio.
 
+config NET_SCH_MULTIQ
+	tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
+	---help---
+	  Say Y here if you want to use an n-band queue packet scheduler
+	  to support devices that have multiple hardware transmit queues.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_multiq.
+
 config NET_SCH_RED
 	tristate "Random Early Detection (RED)"
 	---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7df84..3d9b953f7f6 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
 obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
 obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
 obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
+obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
new file mode 100644
index 00000000000..49a8b67ed3b
--- /dev/null
+++ b/net/sched/sch_multiq.c
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+struct multiq_sched_data {
+	u16 bands;
+	u16 max_bands;
+	u16 curband;
+	struct tcf_proto *filter_list;
+	struct Qdisc **queues;
+};
+
+
+static struct Qdisc *
+multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	u32 band;
+	struct tcf_result res;
+	int err;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	err = tc_classify(skb, q->filter_list, &res);
+#ifdef CONFIG_NET_CLS_ACT
+	switch (err) {
+	case TC_ACT_STOLEN:
+	case TC_ACT_QUEUED:
+		*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+	case TC_ACT_SHOT:
+		return NULL;
+	}
+#endif
+	band = skb_get_queue_mapping(skb);
+
+	if (band >= q->bands)
+		return q->queues[0];
+
+	return q->queues[band];
+}
+
+static int
+multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	ret = qdisc_enqueue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		sch->bstats.packets++;
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static int
+multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	ret = qdisc->ops->requeue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	struct sk_buff *skb;
+	int band;
+
+	for (band = 0; band < q->bands; band++) {
+		/* cycle through bands to ensure fairness */
+		q->curband++;
+		if (q->curband >= q->bands)
+			q->curband = 0;
+
+		/* Check that target subqueue is available before
+		 * pulling an skb to avoid excessive requeues
+		 */
+		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+			qdisc = q->queues[q->curband];
+			skb = qdisc->dequeue(qdisc);
+			if (skb) {
+				sch->q.qlen--;
+				return skb;
+			}
+		}
+	}
+	return NULL;
+
+}
+
+static unsigned int multiq_drop(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int band;
+	unsigned int len;
+	struct Qdisc *qdisc;
+
+	for (band = q->bands-1; band >= 0; band--) {
+		qdisc = q->queues[band];
+		if (qdisc->ops->drop) {
+			len = qdisc->ops->drop(qdisc);
+			if (len != 0) {
+				sch->q.qlen--;
+				return len;
+			}
+		}
+	}
+	return 0;
+}
+
+
+static void
+multiq_reset(struct Qdisc *sch)
+{
+	u16 band;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	for (band = 0; band < q->bands; band++)
+		qdisc_reset(q->queues[band]);
+	sch->q.qlen = 0;
+	q->curband = 0;
+}
+
+static void
+multiq_destroy(struct Qdisc *sch)
+{
+	int band;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	for (band = 0; band < q->bands; band++)
+		qdisc_destroy(q->queues[band]);
+
+	kfree(q->queues);
+}
+
+static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct tc_multiq_qopt *qopt;
+	int i;
+
+	if (!netif_is_multiqueue(qdisc_dev(sch)))
+		return -EINVAL;
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+
+	qopt = nla_data(opt);
+
+	qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
+
+	sch_tree_lock(sch);
+	q->bands = qopt->bands;
+	for (i = q->bands; i < q->max_bands; i++) {
+		struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+		if (child != &noop_qdisc) {
+			qdisc_tree_decrease_qlen(child, child->q.qlen);
+			qdisc_destroy(child);
+		}
+	}
+
+	sch_tree_unlock(sch);
+
+	for (i = 0; i < q->bands; i++) {
+		if (q->queues[i] == &noop_qdisc) {
+			struct Qdisc *child;
+			child = qdisc_create_dflt(qdisc_dev(sch),
+						  sch->dev_queue,
+						  &pfifo_qdisc_ops,
+						  TC_H_MAKE(sch->handle,
+							    i + 1));
+			if (child) {
+				sch_tree_lock(sch);
+				child = xchg(&q->queues[i], child);
+
+				if (child != &noop_qdisc) {
+					qdisc_tree_decrease_qlen(child,
+								 child->q.qlen);
+					qdisc_destroy(child);
+				}
+				sch_tree_unlock(sch);
+			}
+		}
+	}
+	return 0;
+}
+
+static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int i;
+
+	q->queues = NULL;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	q->max_bands = qdisc_dev(sch)->num_tx_queues;
+
+	q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
+	if (!q->queues)
+		return -ENOBUFS;
+	for (i = 0; i < q->max_bands; i++)
+		q->queues[i] = &noop_qdisc;
+
+	return multiq_tune(sch, opt);
+}
+
+static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_multiq_qopt opt;
+
+	opt.bands = q->bands;
+	opt.max_bands = q->max_bands;
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		      struct Qdisc **old)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return -EINVAL;
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = q->queues[band];
+	q->queues[band] = new;
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *
+multiq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return NULL;
+
+	return q->queues[band];
+}
+
+static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = TC_H_MIN(classid);
+
+	if (band - 1 >= q->bands)
+		return 0;
+	return band;
+}
+
+static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
+				 u32 classid)
+{
+	return multiq_get(sch, classid);
+}
+
+
+static void multiq_put(struct Qdisc *q, unsigned long cl)
+{
+	return;
+}
+
+static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent,
+			 struct nlattr **tca, unsigned long *arg)
+{
+	unsigned long cl = *arg;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+static int multiq_delete(struct Qdisc *sch, unsigned long cl)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+
+static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	if (q->queues[cl-1])
+		tcm->tcm_info = q->queues[cl-1]->handle;
+	return 0;
+}
+
+static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				 struct gnet_dump *d)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *cl_q;
+
+	cl_q = q->queues[cl - 1];
+	if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
+		return -1;
+
+	return 0;
+}
+
+static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int band;
+
+	if (arg->stop)
+		return;
+
+	for (band = 0; band < q->bands; band++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, band+1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static const struct Qdisc_class_ops multiq_class_ops = {
+	.graft		=	multiq_graft,
+	.leaf		=	multiq_leaf,
+	.get		=	multiq_get,
+	.put		=	multiq_put,
+	.change		=	multiq_change,
+	.delete		=	multiq_delete,
+	.walk		=	multiq_walk,
+	.tcf_chain	=	multiq_find_tcf,
+	.bind_tcf	=	multiq_bind,
+	.unbind_tcf	=	multiq_put,
+	.dump		=	multiq_dump_class,
+	.dump_stats	=	multiq_dump_class_stats,
+};
+
+static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.cl_ops		=	&multiq_class_ops,
+	.id		=	"multiq",
+	.priv_size	=	sizeof(struct multiq_sched_data),
+	.enqueue	=	multiq_enqueue,
+	.dequeue	=	multiq_dequeue,
+	.requeue	=	multiq_requeue,
+	.drop		=	multiq_drop,
+	.init		=	multiq_init,
+	.reset		=	multiq_reset,
+	.destroy	=	multiq_destroy,
+	.change		=	multiq_tune,
+	.dump		=	multiq_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init multiq_module_init(void)
+{
+	return register_qdisc(&multiq_qdisc_ops);
+}
+
+static void __exit multiq_module_exit(void)
+{
+	unregister_qdisc(&multiq_qdisc_ops);
+}
+
+module_init(multiq_module_init)
+module_exit(multiq_module_exit)
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From ca9b0e27e072be4cef2f5f0cbc0b0fd94eae3520 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Sep 2008 16:30:20 -0700
Subject: pkt_action: add new action skbedit

This new action will have the ability to change the priority and/or
queue_mapping fields on an sk_buff.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig       |  11 +++
 net/sched/Makefile      |   1 +
 net/sched/act_skbedit.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 215 insertions(+)
 create mode 100644 net/sched/act_skbedit.c

(limited to 'net/sched')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index efaa7a75e7f..6767e54155d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -485,6 +485,17 @@ config NET_ACT_SIMP
 	  To compile this code as a module, choose M here: the
 	  module will be called simple.
 
+config NET_ACT_SKBEDIT
+        tristate "SKB Editing"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to change skb priority or queue_mapping settings.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called skbedit.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 3d9b953f7f6..e60c9925b26 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT)	+= act_ipt.o
 obj-$(CONFIG_NET_ACT_NAT)	+= act_nat.o
 obj-$(CONFIG_NET_ACT_PEDIT)	+= act_pedit.o
 obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
+obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
new file mode 100644
index 00000000000..fe9777e77f3
--- /dev/null
+++ b/net/sched/act_skbedit.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_skbedit.h>
+
+#define SKBEDIT_TAB_MASK     15
+static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
+static u32 skbedit_idx_gen;
+static DEFINE_RWLOCK(skbedit_lock);
+
+static struct tcf_hashinfo skbedit_hash_info = {
+	.htab	=	tcf_skbedit_ht,
+	.hmask	=	SKBEDIT_TAB_MASK,
+	.lock	=	&skbedit_lock,
+};
+
+static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
+		       struct tcf_result *res)
+{
+	struct tcf_skbedit *d = a->priv;
+
+	spin_lock(&d->tcf_lock);
+	d->tcf_tm.lastuse = jiffies;
+	d->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	d->tcf_bstats.packets++;
+
+	if (d->flags & SKBEDIT_F_PRIORITY)
+		skb->priority = d->priority;
+	if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
+	    skb->dev->real_num_tx_queues > d->queue_mapping)
+		skb_set_queue_mapping(skb, d->queue_mapping);
+
+	spin_unlock(&d->tcf_lock);
+	return d->tcf_action;
+}
+
+static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
+	[TCA_SKBEDIT_PARMS]		= { .len = sizeof(struct tc_skbedit) },
+	[TCA_SKBEDIT_PRIORITY]		= { .len = sizeof(u32) },
+	[TCA_SKBEDIT_QUEUE_MAPPING]	= { .len = sizeof(u16) },
+};
+
+static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
+			 struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+	struct tc_skbedit *parm;
+	struct tcf_skbedit *d;
+	struct tcf_common *pc;
+	u32 flags = 0, *priority = NULL;
+	u16 *queue_mapping = NULL;
+	int ret = 0, err;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_SKBEDIT_PARMS] == NULL)
+		return -EINVAL;
+
+	if (tb[TCA_SKBEDIT_PRIORITY] != NULL) {
+		flags |= SKBEDIT_F_PRIORITY;
+		priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]);
+	}
+
+	if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) {
+		flags |= SKBEDIT_F_QUEUE_MAPPING;
+		queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
+	}
+	if (!flags)
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
+
+	pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+				     &skbedit_idx_gen, &skbedit_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+
+		d = to_skbedit(pc);
+		ret = ACT_P_CREATED;
+	} else {
+		d = to_skbedit(pc);
+		if (!ovr) {
+			tcf_hash_release(pc, bind, &skbedit_hash_info);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&d->tcf_lock);
+
+	d->flags = flags;
+	if (flags & SKBEDIT_F_PRIORITY)
+		d->priority = *priority;
+	if (flags & SKBEDIT_F_QUEUE_MAPPING)
+		d->queue_mapping = *queue_mapping;
+	d->tcf_action = parm->action;
+
+	spin_unlock_bh(&d->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &skbedit_hash_info);
+	return ret;
+}
+
+static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_skbedit *d = a->priv;
+
+	if (d)
+		return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
+	return 0;
+}
+
+static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+				int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_skbedit *d = a->priv;
+	struct tc_skbedit opt;
+	struct tcf_t t;
+
+	opt.index = d->tcf_index;
+	opt.refcnt = d->tcf_refcnt - ref;
+	opt.bindcnt = d->tcf_bindcnt - bind;
+	opt.action = d->tcf_action;
+	NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
+	if (d->flags & SKBEDIT_F_PRIORITY)
+		NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
+			&d->priority);
+	if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
+		NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING,
+			sizeof(d->queue_mapping), &d->queue_mapping);
+	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+	NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_skbedit_ops = {
+	.kind		=	"skbedit",
+	.hinfo		=	&skbedit_hash_info,
+	.type		=	TCA_ACT_SKBEDIT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_skbedit,
+	.dump		=	tcf_skbedit_dump,
+	.cleanup	=	tcf_skbedit_cleanup,
+	.init		=	tcf_skbedit_init,
+	.walk		=	tcf_generic_walker,
+};
+
+MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
+MODULE_DESCRIPTION("SKB Editing");
+MODULE_LICENSE("GPL");
+
+static int __init skbedit_init_module(void)
+{
+	return tcf_register_action(&act_skbedit_ops);
+}
+
+static void __exit skbedit_cleanup_module(void)
+{
+	tcf_unregister_action(&act_skbedit_ops);
+}
+
+module_init(skbedit_init_module);
+module_exit(skbedit_cleanup_module);
-- 
cgit v1.2.3


From f07d1501292b3b0d3276ee0e537005526a45e242 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Sep 2008 17:57:23 -0700
Subject: multiq: Further multiqueue cleanup

This patch resolves a few issues found with multiq including wording
suggestions and a problem seen in the allocation of queues.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_multiq.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 49a8b67ed3b..5d9cd68e91d 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -214,8 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 	sch_tree_lock(sch);
 	q->bands = qopt->bands;
 	for (i = q->bands; i < q->max_bands; i++) {
-		struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
-		if (child != &noop_qdisc) {
+		if (q->queues[i] != &noop_qdisc) {
+			struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
 			qdisc_tree_decrease_qlen(child, child->q.qlen);
 			qdisc_destroy(child);
 		}
@@ -250,7 +250,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
-	int i;
+	int i, err;
 
 	q->queues = NULL;
 
@@ -265,7 +265,12 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 	for (i = 0; i < q->max_bands; i++)
 		q->queues[i] = &noop_qdisc;
 
-	return multiq_tune(sch, opt);
+	err = multiq_tune(sch,opt);
+
+	if (err)
+		kfree(q->queues);
+
+	return err;
 }
 
 static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
-- 
cgit v1.2.3


From a574420ff46cff0245e6b0fce2e961aa2717743d Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Sat, 20 Sep 2008 22:07:34 -0700
Subject: multiq: requeue should rewind the current_band

Currently dequeueing a packet and requeueing the same packet will cause a
different packet to be pulled on the next dequeue.  This change forces
requeue to rewind the current_band.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_multiq.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net/sched')

diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 5d9cd68e91d..915f3149dde 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -97,6 +97,7 @@ static int
 multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct Qdisc *qdisc;
+	struct multiq_sched_data *q = qdisc_priv(sch);
 	int ret;
 
 	qdisc = multiq_classify(skb, sch, &ret);
@@ -113,6 +114,10 @@ multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
 		sch->qstats.requeues++;
+		if (q->curband)
+			q->curband--;
+		else
+			q->curband = q->bands - 1;
 		return NET_XMIT_SUCCESS;
 	}
 	if (net_xmit_drop_count(ret))
-- 
cgit v1.2.3


From 6067804047b64dde89f4f133fc7eba48ee44107d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 20 Sep 2008 22:20:49 -0700
Subject: net: Use hton[sl]() instead of __constant_hton[sl]() where applicable

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flow.c   | 28 ++++++++++++++--------------
 net/sched/sch_dsmark.c |  8 ++++----
 net/sched/sch_sfq.c    |  4 ++--
 3 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 8f63a1a9401..0ebaff637e3 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -67,9 +67,9 @@ static inline u32 addr_fold(void *addr)
 static u32 flow_get_src(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ntohl(ip_hdr(skb)->saddr);
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
 	default:
 		return addr_fold(skb->sk);
@@ -79,9 +79,9 @@ static u32 flow_get_src(const struct sk_buff *skb)
 static u32 flow_get_dst(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ntohl(ip_hdr(skb)->daddr);
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
 	default:
 		return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
@@ -91,9 +91,9 @@ static u32 flow_get_dst(const struct sk_buff *skb)
 static u32 flow_get_proto(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ip_hdr(skb)->protocol;
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ipv6_hdr(skb)->nexthdr;
 	default:
 		return 0;
@@ -120,7 +120,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb)
 	u32 res = 0;
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP): {
+	case htons(ETH_P_IP): {
 		struct iphdr *iph = ip_hdr(skb);
 
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -128,7 +128,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb)
 			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
 		break;
 	}
-	case __constant_htons(ETH_P_IPV6): {
+	case htons(ETH_P_IPV6): {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		if (has_ports(iph->nexthdr))
@@ -147,7 +147,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
 	u32 res = 0;
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP): {
+	case htons(ETH_P_IP): {
 		struct iphdr *iph = ip_hdr(skb);
 
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -155,7 +155,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
 			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
 		break;
 	}
-	case __constant_htons(ETH_P_IPV6): {
+	case htons(ETH_P_IPV6): {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		if (has_ports(iph->nexthdr))
@@ -213,9 +213,9 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 static u32 flow_get_nfct_src(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ntohl(CTTUPLE(skb, src.u3.ip));
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
 	}
 fallback:
@@ -225,9 +225,9 @@ fallback:
 static u32 flow_get_nfct_dst(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ntohl(CTTUPLE(skb, dst.u3.ip));
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
 	}
 fallback:
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index edd1298f85f..ba43aab3a85 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -202,7 +202,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	if (p->set_tc_index) {
 		switch (skb->protocol) {
-		case __constant_htons(ETH_P_IP):
+		case htons(ETH_P_IP):
 			if (skb_cow_head(skb, sizeof(struct iphdr)))
 				goto drop;
 
@@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				& ~INET_ECN_MASK;
 			break;
 
-		case __constant_htons(ETH_P_IPV6):
+		case htons(ETH_P_IPV6):
 			if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
 				goto drop;
 
@@ -289,11 +289,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	pr_debug("index %d->%d\n", skb->tc_index, index);
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
 				    p->value[index]);
 			break;
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
 				    p->value[index]);
 			break;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 6e041d10dbd..fe1508ef0d3 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -119,7 +119,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	u32 h, h2;
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 	{
 		const struct iphdr *iph = ip_hdr(skb);
 		h = iph->daddr;
@@ -134,7 +134,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 			h2 ^= *(((u32*)iph) + iph->ihl);
 		break;
 	}
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 	{
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 		h = iph->daddr.s6_addr32[3];
-- 
cgit v1.2.3


From d48abfecea8513cfd2fd7e341439c1b8a28e9ff4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 22 Sep 2008 19:20:51 -0700
Subject: net: em_cmp.c use unaligned access helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/em_cmp.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index cc49c932641..bc450397487 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/tc_ematch/tc_em_cmp.h>
+#include <asm/unaligned.h>
 #include <net/pkt_cls.h>
 
 static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
@@ -37,8 +38,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
 			break;
 
 		case TCF_EM_ALIGN_U16:
-			val = *ptr << 8;
-			val |= *(ptr+1);
+			val = get_unaligned_be16(ptr);
 
 			if (cmp_needs_transformation(cmp))
 				val = be16_to_cpu(val);
@@ -47,10 +47,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
 		case TCF_EM_ALIGN_U32:
 			/* Worth checking boundries? The branching seems
 			 * to get worse. Visit again. */
-			val = *ptr << 24;
-			val |= *(ptr+1) << 16;
-			val |= *(ptr+2) << 8;
-			val |= *(ptr+3);
+			val = get_unaligned_be32(ptr);
 
 			if (cmp_needs_transformation(cmp))
 				val = be32_to_cpu(val);
-- 
cgit v1.2.3


From 242f8bfefe4bed626df4e4727ac8f315d80b567a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 22 Sep 2008 22:15:30 -0700
Subject: pkt_sched: Make qdisc->gso_skb a list.

The idea is that we can use this to get rid of
->requeue().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ec0a0839ce5..5961536be60 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -45,7 +45,7 @@ static inline int qdisc_qlen(struct Qdisc *q)
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
 	if (unlikely(skb->next))
-		q->gso_skb = skb;
+		__skb_queue_head(&q->requeue, skb);
 	else
 		q->ops->requeue(skb, q);
 
@@ -57,9 +57,8 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 {
 	struct sk_buff *skb;
 
-	if ((skb = q->gso_skb))
-		q->gso_skb = NULL;
-	else
+	skb = __skb_dequeue(&q->requeue);
+	if (!skb)
 		skb = q->dequeue(q);
 
 	return skb;
@@ -327,6 +326,7 @@ struct Qdisc noop_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
+	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
 };
@@ -352,6 +352,7 @@ static struct Qdisc noqueue_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noqueue_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
+	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.dev_queue	=	&noqueue_netdev_queue,
 };
@@ -472,6 +473,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	sch->padded = (char *) sch - (char *) p;
 
 	INIT_LIST_HEAD(&sch->list);
+	skb_queue_head_init(&sch->requeue);
 	skb_queue_head_init(&sch->q);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
@@ -539,7 +541,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	module_put(ops->owner);
 	dev_put(qdisc_dev(qdisc));
 
-	kfree_skb(qdisc->gso_skb);
+	__skb_queue_purge(&qdisc->requeue);
 
 	kfree((char *) qdisc - qdisc->padded);
 }
-- 
cgit v1.2.3


From f0876520b0b721bedafd9cec3b1b0624ae566eee Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 22 Sep 2008 22:15:58 -0700
Subject: pkt_sched: Always use q->requeue in dev_requeue_skb().

There is no reason to call into the complicated qdiscs
just to remember the last SKB where we found the device
blocked.

The SKB is outside of the qdiscs realm at this point.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5961536be60..1b508bd1c06 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,10 +44,7 @@ static inline int qdisc_qlen(struct Qdisc *q)
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	if (unlikely(skb->next))
-		__skb_queue_head(&q->requeue, skb);
-	else
-		q->ops->requeue(skb, q);
+	__skb_queue_head(&q->requeue, skb);
 
 	__netif_schedule(q);
 	return 0;
-- 
cgit v1.2.3


From ebf059821ed8a36acd706484b719d14d212ada32 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 22 Sep 2008 22:16:23 -0700
Subject: pkt_sched: Check the state of tx_queue in dequeue_skb()

Check in dequeue_skb() the state of tx_queue for requeued skb to save
on locking and re-requeuing, and possibly remove the current check in
qdisc_run(). Based on the idea of Alexander Duyck.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1b508bd1c06..5e7e0bd38fe 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -52,11 +52,21 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 
 static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 {
-	struct sk_buff *skb;
-
-	skb = __skb_dequeue(&q->requeue);
-	if (!skb)
+	struct sk_buff *skb = skb_peek(&q->requeue);
+
+	if (unlikely(skb)) {
+		struct net_device *dev = qdisc_dev(q);
+		struct netdev_queue *txq;
+
+		/* check the reason of requeuing without tx lock first */
+		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+			__skb_unlink(skb, &q->requeue);
+		else
+			skb = NULL;
+	} else {
 		skb = q->dequeue(q);
+	}
 
 	return skb;
 }
-- 
cgit v1.2.3


From 554794de7949d1a6279336404c066f974d4c2bde Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 6 Oct 2008 09:54:39 -0700
Subject: pkt_sched: Fix handling of gso skbs on requeuing

Jay Cliburn noticed and diagnosed a bug triggered in
dev_gso_skb_destructor() after last change from qdisc->gso_skb
to qdisc->requeue list. Since gso_segmented skbs can't be queued
to another list this patch brings back qdisc->gso_skb for them.

Reported-by: Jay Cliburn <jcliburn@gmail.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5e7e0bd38fe..3db4cf1bd26 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,7 +44,10 @@ static inline int qdisc_qlen(struct Qdisc *q)
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	__skb_queue_head(&q->requeue, skb);
+	if (unlikely(skb->next))
+		q->gso_skb = skb;
+	else
+		__skb_queue_head(&q->requeue, skb);
 
 	__netif_schedule(q);
 	return 0;
@@ -52,7 +55,10 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 
 static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 {
-	struct sk_buff *skb = skb_peek(&q->requeue);
+	struct sk_buff *skb = q->gso_skb;
+
+	if (!skb)
+		skb = skb_peek(&q->requeue);
 
 	if (unlikely(skb)) {
 		struct net_device *dev = qdisc_dev(q);
@@ -60,10 +66,15 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
-			__skb_unlink(skb, &q->requeue);
-		else
+		if (!netif_tx_queue_stopped(txq) &&
+		    !netif_tx_queue_frozen(txq)) {
+			if (q->gso_skb)
+				q->gso_skb = NULL;
+			else
+				__skb_unlink(skb, &q->requeue);
+		} else {
 			skb = NULL;
+		}
 	} else {
 		skb = q->dequeue(q);
 	}
@@ -548,6 +559,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	module_put(ops->owner);
 	dev_put(qdisc_dev(qdisc));
 
+	kfree_skb(qdisc->gso_skb);
 	__skb_queue_purge(&qdisc->requeue);
 
 	kfree((char *) qdisc - qdisc->padded);
-- 
cgit v1.2.3


From 6252352d16f7b45a0fd42224f7e70e0288dc4480 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 6 Oct 2008 10:41:50 -0700
Subject: pkt_sched: Simplify dev_requeue_skb and dequeue_skb

qdisc->requeue was planned to universally replace all requeuing code,
but at the top level we never requeue more than one skb, so qdisc->
gso_skb is enough for this. qdisc->requeue would be used on the lower
levels only for one level deep requeuing (like in sch_hfsc) after
finishing all the changes.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3db4cf1bd26..31f6b614b59 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,12 +44,9 @@ static inline int qdisc_qlen(struct Qdisc *q)
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	if (unlikely(skb->next))
-		q->gso_skb = skb;
-	else
-		__skb_queue_head(&q->requeue, skb);
-
+	q->gso_skb = skb;
 	__netif_schedule(q);
+
 	return 0;
 }
 
@@ -57,24 +54,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 {
 	struct sk_buff *skb = q->gso_skb;
 
-	if (!skb)
-		skb = skb_peek(&q->requeue);
-
 	if (unlikely(skb)) {
 		struct net_device *dev = qdisc_dev(q);
 		struct netdev_queue *txq;
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_stopped(txq) &&
-		    !netif_tx_queue_frozen(txq)) {
-			if (q->gso_skb)
-				q->gso_skb = NULL;
-			else
-				__skb_unlink(skb, &q->requeue);
-		} else {
+		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+			q->gso_skb = NULL;
+		else
 			skb = NULL;
-		}
 	} else {
 		skb = q->dequeue(q);
 	}
-- 
cgit v1.2.3


From 367c679007fa4f990eb7ee381326ec59d8148b0e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:17 +0200
Subject: netfilter: xtables: do centralized checkentry call (1/2)

It used to be that {ip,ip6,etc}_tables called extension->checkentry
themselves, but this can be moved into the xtables core.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/sched/act_ipt.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d1263b3c96c..79ea19375ca 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -51,20 +51,12 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 	t->u.kernel.target = target;
 
 	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
-			      table, hook, 0, 0);
-	if (ret) {
+			      table, hook, 0, 0, NULL, t->data);
+	if (ret < 0) {
 		module_put(t->u.kernel.target->me);
 		return ret;
 	}
-	if (t->u.kernel.target->checkentry
-	    && !t->u.kernel.target->checkentry(table, NULL,
-					       t->u.kernel.target, t->data,
-					       hook)) {
-		module_put(t->u.kernel.target->me);
-		ret = -EINVAL;
-	}
-
-	return ret;
+	return 0;
 }
 
 static void ipt_destroy_target(struct ipt_entry_target *t)
-- 
cgit v1.2.3


From 7eb3558655aaa87a3e71a0c065dfaddda521fa6d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:19 +0200
Subject: netfilter: xtables: move extension arguments into compound structure
 (4/6)

This patch does this for target extensions' target functions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/sched/act_ipt.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 79ea19375ca..89791a56429 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -188,6 +188,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 {
 	int ret = 0, result = 0;
 	struct tcf_ipt *ipt = a->priv;
+	struct xt_target_param par;
 
 	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -203,10 +204,13 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 	/* yes, we have to worry about both in and out dev
 	 worry later - danger - this API seems to have changed
 	 from earlier kernels */
-	ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL,
-						   ipt->tcfi_hook,
-						   ipt->tcfi_t->u.kernel.target,
-						   ipt->tcfi_t->data);
+	par.in       = skb->dev;
+	par.out      = NULL;
+	par.hooknum  = ipt->tcfi_hook;
+	par.target   = ipt->tcfi_t->u.kernel.target;
+	par.targinfo = ipt->tcfi_t->data;
+	ret = par.target->target(skb, &par);
+
 	switch (ret) {
 	case NF_ACCEPT:
 		result = TC_ACT_OK;
-- 
cgit v1.2.3


From af5d6dc200eb0fcc6fbd3df1ab4d8969004cb37f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:19 +0200
Subject: netfilter: xtables: move extension arguments into compound structure
 (5/6)

This patch does this for target extensions' checkentry functions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/sched/act_ipt.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 89791a56429..a54dc3f8234 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -40,6 +40,7 @@ static struct tcf_hashinfo ipt_hash_info = {
 
 static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 {
+	struct xt_tgchk_param par;
 	struct xt_target *target;
 	int ret = 0;
 
@@ -49,9 +50,14 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 		return -ENOENT;
 
 	t->u.kernel.target = target;
-
-	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
-			      table, hook, 0, 0, NULL, t->data);
+	par.table     = table;
+	par.entryinfo = NULL;
+	par.target    = target;
+	par.targinfo  = t->data;
+	par.hook_mask = hook;
+
+	ret = xt_check_target(&par, NFPROTO_IPV4,
+	      t->u.target_size - sizeof(*t), 0, false);
 	if (ret < 0) {
 		module_put(t->u.kernel.target->me);
 		return ret;
-- 
cgit v1.2.3


From a2df1648ba615dd5908e9a1fa7b2f133fa302487 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:19 +0200
Subject: netfilter: xtables: move extension arguments into compound structure
 (6/6)

This patch does this for target extensions' destroy functions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/sched/act_ipt.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index a54dc3f8234..b951d422db9 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -67,9 +67,13 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 
 static void ipt_destroy_target(struct ipt_entry_target *t)
 {
-	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
-	module_put(t->u.kernel.target->me);
+	struct xt_tgdtor_param par = {
+		.target   = t->u.kernel.target,
+		.targinfo = t->data,
+	};
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+	module_put(par.target->me);
 }
 
 static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
-- 
cgit v1.2.3


From 916a917dfec18535ff9e2afdafba82e6279eb4f4 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:20 +0200
Subject: netfilter: xtables: provide invoked family value to extensions

By passing in the family through which extensions were invoked, a bit
of data space can be reclaimed. The "family" member will be added to
the parameter structures and the check functions be adjusted.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/sched/act_ipt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index b951d422db9..0453d79ebf5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -55,9 +55,9 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 	par.target    = target;
 	par.targinfo  = t->data;
 	par.hook_mask = hook;
+	par.family    = NFPROTO_IPV4;
 
-	ret = xt_check_target(&par, NFPROTO_IPV4,
-	      t->u.target_size - sizeof(*t), 0, false);
+	ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
 	if (ret < 0) {
 		module_put(t->u.kernel.target->me);
 		return ret;
-- 
cgit v1.2.3


From 53e915034970935596703a6005cde27c2128b5c3 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 8 Oct 2008 11:36:22 -0700
Subject: pkt_sched: Update qdisc requeue stats in dev_requeue_skb()

After the last change of requeuing there is no info about such
incidents in tc stats. This patch updates the counter, but we should
consider this should differ from previous stats because of additional
checks preventing to repeat this. On the other hand, previous stats
didn't include requeuing of gso_segmented skbs.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/sched')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 31f6b614b59..7b5572d6beb 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -45,6 +45,7 @@ static inline int qdisc_qlen(struct Qdisc *q)
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
 	q->gso_skb = skb;
+	q->qstats.requeues++;
 	__netif_schedule(q);
 
 	return 0;
-- 
cgit v1.2.3