From 2c10b32bf57db7ec6d4cca4c4aa3d86bacb01c8a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 2 Sep 2008 17:30:27 -0700 Subject: netlink: Remove compat API for nested attributes Removes all _nested_compat() functions from the API. The prio qdisc no longer requires them and netem has its own format anyway. Their existance is only confusing. Resend: Also remove the wrapper macro. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 18 ++++++++++++++++-- net/sched/sch_prio.c | 6 +----- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3781e55046d..a11959908d9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -388,6 +388,20 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, }; +static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy, int len) +{ + int nested_len = nla_len(nla) - NLA_ALIGN(len); + + if (nested_len < 0) + return -EINVAL; + if (nested_len >= nla_attr_size(0)) + return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), + nested_len, policy); + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + /* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct nlattr *opt) { @@ -399,8 +413,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy, - qopt, sizeof(*qopt)); + qopt = nla_data(opt); + ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); if (ret < 0) return ret; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index a6697c686c7..504a78cdb71 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -254,16 +254,12 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); - struct nlattr *nest; struct tc_prio_qopt opt; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); - nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt); - if (nest == NULL) - goto nla_put_failure; - nla_nest_compat_end(skb, nest); + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; -- cgit v1.2.3 From 5337407c673e2c7c66a84b9838d55a45a760ecff Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 8 Sep 2008 16:17:42 -0700 Subject: warn: Turn the netdev timeout WARN_ON() into a WARN() this patch turns the netdev timeout WARN_ON_ONCE() into a WARN_ONCE(), so that the device and driver names are inside the warning message. This helps automated tools like kerneloops.org to collect the data and do statistics, as well as making it more likely that humans cut-n-paste the important message as part of a bugreport. Signed-off-by: Arjan van de Ven Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 9634091ee2f..ec0a0839ce5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -215,10 +215,9 @@ static void dev_watchdog(unsigned long arg) time_after(jiffies, (dev->trans_start + dev->watchdog_timeo))) { char drivername[64]; - printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", + WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", dev->name, netdev_drivername(dev, drivername, 64)); dev->tx_timeout(dev); - WARN_ON_ONCE(1); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + -- cgit v1.2.3 From 92651940ab00dbe64722e908f70d816713d677b7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 16:29:34 -0700 Subject: pkt_sched: Add multiqueue scheduler support This patch is intended to add a qdisc to support the new tx multiqueue architecture by providing a band for each hardware queue. By doing this it is possible to support a different qdisc per physical hardware queue. This qdisc uses the skb->queue_mapping to select which band to place the traffic onto. It then uses a round robin w/ a check to see if the subqueue is stopped to determine which band to dequeue the packet from. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/sched/Kconfig | 9 + net/sched/Makefile | 1 + net/sched/sch_multiq.c | 467 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 477 insertions(+) create mode 100644 net/sched/sch_multiq.c (limited to 'net/sched') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 9437b27ff84..efaa7a75e7f 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -106,6 +106,15 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. +config NET_SCH_MULTIQ + tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)" + ---help--- + Say Y here if you want to use an n-band queue packet scheduler + to support devices that have multiple hardware transmit queues. + + To compile this code as a module, choose M here: the + module will be called sch_multiq. + config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 1d2b0f7df84..3d9b953f7f6 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o +obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c new file mode 100644 index 00000000000..49a8b67ed3b --- /dev/null +++ b/net/sched/sch_multiq.c @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +struct multiq_sched_data { + u16 bands; + u16 max_bands; + u16 curband; + struct tcf_proto *filter_list; + struct Qdisc **queues; +}; + + +static struct Qdisc * +multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + u32 band; + struct tcf_result res; + int err; + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + err = tc_classify(skb, q->filter_list, &res); +#ifdef CONFIG_NET_CLS_ACT + switch (err) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return NULL; + } +#endif + band = skb_get_queue_mapping(skb); + + if (band >= q->bands) + return q->queues[0]; + + return q->queues[band]; +} + +static int +multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct Qdisc *qdisc; + int ret; + + qdisc = multiq_classify(skb, sch, &ret); +#ifdef CONFIG_NET_CLS_ACT + if (qdisc == NULL) { + + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } +#endif + + ret = qdisc_enqueue(skb, qdisc); + if (ret == NET_XMIT_SUCCESS) { + sch->bstats.bytes += qdisc_pkt_len(skb); + sch->bstats.packets++; + sch->q.qlen++; + return NET_XMIT_SUCCESS; + } + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + return ret; +} + + +static int +multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct Qdisc *qdisc; + int ret; + + qdisc = multiq_classify(skb, sch, &ret); +#ifdef CONFIG_NET_CLS_ACT + if (qdisc == NULL) { + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } +#endif + + ret = qdisc->ops->requeue(skb, qdisc); + if (ret == NET_XMIT_SUCCESS) { + sch->q.qlen++; + sch->qstats.requeues++; + return NET_XMIT_SUCCESS; + } + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + return ret; +} + + +static struct sk_buff *multiq_dequeue(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc; + struct sk_buff *skb; + int band; + + for (band = 0; band < q->bands; band++) { + /* cycle through bands to ensure fairness */ + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + + /* Check that target subqueue is available before + * pulling an skb to avoid excessive requeues + */ + if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { + qdisc = q->queues[q->curband]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; + } + } + } + return NULL; + +} + +static unsigned int multiq_drop(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int band; + unsigned int len; + struct Qdisc *qdisc; + + for (band = q->bands-1; band >= 0; band--) { + qdisc = q->queues[band]; + if (qdisc->ops->drop) { + len = qdisc->ops->drop(qdisc); + if (len != 0) { + sch->q.qlen--; + return len; + } + } + } + return 0; +} + + +static void +multiq_reset(struct Qdisc *sch) +{ + u16 band; + struct multiq_sched_data *q = qdisc_priv(sch); + + for (band = 0; band < q->bands; band++) + qdisc_reset(q->queues[band]); + sch->q.qlen = 0; + q->curband = 0; +} + +static void +multiq_destroy(struct Qdisc *sch) +{ + int band; + struct multiq_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + for (band = 0; band < q->bands; band++) + qdisc_destroy(q->queues[band]); + + kfree(q->queues); +} + +static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + struct tc_multiq_qopt *qopt; + int i; + + if (!netif_is_multiqueue(qdisc_dev(sch))) + return -EINVAL; + if (nla_len(opt) < sizeof(*qopt)) + return -EINVAL; + + qopt = nla_data(opt); + + qopt->bands = qdisc_dev(sch)->real_num_tx_queues; + + sch_tree_lock(sch); + q->bands = qopt->bands; + for (i = q->bands; i < q->max_bands; i++) { + struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, child->q.qlen); + qdisc_destroy(child); + } + } + + sch_tree_unlock(sch); + + for (i = 0; i < q->bands; i++) { + if (q->queues[i] == &noop_qdisc) { + struct Qdisc *child; + child = qdisc_create_dflt(qdisc_dev(sch), + sch->dev_queue, + &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, + i + 1)); + if (child) { + sch_tree_lock(sch); + child = xchg(&q->queues[i], child); + + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, + child->q.qlen); + qdisc_destroy(child); + } + sch_tree_unlock(sch); + } + } + } + return 0; +} + +static int multiq_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int i; + + q->queues = NULL; + + if (opt == NULL) + return -EINVAL; + + q->max_bands = qdisc_dev(sch)->num_tx_queues; + + q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL); + if (!q->queues) + return -ENOBUFS; + for (i = 0; i < q->max_bands; i++) + q->queues[i] = &noop_qdisc; + + return multiq_tune(sch, opt); +} + +static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned char *b = skb_tail_pointer(skb); + struct tc_multiq_qopt opt; + + opt.bands = q->bands; + opt.max_bands = q->max_bands; + + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned long band = arg - 1; + + if (band >= q->bands) + return -EINVAL; + + if (new == NULL) + new = &noop_qdisc; + + sch_tree_lock(sch); + *old = q->queues[band]; + q->queues[band] = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + + return 0; +} + +static struct Qdisc * +multiq_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned long band = arg - 1; + + if (band >= q->bands) + return NULL; + + return q->queues[band]; +} + +static unsigned long multiq_get(struct Qdisc *sch, u32 classid) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned long band = TC_H_MIN(classid); + + if (band - 1 >= q->bands) + return 0; + return band; +} + +static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return multiq_get(sch, classid); +} + + +static void multiq_put(struct Qdisc *q, unsigned long cl) +{ + return; +} + +static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent, + struct nlattr **tca, unsigned long *arg) +{ + unsigned long cl = *arg; + struct multiq_sched_data *q = qdisc_priv(sch); + + if (cl - 1 > q->bands) + return -ENOENT; + return 0; +} + +static int multiq_delete(struct Qdisc *sch, unsigned long cl) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + if (cl - 1 > q->bands) + return -ENOENT; + return 0; +} + + +static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + + if (cl - 1 > q->bands) + return -ENOENT; + tcm->tcm_handle |= TC_H_MIN(cl); + if (q->queues[cl-1]) + tcm->tcm_info = q->queues[cl-1]->handle; + return 0; +} + +static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + struct Qdisc *cl_q; + + cl_q = q->queues[cl - 1]; + if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 || + gnet_stats_copy_queue(d, &cl_q->qstats) < 0) + return -1; + + return 0; +} + +static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int band; + + if (arg->stop) + return; + + for (band = 0; band < q->bands; band++) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, band+1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static const struct Qdisc_class_ops multiq_class_ops = { + .graft = multiq_graft, + .leaf = multiq_leaf, + .get = multiq_get, + .put = multiq_put, + .change = multiq_change, + .delete = multiq_delete, + .walk = multiq_walk, + .tcf_chain = multiq_find_tcf, + .bind_tcf = multiq_bind, + .unbind_tcf = multiq_put, + .dump = multiq_dump_class, + .dump_stats = multiq_dump_class_stats, +}; + +static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { + .next = NULL, + .cl_ops = &multiq_class_ops, + .id = "multiq", + .priv_size = sizeof(struct multiq_sched_data), + .enqueue = multiq_enqueue, + .dequeue = multiq_dequeue, + .requeue = multiq_requeue, + .drop = multiq_drop, + .init = multiq_init, + .reset = multiq_reset, + .destroy = multiq_destroy, + .change = multiq_tune, + .dump = multiq_dump, + .owner = THIS_MODULE, +}; + +static int __init multiq_module_init(void) +{ + return register_qdisc(&multiq_qdisc_ops); +} + +static void __exit multiq_module_exit(void) +{ + unregister_qdisc(&multiq_qdisc_ops); +} + +module_init(multiq_module_init) +module_exit(multiq_module_exit) + +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From ca9b0e27e072be4cef2f5f0cbc0b0fd94eae3520 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 16:30:20 -0700 Subject: pkt_action: add new action skbedit This new action will have the ability to change the priority and/or queue_mapping fields on an sk_buff. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/sched/Kconfig | 11 +++ net/sched/Makefile | 1 + net/sched/act_skbedit.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 net/sched/act_skbedit.c (limited to 'net/sched') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index efaa7a75e7f..6767e54155d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -485,6 +485,17 @@ config NET_ACT_SIMP To compile this code as a module, choose M here: the module will be called simple. +config NET_ACT_SKBEDIT + tristate "SKB Editing" + depends on NET_CLS_ACT + ---help--- + Say Y here to change skb priority or queue_mapping settings. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called skbedit. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 3d9b953f7f6..e60c9925b26 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o +obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c new file mode 100644 index 00000000000..fe9777e77f3 --- /dev/null +++ b/net/sched/act_skbedit.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SKBEDIT_TAB_MASK 15 +static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1]; +static u32 skbedit_idx_gen; +static DEFINE_RWLOCK(skbedit_lock); + +static struct tcf_hashinfo skbedit_hash_info = { + .htab = tcf_skbedit_ht, + .hmask = SKBEDIT_TAB_MASK, + .lock = &skbedit_lock, +}; + +static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbedit *d = a->priv; + + spin_lock(&d->tcf_lock); + d->tcf_tm.lastuse = jiffies; + d->tcf_bstats.bytes += qdisc_pkt_len(skb); + d->tcf_bstats.packets++; + + if (d->flags & SKBEDIT_F_PRIORITY) + skb->priority = d->priority; + if (d->flags & SKBEDIT_F_QUEUE_MAPPING && + skb->dev->real_num_tx_queues > d->queue_mapping) + skb_set_queue_mapping(skb, d->queue_mapping); + + spin_unlock(&d->tcf_lock); + return d->tcf_action; +} + +static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { + [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) }, + [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, + [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, +}; + +static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; + struct tc_skbedit *parm; + struct tcf_skbedit *d; + struct tcf_common *pc; + u32 flags = 0, *priority = NULL; + u16 *queue_mapping = NULL; + int ret = 0, err; + + if (nla == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy); + if (err < 0) + return err; + + if (tb[TCA_SKBEDIT_PARMS] == NULL) + return -EINVAL; + + if (tb[TCA_SKBEDIT_PRIORITY] != NULL) { + flags |= SKBEDIT_F_PRIORITY; + priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]); + } + + if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) { + flags |= SKBEDIT_F_QUEUE_MAPPING; + queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); + } + if (!flags) + return -EINVAL; + + parm = nla_data(tb[TCA_SKBEDIT_PARMS]); + + pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, + &skbedit_idx_gen, &skbedit_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + + d = to_skbedit(pc); + ret = ACT_P_CREATED; + } else { + d = to_skbedit(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &skbedit_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&d->tcf_lock); + + d->flags = flags; + if (flags & SKBEDIT_F_PRIORITY) + d->priority = *priority; + if (flags & SKBEDIT_F_QUEUE_MAPPING) + d->queue_mapping = *queue_mapping; + d->tcf_action = parm->action; + + spin_unlock_bh(&d->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &skbedit_hash_info); + return ret; +} + +static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbedit *d = a->priv; + + if (d) + return tcf_hash_release(&d->common, bind, &skbedit_hash_info); + return 0; +} + +static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbedit *d = a->priv; + struct tc_skbedit opt; + struct tcf_t t; + + opt.index = d->tcf_index; + opt.refcnt = d->tcf_refcnt - ref; + opt.bindcnt = d->tcf_bindcnt - bind; + opt.action = d->tcf_action; + NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt); + if (d->flags & SKBEDIT_F_PRIORITY) + NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), + &d->priority); + if (d->flags & SKBEDIT_F_QUEUE_MAPPING) + NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING, + sizeof(d->queue_mapping), &d->queue_mapping); + t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(d->tcf_tm.expires); + NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t); + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_skbedit_ops = { + .kind = "skbedit", + .hinfo = &skbedit_hash_info, + .type = TCA_ACT_SKBEDIT, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_skbedit, + .dump = tcf_skbedit_dump, + .cleanup = tcf_skbedit_cleanup, + .init = tcf_skbedit_init, + .walk = tcf_generic_walker, +}; + +MODULE_AUTHOR("Alexander Duyck, "); +MODULE_DESCRIPTION("SKB Editing"); +MODULE_LICENSE("GPL"); + +static int __init skbedit_init_module(void) +{ + return tcf_register_action(&act_skbedit_ops); +} + +static void __exit skbedit_cleanup_module(void) +{ + tcf_unregister_action(&act_skbedit_ops); +} + +module_init(skbedit_init_module); +module_exit(skbedit_cleanup_module); -- cgit v1.2.3 From f07d1501292b3b0d3276ee0e537005526a45e242 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 17:57:23 -0700 Subject: multiq: Further multiqueue cleanup This patch resolves a few issues found with multiq including wording suggestions and a problem seen in the allocation of queues. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/sched/sch_multiq.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 49a8b67ed3b..5d9cd68e91d 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -214,8 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); q->bands = qopt->bands; for (i = q->bands; i < q->max_bands; i++) { - struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); - if (child != &noop_qdisc) { + if (q->queues[i] != &noop_qdisc) { + struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } @@ -250,7 +250,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) static int multiq_init(struct Qdisc *sch, struct nlattr *opt) { struct multiq_sched_data *q = qdisc_priv(sch); - int i; + int i, err; q->queues = NULL; @@ -265,7 +265,12 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->max_bands; i++) q->queues[i] = &noop_qdisc; - return multiq_tune(sch, opt); + err = multiq_tune(sch,opt); + + if (err) + kfree(q->queues); + + return err; } static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb) -- cgit v1.2.3 From a574420ff46cff0245e6b0fce2e961aa2717743d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 20 Sep 2008 22:07:34 -0700 Subject: multiq: requeue should rewind the current_band Currently dequeueing a packet and requeueing the same packet will cause a different packet to be pulled on the next dequeue. This change forces requeue to rewind the current_band. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/sched/sch_multiq.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/sched') diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 5d9cd68e91d..915f3149dde 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -97,6 +97,7 @@ static int multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) { struct Qdisc *qdisc; + struct multiq_sched_data *q = qdisc_priv(sch); int ret; qdisc = multiq_classify(skb, sch, &ret); @@ -113,6 +114,10 @@ multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->qstats.requeues++; + if (q->curband) + q->curband--; + else + q->curband = q->bands - 1; return NET_XMIT_SUCCESS; } if (net_xmit_drop_count(ret)) -- cgit v1.2.3 From 6067804047b64dde89f4f133fc7eba48ee44107d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 20 Sep 2008 22:20:49 -0700 Subject: net: Use hton[sl]() instead of __constant_hton[sl]() where applicable Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 28 ++++++++++++++-------------- net/sched/sch_dsmark.c | 8 ++++---- net/sched/sch_sfq.c | 4 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 8f63a1a9401..0ebaff637e3 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -67,9 +67,9 @@ static inline u32 addr_fold(void *addr) static u32 flow_get_src(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(ip_hdr(skb)->saddr); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); default: return addr_fold(skb->sk); @@ -79,9 +79,9 @@ static u32 flow_get_src(const struct sk_buff *skb) static u32 flow_get_dst(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(ip_hdr(skb)->daddr); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); default: return addr_fold(skb->dst) ^ (__force u16)skb->protocol; @@ -91,9 +91,9 @@ static u32 flow_get_dst(const struct sk_buff *skb) static u32 flow_get_proto(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ip_hdr(skb)->protocol; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ipv6_hdr(skb)->nexthdr; default: return 0; @@ -120,7 +120,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb) u32 res = 0; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { struct iphdr *iph = ip_hdr(skb); if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && @@ -128,7 +128,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb) res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); if (has_ports(iph->nexthdr)) @@ -147,7 +147,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) u32 res = 0; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { struct iphdr *iph = ip_hdr(skb); if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && @@ -155,7 +155,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); if (has_ports(iph->nexthdr)) @@ -213,9 +213,9 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, src.u3.ip6[3])); } fallback: @@ -225,9 +225,9 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); } fallback: diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index edd1298f85f..ba43aab3a85 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -202,7 +202,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (p->set_tc_index) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): if (skb_cow_head(skb, sizeof(struct iphdr))) goto drop; @@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) & ~INET_ECN_MASK; break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): if (skb_cow_head(skb, sizeof(struct ipv6hdr))) goto drop; @@ -289,11 +289,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) pr_debug("index %d->%d\n", skb->tc_index, index); switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mask[index], p->value[index]); break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index], p->value[index]); break; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 6e041d10dbd..fe1508ef0d3 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -119,7 +119,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) u32 h, h2; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): { const struct iphdr *iph = ip_hdr(skb); h = iph->daddr; @@ -134,7 +134,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) h2 ^= *(((u32*)iph) + iph->ihl); break; } - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); h = iph->daddr.s6_addr32[3]; -- cgit v1.2.3 From d48abfecea8513cfd2fd7e341439c1b8a28e9ff4 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 22 Sep 2008 19:20:51 -0700 Subject: net: em_cmp.c use unaligned access helpers Signed-off-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/sched/em_cmp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net/sched') diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index cc49c932641..bc450397487 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -14,6 +14,7 @@ #include #include #include +#include #include static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp) @@ -37,8 +38,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, break; case TCF_EM_ALIGN_U16: - val = *ptr << 8; - val |= *(ptr+1); + val = get_unaligned_be16(ptr); if (cmp_needs_transformation(cmp)) val = be16_to_cpu(val); @@ -47,10 +47,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, case TCF_EM_ALIGN_U32: /* Worth checking boundries? The branching seems * to get worse. Visit again. */ - val = *ptr << 24; - val |= *(ptr+1) << 16; - val |= *(ptr+2) << 8; - val |= *(ptr+3); + val = get_unaligned_be32(ptr); if (cmp_needs_transformation(cmp)) val = be32_to_cpu(val); -- cgit v1.2.3 From 242f8bfefe4bed626df4e4727ac8f315d80b567a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 22:15:30 -0700 Subject: pkt_sched: Make qdisc->gso_skb a list. The idea is that we can use this to get rid of ->requeue(). Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ec0a0839ce5..5961536be60 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -45,7 +45,7 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { if (unlikely(skb->next)) - q->gso_skb = skb; + __skb_queue_head(&q->requeue, skb); else q->ops->requeue(skb, q); @@ -57,9 +57,8 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { struct sk_buff *skb; - if ((skb = q->gso_skb)) - q->gso_skb = NULL; - else + skb = __skb_dequeue(&q->requeue); + if (!skb) skb = q->dequeue(q); return skb; @@ -327,6 +326,7 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; @@ -352,6 +352,7 @@ static struct Qdisc noqueue_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .dev_queue = &noqueue_netdev_queue, }; @@ -472,6 +473,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); + skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; @@ -539,7 +541,7 @@ void qdisc_destroy(struct Qdisc *qdisc) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); - kfree_skb(qdisc->gso_skb); + __skb_queue_purge(&qdisc->requeue); kfree((char *) qdisc - qdisc->padded); } -- cgit v1.2.3 From f0876520b0b721bedafd9cec3b1b0624ae566eee Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 22:15:58 -0700 Subject: pkt_sched: Always use q->requeue in dev_requeue_skb(). There is no reason to call into the complicated qdiscs just to remember the last SKB where we found the device blocked. The SKB is outside of the qdiscs realm at this point. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5961536be60..1b508bd1c06 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -44,10 +44,7 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - if (unlikely(skb->next)) - __skb_queue_head(&q->requeue, skb); - else - q->ops->requeue(skb, q); + __skb_queue_head(&q->requeue, skb); __netif_schedule(q); return 0; -- cgit v1.2.3 From ebf059821ed8a36acd706484b719d14d212ada32 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 22 Sep 2008 22:16:23 -0700 Subject: pkt_sched: Check the state of tx_queue in dequeue_skb() Check in dequeue_skb() the state of tx_queue for requeued skb to save on locking and re-requeuing, and possibly remove the current check in qdisc_run(). Based on the idea of Alexander Duyck. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1b508bd1c06..5e7e0bd38fe 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -52,11 +52,21 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { - struct sk_buff *skb; - - skb = __skb_dequeue(&q->requeue); - if (!skb) + struct sk_buff *skb = skb_peek(&q->requeue); + + if (unlikely(skb)) { + struct net_device *dev = qdisc_dev(q); + struct netdev_queue *txq; + + /* check the reason of requeuing without tx lock first */ + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + __skb_unlink(skb, &q->requeue); + else + skb = NULL; + } else { skb = q->dequeue(q); + } return skb; } -- cgit v1.2.3 From 554794de7949d1a6279336404c066f974d4c2bde Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 6 Oct 2008 09:54:39 -0700 Subject: pkt_sched: Fix handling of gso skbs on requeuing Jay Cliburn noticed and diagnosed a bug triggered in dev_gso_skb_destructor() after last change from qdisc->gso_skb to qdisc->requeue list. Since gso_segmented skbs can't be queued to another list this patch brings back qdisc->gso_skb for them. Reported-by: Jay Cliburn Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5e7e0bd38fe..3db4cf1bd26 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -44,7 +44,10 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - __skb_queue_head(&q->requeue, skb); + if (unlikely(skb->next)) + q->gso_skb = skb; + else + __skb_queue_head(&q->requeue, skb); __netif_schedule(q); return 0; @@ -52,7 +55,10 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { - struct sk_buff *skb = skb_peek(&q->requeue); + struct sk_buff *skb = q->gso_skb; + + if (!skb) + skb = skb_peek(&q->requeue); if (unlikely(skb)) { struct net_device *dev = qdisc_dev(q); @@ -60,10 +66,15 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) - __skb_unlink(skb, &q->requeue); - else + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) { + if (q->gso_skb) + q->gso_skb = NULL; + else + __skb_unlink(skb, &q->requeue); + } else { skb = NULL; + } } else { skb = q->dequeue(q); } @@ -548,6 +559,7 @@ void qdisc_destroy(struct Qdisc *qdisc) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); + kfree_skb(qdisc->gso_skb); __skb_queue_purge(&qdisc->requeue); kfree((char *) qdisc - qdisc->padded); -- cgit v1.2.3 From 6252352d16f7b45a0fd42224f7e70e0288dc4480 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 6 Oct 2008 10:41:50 -0700 Subject: pkt_sched: Simplify dev_requeue_skb and dequeue_skb qdisc->requeue was planned to universally replace all requeuing code, but at the top level we never requeue more than one skb, so qdisc-> gso_skb is enough for this. qdisc->requeue would be used on the lower levels only for one level deep requeuing (like in sch_hfsc) after finishing all the changes. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3db4cf1bd26..31f6b614b59 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -44,12 +44,9 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - if (unlikely(skb->next)) - q->gso_skb = skb; - else - __skb_queue_head(&q->requeue, skb); - + q->gso_skb = skb; __netif_schedule(q); + return 0; } @@ -57,24 +54,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { struct sk_buff *skb = q->gso_skb; - if (!skb) - skb = skb_peek(&q->requeue); - if (unlikely(skb)) { struct net_device *dev = qdisc_dev(q); struct netdev_queue *txq; /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) { - if (q->gso_skb) - q->gso_skb = NULL; - else - __skb_unlink(skb, &q->requeue); - } else { + if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + q->gso_skb = NULL; + else skb = NULL; - } } else { skb = q->dequeue(q); } -- cgit v1.2.3 From 367c679007fa4f990eb7ee381326ec59d8148b0e Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:17 +0200 Subject: netfilter: xtables: do centralized checkentry call (1/2) It used to be that {ip,ip6,etc}_tables called extension->checkentry themselves, but this can be moved into the xtables core. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/sched/act_ipt.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d1263b3c96c..79ea19375ca 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -51,20 +51,12 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int t->u.kernel.target = target; ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - table, hook, 0, 0); - if (ret) { + table, hook, 0, 0, NULL, t->data); + if (ret < 0) { module_put(t->u.kernel.target->me); return ret; } - if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(table, NULL, - t->u.kernel.target, t->data, - hook)) { - module_put(t->u.kernel.target->me); - ret = -EINVAL; - } - - return ret; + return 0; } static void ipt_destroy_target(struct ipt_entry_target *t) -- cgit v1.2.3 From 7eb3558655aaa87a3e71a0c065dfaddda521fa6d Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (4/6) This patch does this for target extensions' target functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/sched/act_ipt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 79ea19375ca..89791a56429 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -188,6 +188,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, { int ret = 0, result = 0; struct tcf_ipt *ipt = a->priv; + struct xt_target_param par; if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -203,10 +204,13 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed from earlier kernels */ - ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL, - ipt->tcfi_hook, - ipt->tcfi_t->u.kernel.target, - ipt->tcfi_t->data); + par.in = skb->dev; + par.out = NULL; + par.hooknum = ipt->tcfi_hook; + par.target = ipt->tcfi_t->u.kernel.target; + par.targinfo = ipt->tcfi_t->data; + ret = par.target->target(skb, &par); + switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; -- cgit v1.2.3 From af5d6dc200eb0fcc6fbd3df1ab4d8969004cb37f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (5/6) This patch does this for target extensions' checkentry functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/sched/act_ipt.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 89791a56429..a54dc3f8234 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -40,6 +40,7 @@ static struct tcf_hashinfo ipt_hash_info = { static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) { + struct xt_tgchk_param par; struct xt_target *target; int ret = 0; @@ -49,9 +50,14 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int return -ENOENT; t->u.kernel.target = target; - - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - table, hook, 0, 0, NULL, t->data); + par.table = table; + par.entryinfo = NULL; + par.target = target; + par.targinfo = t->data; + par.hook_mask = hook; + + ret = xt_check_target(&par, NFPROTO_IPV4, + t->u.target_size - sizeof(*t), 0, false); if (ret < 0) { module_put(t->u.kernel.target->me); return ret; -- cgit v1.2.3 From a2df1648ba615dd5908e9a1fa7b2f133fa302487 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (6/6) This patch does this for target extensions' destroy functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/sched/act_ipt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index a54dc3f8234..b951d422db9 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -67,9 +67,13 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int static void ipt_destroy_target(struct ipt_entry_target *t) { - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + struct xt_tgdtor_param par = { + .target = t->u.kernel.target, + .targinfo = t->data, + }; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); } static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) -- cgit v1.2.3 From 916a917dfec18535ff9e2afdafba82e6279eb4f4 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:20 +0200 Subject: netfilter: xtables: provide invoked family value to extensions By passing in the family through which extensions were invoked, a bit of data space can be reclaimed. The "family" member will be added to the parameter structures and the check functions be adjusted. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/sched/act_ipt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index b951d422db9..0453d79ebf5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -55,9 +55,9 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int par.target = target; par.targinfo = t->data; par.hook_mask = hook; + par.family = NFPROTO_IPV4; - ret = xt_check_target(&par, NFPROTO_IPV4, - t->u.target_size - sizeof(*t), 0, false); + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); if (ret < 0) { module_put(t->u.kernel.target->me); return ret; -- cgit v1.2.3 From 53e915034970935596703a6005cde27c2128b5c3 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 8 Oct 2008 11:36:22 -0700 Subject: pkt_sched: Update qdisc requeue stats in dev_requeue_skb() After the last change of requeuing there is no info about such incidents in tc stats. This patch updates the counter, but we should consider this should differ from previous stats because of additional checks preventing to repeat this. On the other hand, previous stats didn't include requeuing of gso_segmented skbs. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 31f6b614b59..7b5572d6beb 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -45,6 +45,7 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { q->gso_skb = skb; + q->qstats.requeues++; __netif_schedule(q); return 0; -- cgit v1.2.3