From aa6f5ffbdba45aa8e19e5048648fc6c7b25376d3 Mon Sep 17 00:00:00 2001 From: merge Date: Thu, 22 Jan 2009 13:55:32 +0000 Subject: MERGE-via-pending-tracking-hist-MERGE-via-stable-tracking-MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040-1232632141 pending-tracking-hist top was MERGE-via-stable-tracking-MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040-1232632141 / fdf777a63bcb59e0dfd78bfe2c6242e01f6d4eb9 ... parent commitmessage: From: merge MERGE-via-stable-tracking-hist-MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040 stable-tracking-hist top was MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040 / 90463bfd2d5a3c8b52f6e6d71024a00e052b0ced ... parent commitmessage: From: merge MERGE-via-mokopatches-tracking-hist-fix-stray-endmenu-patch mokopatches-tracking-hist top was fix-stray-endmenu-patch / 3630e0be570de8057e7f8d2fe501ed353cdf34e6 ... parent commitmessage: From: Andy Green fix-stray-endmenu.patch Signed-off-by: Andy Green --- net/sched/Kconfig | 19 ++ net/sched/Makefile | 2 + net/sched/act_api.c | 18 +- net/sched/act_gact.c | 4 +- net/sched/act_ipt.c | 4 +- net/sched/act_mirred.c | 4 +- net/sched/act_nat.c | 4 +- net/sched/act_pedit.c | 4 +- net/sched/act_police.c | 33 ++- net/sched/act_simple.c | 4 +- net/sched/act_skbedit.c | 4 +- net/sched/cls_api.c | 3 +- net/sched/cls_basic.c | 2 +- net/sched/cls_cgroup.c | 293 ++++++++++++++++++++++++++ net/sched/cls_flow.c | 4 +- net/sched/cls_fw.c | 2 +- net/sched/cls_route.c | 2 +- net/sched/cls_tcindex.c | 6 - net/sched/cls_u32.c | 14 +- net/sched/ematch.c | 18 +- net/sched/sch_api.c | 52 ++--- net/sched/sch_atm.c | 36 ++-- net/sched/sch_blackhole.c | 1 + net/sched/sch_cbq.c | 76 +++---- net/sched/sch_drr.c | 519 ++++++++++++++++++++++++++++++++++++++++++++++ net/sched/sch_dsmark.c | 22 +- net/sched/sch_fifo.c | 4 +- net/sched/sch_generic.c | 47 +++-- net/sched/sch_gred.c | 22 +- net/sched/sch_hfsc.c | 64 +++--- net/sched/sch_htb.c | 186 +++++++---------- net/sched/sch_multiq.c | 82 ++++---- net/sched/sch_netem.c | 163 ++------------- net/sched/sch_prio.c | 50 ++--- net/sched/sch_red.c | 33 ++- net/sched/sch_sfq.c | 73 +------ net/sched/sch_tbf.c | 44 ++-- net/sched/sch_teql.c | 39 ++-- 38 files changed, 1252 insertions(+), 705 deletions(-) create mode 100644 net/sched/cls_cgroup.c create mode 100644 net/sched/sch_drr.c (limited to 'net/sched') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 6767e54155d..929218a4762 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -194,6 +194,17 @@ config NET_SCH_NETEM If unsure, say N. +config NET_SCH_DRR + tristate "Deficit Round Robin scheduler (DRR)" + help + Say Y here if you want to use the Deficit Round Robin (DRR) packet + scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_drr. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT @@ -316,6 +327,14 @@ config NET_CLS_FLOW To compile this code as a module, choose M here: the module will be called cls_flow. +config NET_CLS_CGROUP + bool "Control Group Classifier" + select NET_CLS + depends on CGROUPS + ---help--- + Say Y here if you want to classify packets based on the control + cgroup of their process. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index e60c9925b26..54d950cd4b8 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o +obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o @@ -38,6 +39,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o +obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8f457f1e0ac..9d03cc33b6c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -214,12 +214,14 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, } EXPORT_SYMBOL(tcf_hash_check); -struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) +struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, + struct tc_action *a, int size, int bind, + u32 *idx_gen, struct tcf_hashinfo *hinfo) { struct tcf_common *p = kzalloc(size, GFP_KERNEL); if (unlikely(!p)) - return p; + return ERR_PTR(-ENOMEM); p->tcfc_refcnt = 1; if (bind) p->tcfc_bindcnt = 1; @@ -228,9 +230,15 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_acti p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; - if (est) - gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, - &p->tcfc_lock, est); + if (est) { + int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, + &p->tcfc_lock, est); + if (err) { + kfree(p); + return ERR_PTR(err); + } + } + a->priv = (void *) p; return p; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ac04289da5d..e7f796aec65 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -88,8 +88,8 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind, &gact_idx_gen, &gact_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0453d79ebf5..082c520b0de 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -136,8 +136,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, &ipt_idx_gen, &ipt_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 70341c020b6..b9aaab4e035 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -105,8 +105,8 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, &mirred_idx_gen, &mirred_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7b39ed485bc..d885ba31156 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -68,8 +68,8 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &nat_idx_gen, &nat_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); p = to_tcf_nat(pc); ret = ACT_P_CREATED; } else { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index d5f4e340486..96c0ed115e2 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -68,8 +68,8 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &pedit_idx_gen, &pedit_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 38015b49394..5c72a116b1a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -182,17 +182,32 @@ override: R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); if (R_tab == NULL) goto failure; + + if (!est && (ret == ACT_P_CREATED || + !gen_estimator_active(&police->tcf_bstats, + &police->tcf_rate_est))) { + err = -EINVAL; + goto failure; + } + if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE]); - if (P_tab == NULL) { - qdisc_put_rtab(R_tab); + if (P_tab == NULL) goto failure; - } } } - /* No failure allowed after this point */ + spin_lock_bh(&police->tcf_lock); + if (est) { + err = gen_replace_estimator(&police->tcf_bstats, + &police->tcf_rate_est, + &police->tcf_lock, est); + if (err) + goto failure_unlock; + } + + /* No failure allowed after this point */ if (R_tab != NULL) { qdisc_put_rtab(police->tcfp_R_tab); police->tcfp_R_tab = R_tab; @@ -217,10 +232,6 @@ override: if (tb[TCA_POLICE_AVRATE]) police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]); - if (est) - gen_replace_estimator(&police->tcf_bstats, - &police->tcf_rate_est, - &police->tcf_lock, est); spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -238,7 +249,13 @@ override: a->priv = police; return ret; +failure_unlock: + spin_unlock_bh(&police->tcf_lock); failure: + if (P_tab) + qdisc_put_rtab(P_tab); + if (R_tab) + qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) kfree(police); return err; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e7851ce92cf..8daa1ebc741 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -124,8 +124,8 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &simp_idx_gen, &simp_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); d = to_defact(pc); ret = alloc_defdata(d, defdata); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fe9777e77f3..4ab916b8074 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -104,8 +104,8 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &skbedit_idx_gen, &skbedit_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); d = to_skbedit(pc); ret = ACT_P_CREATED; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 16e7ac9774e..173fcc4b050 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -531,7 +531,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, if (src->action) { struct tc_action *act; tcf_tree_lock(tp); - act = xchg(&dst->action, src->action); + act = dst->action; + dst->action = src->action; tcf_tree_unlock(tp); if (act) tcf_action_destroy(act, TCA_ACT_UNBIND); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 956915c217d..4e2bda85411 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -102,7 +102,7 @@ static inline void basic_delete_filter(struct tcf_proto *tp, static void basic_destroy(struct tcf_proto *tp) { - struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL); + struct basic_head *head = tp->root; struct basic_filter *f, *n; list_for_each_entry_safe(f, n, &head->flist, link) { diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c new file mode 100644 index 00000000000..91a3db4a76f --- /dev/null +++ b/net/sched/cls_cgroup.c @@ -0,0 +1,293 @@ +/* + * net/sched/cls_cgroup.c Control Group Classifier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct cgroup_cls_state +{ + struct cgroup_subsys_state css; + u32 classid; +}; + +static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp) +{ + return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id), + struct cgroup_cls_state, css); +} + +static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) +{ + return container_of(task_subsys_state(p, net_cls_subsys_id), + struct cgroup_cls_state, css); +} + +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp) +{ + struct cgroup_cls_state *cs; + + if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) + return ERR_PTR(-ENOMEM); + + if (cgrp->parent) + cs->classid = cgrp_cls_state(cgrp->parent)->classid; + + return &cs->css; +} + +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + kfree(cgrp_cls_state(cgrp)); +} + +static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) +{ + return cgrp_cls_state(cgrp)->classid; +} + +static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value) +{ + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + + cgrp_cls_state(cgrp)->classid = (u32) value; + + cgroup_unlock(); + + return 0; +} + +static struct cftype ss_files[] = { + { + .name = "classid", + .read_u64 = read_classid, + .write_u64 = write_classid, + }, +}; + +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); +} + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .create = cgrp_create, + .destroy = cgrp_destroy, + .populate = cgrp_populate, + .subsys_id = net_cls_subsys_id, +}; + +struct cls_cgroup_head +{ + u32 handle; + struct tcf_exts exts; + struct tcf_ematch_tree ematches; +}; + +static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + struct cls_cgroup_head *head = tp->root; + struct cgroup_cls_state *cs; + int ret = 0; + + /* + * Due to the nature of the classifier it is required to ignore all + * packets originating from softirq context as accessing `current' + * would lead to false results. + * + * This test assumes that all callers of dev_queue_xmit() explicitely + * disable bh. Knowing this, it is possible to detect softirq based + * calls by looking at the number of nested bh disable calls because + * softirqs always disables bh. + */ + if (softirq_count() != SOFTIRQ_OFFSET) + return -1; + + rcu_read_lock(); + cs = task_cls_state(current); + if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) { + res->classid = cs->classid; + res->class = 0; + ret = tcf_exts_exec(skb, &head->exts, res); + } else + ret = -1; + + rcu_read_unlock(); + + return ret; +} + +static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) +{ + return 0UL; +} + +static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static int cls_cgroup_init(struct tcf_proto *tp) +{ + return 0; +} + +static const struct tcf_ext_map cgroup_ext_map = { + .action = TCA_CGROUP_ACT, + .police = TCA_CGROUP_POLICE, +}; + +static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { + [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, +}; + +static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg) +{ + struct nlattr *tb[TCA_CGROUP_MAX+1]; + struct cls_cgroup_head *head = tp->root; + struct tcf_ematch_tree t; + struct tcf_exts e; + int err; + + if (head == NULL) { + if (!handle) + return -EINVAL; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + head->handle = handle; + + tcf_tree_lock(tp); + tp->root = head; + tcf_tree_unlock(tp); + } + + if (handle != head->handle) + return -ENOENT; + + err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], + cgroup_policy); + if (err < 0) + return err; + + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map); + if (err < 0) + return err; + + err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); + if (err < 0) + return err; + + tcf_exts_change(tp, &head->exts, &e); + tcf_em_tree_change(tp, &head->ematches, &t); + + return 0; +} + +static void cls_cgroup_destroy(struct tcf_proto *tp) +{ + struct cls_cgroup_head *head = tp->root; + + if (head) { + tcf_exts_destroy(tp, &head->exts); + tcf_em_tree_destroy(tp, &head->ematches); + kfree(head); + } +} + +static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg) +{ + return -EOPNOTSUPP; +} + +static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct cls_cgroup_head *head = tp->root; + + if (arg->count < arg->skip) + goto skip; + + if (arg->fn(tp, (unsigned long) head, arg) < 0) { + arg->stop = 1; + return; + } +skip: + arg->count++; +} + +static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct cls_cgroup_head *head = tp->root; + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + + t->tcm_handle = head->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + + if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 || + tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { + .kind = "cgroup", + .init = cls_cgroup_init, + .change = cls_cgroup_change, + .classify = cls_cgroup_classify, + .destroy = cls_cgroup_destroy, + .get = cls_cgroup_get, + .put = cls_cgroup_put, + .delete = cls_cgroup_delete, + .walk = cls_cgroup_walk, + .dump = cls_cgroup_dump, + .owner = THIS_MODULE, +}; + +static int __init init_cgroup_cls(void) +{ + return register_tcf_proto_ops(&cls_cgroup_ops); +} + +static void __exit exit_cgroup_cls(void) +{ + unregister_tcf_proto_ops(&cls_cgroup_ops); +} + +module_init(init_cgroup_cls); +module_exit(exit_cgroup_cls); +MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 0ebaff637e3..0ef4e3065bc 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -260,14 +260,14 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb) static u32 flow_get_skuid(const struct sk_buff *skb) { if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_uid; + return skb->sk->sk_socket->file->f_cred->fsuid; return 0; } static u32 flow_get_skgid(const struct sk_buff *skb) { if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_gid; + return skb->sk->sk_socket->file->f_cred->fsgid; return 0; } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index b0f90e593af..6d6e87585fb 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -148,7 +148,7 @@ fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) static void fw_destroy(struct tcf_proto *tp) { - struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL); + struct fw_head *head = tp->root; struct fw_filter *f; int h; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index e3d8455eebc..bdf1f4172ee 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -260,7 +260,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) static void route4_destroy(struct tcf_proto *tp) { - struct route4_head *head = xchg(&tp->root, NULL); + struct route4_head *head = tp->root; int h1, h2; if (head == NULL) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 7a7bff5ded2..e806f2314b5 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -13,12 +13,6 @@ #include #include - -/* - * Not quite sure if we need all the xchgs Alexey uses when accessing things. - * Can always add them later ... :) - */ - /* * Passing parameters to the root seems to be done more awkwardly than really * necessary. At least, u32 doesn't seem to use such dirty hacks. To be diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 246f9065ce3..07372f60bee 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -387,7 +387,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) static void u32_destroy(struct tcf_proto *tp) { struct tc_u_common *tp_c = tp->data; - struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); + struct tc_u_hnode *root_ht = tp->root; WARN_ON(root_ht == NULL); @@ -479,7 +479,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, err = -EINVAL; if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); - struct tc_u_hnode *ht_down = NULL; + struct tc_u_hnode *ht_down = NULL, *ht_old; if (TC_U32_KEY(handle)) goto errout; @@ -493,11 +493,12 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, } tcf_tree_lock(tp); - ht_down = xchg(&n->ht_down, ht_down); + ht_old = n->ht_down; + n->ht_down = ht_down; tcf_tree_unlock(tp); - if (ht_down) - ht_down->refcnt--; + if (ht_old) + ht_old->refcnt--; } if (tb[TCA_U32_CLASSID]) { n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); @@ -637,8 +638,9 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, break; n->next = *ins; - wmb(); + tcf_tree_lock(tp); *ins = n; + tcf_tree_unlock(tp); *arg = (unsigned long)n; return 0; diff --git a/net/sched/ematch.c b/net/sched/ematch.c index e82519e548d..aab59409728 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -71,7 +71,7 @@ * * static void __exit exit_my_ematch(void) * { - * return tcf_em_unregister(&my_ops); + * tcf_em_unregister(&my_ops); * } * * module_init(init_my_ematch); @@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register); * * Returns -ENOENT if no matching ematch was found. */ -int tcf_em_unregister(struct tcf_ematch_ops *ops) +void tcf_em_unregister(struct tcf_ematch_ops *ops) { - int err = 0; - struct tcf_ematch_ops *e; - write_lock(&ematch_mod_lock); - list_for_each_entry(e, &ematch_ops, link) { - if (e == ops) { - list_del(&e->link); - goto out; - } - } - - err = -ENOENT; -out: + list_del(&ops->link); write_unlock(&ematch_mod_lock); - return err; } EXPORT_SYMBOL(tcf_em_unregister); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b16ad2972c6..0fc4a18fd96 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -97,10 +97,9 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, Auxiliary routines: - ---requeue + ---peek - requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if netif_queue_stopped()=0. + like dequeue but without removing a packet from the queue ---reset @@ -147,8 +146,14 @@ int register_qdisc(struct Qdisc_ops *qops) if (qops->enqueue == NULL) qops->enqueue = noop_qdisc_ops.enqueue; - if (qops->requeue == NULL) - qops->requeue = noop_qdisc_ops.requeue; + if (qops->peek == NULL) { + if (qops->dequeue == NULL) { + qops->peek = noop_qdisc_ops.peek; + } else { + rc = -EINVAL; + goto out; + } + } if (qops->dequeue == NULL) qops->dequeue = noop_qdisc_ops.dequeue; @@ -184,7 +189,7 @@ EXPORT_SYMBOL(unregister_qdisc); (root qdisc, all its children, children of children etc.) */ -struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) +static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) { struct Qdisc *q; @@ -199,28 +204,16 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } -/* - * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen() - * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue() - */ -static DEFINE_SPINLOCK(qdisc_list_lock); - static void qdisc_list_add(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - spin_lock_bh(&qdisc_list_lock); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); - spin_unlock_bh(&qdisc_list_lock); - } } void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - spin_lock_bh(&qdisc_list_lock); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) list_del(&q->list); - spin_unlock_bh(&qdisc_list_lock); - } } EXPORT_SYMBOL(qdisc_list_del); @@ -229,22 +222,17 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) unsigned int i; struct Qdisc *q; - spin_lock_bh(&qdisc_list_lock); - for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct Qdisc *txq_root = txq->qdisc_sleeping; q = qdisc_match_from_root(txq_root, handle); if (q) - goto unlock; + goto out; } q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); - -unlock: - spin_unlock_bh(&qdisc_list_lock); - +out: return q; } @@ -417,6 +405,8 @@ static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) struct nlattr *nest; nest = nla_nest_start(skb, TCA_STAB); + if (nest == NULL) + goto nla_put_failure; NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); nla_nest_end(skb, nest); @@ -460,7 +450,6 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) timer); wd->qdisc->flags &= ~TCQ_F_THROTTLED; - smp_wmb(); __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; @@ -890,9 +879,12 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) sch->stab = stab; if (tca[TCA_RATE]) + /* NB: ignores errors from replace_estimator + because change can't be undone. */ gen_replace_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 43d37256c15..2a8b83af7c4 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -62,7 +62,7 @@ struct atm_qdisc_data { struct atm_flow_data link; /* unclassified skbs go here */ struct atm_flow_data *flows; /* NB: "link" is also on this list */ - struct tasklet_struct task; /* requeue tasklet */ + struct tasklet_struct task; /* dequeue tasklet */ }; /* ------------------------- Class/flow operations ------------------------- */ @@ -102,7 +102,8 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, return -EINVAL; if (!new) new = &noop_qdisc; - *old = xchg(&flow->q, new); + *old = flow->q; + flow->q = new; if (*old) qdisc_reset(*old); return 0; @@ -480,11 +481,14 @@ static void sch_atm_dequeue(unsigned long data) * If traffic is properly shaped, this won't generate nasty * little bursts. Otherwise, it may ... (but that's okay) */ - while ((skb = flow->q->dequeue(flow->q))) { - if (!atm_may_send(flow->vcc, skb->truesize)) { - (void)flow->q->ops->requeue(skb, flow->q); + while ((skb = flow->q->ops->peek(flow->q))) { + if (!atm_may_send(flow->vcc, skb->truesize)) break; - } + + skb = qdisc_dequeue_peeked(flow->q); + if (unlikely(!skb)) + break; + pr_debug("atm_tc_dequeue: sending on class %p\n", flow); /* remove any LL header somebody else has attached */ skb_pull(skb, skb_network_offset(skb)); @@ -516,27 +520,19 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); tasklet_schedule(&p->task); - skb = p->link.q->dequeue(p->link.q); + skb = qdisc_dequeue_peeked(p->link.q); if (skb) sch->q.qlen--; return skb; } -static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff *atm_tc_peek(struct Qdisc *sch) { struct atm_qdisc_data *p = qdisc_priv(sch); - int ret; - pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - ret = p->link.q->ops->requeue(skb, p->link.q); - if (!ret) { - sch->q.qlen++; - sch->qstats.requeues++; - } else if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - p->link.qstats.drops++; - } - return ret; + pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p); + + return p->link.q->ops->peek(p->link.q); } static unsigned int atm_tc_drop(struct Qdisc *sch) @@ -694,7 +690,7 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = { .priv_size = sizeof(struct atm_qdisc_data), .enqueue = atm_tc_enqueue, .dequeue = atm_tc_dequeue, - .requeue = atm_tc_requeue, + .peek = atm_tc_peek, .drop = atm_tc_drop, .init = atm_tc_init, .reset = atm_tc_reset, diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index 507fb488bc9..094a874b48b 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -33,6 +33,7 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = { .priv_size = 0, .enqueue = blackhole_enqueue, .dequeue = blackhole_dequeue, + .peek = blackhole_dequeue, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 03e389e8d94..9e43ed94916 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -static int -cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int ret; - - if ((cl = q->tx_class) == NULL) { - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_CN; - } - q->tx_class = NULL; - - cbq_mark_toplevel(q, cl); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; - cl->q->__parent = sch; -#endif - if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - if (!cl->next_alive) - cbq_activate_class(cl); - return 0; - } - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; -} - /* Overlimit actions */ /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */ @@ -1669,7 +1635,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, #endif } sch_tree_lock(sch); - *old = xchg(&cl->q, new); + *old = cl->q; + cl->q = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -1798,11 +1765,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t } if (tb[TCA_CBQ_RATE]) { - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]); + rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), + tb[TCA_CBQ_RTAB]); if (rtab == NULL) return -EINVAL; } + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + if (rtab) + qdisc_put_rtab(rtab); + return err; + } + } + /* Change class parameters */ sch_tree_lock(sch); @@ -1810,8 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cbq_deactivate_class(cl); if (rtab) { - rtab = xchg(&cl->R_tab, rtab); - qdisc_put_rtab(rtab); + qdisc_put_rtab(cl->R_tab); + cl->R_tab = rtab; } if (tb[TCA_CBQ_LSSOPT]) @@ -1838,10 +1817,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t sch_tree_unlock(sch); - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); return 0; } @@ -1888,6 +1863,17 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl = kzalloc(sizeof(*cl), GFP_KERNEL); if (cl == NULL) goto failure; + + if (tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + kfree(cl); + goto failure; + } + } + cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; @@ -1929,10 +1915,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t qdisc_class_hash_grow(sch, &q->clhash); - if (tca[TCA_RATE]) - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); - *arg = (unsigned long)cl; return 0; @@ -2066,7 +2048,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct cbq_sched_data), .enqueue = cbq_enqueue, .dequeue = cbq_dequeue, - .requeue = cbq_requeue, + .peek = qdisc_peek_dequeued, .drop = cbq_drop, .init = cbq_init, .reset = cbq_reset, diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c new file mode 100644 index 00000000000..f6b4fa97df7 --- /dev/null +++ b/net/sched/sch_drr.c @@ -0,0 +1,519 @@ +/* + * net/sched/sch_drr.c Deficit Round Robin scheduler + * + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct drr_class { + struct Qdisc_class_common common; + unsigned int refcnt; + unsigned int filter_cnt; + + struct gnet_stats_basic bstats; + struct gnet_stats_queue qstats; + struct gnet_stats_rate_est rate_est; + struct list_head alist; + struct Qdisc *qdisc; + + u32 quantum; + u32 deficit; +}; + +struct drr_sched { + struct list_head active; + struct tcf_proto *filter_list; + struct Qdisc_class_hash clhash; +}; + +static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) +{ + struct drr_sched *q = qdisc_priv(sch); + struct Qdisc_class_common *clc; + + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct drr_class, common); +} + +static void drr_purge_queue(struct drr_class *cl) +{ + unsigned int len = cl->qdisc->q.qlen; + + qdisc_reset(cl->qdisc); + qdisc_tree_decrease_qlen(cl->qdisc, len); +} + +static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { + [TCA_DRR_QUANTUM] = { .type = NLA_U32 }, +}; + +static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)*arg; + struct nlattr *tb[TCA_DRR_MAX + 1]; + u32 quantum; + int err; + + err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy); + if (err < 0) + return err; + + if (tb[TCA_DRR_QUANTUM]) { + quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]); + if (quantum == 0) + return -EINVAL; + } else + quantum = psched_mtu(qdisc_dev(sch)); + + if (cl != NULL) { + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } + + sch_tree_lock(sch); + if (tb[TCA_DRR_QUANTUM]) + cl->quantum = quantum; + sch_tree_unlock(sch); + + return 0; + } + + cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL); + if (cl == NULL) + return -ENOBUFS; + + cl->refcnt = 1; + cl->common.classid = classid; + cl->quantum = quantum; + cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); + if (cl->qdisc == NULL) + cl->qdisc = &noop_qdisc; + + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + qdisc_destroy(cl->qdisc); + kfree(cl); + return err; + } + } + + sch_tree_lock(sch); + qdisc_class_hash_insert(&q->clhash, &cl->common); + sch_tree_unlock(sch); + + qdisc_class_hash_grow(sch, &q->clhash); + + *arg = (unsigned long)cl; + return 0; +} + +static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) +{ + gen_kill_estimator(&cl->bstats, &cl->rate_est); + qdisc_destroy(cl->qdisc); + kfree(cl); +} + +static int drr_delete_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->filter_cnt > 0) + return -EBUSY; + + sch_tree_lock(sch); + + drr_purge_queue(cl); + qdisc_class_hash_remove(&q->clhash, &cl->common); + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); + + sch_tree_unlock(sch); + return 0; +} + +static unsigned long drr_get_class(struct Qdisc *sch, u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->refcnt++; + + return (unsigned long)cl; +} + +static void drr_put_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); +} + +static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl) +{ + struct drr_sched *q = qdisc_priv(sch); + + if (cl) + return NULL; + + return &q->filter_list; +} + +static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->filter_cnt++; + + return (unsigned long)cl; +} + +static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + cl->filter_cnt--; +} + +static int drr_graft_class(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (new == NULL) { + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, cl->common.classid); + if (new == NULL) + new = &noop_qdisc; + } + + sch_tree_lock(sch); + drr_purge_queue(cl); + *old = cl->qdisc; + cl->qdisc = new; + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + return cl->qdisc; +} + +static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); +} + +static int drr_dump_class(struct Qdisc *sch, unsigned long arg, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct nlattr *nest; + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle = cl->common.classid; + tcm->tcm_info = cl->qdisc->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum); + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, + struct gnet_dump *d) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct tc_drr_stats xstats; + + memset(&xstats, 0, sizeof(xstats)); + if (cl->qdisc->q.qlen) + xstats.deficit = cl->deficit; + + if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) + return -1; + + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); +} + +static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, (unsigned long)cl, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + } +} + +static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct tcf_result res; + int result; + + if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { + cl = drr_find_class(sch, skb->priority); + if (cl != NULL) + return cl; + } + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return NULL; + } +#endif + cl = (struct drr_class *)res.class; + if (cl == NULL) + cl = drr_find_class(sch, res.classid); + return cl; + } + return NULL; +} + +static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + int err; + + cl = drr_classify(skb, sch, &err); + if (cl == NULL) { + if (err & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return err; + } + + len = qdisc_pkt_len(skb); + err = qdisc_enqueue(skb, cl->qdisc); + if (unlikely(err != NET_XMIT_SUCCESS)) { + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + sch->qstats.drops++; + } + return err; + } + + if (cl->qdisc->q.qlen == 1) { + list_add_tail(&cl->alist, &q->active); + cl->deficit = cl->quantum; + } + + cl->bstats.packets++; + cl->bstats.bytes += len; + sch->bstats.packets++; + sch->bstats.bytes += len; + + sch->q.qlen++; + return err; +} + +static struct sk_buff *drr_dequeue(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct sk_buff *skb; + unsigned int len; + + if (list_empty(&q->active)) + goto out; + while (1) { + cl = list_first_entry(&q->active, struct drr_class, alist); + skb = cl->qdisc->ops->peek(cl->qdisc); + if (skb == NULL) + goto out; + + len = qdisc_pkt_len(skb); + if (len <= cl->deficit) { + cl->deficit -= len; + skb = qdisc_dequeue_peeked(cl->qdisc); + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + sch->q.qlen--; + return skb; + } + + cl->deficit += cl->quantum; + list_move_tail(&cl->alist, &q->active); + } +out: + return NULL; +} + +static unsigned int drr_drop(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + + list_for_each_entry(cl, &q->active, alist) { + if (cl->qdisc->ops->drop) { + len = cl->qdisc->ops->drop(cl->qdisc); + if (len > 0) { + sch->q.qlen--; + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + return len; + } + } + } + return 0; +} + +static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +{ + struct drr_sched *q = qdisc_priv(sch); + int err; + + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; + INIT_LIST_HEAD(&q->active); + return 0; +} + +static void drr_reset_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (cl->qdisc->q.qlen) + list_del(&cl->alist); + qdisc_reset(cl->qdisc); + } + } + sch->q.qlen = 0; +} + +static void drr_destroy_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n, *next; + unsigned int i; + + tcf_destroy_chain(&q->filter_list); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + common.hnode) + drr_destroy_class(sch, cl); + } + qdisc_class_hash_destroy(&q->clhash); +} + +static const struct Qdisc_class_ops drr_class_ops = { + .change = drr_change_class, + .delete = drr_delete_class, + .get = drr_get_class, + .put = drr_put_class, + .tcf_chain = drr_tcf_chain, + .bind_tcf = drr_bind_tcf, + .unbind_tcf = drr_unbind_tcf, + .graft = drr_graft_class, + .leaf = drr_class_leaf, + .qlen_notify = drr_qlen_notify, + .dump = drr_dump_class, + .dump_stats = drr_dump_class_stats, + .walk = drr_walk, +}; + +static struct Qdisc_ops drr_qdisc_ops __read_mostly = { + .cl_ops = &drr_class_ops, + .id = "drr", + .priv_size = sizeof(struct drr_sched), + .enqueue = drr_enqueue, + .dequeue = drr_dequeue, + .peek = qdisc_peek_dequeued, + .drop = drr_drop, + .init = drr_init_qdisc, + .reset = drr_reset_qdisc, + .destroy = drr_destroy_qdisc, + .owner = THIS_MODULE, +}; + +static int __init drr_init(void) +{ + return register_qdisc(&drr_qdisc_ops); +} + +static void __exit drr_exit(void) +{ + unregister_qdisc(&drr_qdisc_ops); +} + +module_init(drr_init); +module_exit(drr_exit); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index ba43aab3a85..d303daa45d4 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -68,7 +68,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, } sch_tree_lock(sch); - *old = xchg(&p->q, new); + *old = p->q; + p->q = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -313,24 +314,13 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) return skb; } -static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff *dsmark_peek(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - - err = p->q->ops->requeue(skb, p->q); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - sch->qstats.drops++; - return err; - } - sch->q.qlen++; - sch->qstats.requeues++; + pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p); - return NET_XMIT_SUCCESS; + return p->q->ops->peek(p->q); } static unsigned int dsmark_drop(struct Qdisc *sch) @@ -496,7 +486,7 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { .priv_size = sizeof(struct dsmark_qdisc_data), .enqueue = dsmark_enqueue, .dequeue = dsmark_dequeue, - .requeue = dsmark_requeue, + .peek = dsmark_peek, .drop = dsmark_drop, .init = dsmark_init, .reset = dsmark_reset, diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 23d258bfe8a..92cfc9d7e3b 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -83,7 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, - .requeue = qdisc_requeue, + .peek = qdisc_peek_head, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, @@ -98,7 +98,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, - .requeue = qdisc_requeue, + .peek = qdisc_peek_head, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 93cd30ce650..5f5efe4e607 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg) char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", dev->name, netdev_drivername(dev, drivername, 64)); - dev->tx_timeout(dev); + dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + @@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg) void __netdev_watchdog_up(struct net_device *dev) { - if (dev->tx_timeout) { + if (dev->netdev_ops->ndo_tx_timeout) { if (dev->watchdog_timeo <= 0) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, @@ -270,6 +270,8 @@ static void dev_watchdog_down(struct net_device *dev) void netif_carrier_on(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -285,8 +287,11 @@ EXPORT_SYMBOL(netif_carrier_on); */ void netif_carrier_off(struct net_device *dev) { - if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) + if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); + } } EXPORT_SYMBOL(netif_carrier_off); @@ -306,21 +311,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) return NULL; } -static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - if (net_ratelimit()) - printk(KERN_DEBUG "%s deferred output. It is buggy.\n", - skb->dev->name); - kfree_skb(skb); - return NET_XMIT_CN; -} - struct Qdisc_ops noop_qdisc_ops __read_mostly = { .id = "noop", .priv_size = 0, .enqueue = noop_enqueue, .dequeue = noop_dequeue, - .requeue = noop_requeue, + .peek = noop_dequeue, .owner = THIS_MODULE, }; @@ -335,7 +331,6 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; @@ -346,7 +341,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .priv_size = 0, .enqueue = noop_enqueue, .dequeue = noop_dequeue, - .requeue = noop_requeue, + .peek = noop_dequeue, .owner = THIS_MODULE, }; @@ -362,7 +357,6 @@ static struct Qdisc noqueue_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .dev_queue = &noqueue_netdev_queue, }; @@ -411,10 +405,17 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) return NULL; } -static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) { - qdisc->q.qlen++; - return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); + int prio; + struct sk_buff_head *list = qdisc_priv(qdisc); + + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { + if (!skb_queue_empty(list + prio)) + return skb_peek(list + prio); + } + + return NULL; } static void pfifo_fast_reset(struct Qdisc* qdisc) @@ -457,7 +458,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), .enqueue = pfifo_fast_enqueue, .dequeue = pfifo_fast_dequeue, - .requeue = pfifo_fast_requeue, + .peek = pfifo_fast_peek, .init = pfifo_fast_init, .reset = pfifo_fast_reset, .dump = pfifo_fast_dump, @@ -483,7 +484,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); - skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; @@ -526,6 +526,9 @@ void qdisc_reset(struct Qdisc *qdisc) if (ops->reset) ops->reset(qdisc); + + kfree_skb(qdisc->gso_skb); + qdisc->gso_skb = NULL; } EXPORT_SYMBOL(qdisc_reset); @@ -552,8 +555,6 @@ void qdisc_destroy(struct Qdisc *qdisc) dev_put(qdisc_dev(qdisc)); kfree_skb(qdisc->gso_skb); - __skb_queue_purge(&qdisc->requeue); - kfree((char *) qdisc - qdisc->padded); } EXPORT_SYMBOL(qdisc_destroy); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index c1ad6b8de10..40408d595c0 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -240,26 +240,6 @@ congestion_drop: return NET_XMIT_CN; } -static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct gred_sched *t = qdisc_priv(sch); - struct gred_sched_data *q; - u16 dp = tc_index_to_dp(skb); - - if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { - if (net_ratelimit()) - printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " - "for requeue, screwing up backlog.\n", - tc_index_to_dp(skb)); - } else { - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - q->backlog += qdisc_pkt_len(skb); - } - - return qdisc_requeue(skb, sch); -} - static struct sk_buff *gred_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -602,7 +582,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .priv_size = sizeof(struct gred_sched), .enqueue = gred_enqueue, .dequeue = gred_dequeue, - .requeue = gred_requeue, + .peek = qdisc_peek_head, .drop = gred_drop, .init = gred_init, .reset = gred_reset, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c1e77da8cd0..45c31b1a4e1 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -184,7 +184,6 @@ struct hfsc_sched struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ - struct sk_buff_head requeue; /* requeued packet */ struct qdisc_watchdog watchdog; /* watchdog timer */ }; @@ -880,28 +879,20 @@ set_passive(struct hfsc_class *cl) */ } -/* - * hack to get length of first packet in queue. - */ static unsigned int qdisc_peek_len(struct Qdisc *sch) { struct sk_buff *skb; unsigned int len; - skb = sch->dequeue(sch); + skb = sch->ops->peek(sch); if (skb == NULL) { if (net_ratelimit()) printk("qdisc_peek_len: non work-conserving qdisc ?\n"); return 0; } len = qdisc_pkt_len(skb); - if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { - if (net_ratelimit()) - printk("qdisc_peek_len: failed to requeue\n"); - qdisc_tree_decrease_qlen(sch, 1); - return 0; - } + return len; } @@ -1027,6 +1018,14 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } cur_time = psched_get_time(); + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } + sch_tree_lock(sch); if (rsc != NULL) hfsc_change_rsc(cl, rsc, cur_time); @@ -1043,10 +1042,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_unlock(sch); - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); return 0; } @@ -1072,6 +1067,16 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + if (tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + kfree(cl); + return err; + } + } + if (rsc != NULL) hfsc_change_rsc(cl, rsc, 0); if (fsc != NULL) @@ -1102,9 +1107,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, qdisc_class_hash_grow(sch, &q->clhash); - if (tca[TCA_RATE]) - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } @@ -1211,7 +1213,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); hfsc_purge_queue(sch, cl); - *old = xchg(&cl->qdisc, new); + *old = cl->qdisc; + cl->qdisc = new; sch_tree_unlock(sch); return 0; } @@ -1440,7 +1443,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); - skb_queue_head_init(&q->requeue); q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; @@ -1525,7 +1527,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } - __skb_queue_purge(&q->requeue); q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); @@ -1550,7 +1551,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch) hfsc_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); - __skb_queue_purge(&q->requeue); qdisc_watchdog_cancel(&q->watchdog); } @@ -1574,7 +1574,7 @@ static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct hfsc_class *cl; - int err; + int uninitialized_var(err); cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { @@ -1617,8 +1617,6 @@ hfsc_dequeue(struct Qdisc *sch) if (sch->q.qlen == 0) return NULL; - if ((skb = __skb_dequeue(&q->requeue))) - goto out; cur_time = psched_get_time(); @@ -1642,7 +1640,7 @@ hfsc_dequeue(struct Qdisc *sch) } } - skb = cl->qdisc->dequeue(cl->qdisc); + skb = qdisc_dequeue_peeked(cl->qdisc); if (skb == NULL) { if (net_ratelimit()) printk("HFSC: Non-work-conserving qdisc ?\n"); @@ -1667,24 +1665,12 @@ hfsc_dequeue(struct Qdisc *sch) set_passive(cl); } - out: sch->flags &= ~TCQ_F_THROTTLED; sch->q.qlen--; return skb; } -static int -hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct hfsc_sched *q = qdisc_priv(sch); - - __skb_queue_head(&q->requeue, skb); - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; -} - static unsigned int hfsc_drop(struct Qdisc *sch) { @@ -1735,7 +1721,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { .dump = hfsc_dump_qdisc, .enqueue = hfsc_enqueue, .dequeue = hfsc_dequeue, - .requeue = hfsc_requeue, + .peek = qdisc_peek_dequeued, .drop = hfsc_drop, .cl_ops = &hfsc_class_ops, .priv_size = sizeof(struct hfsc_sched), diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d14f02056ae..2f0f0b04d3f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -84,12 +84,12 @@ struct htb_class { unsigned int children; struct htb_class *parent; /* parent class */ + int prio; /* these two are used only by leaves... */ + int quantum; /* but stored for parent-to-leaf return */ + union { struct htb_class_leaf { struct Qdisc *q; - int prio; - int aprio; - int quantum; int deficit[TC_HTB_MAXDEPTH]; struct list_head drop_list; } leaf; @@ -123,19 +123,8 @@ struct htb_class { psched_tdiff_t mbuffer; /* max wait time */ long tokens, ctokens; /* current number of tokens */ psched_time_t t_c; /* checkpoint time */ - - int prio; /* For parent to leaf return possible here */ - int quantum; /* we do backup. Finally full replacement */ - /* of un.leaf originals should be done. */ }; -static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, - int size) -{ - long result = qdisc_l2t(rate, size); - return result; -} - struct htb_sched { struct Qdisc_class_hash clhash; struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ @@ -152,9 +141,6 @@ struct htb_sched { /* time of nearest event per level (row) */ psched_time_t near_ev_cache[TC_HTB_MAXDEPTH]; - /* whether we hit non-work conserving class during this dequeue; we use */ - int nwc_hit; /* this to disable mindelay complaint in dequeue */ - int defcls; /* class where unclassified flows go to */ /* filters for qdisc itself */ @@ -527,10 +513,10 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen); if (!cl->prio_activity) { - cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); + cl->prio_activity = 1 << cl->prio; htb_activate_prios(q, cl); list_add_tail(&cl->un.leaf.drop_list, - q->drops + cl->un.leaf.aprio); + q->drops + cl->prio); } } @@ -551,7 +537,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret; + int uninitialized_var(ret); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); @@ -591,45 +577,30 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -/* TODO: requeuing packet charges it to policers again !! */ -static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) +static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff) { - int ret; - struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_classify(skb, sch, &ret); - struct sk_buff *tskb; + long toks = diff + cl->tokens; - if (cl == HTB_DIRECT) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_head(&q->direct_queue, skb); - } else { - __skb_queue_head(&q->direct_queue, skb); - tskb = __skb_dequeue_tail(&q->direct_queue); - kfree_skb(tskb); - sch->qstats.drops++; - return NET_XMIT_CN; - } -#ifdef CONFIG_NET_CLS_ACT - } else if (!cl) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; -#endif - } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != - NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; - } else - htb_activate(q, cl); + if (toks > cl->buffer) + toks = cl->buffer; + toks -= (long) qdisc_l2t(cl->rate, bytes); + if (toks <= -cl->mbuffer) + toks = 1 - cl->mbuffer; - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; + cl->tokens = toks; +} + +static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff) +{ + long toks = diff + cl->ctokens; + + if (toks > cl->cbuffer) + toks = cl->cbuffer; + toks -= (long) qdisc_l2t(cl->ceil, bytes); + if (toks <= -cl->mbuffer) + toks = 1 - cl->mbuffer; + + cl->ctokens = toks; } /** @@ -647,26 +618,20 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, int level, struct sk_buff *skb) { int bytes = qdisc_pkt_len(skb); - long toks, diff; enum htb_cmode old_mode; - -#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \ - if (toks > cl->B) toks = cl->B; \ - toks -= L2T(cl, cl->R, bytes); \ - if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \ - cl->T = toks + long diff; while (cl) { diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); if (cl->level >= level) { if (cl->level == level) cl->xstats.lends++; - HTB_ACCNT(tokens, buffer, rate); + htb_accnt_tokens(cl, bytes, diff); } else { cl->xstats.borrows++; cl->tokens += diff; /* we moved t_c; update tokens */ } - HTB_ACCNT(ctokens, cbuffer, ceil); + htb_accnt_ctokens(cl, bytes, diff); cl->t_c = q->now; old_mode = cl->cmode; @@ -696,12 +661,13 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq). * Note: Applied are events whose have cl->pq_key <= q->now. */ -static psched_time_t htb_do_events(struct htb_sched *q, int level) +static psched_time_t htb_do_events(struct htb_sched *q, int level, + unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of 1 to simplify things when jiffy is going to be incremented too soon */ - unsigned long stop_at = jiffies + 2; + unsigned long stop_at = start + 2; while (time_before(jiffies, stop_at)) { struct htb_class *cl; long diff; @@ -720,8 +686,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level) if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } - /* too much load - let's continue on next jiffie */ - return q->now + PSCHED_TICKS_PER_SEC / HZ; + /* too much load - let's continue on next jiffie (including above) */ + return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL @@ -733,14 +699,14 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, while (n) { struct htb_class *cl = rb_entry(n, struct htb_class, node[prio]); - if (id == cl->common.classid) - return n; if (id > cl->common.classid) { n = n->rb_right; - } else { + } else if (id < cl->common.classid) { r = n; n = n->rb_left; + } else { + return n; } } return r; @@ -761,7 +727,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - WARN_ON(!tree->rb_node); + BUG_ON(!tree->rb_node); sp->root = tree->rb_node; sp->pptr = pptr; sp->pid = pid; @@ -781,9 +747,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, *sp->pptr = (*sp->pptr)->rb_left; if (sp > stk) { sp--; - WARN_ON(!*sp->pptr); - if (!*sp->pptr) + if (!*sp->pptr) { + WARN_ON(1); return NULL; + } htb_next_rb_node(sp->pptr); } } else { @@ -814,8 +781,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, do { next: - WARN_ON(!cl); - if (!cl) + if (unlikely(!cl)) return NULL; /* class can be empty - it is unlikely but can be true if leaf @@ -849,7 +815,7 @@ next: cl->common.classid); cl->warned = 1; } - q->nwc_hit++; + htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> ptr[0]) + prio); cl = htb_lookup_leaf(q->row[level] + prio, prio, @@ -861,7 +827,7 @@ next: if (likely(skb != NULL)) { cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); if (cl->un.leaf.deficit[level] < 0) { - cl->un.leaf.deficit[level] += cl->un.leaf.quantum; + cl->un.leaf.deficit[level] += cl->quantum; htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> ptr[0]) + prio); } @@ -880,6 +846,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); int level; psched_time_t next_event; + unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); @@ -892,23 +859,24 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) if (!sch->q.qlen) goto fin; q->now = psched_get_time(); + start_at = jiffies; next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; - q->nwc_hit = 0; + for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ int m; psched_time_t event; if (q->now >= q->near_ev_cache[level]) { - event = htb_do_events(q, level); + event = htb_do_events(q, level, start_at); if (!event) event = q->now + PSCHED_TICKS_PER_SEC; q->near_ev_cache[level] = event; } else event = q->near_ev_cache[level]; - if (event && next_event > event) + if (next_event > event) next_event = event; m = ~q->row_mask[level]; @@ -1095,8 +1063,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.buffer = cl->buffer; opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer; - opt.quantum = cl->un.leaf.quantum; - opt.prio = cl->un.leaf.prio; + opt.quantum = cl->quantum; + opt.prio = cl->prio; opt.level = cl->level; NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); @@ -1141,7 +1109,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, == NULL) return -ENOBUFS; sch_tree_lock(sch); - if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { + *old = cl->un.leaf.q; + cl->un.leaf.q = new; + if (*old != NULL) { qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); } @@ -1198,8 +1168,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, memset(&parent->un.inner, 0, sizeof(parent->un.inner)); INIT_LIST_HEAD(&parent->un.leaf.drop_list); parent->un.leaf.q = new_q ? new_q : &noop_qdisc; - parent->un.leaf.quantum = parent->quantum; - parent->un.leaf.prio = parent->prio; parent->tokens = parent->buffer; parent->ctokens = parent->cbuffer; parent->t_c = psched_get_time(); @@ -1371,9 +1339,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE] ? : &est.nla); + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE] ? : &est.nla); + if (err) { + kfree(cl); + goto failure; + } + cl->refcnt = 1; cl->children = 0; INIT_LIST_HEAD(&cl->un.leaf.drop_list); @@ -1425,37 +1398,36 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (parent) parent->children++; } else { - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } sch_tree_lock(sch); } /* it used to be a nasty bug here, we have to check that node is really leaf before changing cl->un.leaf ! */ if (!cl->level) { - cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; - if (!hopt->quantum && cl->un.leaf.quantum < 1000) { + cl->quantum = rtab->rate.rate / q->rate2quantum; + if (!hopt->quantum && cl->quantum < 1000) { printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->common.classid); - cl->un.leaf.quantum = 1000; + cl->quantum = 1000; } - if (!hopt->quantum && cl->un.leaf.quantum > 200000) { + if (!hopt->quantum && cl->quantum > 200000) { printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->common.classid); - cl->un.leaf.quantum = 200000; + cl->quantum = 200000; } if (hopt->quantum) - cl->un.leaf.quantum = hopt->quantum; - if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO) - cl->un.leaf.prio = TC_HTB_NUMPRIO - 1; - - /* backup for htb_parent_to_leaf */ - cl->quantum = cl->un.leaf.quantum; - cl->prio = cl->un.leaf.prio; + cl->quantum = hopt->quantum; + if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO) + cl->prio = TC_HTB_NUMPRIO - 1; } cl->buffer = hopt->buffer; @@ -1565,7 +1537,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .priv_size = sizeof(struct htb_sched), .enqueue = htb_enqueue, .dequeue = htb_dequeue, - .requeue = htb_requeue, + .peek = qdisc_peek_dequeued, .drop = htb_drop, .init = htb_init, .reset = htb_reset, diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 915f3149dde..7e151861794 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct Qdisc *qdisc; - struct multiq_sched_data *q = qdisc_priv(sch); - int ret; - - qdisc = multiq_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif - - ret = qdisc->ops->requeue(skb, qdisc); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - if (q->curband) - q->curband--; - else - q->curband = q->bands - 1; - return NET_XMIT_SUCCESS; - } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; -} - - static struct sk_buff *multiq_dequeue(struct Qdisc *sch) { struct multiq_sched_data *q = qdisc_priv(sch); @@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) q->curband = 0; /* Check that target subqueue is available before - * pulling an skb to avoid excessive requeues + * pulling an skb to avoid head-of-line blocking. */ if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { qdisc = q->queues[q->curband]; @@ -155,6 +121,34 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) } +static struct sk_buff *multiq_peek(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned int curband = q->curband; + struct Qdisc *qdisc; + struct sk_buff *skb; + int band; + + for (band = 0; band < q->bands; band++) { + /* cycle through bands to ensure fairness */ + curband++; + if (curband >= q->bands) + curband = 0; + + /* Check that target subqueue is available before + * pulling an skb to avoid head-of-line blocking. + */ + if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) { + qdisc = q->queues[curband]; + skb = qdisc->ops->peek(qdisc); + if (skb) + return skb; + } + } + return NULL; + +} + static unsigned int multiq_drop(struct Qdisc *sch) { struct multiq_sched_data *q = qdisc_priv(sch); @@ -220,7 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) q->bands = qopt->bands; for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { - struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } @@ -230,7 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child; + struct Qdisc *child, *old; child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, @@ -238,12 +233,13 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) i + 1)); if (child) { sch_tree_lock(sch); - child = xchg(&q->queues[i], child); + old = q->queues[i]; + q->queues[i] = child; - if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, - child->q.qlen); - qdisc_destroy(child); + if (old != &noop_qdisc) { + qdisc_tree_decrease_qlen(old, + old->q.qlen); + qdisc_destroy(old); } sch_tree_unlock(sch); } @@ -451,7 +447,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct multiq_sched_data), .enqueue = multiq_enqueue, .dequeue = multiq_dequeue, - .requeue = multiq_requeue, + .peek = multiq_peek, .drop = multiq_drop, .init = multiq_init, .reset = multiq_reset, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a11959908d9..d876b873484 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -46,9 +46,6 @@ layering other disciplines. It does not need to do bandwidth control either since that can be handled by using token bucket or other rate control. - - The simulator is limited by the Linux timer resolution - and will create packet bursts on the HZ boundary (1ms). */ struct netem_sched_data { @@ -233,7 +230,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ cb->time_to_send = psched_get_time(); q->counter = 0; - ret = q->qdisc->ops->requeue(skb, q->qdisc); + + __skb_queue_head(&q->qdisc->q, skb); + q->qdisc->qstats.backlog += qdisc_pkt_len(skb); + q->qdisc->qstats.requeues++; + ret = NET_XMIT_SUCCESS; } if (likely(ret == NET_XMIT_SUCCESS)) { @@ -248,20 +249,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -/* Requeue packets but don't change time stamp */ -static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct netem_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int netem_drop(struct Qdisc* sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -279,29 +266,25 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - smp_mb(); if (sch->flags & TCQ_F_THROTTLED) return NULL; - skb = q->qdisc->dequeue(q->qdisc); + skb = q->qdisc->ops->peek(q->qdisc); if (skb) { const struct netem_skb_cb *cb = netem_skb_cb(skb); psched_time_t now = psched_get_time(); /* if more time remaining? */ if (cb->time_to_send <= now) { + skb = qdisc_dequeue_peeked(q->qdisc); + if (unlikely(!skb)) + return NULL; + pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; return skb; } - if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) { - qdisc_tree_decrease_qlen(q->qdisc, 1); - sch->qstats.drops++; - printk(KERN_ERR "netem: %s could not requeue\n", - q->qdisc->ops->id); - } - qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); } @@ -344,14 +327,13 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); - d = xchg(&q->delay_dist, d); + kfree(q->delay_dist); + q->delay_dist = d; spin_unlock_bh(root_lock); - - kfree(d); return 0; } -static int get_correlation(struct Qdisc *sch, const struct nlattr *attr) +static void get_correlation(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_corr *c = nla_data(attr); @@ -359,27 +341,24 @@ static int get_correlation(struct Qdisc *sch, const struct nlattr *attr) init_crandom(&q->delay_cor, c->delay_corr); init_crandom(&q->loss_cor, c->loss_corr); init_crandom(&q->dup_cor, c->dup_corr); - return 0; } -static int get_reorder(struct Qdisc *sch, const struct nlattr *attr) +static void get_reorder(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_reorder *r = nla_data(attr); q->reorder = r->probability; init_crandom(&q->reorder_cor, r->correlation); - return 0; } -static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr) +static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_corrupt *r = nla_data(attr); q->corrupt = r->probability; init_crandom(&q->corrupt_cor, r->correlation); - return 0; } static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { @@ -438,11 +417,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (q->gap) q->reorder = ~0; - if (tb[TCA_NETEM_CORR]) { - ret = get_correlation(sch, tb[TCA_NETEM_CORR]); - if (ret) - return ret; - } + if (tb[TCA_NETEM_CORR]) + get_correlation(sch, tb[TCA_NETEM_CORR]); if (tb[TCA_NETEM_DELAY_DIST]) { ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); @@ -450,17 +426,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) return ret; } - if (tb[TCA_NETEM_REORDER]) { - ret = get_reorder(sch, tb[TCA_NETEM_REORDER]); - if (ret) - return ret; - } + if (tb[TCA_NETEM_REORDER]) + get_reorder(sch, tb[TCA_NETEM_REORDER]); - if (tb[TCA_NETEM_CORRUPT]) { - ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); - if (ret) - return ret; - } + if (tb[TCA_NETEM_CORRUPT]) + get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); return 0; } @@ -541,7 +511,7 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = tfifo_enqueue, .dequeue = qdisc_dequeue_head, - .requeue = qdisc_requeue, + .peek = qdisc_peek_head, .drop = qdisc_queue_drop, .init = tfifo_init, .reset = qdisc_reset_queue, @@ -624,99 +594,12 @@ nla_put_failure: return -1; } -static int netem_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct netem_sched_data *q = qdisc_priv(sch); - - if (cl != 1) /* only one class */ - return -ENOENT; - - tcm->tcm_handle |= TC_H_MIN(1); - tcm->tcm_info = q->qdisc->handle; - - return 0; -} - -static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) -{ - struct netem_sched_data *q = qdisc_priv(sch); - - if (new == NULL) - new = &noop_qdisc; - - sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - - return 0; -} - -static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct netem_sched_data *q = qdisc_priv(sch); - return q->qdisc; -} - -static unsigned long netem_get(struct Qdisc *sch, u32 classid) -{ - return 1; -} - -static void netem_put(struct Qdisc *sch, unsigned long arg) -{ -} - -static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) -{ - return -ENOSYS; -} - -static int netem_delete(struct Qdisc *sch, unsigned long arg) -{ - return -ENOSYS; -} - -static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; - } -} - -static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl) -{ - return NULL; -} - -static const struct Qdisc_class_ops netem_class_ops = { - .graft = netem_graft, - .leaf = netem_leaf, - .get = netem_get, - .put = netem_put, - .change = netem_change_class, - .delete = netem_delete, - .walk = netem_walk, - .tcf_chain = netem_find_tcf, - .dump = netem_dump_class, -}; - static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .id = "netem", - .cl_ops = &netem_class_ops, .priv_size = sizeof(struct netem_sched_data), .enqueue = netem_enqueue, .dequeue = netem_dequeue, - .requeue = netem_requeue, + .peek = qdisc_peek_dequeued, .drop = netem_drop, .init = netem_init, .reset = netem_reset, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 504a78cdb71..94cecef7014 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -93,34 +93,20 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -prio_requeue(struct sk_buff *skb, struct Qdisc* sch) +static struct sk_buff *prio_peek(struct Qdisc *sch) { - struct Qdisc *qdisc; - int ret; - - qdisc = prio_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif + struct prio_sched_data *q = qdisc_priv(sch); + int prio; - if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; + for (prio = 0; prio < q->bands; prio++) { + struct Qdisc *qdisc = q->queues[prio]; + struct sk_buff *skb = qdisc->ops->peek(qdisc); + if (skb) + return skb; } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; + return NULL; } - static struct sk_buff *prio_dequeue(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -201,7 +187,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; iqueues[i], &noop_qdisc); + struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); @@ -211,18 +198,19 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; ibands; i++) { if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child; + struct Qdisc *child, *old; child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); - child = xchg(&q->queues[i], child); + old = q->queues[i]; + q->queues[i] = child; - if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, - child->q.qlen); - qdisc_destroy(child); + if (old != &noop_qdisc) { + qdisc_tree_decrease_qlen(old, + old->q.qlen); + qdisc_destroy(old); } sch_tree_unlock(sch); } @@ -421,7 +409,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .priv_size = sizeof(struct prio_sched_data), .enqueue = prio_enqueue, .dequeue = prio_dequeue, - .requeue = prio_requeue, + .peek = prio_peek, .drop = prio_drop, .init = prio_init, .reset = prio_reset, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 5da05839e22..2bdf241f631 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -108,23 +108,6 @@ congestion_drop: return NET_XMIT_CN; } -static int red_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct red_sched_data *q = qdisc_priv(sch); - struct Qdisc *child = q->qdisc; - int ret; - - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - - ret = child->ops->requeue(skb, child); - if (likely(ret == NET_XMIT_SUCCESS)) { - sch->qstats.requeues++; - sch->q.qlen++; - } - return ret; -} - static struct sk_buff * red_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -140,6 +123,14 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch) return skb; } +static struct sk_buff * red_peek(struct Qdisc* sch) +{ + struct red_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + + return child->ops->peek(child); +} + static unsigned int red_drop(struct Qdisc* sch) { struct red_sched_data *q = qdisc_priv(sch); @@ -211,7 +202,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); - qdisc_destroy(xchg(&q->qdisc, child)); + qdisc_destroy(q->qdisc); + q->qdisc = child; } red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, @@ -292,7 +284,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); + *old = q->qdisc; + q->qdisc = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -361,7 +354,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = { .cl_ops = &red_class_ops, .enqueue = red_enqueue, .dequeue = red_dequeue, - .requeue = red_requeue, + .peek = red_peek, .drop = red_drop, .init = red_init, .reset = red_reset, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index fe1508ef0d3..33133d27b53 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -281,7 +281,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash; sfq_index x; - int ret; + int uninitialized_var(ret); hash = sfq_classify(skb, sch, &ret); if (hash == 0) { @@ -329,71 +329,20 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; } -static int -sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff * +sfq_peek(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned int hash; - sfq_index x; - int ret; - - hash = sfq_classify(skb, sch, &ret); - if (hash == 0) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } - hash--; + sfq_index a; - x = q->ht[hash]; - if (x == SFQ_DEPTH) { - q->ht[hash] = x = q->dep[SFQ_DEPTH].next; - q->hash[x] = hash; - } - - sch->qstats.backlog += qdisc_pkt_len(skb); - __skb_queue_head(&q->qs[x], skb); - /* If selected queue has length q->limit+1, this means that - * all another queues are empty and we do simple tail drop. - * This packet is still requeued at head of queue, tail packet - * is dropped. - */ - if (q->qs[x].qlen > q->limit) { - skb = q->qs[x].prev; - __skb_unlink(skb, &q->qs[x]); - sch->qstats.drops++; - sch->qstats.backlog -= qdisc_pkt_len(skb); - kfree_skb(skb); - return NET_XMIT_CN; - } - - sfq_inc(q, x); - if (q->qs[x].qlen == 1) { /* The flow is new */ - if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->tail = x; - q->next[x] = x; - q->allot[x] = q->quantum; - } else { - q->next[x] = q->next[q->tail]; - q->next[q->tail] = x; - q->tail = x; - } - } - - if (++sch->q.qlen <= q->limit) { - sch->qstats.requeues++; - return 0; - } + /* No active slots */ + if (q->tail == SFQ_DEPTH) + return NULL; - sch->qstats.drops++; - sfq_drop(sch); - return NET_XMIT_CN; + a = q->next[q->tail]; + return skb_peek(&q->qs[a]); } - - - static struct sk_buff * sfq_dequeue(struct Qdisc *sch) { @@ -486,7 +435,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; q->perturb_timer.function = sfq_perturbation; - q->perturb_timer.data = (unsigned long)sch;; + q->perturb_timer.data = (unsigned long)sch; init_timer_deferrable(&q->perturb_timer); for (i = 0; i < SFQ_HASH_DIVISOR; i++) @@ -624,7 +573,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct sfq_sched_data), .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, - .requeue = sfq_requeue, + .peek = sfq_peek, .drop = sfq_drop, .init = sfq_init, .reset = sfq_reset, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 94c61598b86..a2f93c09f3c 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return 0; } -static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct tbf_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int tbf_drop(struct Qdisc* sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -169,7 +156,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - skb = q->qdisc->dequeue(q->qdisc); + skb = q->qdisc->ops->peek(q->qdisc); if (skb) { psched_time_t now; @@ -192,6 +179,10 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) toks -= L2T(q, len); if ((toks|ptoks) >= 0) { + skb = qdisc_dequeue_peeked(q->qdisc); + if (unlikely(!skb)) + return NULL; + q->t_c = now; q->tokens = toks; q->ptokens = ptoks; @@ -214,12 +205,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) (cf. CSZ, HPFQ, HFSC) */ - if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { - /* When requeue fails skb is dropped */ - qdisc_tree_decrease_qlen(q->qdisc, 1); - sch->qstats.drops++; - } - sch->qstats.overlimits++; } return NULL; @@ -251,6 +236,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) struct tc_tbf_qopt *qopt; struct qdisc_rate_table *rtab = NULL; struct qdisc_rate_table *ptab = NULL; + struct qdisc_rate_table *tmp; struct Qdisc *child = NULL; int max_size,n; @@ -299,7 +285,8 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); - qdisc_destroy(xchg(&q->qdisc, child)); + qdisc_destroy(q->qdisc); + q->qdisc = child; } q->limit = qopt->limit; q->mtu = qopt->mtu; @@ -307,8 +294,14 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) q->buffer = qopt->buffer; q->tokens = q->buffer; q->ptokens = q->mtu; - rtab = xchg(&q->R_tab, rtab); - ptab = xchg(&q->P_tab, ptab); + + tmp = q->R_tab; + q->R_tab = rtab; + rtab = tmp; + + tmp = q->P_tab; + q->P_tab = ptab; + ptab = tmp; sch_tree_unlock(sch); err = 0; done: @@ -398,7 +391,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); + *old = q->qdisc; + q->qdisc = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -469,7 +463,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .priv_size = sizeof(struct tbf_sched_data), .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, - .requeue = tbf_requeue, + .peek = qdisc_peek_dequeued, .drop = tbf_drop, .init = tbf_init, .reset = tbf_reset, diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index d35ef059abb..ec697cebb63 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) return NET_XMIT_DROP; } -static int -teql_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct teql_sched_data *q = qdisc_priv(sch); - - __skb_queue_head(&q->q, skb); - sch->qstats.requeues++; - return 0; -} - static struct sk_buff * teql_dequeue(struct Qdisc* sch) { @@ -123,6 +113,13 @@ teql_dequeue(struct Qdisc* sch) return skb; } +static struct sk_buff * +teql_peek(struct Qdisc* sch) +{ + /* teql is meant to be used as root qdisc */ + return NULL; +} + static __inline__ void teql_neigh_release(struct neighbour *n) { @@ -292,9 +289,9 @@ restart: do { struct net_device *slave = qdisc_dev(q); - struct netdev_queue *slave_txq; + struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); + const struct net_device_ops *slave_ops = slave->netdev_ops; - slave_txq = netdev_get_tx_queue(slave, 0); if (slave_txq->qdisc_sleeping != q) continue; if (__netif_subqueue_stopped(slave, subq) || @@ -308,7 +305,7 @@ restart: if (__netif_tx_trylock(slave_txq)) { if (!netif_tx_queue_stopped(slave_txq) && !netif_tx_queue_frozen(slave_txq) && - slave->hard_start_xmit(skb, slave) == 0) { + slave_ops->ndo_start_xmit(skb, slave) == 0) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); @@ -423,6 +420,14 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) return 0; } +static const struct net_device_ops teql_netdev_ops = { + .ndo_open = teql_master_open, + .ndo_stop = teql_master_close, + .ndo_start_xmit = teql_master_xmit, + .ndo_get_stats = teql_master_stats, + .ndo_change_mtu = teql_master_mtu, +}; + static __init void teql_master_setup(struct net_device *dev) { struct teql_master *master = netdev_priv(dev); @@ -433,17 +438,13 @@ static __init void teql_master_setup(struct net_device *dev) ops->enqueue = teql_enqueue; ops->dequeue = teql_dequeue; - ops->requeue = teql_requeue; + ops->peek = teql_peek; ops->init = teql_qdisc_init; ops->reset = teql_reset; ops->destroy = teql_destroy; ops->owner = THIS_MODULE; - dev->open = teql_master_open; - dev->hard_start_xmit = teql_master_xmit; - dev->stop = teql_master_close; - dev->get_stats = teql_master_stats; - dev->change_mtu = teql_master_mtu; + dev->netdev_ops = &teql_netdev_ops; dev->type = ARPHRD_VOID; dev->mtu = 1500; dev->tx_queue_len = 100; -- cgit v1.2.3