aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c125
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/skbuff.c157
-rw-r--r--net/core/sysctl_net_core.c61
4 files changed, 176 insertions, 170 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index ab935778ce8..7016e0c36b3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -115,18 +115,6 @@
#endif /* CONFIG_NET_RADIO */
#include <asm/current.h>
-/* This define, if set, will randomly drop a packet when congestion
- * is more than moderate. It helps fairness in the multi-interface
- * case when one of them is a hog, but it kills performance for the
- * single interface case so it is off now by default.
- */
-#undef RAND_LIE
-
-/* Setting this will sample the queue lengths and thus congestion
- * via a timer instead of as each packet is received.
- */
-#undef OFFLINE_SAMPLE
-
/*
* The list of packet types we will receive (as opposed to discard)
* and the routines to invoke.
@@ -159,11 +147,6 @@ static DEFINE_SPINLOCK(ptype_lock);
static struct list_head ptype_base[16]; /* 16 way hashed list */
static struct list_head ptype_all; /* Taps */
-#ifdef OFFLINE_SAMPLE
-static void sample_queue(unsigned long dummy);
-static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
-#endif
-
/*
* The @dev_base list is protected by @dev_base_lock and the rtln
* semaphore.
@@ -215,7 +198,7 @@ static struct notifier_block *netdev_chain;
* Device drivers call our routines to queue packets here. We empty the
* queue in the local softnet handler.
*/
-DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
+DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
#ifdef CONFIG_SYSFS
extern int netdev_sysfs_init(void);
@@ -1363,71 +1346,13 @@ out:
Receiver routines
=======================================================================*/
-int netdev_max_backlog = 300;
+int netdev_max_backlog = 1000;
+int netdev_budget = 300;
int weight_p = 64; /* old backlog weight */
-/* These numbers are selected based on intuition and some
- * experimentatiom, if you have more scientific way of doing this
- * please go ahead and fix things.
- */
-int no_cong_thresh = 10;
-int no_cong = 20;
-int lo_cong = 100;
-int mod_cong = 290;
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
-static void get_sample_stats(int cpu)
-{
-#ifdef RAND_LIE
- unsigned long rd;
- int rq;
-#endif
- struct softnet_data *sd = &per_cpu(softnet_data, cpu);
- int blog = sd->input_pkt_queue.qlen;
- int avg_blog = sd->avg_blog;
-
- avg_blog = (avg_blog >> 1) + (blog >> 1);
-
- if (avg_blog > mod_cong) {
- /* Above moderate congestion levels. */
- sd->cng_level = NET_RX_CN_HIGH;
-#ifdef RAND_LIE
- rd = net_random();
- rq = rd % netdev_max_backlog;
- if (rq < avg_blog) /* unlucky bastard */
- sd->cng_level = NET_RX_DROP;
-#endif
- } else if (avg_blog > lo_cong) {
- sd->cng_level = NET_RX_CN_MOD;
-#ifdef RAND_LIE
- rd = net_random();
- rq = rd % netdev_max_backlog;
- if (rq < avg_blog) /* unlucky bastard */
- sd->cng_level = NET_RX_CN_HIGH;
-#endif
- } else if (avg_blog > no_cong)
- sd->cng_level = NET_RX_CN_LOW;
- else /* no congestion */
- sd->cng_level = NET_RX_SUCCESS;
-
- sd->avg_blog = avg_blog;
-}
-
-#ifdef OFFLINE_SAMPLE
-static void sample_queue(unsigned long dummy)
-{
-/* 10 ms 0r 1ms -- i don't care -- JHS */
- int next_tick = 1;
- int cpu = smp_processor_id();
-
- get_sample_stats(cpu);
- next_tick += jiffies;
- mod_timer(&samp_timer, next_tick);
-}
-#endif
-
-
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
@@ -1448,7 +1373,6 @@ static void sample_queue(unsigned long dummy)
int netif_rx(struct sk_buff *skb)
{
- int this_cpu;
struct softnet_data *queue;
unsigned long flags;
@@ -1464,38 +1388,22 @@ int netif_rx(struct sk_buff *skb)
* short when CPU is congested, but is still operating.
*/
local_irq_save(flags);
- this_cpu = smp_processor_id();
queue = &__get_cpu_var(softnet_data);
__get_cpu_var(netdev_rx_stat).total++;
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
if (queue->input_pkt_queue.qlen) {
- if (queue->throttle)
- goto drop;
-
enqueue:
dev_hold(skb->dev);
__skb_queue_tail(&queue->input_pkt_queue, skb);
-#ifndef OFFLINE_SAMPLE
- get_sample_stats(this_cpu);
-#endif
local_irq_restore(flags);
- return queue->cng_level;
+ return NET_RX_SUCCESS;
}
- if (queue->throttle)
- queue->throttle = 0;
-
netif_rx_schedule(&queue->backlog_dev);
goto enqueue;
}
- if (!queue->throttle) {
- queue->throttle = 1;
- __get_cpu_var(netdev_rx_stat).throttled++;
- }
-
-drop:
__get_cpu_var(netdev_rx_stat).dropped++;
local_irq_restore(flags);
@@ -1780,8 +1688,6 @@ job_done:
smp_mb__before_clear_bit();
netif_poll_enable(backlog_dev);
- if (queue->throttle)
- queue->throttle = 0;
local_irq_enable();
return 0;
}
@@ -1790,8 +1696,7 @@ static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
- int budget = netdev_max_backlog;
-
+ int budget = netdev_budget;
local_irq_disable();
@@ -2055,15 +1960,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
struct netif_rx_stats *s = v;
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
- s->total, s->dropped, s->time_squeeze, s->throttled,
- s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
- s->fastroute_deferred_out,
-#if 0
- s->fastroute_latency_reduction
-#else
- s->cpu_collision
-#endif
- );
+ s->total, s->dropped, s->time_squeeze, 0,
+ 0, 0, 0, 0, /* was fastroute */
+ s->cpu_collision );
return 0;
}
@@ -3305,9 +3204,6 @@ static int __init net_dev_init(void)
queue = &per_cpu(softnet_data, i);
skb_queue_head_init(&queue->input_pkt_queue);
- queue->throttle = 0;
- queue->cng_level = 0;
- queue->avg_blog = 10; /* arbitrary non-zero */
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
@@ -3316,11 +3212,6 @@ static int __init net_dev_init(void)
atomic_set(&queue->backlog_dev.refcnt, 1);
}
-#ifdef OFFLINE_SAMPLE
- samp_timer.expires = jiffies + (10 * HZ);
- add_timer(&samp_timer);
-#endif
-
dev_boot_phase = 0;
open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f6bdcad47da..851eb927ed9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -32,6 +32,7 @@
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/random.h>
+#include <linux/string.h>
#define NEIGH_DEBUG 1
@@ -2592,7 +2593,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[17].extra1 = dev;
}
- dev_name = net_sysctl_strdup(dev_name_source);
+ dev_name = kstrdup(dev_name_source, GFP_KERNEL);
if (!dev_name) {
err = -ENOBUFS;
goto free;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6d68c03bc05..bb73b2190ec 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1500,6 +1500,159 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
skb_split_no_header(skb, skb1, len, pos);
}
+/**
+ * skb_prepare_seq_read - Prepare a sequential read of skb data
+ * @skb: the buffer to read
+ * @from: lower offset of data to be read
+ * @to: upper offset of data to be read
+ * @st: state variable
+ *
+ * Initializes the specified state variable. Must be called before
+ * invoking skb_seq_read() for the first time.
+ */
+void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
+ unsigned int to, struct skb_seq_state *st)
+{
+ st->lower_offset = from;
+ st->upper_offset = to;
+ st->root_skb = st->cur_skb = skb;
+ st->frag_idx = st->stepped_offset = 0;
+ st->frag_data = NULL;
+}
+
+/**
+ * skb_seq_read - Sequentially read skb data
+ * @consumed: number of bytes consumed by the caller so far
+ * @data: destination pointer for data to be returned
+ * @st: state variable
+ *
+ * Reads a block of skb data at &consumed relative to the
+ * lower offset specified to skb_prepare_seq_read(). Assigns
+ * the head of the data block to &data and returns the length
+ * of the block or 0 if the end of the skb data or the upper
+ * offset has been reached.
+ *
+ * The caller is not required to consume all of the data
+ * returned, i.e. &consumed is typically set to the number
+ * of bytes already consumed and the next call to
+ * skb_seq_read() will return the remaining part of the block.
+ *
+ * Note: The size of each block of data returned can be arbitary,
+ * this limitation is the cost for zerocopy seqeuental
+ * reads of potentially non linear data.
+ *
+ * Note: Fragment lists within fragments are not implemented
+ * at the moment, state->root_skb could be replaced with
+ * a stack for this purpose.
+ */
+unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
+ struct skb_seq_state *st)
+{
+ unsigned int block_limit, abs_offset = consumed + st->lower_offset;
+ skb_frag_t *frag;
+
+ if (unlikely(abs_offset >= st->upper_offset))
+ return 0;
+
+next_skb:
+ block_limit = skb_headlen(st->cur_skb);
+
+ if (abs_offset < block_limit) {
+ *data = st->cur_skb->data + abs_offset;
+ return block_limit - abs_offset;
+ }
+
+ if (st->frag_idx == 0 && !st->frag_data)
+ st->stepped_offset += skb_headlen(st->cur_skb);
+
+ while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
+ frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
+ block_limit = frag->size + st->stepped_offset;
+
+ if (abs_offset < block_limit) {
+ if (!st->frag_data)
+ st->frag_data = kmap_skb_frag(frag);
+
+ *data = (u8 *) st->frag_data + frag->page_offset +
+ (abs_offset - st->stepped_offset);
+
+ return block_limit - abs_offset;
+ }
+
+ if (st->frag_data) {
+ kunmap_skb_frag(st->frag_data);
+ st->frag_data = NULL;
+ }
+
+ st->frag_idx++;
+ st->stepped_offset += frag->size;
+ }
+
+ if (st->cur_skb->next) {
+ st->cur_skb = st->cur_skb->next;
+ st->frag_idx = 0;
+ goto next_skb;
+ } else if (st->root_skb == st->cur_skb &&
+ skb_shinfo(st->root_skb)->frag_list) {
+ st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
+ goto next_skb;
+ }
+
+ return 0;
+}
+
+/**
+ * skb_abort_seq_read - Abort a sequential read of skb data
+ * @st: state variable
+ *
+ * Must be called if skb_seq_read() was not called until it
+ * returned 0.
+ */
+void skb_abort_seq_read(struct skb_seq_state *st)
+{
+ if (st->frag_data)
+ kunmap_skb_frag(st->frag_data);
+}
+
+#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
+
+static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
+ struct ts_config *conf,
+ struct ts_state *state)
+{
+ return skb_seq_read(offset, text, TS_SKB_CB(state));
+}
+
+static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
+{
+ skb_abort_seq_read(TS_SKB_CB(state));
+}
+
+/**
+ * skb_find_text - Find a text pattern in skb data
+ * @skb: the buffer to look in
+ * @from: search offset
+ * @to: search limit
+ * @config: textsearch configuration
+ * @state: uninitialized textsearch state variable
+ *
+ * Finds a pattern in the skb data according to the specified
+ * textsearch configuration. Use textsearch_next() to retrieve
+ * subsequent occurrences of the pattern. Returns the offset
+ * to the first occurrence or UINT_MAX if no match was found.
+ */
+unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
+ unsigned int to, struct ts_config *config,
+ struct ts_state *state)
+{
+ config->get_next_block = skb_ts_get_next_block;
+ config->finish = skb_ts_finish;
+
+ skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
+
+ return textsearch_find(config, state);
+}
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
@@ -1538,3 +1691,7 @@ EXPORT_SYMBOL(skb_queue_tail);
EXPORT_SYMBOL(skb_unlink);
EXPORT_SYMBOL(skb_append);
EXPORT_SYMBOL(skb_split);
+EXPORT_SYMBOL(skb_prepare_seq_read);
+EXPORT_SYMBOL(skb_seq_read);
+EXPORT_SYMBOL(skb_abort_seq_read);
+EXPORT_SYMBOL(skb_find_text);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8be646cb19..8f817ad9f54 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -13,12 +13,8 @@
#ifdef CONFIG_SYSCTL
extern int netdev_max_backlog;
+extern int netdev_budget;
extern int weight_p;
-extern int no_cong_thresh;
-extern int no_cong;
-extern int lo_cong;
-extern int mod_cong;
-extern int netdev_fastroute;
extern int net_msg_cost;
extern int net_msg_burst;
@@ -35,19 +31,6 @@ extern int sysctl_somaxconn;
extern char sysctl_divert_version[];
#endif /* CONFIG_NET_DIVERT */
-/*
- * This strdup() is used for creating copies of network
- * device names to be handed over to sysctl.
- */
-
-char *net_sysctl_strdup(const char *s)
-{
- char *rv = kmalloc(strlen(s)+1, GFP_KERNEL);
- if (rv)
- strcpy(rv, s);
- return rv;
-}
-
ctl_table core_table[] = {
#ifdef CONFIG_NET
{
@@ -99,38 +82,6 @@ ctl_table core_table[] = {
.proc_handler = &proc_dointvec
},
{
- .ctl_name = NET_CORE_NO_CONG_THRESH,
- .procname = "no_cong_thresh",
- .data = &no_cong_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_CORE_NO_CONG,
- .procname = "no_cong",
- .data = &no_cong,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_CORE_LO_CONG,
- .procname = "lo_cong",
- .data = &lo_cong,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_CORE_MOD_CONG,
- .procname = "mod_cong",
- .data = &mod_cong,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
.ctl_name = NET_CORE_MSG_COST,
.procname = "message_cost",
.data = &net_msg_cost,
@@ -174,9 +125,15 @@ ctl_table core_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
+ {
+ .ctl_name = NET_CORE_BUDGET,
+ .procname = "netdev_budget",
+ .data = &netdev_budget,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
{ .ctl_name = 0 }
};
-EXPORT_SYMBOL(net_sysctl_strdup);
-
#endif