From 72a3effaf633bcae9034b7e176bdbd78d64a71db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Nov 2006 02:30:37 -0800 Subject: [NET]: Size listen hash tables using backlog hint We currently allocate a fixed size (TCP_SYNQ_HSIZE=512) slots hash table for each LISTEN socket, regardless of various parameters (listen backlog for example) On x86_64, this means order-1 allocations (might fail), even for 'small' sockets, expecting few connections. On the contrary, a huge server wanting a backlog of 50000 is slowed down a bit because of this fixed limit. This patch makes the sizing of listen hash table a dynamic parameter, depending of : - net.core.somaxconn tunable (default is 128) - net.ipv4.tcp_max_syn_backlog tunable (default : 256, 1024 or 128) - backlog value given by user application (2nd parameter of listen()) For large allocations (bigger than PAGE_SIZE), we use vmalloc() instead of kmalloc(). We still limit memory allocation with the two existing tunables (somaxconn & tcp_max_syn_backlog). So for standard setups, this patch actually reduce RAM usage. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/tcp_ipv4.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index edcf0932ac6..4a81d54a756 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -204,7 +204,7 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { - err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); + err = inet_csk_listen_start(sk, backlog); if (err) goto out; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 96bbe2a0aa1..9d68837888d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -343,7 +343,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, EXPORT_SYMBOL_GPL(inet_csk_route_req); static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, - const u32 rnd, const u16 synq_hsize) + const u32 rnd, const u32 synq_hsize) { return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 22ef8bd2662..5fbf96552ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -715,7 +715,7 @@ static struct ip_options *tcp_v4_save_options(struct sock *sk, return dopt; } -struct request_sock_ops tcp_request_sock_ops = { +struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), .rtx_syn_ack = tcp_v4_send_synack, @@ -1385,7 +1385,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (st->state == TCP_SEQ_STATE_OPENREQ) { struct request_sock *req = cur; - icsk = inet_csk(st->syn_wait_sk); + icsk = inet_csk(st->syn_wait_sk); req = req->dl_next; while (1) { while (req) { @@ -1395,7 +1395,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) } req = req->dl_next; } - if (++st->sbucket >= TCP_SYNQ_HSIZE) + if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) break; get_req: req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; -- cgit v1.2.3