diff options
-rw-r--r-- | net/ipv4/Kconfig | 2 | ||||
-rw-r--r-- | net/ipv4/Makefile | 1 | ||||
-rw-r--r-- | net/netfilter/Kconfig | 2 | ||||
-rw-r--r-- | net/netfilter/Makefile | 3 | ||||
-rw-r--r-- | net/netfilter/ipvs/Kconfig (renamed from net/ipv4/ipvs/Kconfig) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/Makefile (renamed from net/ipv4/ipvs/Makefile) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_app.c (renamed from net/ipv4/ipvs/ip_vs_app.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_conn.c (renamed from net/ipv4/ipvs/ip_vs_conn.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c (renamed from net/ipv4/ipvs/ip_vs_core.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c (renamed from net/ipv4/ipvs/ip_vs_ctl.c) | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_dh.c | 261 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_est.c (renamed from net/ipv4/ipvs/ip_vs_est.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ftp.c (renamed from net/ipv4/ipvs/ip_vs_ftp.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_lblc.c (renamed from net/ipv4/ipvs/ip_vs_lblc.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_lblcr.c (renamed from net/ipv4/ipvs/ip_vs_lblcr.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_lc.c (renamed from net/ipv4/ipvs/ip_vs_lc.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_nq.c (renamed from net/ipv4/ipvs/ip_vs_nq.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto.c (renamed from net/ipv4/ipvs/ip_vs_proto.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_ah_esp.c (renamed from net/ipv4/ipvs/ip_vs_proto_ah_esp.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_tcp.c (renamed from net/ipv4/ipvs/ip_vs_proto_tcp.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_udp.c (renamed from net/ipv4/ipvs/ip_vs_proto_udp.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_rr.c (renamed from net/ipv4/ipvs/ip_vs_rr.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sched.c (renamed from net/ipv4/ipvs/ip_vs_sched.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sed.c (renamed from net/ipv4/ipvs/ip_vs_sed.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sh.c (renamed from net/ipv4/ipvs/ip_vs_sh.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sync.c (renamed from net/ipv4/ipvs/ip_vs_sync.c) | 6 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_wlc.c (renamed from net/ipv4/ipvs/ip_vs_wlc.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_wrr.c (renamed from net/ipv4/ipvs/ip_vs_wrr.c) | 0 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c (renamed from net/ipv4/ipvs/ip_vs_xmit.c) | 0 |
29 files changed, 272 insertions, 5 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 591ea23639c..691268f3a35 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -630,5 +630,3 @@ config TCP_MD5SIG If unsure, say N. -source "net/ipv4/ipvs/Kconfig" - diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ad40ef3f9eb..80ff87ce43a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ -obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f70b4145ffc..78892cf2b02 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -858,3 +858,5 @@ config NETFILTER_XT_MATCH_U32 endif # NETFILTER_XTABLES endmenu + +source "net/netfilter/ipvs/Kconfig" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 8ce67665882..da3d909e053 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -89,3 +89,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o + +# IPVS +obj-$(CONFIG_IP_VS) += ipvs/ diff --git a/net/ipv4/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index de6004de80b..de6004de80b 100644 --- a/net/ipv4/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig diff --git a/net/ipv4/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 73a46fe1fe4..73a46fe1fe4 100644 --- a/net/ipv4/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 201b8ea3020..201b8ea3020 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 9a24332fbed..9a24332fbed 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 958abf3e5f8..958abf3e5f8 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 771551d8fba..0302cf3e503 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1330,7 +1330,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) out_unlock: write_unlock_bh(&__ip_vs_svc_lock); +#ifdef CONFIG_IP_VS_IPV6 out: +#endif if (old_sched) ip_vs_scheduler_put(old_sched); diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c new file mode 100644 index 00000000000..a16943fd72f --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -0,0 +1,261 @@ +/* + * IPVS: Destination Hashing scheduling module + * + * Authors: Wensong Zhang <wensong@gnuchina.org> + * + * Inspired by the consistent hashing scheduler patch from + * Thomas Proell <proellt@gmx.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Changes: + * + */ + +/* + * The dh algorithm is to select server by the hash key of destination IP + * address. The pseudo code is as follows: + * + * n <- servernode[dest_ip]; + * if (n is dead) OR + * (n is overloaded) OR (n.weight <= 0) then + * return NULL; + * + * return n; + * + * Notes that servernode is a 256-bucket hash table that maps the hash + * index derived from packet destination IP address to the current server + * array. If the dh scheduler is used in cache cluster, it is good to + * combine it with cache_bypass feature. When the statically assigned + * server is dead or overloaded, the load balancer can bypass the cache + * server and send requests to the original server directly. + * + */ + +#include <linux/ip.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> + +#include <net/ip_vs.h> + + +/* + * IPVS DH bucket + */ +struct ip_vs_dh_bucket { + struct ip_vs_dest *dest; /* real server (cache) */ +}; + +/* + * for IPVS DH entry hash table + */ +#ifndef CONFIG_IP_VS_DH_TAB_BITS +#define CONFIG_IP_VS_DH_TAB_BITS 8 +#endif +#define IP_VS_DH_TAB_BITS CONFIG_IP_VS_DH_TAB_BITS +#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS) +#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1) + + +/* + * Returns hash value for IPVS DH entry + */ +static inline unsigned ip_vs_dh_hashkey(__be32 addr) +{ + return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK; +} + + +/* + * Get ip_vs_dest associated with supplied parameters. + */ +static inline struct ip_vs_dest * +ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr) +{ + return (tbl[ip_vs_dh_hashkey(addr)]).dest; +} + + +/* + * Assign all the hash buckets of the specified table with the service. + */ +static int +ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) +{ + int i; + struct ip_vs_dh_bucket *b; + struct list_head *p; + struct ip_vs_dest *dest; + + b = tbl; + p = &svc->destinations; + for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { + if (list_empty(p)) { + b->dest = NULL; + } else { + if (p == &svc->destinations) + p = p->next; + + dest = list_entry(p, struct ip_vs_dest, n_list); + atomic_inc(&dest->refcnt); + b->dest = dest; + + p = p->next; + } + b++; + } + return 0; +} + + +/* + * Flush all the hash buckets of the specified table. + */ +static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) +{ + int i; + struct ip_vs_dh_bucket *b; + + b = tbl; + for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { + if (b->dest) { + atomic_dec(&b->dest->refcnt); + b->dest = NULL; + } + b++; + } +} + + +static int ip_vs_dh_init_svc(struct ip_vs_service *svc) +{ + struct ip_vs_dh_bucket *tbl; + + /* allocate the DH table for this service */ + tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, + GFP_ATOMIC); + if (tbl == NULL) { + IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n"); + return -ENOMEM; + } + svc->sched_data = tbl; + IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " + "current service\n", + sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); + + /* assign the hash buckets with the updated service */ + ip_vs_dh_assign(tbl, svc); + + return 0; +} + + +static int ip_vs_dh_done_svc(struct ip_vs_service *svc) +{ + struct ip_vs_dh_bucket *tbl = svc->sched_data; + + /* got to clean up hash buckets here */ + ip_vs_dh_flush(tbl); + + /* release the table itself */ + kfree(svc->sched_data); + IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", + sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); + + return 0; +} + + +static int ip_vs_dh_update_svc(struct ip_vs_service *svc) +{ + struct ip_vs_dh_bucket *tbl = svc->sched_data; + + /* got to clean up hash buckets here */ + ip_vs_dh_flush(tbl); + + /* assign the hash buckets with the updated service */ + ip_vs_dh_assign(tbl, svc); + + return 0; +} + + +/* + * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, + * consider that the server is overloaded here. + */ +static inline int is_overloaded(struct ip_vs_dest *dest) +{ + return dest->flags & IP_VS_DEST_F_OVERLOAD; +} + + +/* + * Destination hashing scheduling + */ +static struct ip_vs_dest * +ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +{ + struct ip_vs_dest *dest; + struct ip_vs_dh_bucket *tbl; + struct iphdr *iph = ip_hdr(skb); + + IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n"); + + tbl = (struct ip_vs_dh_bucket *)svc->sched_data; + dest = ip_vs_dh_get(tbl, iph->daddr); + if (!dest + || !(dest->flags & IP_VS_DEST_F_AVAILABLE) + || atomic_read(&dest->weight) <= 0 + || is_overloaded(dest)) { + return NULL; + } + + IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u " + "--> server %u.%u.%u.%u:%d\n", + NIPQUAD(iph->daddr), + NIPQUAD(dest->addr.ip), + ntohs(dest->port)); + + return dest; +} + + +/* + * IPVS DH Scheduler structure + */ +static struct ip_vs_scheduler ip_vs_dh_scheduler = +{ + .name = "dh", + .refcnt = ATOMIC_INIT(0), + .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif + .init_service = ip_vs_dh_init_svc, + .done_service = ip_vs_dh_done_svc, + .update_service = ip_vs_dh_update_svc, + .schedule = ip_vs_dh_schedule, +}; + + +static int __init ip_vs_dh_init(void) +{ + return register_ip_vs_scheduler(&ip_vs_dh_scheduler); +} + + +static void __exit ip_vs_dh_cleanup(void) +{ + unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); +} + + +module_init(ip_vs_dh_init); +module_exit(ip_vs_dh_cleanup); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 2eb2860dabb..2eb2860dabb 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 2e7dbd8b73a..2e7dbd8b73a 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 6ecef3518ca..6ecef3518ca 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 1f75ea83bcf..1f75ea83bcf 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index b69f808ac46..b69f808ac46 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 9a2d8033f08..9a2d8033f08 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 0791f9e08fe..0791f9e08fe 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 80ab0c8e5b4..80ab0c8e5b4 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index dd4566ea2bf..dd4566ea2bf 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 6eb6039d634..6eb6039d634 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index a22195f68ac..a22195f68ac 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index a46ad9e3501..a46ad9e3501 100644 --- a/net/ipv4/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 7d2f22f04b8..7d2f22f04b8 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 1d96de27fef..1d96de27fef 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 28237a5f62e..de5e7e118ee 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -30,6 +30,7 @@ #include <linux/err.h> #include <linux/kthread.h> #include <linux/wait.h> +#include <linux/kernel.h> #include <net/ip.h> #include <net/sock.h> @@ -99,6 +100,7 @@ struct ip_vs_sync_thread_data { */ #define SYNC_MESG_HEADER_LEN 4 +#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ struct ip_vs_sync_mesg { __u8 nr_conns; @@ -516,8 +518,8 @@ static int set_sync_mesg_maxlen(int sync_state) num = (dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr) - SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; - sync_send_mesg_maxlen = - SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * num; + sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + + SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); IP_VS_DBG(7, "setting the maximum length of sync sending " "message %d.\n", sync_send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 8c596e71259..8c596e71259 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 7ea92fed50b..7ea92fed50b 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 02ddc2b3ce2..02ddc2b3ce2 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c |