/* * Device round robin policy for multipath. * * * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $ * * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/timer.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/module.h> #include <linux/mroute.h> #include <linux/init.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/icmp.h> #include <net/udp.h> #include <net/raw.h> #include <linux/notifier.h> #include <linux/if_arp.h> #include <linux/netfilter_ipv4.h> #include <net/ipip.h> #include <net/checksum.h> #include <net/ip_mp_alg.h> struct multipath_device { int ifi; /* interface index of device */ atomic_t usecount; int allocated; }; #define MULTIPATH_MAX_DEVICECANDIDATES 10 static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; static DEFINE_SPINLOCK(state_lock); static int inline __multipath_findslot(void) { int i; for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { if (state[i].allocated == 0) return i; } return -1; } static int inline __multipath_finddev(int ifindex) { int i; for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { if (state[i].allocated != 0 && state[i].ifi == ifindex) return i; } return -1; } static int drr_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; int devidx; switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: spin_lock_bh(&state_lock); devidx = __multipath_finddev(dev->ifindex); if (devidx != -1) { state[devidx].allocated = 0; state[devidx].ifi = 0; atomic_set(&state[devidx].usecount, 0); } spin_unlock_bh(&state_lock); break; }; return NOTIFY_DONE; } static struct notifier_block drr_dev_notifier = { .notifier_call = drr_dev_event, }; static void drr_safe_inc(atomic_t *usecount) { int n; atomic_inc(usecount); n = atomic_read(usecount); if (n <= 0) { int i; spin_lock_bh(&state_lock); for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) atomic_set(&state[i].usecount, 0); spin_unlock_bh(&state_lock); } } static void drr_select_route(const struct flowi *flp, struct rtable *first, struct rtable **rp) { struct rtable *nh, *result, *cur_min; int min_usecount = -1; int devidx = -1; int cur_min_devidx = -1; /* 1. make sure all alt. nexthops have the same GC related data */ /* 2. determine the new candidate to be returned */ result = NULL; cur_min = NULL; for (nh = rcu_dereference(first); nh; nh = rcu_dereference(nh->u.rt_next)) { if ((nh->u.dst.flags & DST_BALANCED) != 0 && multipath_comparekeys(&nh->fl, flp)) { int nh_ifidx = nh->u.dst.dev->ifindex; nh->u.dst.lastuse = jiffies; nh->u.dst.__use++; if (result != NULL) continue; /* search for the output interface */ /* this is not SMP safe, only add/remove are * SMP safe as wrong usecount updates have no big * impact */ devidx = __multipath_finddev(nh_ifidx); if (devidx == -1) { /* add the interface to the array * SMP safe */ spin_lock_bh(&state_lock); /* due to SMP: search again */ devidx = __multipath_finddev(nh_ifidx); if (devidx == -1) { /* add entry for device */ devidx = __multipath_findslot(); if (devidx == -1) { /* unlikely but possible */ continue; } state[devidx].allocated = 1; state[devidx].ifi = nh_ifidx; atomic_set(&state[devidx].usecount, 0); min_usecount = 0; } spin_unlock_bh(&state_lock); } if (min_usecount == 0) { /* if the device has not been used it is * the primary target */ drr_safe_inc(&state[devidx].usecount); result = nh; } else { int count = atomic_read(&state[devidx].usecount); if (min_usecount == -1 || count < min_usecount) { cur_min = nh; cur_min_devidx = devidx; min_usecount = count; } } } } if (!result) { if (cur_min) { drr_safe_inc(&state[cur_min_devidx].usecount); result = cur_min; } else { result = first; } } *rp = result; } static struct ip_mp_alg_ops drr_ops = { .mp_alg_select_route = drr_select_route, }; static int __init drr_init(void) { int err = register_netdevice_notifier(&drr_dev_notifier); if (err) return err; err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR); if (err) goto fail; return 0; fail: unregister_netdevice_notifier(&drr_dev_notifier); return err; } static void __exit drr_exit(void) { unregister_netdevice_notifier(&drr_dev_notifier); multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR); } module_init(drr_init); module_exit(drr_exit); MODULE_LICENSE("GPL");