aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-19 19:04:47 +0200
committerIngo Molnar <mingo@elte.hu>2008-10-19 19:04:47 +0200
commit3e10e879a8c334a5927d800a3663a24d562cfa31 (patch)
tree5d18bc7e38c986a044e99aa0d0a4aff4931ec7d0 /net
parent98d9c66ab07471006fd7910cb16453581c41a3e7 (diff)
parent0cfd81031a26717fe14380d18275f8e217571615 (diff)
Merge branch 'linus' into tracing-v28-for-linus-v3
Conflicts: init/main.c kernel/module.c scripts/bootgraph.pl
Diffstat (limited to 'net')
-rw-r--r--net/802/fc.c2
-rw-r--r--net/802/psnap.c2
-rw-r--r--net/9p/client.c2
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/appletalk/ddp.c4
-rw-r--r--net/bluetooth/af_bluetooth.c8
-rw-r--r--net/bluetooth/hidp/core.c214
-rw-r--r--net/bluetooth/hidp/hidp.h2
-rw-r--r--net/bridge/netfilter/Kconfig1
-rw-r--r--net/bridge/netfilter/ebtables.c15
-rw-r--r--net/can/af_can.c4
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/dev_mcast.c2
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/pktgen.c8
-rw-r--r--net/core/rtnetlink.c4
-rw-r--r--net/core/skb_dma_map.c2
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/stream.c2
-rw-r--r--net/dccp/ccid.c2
-rw-r--r--net/decnet/dn_dev.c8
-rw-r--r--net/decnet/sysctl_net_decnet.c4
-rw-r--r--net/dsa/Kconfig2
-rw-r--r--net/ipv4/cipso_ipv4.c656
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c97
-rw-r--r--net/ipv4/route.c14
-rw-r--r--net/ipv4/sysctl_net_ipv4.c18
-rw-r--r--net/ipv4/tcp_cong.c4
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf.c1
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/ndisc.c13
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/mac80211/debugfs_netdev.c6
-rw-r--r--net/mac80211/debugfs_sta.c11
-rw-r--r--net/mac80211/ieee80211_i.h6
-rw-r--r--net/mac80211/mlme.c3
-rw-r--r--net/mac80211/rx.c5
-rw-r--r--net/mac80211/scan.c3
-rw-r--r--net/mac80211/sta_info.c7
-rw-r--r--net/mac80211/sta_info.h1
-rw-r--r--net/mac80211/util.c8
-rw-r--r--net/mac80211/wext.c2
-rw-r--r--net/netfilter/nf_conntrack_acct.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c151
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_tproxy_core.c1
-rw-r--r--net/netfilter/nfnetlink.c14
-rw-r--r--net/netlabel/Makefile3
-rw-r--r--net/netlabel/netlabel_addrlist.c388
-rw-r--r--net/netlabel/netlabel_addrlist.h189
-rw-r--r--net/netlabel/netlabel_cipso_v4.c136
-rw-r--r--net/netlabel/netlabel_cipso_v4.h10
-rw-r--r--net/netlabel/netlabel_domainhash.c393
-rw-r--r--net/netlabel/netlabel_domainhash.h40
-rw-r--r--net/netlabel/netlabel_kapi.c272
-rw-r--r--net/netlabel/netlabel_mgmt.c410
-rw-r--r--net/netlabel/netlabel_mgmt.h59
-rw-r--r--net/netlabel/netlabel_unlabeled.c456
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/phonet/af_phonet.c3
-rw-r--r--net/rfkill/rfkill-input.c1
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/clnt.c6
-rw-r--r--net/sunrpc/rpcb_clnt.c121
-rw-r--r--net/sunrpc/svc.c251
-rw-r--r--net/sunrpc/svc_xprt.c39
-rw-r--r--net/sunrpc/svcsock.c17
-rw-r--r--net/sunrpc/xprt.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c29
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c187
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c255
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c364
-rw-r--r--net/sunrpc/xprtrdma/transport.c41
-rw-r--r--net/sunrpc/xprtrdma/verbs.c741
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h17
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/wireless/core.c6
96 files changed, 4225 insertions, 1611 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index cb3475ea6fd..34cf1ee014b 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -82,13 +82,13 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
static int fc_rebuild_header(struct sk_buff *skb)
{
+#ifdef CONFIG_INET
struct fch_hdr *fch=(struct fch_hdr *)skb->data;
struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr));
if(fcllc->ethertype != htons(ETH_P_IP)) {
printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype));
return 0;
}
-#ifdef CONFIG_INET
return arp_find(fch->daddr, skb);
#else
return 0;
diff --git a/net/802/psnap.c b/net/802/psnap.c
index b3cfe5a14fc..70980baeb68 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -1,7 +1,7 @@
/*
* SNAP data link layer. Derived from 802.2
*
- * Alan Cox <Alan.Cox@linux.org>,
+ * Alan Cox <alan@lxorguk.ukuu.org.uk>,
* from the 802.2 layer by Greg Page.
* Merged in additions from Greg Page's psnap.c.
*
diff --git a/net/9p/client.c b/net/9p/client.c
index 10e320307ec..e053e06028a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -52,7 +52,7 @@ enum {
Opt_err,
};
-static match_table_t tokens = {
+static const match_table_t tokens = {
{Opt_msize, "msize=%u"},
{Opt_legacy, "noextend"},
{Opt_trans, "trans=%s"},
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index d652baf5ff9..6dabbdb6665 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -86,7 +86,7 @@ enum {
Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
};
-static match_table_t tokens = {
+static const match_table_t tokens = {
{Opt_port, "port=%u"},
{Opt_rfdno, "rfdno=%u"},
{Opt_wfdno, "wfdno=%u"},
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 0c850427a85..d3134e7e6ee 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -2,7 +2,7 @@
* DDP: An implementation of the AppleTalk DDP protocol for
* Ethernet 'ELAP'.
*
- * Alan Cox <Alan.Cox@linux.org>
+ * Alan Cox <alan@lxorguk.ukuu.org.uk>
*
* With more than a little assistance from
*
@@ -1934,6 +1934,6 @@ static void __exit atalk_exit(void)
module_exit(atalk_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Alan Cox <Alan.Cox@linux.org>");
+MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>");
MODULE_DESCRIPTION("AppleTalk 0.20\n");
MODULE_ALIAS_NETPROTO(PF_APPLETALK);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index f6348e078aa..8f9431a12c6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -37,10 +37,7 @@
#include <linux/poll.h>
#include <net/sock.h>
#include <asm/ioctls.h>
-
-#if defined(CONFIG_KMOD)
#include <linux/kmod.h>
-#endif
#include <net/bluetooth/bluetooth.h>
@@ -145,11 +142,8 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto)
if (proto < 0 || proto >= BT_MAX_PROTO)
return -EINVAL;
-#if defined(CONFIG_KMOD)
- if (!bt_proto[proto]) {
+ if (!bt_proto[proto])
request_module("bt-proto-%d", proto);
- }
-#endif
err = -EPROTONOSUPPORT;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 96434d774c8..acdeab3d980 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -578,7 +578,7 @@ static int hidp_session(void *arg)
if (session->hid) {
if (session->hid->claimed & HID_CLAIMED_INPUT)
hidinput_disconnect(session->hid);
- hid_free_device(session->hid);
+ hid_destroy_device(session->hid);
}
/* Wakeup user-space polling for socket errors */
@@ -623,9 +623,15 @@ static struct device *hidp_get_device(struct hidp_session *session)
static int hidp_setup_input(struct hidp_session *session,
struct hidp_connadd_req *req)
{
- struct input_dev *input = session->input;
+ struct input_dev *input;
int i;
+ input = input_allocate_device();
+ if (!input)
+ return -ENOMEM;
+
+ session->input = input;
+
input_set_drvdata(input, session);
input->name = "Bluetooth HID Boot Protocol Device";
@@ -677,67 +683,114 @@ static void hidp_close(struct hid_device *hid)
{
}
-static const struct {
- __u16 idVendor;
- __u16 idProduct;
- unsigned quirks;
-} hidp_blacklist[] = {
- /* Apple wireless Mighty Mouse */
- { 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL },
+static int hidp_parse(struct hid_device *hid)
+{
+ struct hidp_session *session = hid->driver_data;
+ struct hidp_connadd_req *req = session->req;
+ unsigned char *buf;
+ int ret;
- { } /* Terminating entry */
-};
+ buf = kmalloc(req->rd_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, req->rd_data, req->rd_size)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+
+ ret = hid_parse_report(session->hid, buf, req->rd_size);
+
+ kfree(buf);
+
+ if (ret)
+ return ret;
+
+ session->req = NULL;
+
+ return 0;
+}
+
+static int hidp_start(struct hid_device *hid)
+{
+ struct hidp_session *session = hid->driver_data;
+ struct hid_report *report;
-static void hidp_setup_quirks(struct hid_device *hid)
+ list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].
+ report_list, list)
+ hidp_send_report(session, report);
+
+ list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].
+ report_list, list)
+ hidp_send_report(session, report);
+
+ return 0;
+}
+
+static void hidp_stop(struct hid_device *hid)
{
- unsigned int n;
+ struct hidp_session *session = hid->driver_data;
+
+ skb_queue_purge(&session->ctrl_transmit);
+ skb_queue_purge(&session->intr_transmit);
- for (n = 0; hidp_blacklist[n].idVendor; n++)
- if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) &&
- hidp_blacklist[n].idProduct == le16_to_cpu(hid->product))
- hid->quirks = hidp_blacklist[n].quirks;
+ if (hid->claimed & HID_CLAIMED_INPUT)
+ hidinput_disconnect(hid);
+ hid->claimed = 0;
}
-static void hidp_setup_hid(struct hidp_session *session,
+static struct hid_ll_driver hidp_hid_driver = {
+ .parse = hidp_parse,
+ .start = hidp_start,
+ .stop = hidp_stop,
+ .open = hidp_open,
+ .close = hidp_close,
+ .hidinput_input_event = hidp_hidinput_event,
+};
+
+static int hidp_setup_hid(struct hidp_session *session,
struct hidp_connadd_req *req)
{
- struct hid_device *hid = session->hid;
- struct hid_report *report;
+ struct hid_device *hid;
bdaddr_t src, dst;
+ int ret;
- baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
- baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
+ hid = hid_allocate_device();
+ if (IS_ERR(hid)) {
+ ret = PTR_ERR(session->hid);
+ goto err;
+ }
+ session->hid = hid;
+ session->req = req;
hid->driver_data = session;
- hid->country = req->country;
+ baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
+ baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
hid->bus = BUS_BLUETOOTH;
hid->vendor = req->vendor;
hid->product = req->product;
hid->version = req->version;
+ hid->country = req->country;
strncpy(hid->name, req->name, 128);
strncpy(hid->phys, batostr(&src), 64);
strncpy(hid->uniq, batostr(&dst), 64);
- hid->dev = hidp_get_device(session);
-
- hid->hid_open = hidp_open;
- hid->hid_close = hidp_close;
-
- hid->hidinput_input_event = hidp_hidinput_event;
+ hid->dev.parent = hidp_get_device(session);
+ hid->ll_driver = &hidp_hid_driver;
- hidp_setup_quirks(hid);
+ ret = hid_add_device(hid);
+ if (ret)
+ goto err_hid;
- list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list)
- hidp_send_report(session, report);
-
- list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].report_list, list)
- hidp_send_report(session, report);
-
- if (hidinput_connect(hid) == 0)
- hid->claimed |= HID_CLAIMED_INPUT;
+ return 0;
+err_hid:
+ hid_destroy_device(hid);
+ session->hid = NULL;
+err:
+ return ret;
}
int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock)
@@ -757,38 +810,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size);
- if (req->rd_size > 0) {
- unsigned char *buf = kmalloc(req->rd_size, GFP_KERNEL);
-
- if (!buf) {
- kfree(session);
- return -ENOMEM;
- }
-
- if (copy_from_user(buf, req->rd_data, req->rd_size)) {
- kfree(buf);
- kfree(session);
- return -EFAULT;
- }
-
- session->hid = hid_parse_report(buf, req->rd_size);
-
- kfree(buf);
-
- if (!session->hid) {
- kfree(session);
- return -EINVAL;
- }
- }
-
- if (!session->hid) {
- session->input = input_allocate_device();
- if (!session->input) {
- kfree(session);
- return -ENOMEM;
- }
- }
-
down_write(&hidp_session_sem);
s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst);
@@ -816,15 +837,18 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
session->idle_to = req->idle_to;
- if (session->input) {
+ if (req->rd_size > 0) {
+ err = hidp_setup_hid(session, req);
+ if (err && err != -ENODEV)
+ goto err_skb;
+ }
+
+ if (!session->hid) {
err = hidp_setup_input(session, req);
if (err < 0)
- goto failed;
+ goto err_skb;
}
- if (session->hid)
- hidp_setup_hid(session, req);
-
__hidp_link_session(session);
hidp_set_timer(session);
@@ -850,17 +874,16 @@ unlink:
__hidp_unlink_session(session);
- if (session->input) {
+ if (session->input)
input_unregister_device(session->input);
- session->input = NULL; /* don't try to free it here */
- }
-
+ if (session->hid)
+ hid_destroy_device(session->hid);
+err_skb:
+ skb_queue_purge(&session->ctrl_transmit);
+ skb_queue_purge(&session->intr_transmit);
failed:
up_write(&hidp_session_sem);
- if (session->hid)
- hid_free_device(session->hid);
-
input_free_device(session->input);
kfree(session);
return err;
@@ -950,18 +973,43 @@ int hidp_get_conninfo(struct hidp_conninfo *ci)
return err;
}
+static const struct hid_device_id hidp_table[] = {
+ { HID_BLUETOOTH_DEVICE(HID_ANY_ID, HID_ANY_ID) },
+ { }
+};
+
+static struct hid_driver hidp_driver = {
+ .name = "generic-bluetooth",
+ .id_table = hidp_table,
+};
+
static int __init hidp_init(void)
{
+ int ret;
+
l2cap_load();
BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION);
- return hidp_init_sockets();
+ ret = hid_register_driver(&hidp_driver);
+ if (ret)
+ goto err;
+
+ ret = hidp_init_sockets();
+ if (ret)
+ goto err_drv;
+
+ return 0;
+err_drv:
+ hid_unregister_driver(&hidp_driver);
+err:
+ return ret;
}
static void __exit hidp_exit(void)
{
hidp_cleanup_sockets();
+ hid_unregister_driver(&hidp_driver);
}
module_init(hidp_init);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 343fb0566b3..e503c89057a 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -151,6 +151,8 @@ struct hidp_session {
struct sk_buff_head ctrl_transmit;
struct sk_buff_head intr_transmit;
+
+ struct hidp_connadd_req *req;
};
static inline void hidp_schedule(struct hidp_session *session)
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 366d3e9d51f..ba6f73eb06c 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -4,6 +4,7 @@
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
+ depends on BRIDGE && BRIDGE_NETFILTER
select NETFILTER_XTABLES
help
ebtables is a general, extensible frame/packet identification
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5bb88eb0aad..0fa208e8640 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -305,23 +305,14 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
return NULL;
}
-#ifndef CONFIG_KMOD
-#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
-#else
static void *
find_inlist_lock(struct list_head *head, const char *name, const char *prefix,
int *error, struct mutex *mutex)
{
- void *ret;
-
- ret = find_inlist_lock_noload(head, name, error, mutex);
- if (!ret) {
- request_module("%s%s", prefix, name);
- ret = find_inlist_lock_noload(head, name, error, mutex);
- }
- return ret;
+ return try_then_request_module(
+ find_inlist_lock_noload(head, name, error, mutex),
+ "%s%s", prefix, name);
}
-#endif
static inline struct ebt_table *
find_table_lock(const char *name, int *error, struct mutex *mutex)
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 8035fbf526a..7d4d2b3c137 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -128,8 +128,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol)
if (net != &init_net)
return -EAFNOSUPPORT;
-#ifdef CONFIG_KMOD
- /* try to load protocol module, when CONFIG_KMOD is defined */
+#ifdef CONFIG_MODULES
+ /* try to load protocol module kernel is modular */
if (!proto_tab[protocol]) {
err = request_module("can-proto-%d", protocol);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 52f577a0f54..ee631843c2f 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -9,7 +9,7 @@
* identical recvmsg() code. So we share it here. The poll was
* shared before but buried in udp.c so I moved it.
*
- * Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old
+ * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
* udp.c code)
*
* Fixes:
diff --git a/net/core/dev.c b/net/core/dev.c
index 1408a083fe4..868ec0ba8b7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4956,8 +4956,6 @@ EXPORT_SYMBOL(br_fdb_get_hook);
EXPORT_SYMBOL(br_fdb_put_hook);
#endif
-#ifdef CONFIG_KMOD
EXPORT_SYMBOL(dev_load);
-#endif
EXPORT_PER_CPU_SYMBOL(softnet_data);
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 5402b3b38e0..9e2fa39f22a 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -6,7 +6,7 @@
* Richard Underwood <richard@wuzz.demon.co.uk>
*
* Stir fried together from the IP multicast and CAP patches above
- * Alan Cox <Alan.Cox@linux.org>
+ * Alan Cox <alan@lxorguk.ukuu.org.uk>
*
* Fixes:
* Alan Cox : Update the device on a real delete
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b0dc818a91d..f1d07b5c1e1 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -96,7 +96,7 @@ static void net_free(struct net *net)
return;
}
#endif
-
+ kfree(net->gen);
kmem_cache_free(net_cachep, net);
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a756847e381..99f656d35b4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2474,7 +2474,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
if (ret < 0) {
printk(KERN_ERR "Error expanding "
"ipsec packet %d\n",ret);
- return 0;
+ goto err;
}
}
@@ -2484,8 +2484,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
if (ret) {
printk(KERN_ERR "Error creating ipsec "
"packet %d\n",ret);
- kfree_skb(skb);
- return 0;
+ goto err;
}
/* restore ll */
eth = (__u8 *) skb_push(skb, ETH_HLEN);
@@ -2494,6 +2493,9 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
}
}
return 1;
+err:
+ kfree_skb(skb);
+ return 0;
}
#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3630131fa1f..31f29d2989f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1040,7 +1040,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct nlattr *linkinfo[IFLA_INFO_MAX+1];
int err;
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
replay:
#endif
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
@@ -1129,7 +1129,7 @@ replay:
return -EOPNOTSUPP;
if (!ops) {
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
if (kind[0]) {
__rtnl_unlock();
request_module("rtnl-link-%s", kind);
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
index 1f49afcd8e8..86234923a3b 100644
--- a/net/core/skb_dma_map.c
+++ b/net/core/skb_dma_map.c
@@ -35,7 +35,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
return 0;
unwind:
- while (i-- >= 0) {
+ while (--i >= 0) {
skb_frag_t *fp = &sp->frags[i];
dma_unmap_page(dev, sp->dma_maps[i + 1],
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7f7bb1a636d..4e22e3a3535 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1,7 +1,7 @@
/*
* Routines having to do with the 'struct sk_buff' memory handlers.
*
- * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
+ * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
* Florian La Roche <rzsfl@rz.uni-sb.de>
*
* Fixes:
diff --git a/net/core/stream.c b/net/core/stream.c
index a6b3437ff08..8727cead64a 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -9,7 +9,7 @@
*
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
* (from old tcp.c code)
- * Alan Cox <alan@redhat.com> (Borrowed comments 8-))
+ * Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-))
*/
#include <linux/module.h>
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 4809753d12a..8fe931a3d7a 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -154,7 +154,7 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
struct ccid *ccid = NULL;
ccids_read_lock();
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
if (ccids[id] == NULL) {
/* We only try to load if in process context */
ccids_read_unlock();
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 2f0ac3c3eb7..28e26bd08e2 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -152,7 +152,7 @@ static struct dn_dev_parms dn_dev_list[] = {
#define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list)
-#define DN_DEV_PARMS_OFFSET(x) ((int) ((char *) &((struct dn_dev_parms *)0)->x))
+#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x)
#ifdef CONFIG_SYSCTL
@@ -166,7 +166,7 @@ static int max_priority[] = { 127 }; /* From DECnet spec */
static int dn_forwarding_proc(ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
-static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
+static int dn_forwarding_sysctl(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen);
@@ -318,7 +318,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
#endif
}
-static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
+static int dn_forwarding_sysctl(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
@@ -490,9 +490,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
return -EFAULT;
ifr->ifr_name[IFNAMSIZ-1] = 0;
-#ifdef CONFIG_KMOD
dev_load(&init_net, ifr->ifr_name);
-#endif
switch(cmd) {
case SIOCGIFADDR:
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 228067c571b..36400b26689 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str)
}
-static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen,
+static int dn_node_address_strategy(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
@@ -217,7 +217,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
}
-static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen,
+static int dn_def_dev_strategy(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cdce4c6c672..49211b35725 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,7 +1,7 @@
menuconfig NET_DSA
bool "Distributed Switch Architecture support"
default n
- depends on EXPERIMENTAL
+ depends on EXPERIMENTAL && !S390
select PHYLIB
---help---
This allows you to use hardware switch chips that use
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2c0e4572cc9..490e035c6d9 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -13,7 +13,7 @@
*/
/*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -47,17 +47,7 @@
#include <asm/bug.h>
#include <asm/unaligned.h>
-struct cipso_v4_domhsh_entry {
- char *domain;
- u32 valid;
- struct list_head list;
- struct rcu_head rcu;
-};
-
/* List of available DOI definitions */
-/* XXX - Updates should be minimal so having a single lock for the
- * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
- * okay. */
/* XXX - This currently assumes a minimal number of different DOIs in use,
* if in practice there are a lot of different DOIs this list should
* probably be turned into a hash table or something similar so we
@@ -119,6 +109,19 @@ int cipso_v4_rbm_strictvalid = 1;
* be omitted. */
#define CIPSO_V4_TAG_RNG_CAT_MAX 8
+/* Base length of the local tag (non-standard tag).
+ * Tag definition (may change between kernel versions)
+ *
+ * 0 8 16 24 32
+ * +----------+----------+----------+----------+
+ * | 10000000 | 00000110 | 32-bit secid value |
+ * +----------+----------+----------+----------+
+ * | in (host byte order)|
+ * +----------+----------+
+ *
+ */
+#define CIPSO_V4_TAG_LOC_BLEN 6
+
/*
* Helper Functions
*/
@@ -194,25 +197,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
}
/**
- * cipso_v4_doi_domhsh_free - Frees a domain list entry
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that the memory allocated to a domain list entry can be released
- * safely.
- *
- */
-static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
-{
- struct cipso_v4_domhsh_entry *ptr;
-
- ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
- kfree(ptr->domain);
- kfree(ptr);
-}
-
-/**
* cipso_v4_cache_entry_free - Frees a cache entry
* @entry: the entry to free
*
@@ -457,7 +441,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
struct cipso_v4_doi *iter;
list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
- if (iter->doi == doi && iter->valid)
+ if (iter->doi == doi && atomic_read(&iter->refcount))
return iter;
return NULL;
}
@@ -496,14 +480,17 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
if (doi_def->type != CIPSO_V4_MAP_PASS)
return -EINVAL;
break;
+ case CIPSO_V4_TAG_LOCAL:
+ if (doi_def->type != CIPSO_V4_MAP_LOCAL)
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
}
- doi_def->valid = 1;
+ atomic_set(&doi_def->refcount, 1);
INIT_RCU_HEAD(&doi_def->rcu);
- INIT_LIST_HEAD(&doi_def->dom_list);
spin_lock(&cipso_v4_doi_list_lock);
if (cipso_v4_doi_search(doi_def->doi) != NULL)
@@ -519,59 +506,129 @@ doi_add_failure:
}
/**
+ * cipso_v4_doi_free - Frees a DOI definition
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function frees all of the memory associated with a DOI definition.
+ *
+ */
+void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
+{
+ if (doi_def == NULL)
+ return;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_TRANS:
+ kfree(doi_def->map.std->lvl.cipso);
+ kfree(doi_def->map.std->lvl.local);
+ kfree(doi_def->map.std->cat.cipso);
+ kfree(doi_def->map.std->cat.local);
+ break;
+ }
+ kfree(doi_def);
+}
+
+/**
+ * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to the DOI definition can be released
+ * safely.
+ *
+ */
+static void cipso_v4_doi_free_rcu(struct rcu_head *entry)
+{
+ struct cipso_v4_doi *doi_def;
+
+ doi_def = container_of(entry, struct cipso_v4_doi, rcu);
+ cipso_v4_doi_free(doi_def);
+}
+
+/**
* cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
* @doi: the DOI value
* @audit_secid: the LSM secid to use in the audit message
- * @callback: the DOI cleanup/free callback
*
* Description:
- * Removes a DOI definition from the CIPSO engine, @callback is called to
- * free any memory. The NetLabel routines will be called to release their own
- * LSM domain mappings as well as our own domain list. Returns zero on
- * success and negative values on failure.
+ * Removes a DOI definition from the CIPSO engine. The NetLabel routines will
+ * be called to release their own LSM domain mappings as well as our own
+ * domain list. Returns zero on success and negative values on failure.
*
*/
-int cipso_v4_doi_remove(u32 doi,
- struct netlbl_audit *audit_info,
- void (*callback) (struct rcu_head * head))
+int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
{
struct cipso_v4_doi *doi_def;
- struct cipso_v4_domhsh_entry *dom_iter;
spin_lock(&cipso_v4_doi_list_lock);
doi_def = cipso_v4_doi_search(doi);
- if (doi_def != NULL) {
- doi_def->valid = 0;
- list_del_rcu(&doi_def->list);
+ if (doi_def == NULL) {
spin_unlock(&cipso_v4_doi_list_lock);
- rcu_read_lock();
- list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
- if (dom_iter->valid)
- netlbl_cfg_map_del(dom_iter->domain,
- audit_info);
- rcu_read_unlock();
- cipso_v4_cache_invalidate();
- call_rcu(&doi_def->rcu, callback);
- return 0;
+ return -ENOENT;
+ }
+ if (!atomic_dec_and_test(&doi_def->refcount)) {
+ spin_unlock(&cipso_v4_doi_list_lock);
+ return -EBUSY;
}
+ list_del_rcu(&doi_def->list);
spin_unlock(&cipso_v4_doi_list_lock);
- return -ENOENT;
+ cipso_v4_cache_invalidate();
+ call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
+
+ return 0;
}
/**
- * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
+ * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition
* @doi: the DOI value
*
* Description:
* Searches for a valid DOI definition and if one is found it is returned to
* the caller. Otherwise NULL is returned. The caller must ensure that
- * rcu_read_lock() is held while accessing the returned definition.
+ * rcu_read_lock() is held while accessing the returned definition and the DOI
+ * definition reference count is decremented when the caller is done.
*
*/
struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
{
- return cipso_v4_doi_search(doi);
+ struct cipso_v4_doi *doi_def;
+
+ rcu_read_lock();
+ doi_def = cipso_v4_doi_search(doi);
+ if (doi_def == NULL)
+ goto doi_getdef_return;
+ if (!atomic_inc_not_zero(&doi_def->refcount))
+ doi_def = NULL;
+
+doi_getdef_return:
+ rcu_read_unlock();
+ return doi_def;
+}
+
+/**
+ * cipso_v4_doi_putdef - Releases a reference for the given DOI definition
+ * @doi_def: the DOI definition
+ *
+ * Description:
+ * Releases a DOI definition reference obtained from cipso_v4_doi_getdef().
+ *
+ */
+void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
+{
+ if (doi_def == NULL)
+ return;
+
+ if (!atomic_dec_and_test(&doi_def->refcount))
+ return;
+ spin_lock(&cipso_v4_doi_list_lock);
+ list_del_rcu(&doi_def->list);
+ spin_unlock(&cipso_v4_doi_list_lock);
+
+ cipso_v4_cache_invalidate();
+ call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
}
/**
@@ -597,7 +654,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt,
rcu_read_lock();
list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
- if (iter_doi->valid) {
+ if (atomic_read(&iter_doi->refcount) > 0) {
if (doi_cnt++ < *skip_cnt)
continue;
ret_val = callback(iter_doi, cb_arg);
@@ -613,85 +670,6 @@ doi_walk_return:
return ret_val;
}
-/**
- * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
- * @doi_def: the DOI definition
- * @domain: the domain to add
- *
- * Description:
- * Adds the @domain to the DOI specified by @doi_def, this function
- * should only be called by external functions (i.e. NetLabel). This function
- * does allocate memory. Returns zero on success, negative values on failure.
- *
- */
-int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
-{
- struct cipso_v4_domhsh_entry *iter;
- struct cipso_v4_domhsh_entry *new_dom;
-
- new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
- if (new_dom == NULL)
- return -ENOMEM;
- if (domain) {
- new_dom->domain = kstrdup(domain, GFP_KERNEL);
- if (new_dom->domain == NULL) {
- kfree(new_dom);
- return -ENOMEM;
- }
- }
- new_dom->valid = 1;
- INIT_RCU_HEAD(&new_dom->rcu);
-
- spin_lock(&cipso_v4_doi_list_lock);
- list_for_each_entry(iter, &doi_def->dom_list, list)
- if (iter->valid &&
- ((domain != NULL && iter->domain != NULL &&
- strcmp(iter->domain, domain) == 0) ||
- (domain == NULL && iter->domain == NULL))) {
- spin_unlock(&cipso_v4_doi_list_lock);
- kfree(new_dom->domain);
- kfree(new_dom);
- return -EEXIST;
- }
- list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
- spin_unlock(&cipso_v4_doi_list_lock);
-
- return 0;
-}
-
-/**
- * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
- * @doi_def: the DOI definition
- * @domain: the domain to remove
- *
- * Description:
- * Removes the @domain from the DOI specified by @doi_def, this function
- * should only be called by external functions (i.e. NetLabel). Returns zero
- * on success and negative values on error.
- *
- */
-int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
- const char *domain)
-{
- struct cipso_v4_domhsh_entry *iter;
-
- spin_lock(&cipso_v4_doi_list_lock);
- list_for_each_entry(iter, &doi_def->dom_list, list)
- if (iter->valid &&
- ((domain != NULL && iter->domain != NULL &&
- strcmp(iter->domain, domain) == 0) ||
- (domain == NULL && iter->domain == NULL))) {
- iter->valid = 0;
- list_del_rcu(&iter->list);
- spin_unlock(&cipso_v4_doi_list_lock);
- call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
- return 0;
- }
- spin_unlock(&cipso_v4_doi_list_lock);
-
- return -ENOENT;
-}
-
/*
* Label Mapping Functions
*/
@@ -712,7 +690,7 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
switch (doi_def->type) {
case CIPSO_V4_MAP_PASS:
return 0;
- case CIPSO_V4_MAP_STD:
+ case CIPSO_V4_MAP_TRANS:
if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
return 0;
break;
@@ -741,7 +719,7 @@ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
case CIPSO_V4_MAP_PASS:
*net_lvl = host_lvl;
return 0;
- case CIPSO_V4_MAP_STD:
+ case CIPSO_V4_MAP_TRANS:
if (host_lvl < doi_def->map.std->lvl.local_size &&
doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) {
*net_lvl = doi_def->map.std->lvl.local[host_lvl];
@@ -775,7 +753,7 @@ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
case CIPSO_V4_MAP_PASS:
*host_lvl = net_lvl;
return 0;
- case CIPSO_V4_MAP_STD:
+ case CIPSO_V4_MAP_TRANS:
map_tbl = doi_def->map.std;
if (net_lvl < map_tbl->lvl.cipso_size &&
map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
@@ -812,7 +790,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
switch (doi_def->type) {
case CIPSO_V4_MAP_PASS:
return 0;
- case CIPSO_V4_MAP_STD:
+ case CIPSO_V4_MAP_TRANS:
cipso_cat_size = doi_def->map.std->cat.cipso_size;
cipso_array = doi_def->map.std->cat.cipso;
for (;;) {
@@ -860,7 +838,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
u32 host_cat_size = 0;
u32 *host_cat_array = NULL;
- if (doi_def->type == CIPSO_V4_MAP_STD) {
+ if (doi_def->type == CIPSO_V4_MAP_TRANS) {
host_cat_size = doi_def->map.std->cat.local_size;
host_cat_array = doi_def->map.std->cat.local;
}
@@ -875,7 +853,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
case CIPSO_V4_MAP_PASS:
net_spot = host_spot;
break;
- case CIPSO_V4_MAP_STD:
+ case CIPSO_V4_MAP_TRANS:
if (host_spot >= host_cat_size)
return -EPERM;
net_spot = host_cat_array[host_spot];
@@ -921,7 +899,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
u32 net_cat_size = 0;
u32 *net_cat_array = NULL;
- if (doi_def->type == CIPSO_V4_MAP_STD) {
+ if (doi_def->type == CIPSO_V4_MAP_TRANS) {
net_cat_size = doi_def->map.std->cat.cipso_size;
net_cat_array = doi_def->map.std->cat.cipso;
}
@@ -941,7 +919,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
case CIPSO_V4_MAP_PASS:
host_spot = net_spot;
break;
- case CIPSO_V4_MAP_STD:
+ case CIPSO_V4_MAP_TRANS:
if (net_spot >= net_cat_size)
return -EPERM;
host_spot = net_cat_array[net_spot];
@@ -1277,7 +1255,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
} else
tag_len = 4;
- buffer[0] = 0x01;
+ buffer[0] = CIPSO_V4_TAG_RBITMAP;
buffer[1] = tag_len;
buffer[3] = level;
@@ -1373,7 +1351,7 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def,
} else
tag_len = 4;
- buffer[0] = 0x02;
+ buffer[0] = CIPSO_V4_TAG_ENUM;
buffer[1] = tag_len;
buffer[3] = level;
@@ -1469,7 +1447,7 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def,
} else
tag_len = 4;
- buffer[0] = 0x05;
+ buffer[0] = CIPSO_V4_TAG_RANGE;
buffer[1] = tag_len;
buffer[3] = level;
@@ -1523,6 +1501,54 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
}
/**
+ * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the local tag. Returns the size of the tag
+ * on success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr,
+ unsigned char *buffer,
+ u32 buffer_len)
+{
+ if (!(secattr->flags & NETLBL_SECATTR_SECID))
+ return -EPERM;
+
+ buffer[0] = CIPSO_V4_TAG_LOCAL;
+ buffer[1] = CIPSO_V4_TAG_LOC_BLEN;
+ *(u32 *)&buffer[2] = secattr->attr.secid;
+
+ return CIPSO_V4_TAG_LOC_BLEN;
+}
+
+/**
+ * cipso_v4_parsetag_loc - Parse a CIPSO local tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO local tag and return the security attributes in @secattr.
+ * Return zero on success, negatives values on failure.
+ *
+ */
+static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def,
+ const unsigned char *tag,
+ struct netlbl_lsm_secattr *secattr)
+{
+ secattr->attr.secid = *(u32 *)&tag[2];
+ secattr->flags |= NETLBL_SECATTR_SECID;
+
+ return 0;
+}
+
+/**
* cipso_v4_validate - Validate a CIPSO option
* @option: the start of the option, on error it is set to point to the error
*
@@ -1541,7 +1567,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
* that is unrecognized."
*
*/
-int cipso_v4_validate(unsigned char **option)
+int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
{
unsigned char *opt = *option;
unsigned char *tag;
@@ -1566,7 +1592,7 @@ int cipso_v4_validate(unsigned char **option)
goto validate_return_locked;
}
- opt_iter = 6;
+ opt_iter = CIPSO_V4_HDR_LEN;
tag = opt + opt_iter;
while (opt_iter < opt_len) {
for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
@@ -1584,7 +1610,7 @@ int cipso_v4_validate(unsigned char **option)
switch (tag[0]) {
case CIPSO_V4_TAG_RBITMAP:
- if (tag_len < 4) {
+ if (tag_len < CIPSO_V4_TAG_RBM_BLEN) {
err_offset = opt_iter + 1;
goto validate_return_locked;
}
@@ -1602,7 +1628,7 @@ int cipso_v4_validate(unsigned char **option)
err_offset = opt_iter + 3;
goto validate_return_locked;
}
- if (tag_len > 4 &&
+ if (tag_len > CIPSO_V4_TAG_RBM_BLEN &&
cipso_v4_map_cat_rbm_valid(doi_def,
&tag[4],
tag_len - 4) < 0) {
@@ -1612,7 +1638,7 @@ int cipso_v4_validate(unsigned char **option)
}
break;
case CIPSO_V4_TAG_ENUM:
- if (tag_len < 4) {
+ if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) {
err_offset = opt_iter + 1;
goto validate_return_locked;
}
@@ -1622,7 +1648,7 @@ int cipso_v4_validate(unsigned char **option)
err_offset = opt_iter + 3;
goto validate_return_locked;
}
- if (tag_len > 4 &&
+ if (tag_len > CIPSO_V4_TAG_ENUM_BLEN &&
cipso_v4_map_cat_enum_valid(doi_def,
&tag[4],
tag_len - 4) < 0) {
@@ -1631,7 +1657,7 @@ int cipso_v4_validate(unsigned char **option)
}
break;
case CIPSO_V4_TAG_RANGE:
- if (tag_len < 4) {
+ if (tag_len < CIPSO_V4_TAG_RNG_BLEN) {
err_offset = opt_iter + 1;
goto validate_return_locked;
}
@@ -1641,7 +1667,7 @@ int cipso_v4_validate(unsigned char **option)
err_offset = opt_iter + 3;
goto validate_return_locked;
}
- if (tag_len > 4 &&
+ if (tag_len > CIPSO_V4_TAG_RNG_BLEN &&
cipso_v4_map_cat_rng_valid(doi_def,
&tag[4],
tag_len - 4) < 0) {
@@ -1649,6 +1675,19 @@ int cipso_v4_validate(unsigned char **option)
goto validate_return_locked;
}
break;
+ case CIPSO_V4_TAG_LOCAL:
+ /* This is a non-standard tag that we only allow for
+ * local connections, so if the incoming interface is
+ * not the loopback device drop the packet. */
+ if (!(skb->dev->flags & IFF_LOOPBACK)) {
+ err_offset = opt_iter;
+ goto validate_return_locked;
+ }
+ if (tag_len != CIPSO_V4_TAG_LOC_BLEN) {
+ err_offset = opt_iter + 1;
+ goto validate_return_locked;
+ }
+ break;
default:
err_offset = opt_iter;
goto validate_return_locked;
@@ -1704,48 +1743,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
}
/**
- * cipso_v4_sock_setattr - Add a CIPSO option to a socket
- * @sk: the socket
+ * cipso_v4_genopt - Generate a CIPSO option
+ * @buf: the option buffer
+ * @buf_len: the size of opt_buf
* @doi_def: the CIPSO DOI to use
- * @secattr: the specific security attributes of the socket
+ * @secattr: the security attributes
*
* Description:
- * Set the CIPSO option on the given socket using the DOI definition and
- * security attributes passed to the function. This function requires
- * exclusive access to @sk, which means it either needs to be in the
- * process of being created or locked. Returns zero on success and negative
- * values on failure.
+ * Generate a CIPSO option using the DOI definition and security attributes
+ * passed to the function. Returns the length of the option on success and
+ * negative values on failure.
*
*/
-int cipso_v4_sock_setattr(struct sock *sk,
- const struct cipso_v4_doi *doi_def,
- const struct netlbl_lsm_secattr *secattr)
+static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
+ const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
{
- int ret_val = -EPERM;
+ int ret_val;
u32 iter;
- unsigned char *buf;
- u32 buf_len = 0;
- u32 opt_len;
- struct ip_options *opt = NULL;
- struct inet_sock *sk_inet;
- struct inet_connection_sock *sk_conn;
- /* In the case of sock_create_lite(), the sock->sk field is not
- * defined yet but it is not a problem as the only users of these
- * "lite" PF_INET sockets are functions which do an accept() call
- * afterwards so we will label the socket as part of the accept(). */
- if (sk == NULL)
- return 0;
-
- /* We allocate the maximum CIPSO option size here so we are probably
- * being a little wasteful, but it makes our life _much_ easier later
- * on and after all we are only talking about 40 bytes. */
- buf_len = CIPSO_V4_OPT_LEN_MAX;
- buf = kmalloc(buf_len, GFP_ATOMIC);
- if (buf == NULL) {
- ret_val = -ENOMEM;
- goto socket_setattr_failure;
- }
+ if (buf_len <= CIPSO_V4_HDR_LEN)
+ return -ENOSPC;
/* XXX - This code assumes only one tag per CIPSO option which isn't
* really a good assumption to make but since we only support the MAC
@@ -1772,9 +1790,14 @@ int cipso_v4_sock_setattr(struct sock *sk,
&buf[CIPSO_V4_HDR_LEN],
buf_len - CIPSO_V4_HDR_LEN);
break;
+ case CIPSO_V4_TAG_LOCAL:
+ ret_val = cipso_v4_gentag_loc(doi_def,
+ secattr,
+ &buf[CIPSO_V4_HDR_LEN],
+ buf_len - CIPSO_V4_HDR_LEN);
+ break;
default:
- ret_val = -EPERM;
- goto socket_setattr_failure;
+ return -EPERM;
}
iter++;
@@ -1782,9 +1805,58 @@ int cipso_v4_sock_setattr(struct sock *sk,
iter < CIPSO_V4_TAG_MAXCNT &&
doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
if (ret_val < 0)
- goto socket_setattr_failure;
+ return ret_val;
cipso_v4_gentag_hdr(doi_def, buf, ret_val);
- buf_len = CIPSO_V4_HDR_LEN + ret_val;
+ return CIPSO_V4_HDR_LEN + ret_val;
+}
+
+/**
+ * cipso_v4_sock_setattr - Add a CIPSO option to a socket
+ * @sk: the socket
+ * @doi_def: the CIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function. This function requires
+ * exclusive access to @sk, which means it either needs to be in the
+ * process of being created or locked. Returns zero on success and negative
+ * values on failure.
+ *
+ */
+int cipso_v4_sock_setattr(struct sock *sk,
+ const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -EPERM;
+ unsigned char *buf = NULL;
+ u32 buf_len;
+ u32 opt_len;
+ struct ip_options *opt = NULL;
+ struct inet_sock *sk_inet;
+ struct inet_connection_sock *sk_conn;
+
+ /* In the case of sock_create_lite(), the sock->sk field is not
+ * defined yet but it is not a problem as the only users of these
+ * "lite" PF_INET sockets are functions which do an accept() call
+ * afterwards so we will label the socket as part of the accept(). */
+ if (sk == NULL)
+ return 0;
+
+ /* We allocate the maximum CIPSO option size here so we are probably
+ * being a little wasteful, but it makes our life _much_ easier later
+ * on and after all we are only talking about 40 bytes. */
+ buf_len = CIPSO_V4_OPT_LEN_MAX;
+ buf = kmalloc(buf_len, GFP_ATOMIC);
+ if (buf == NULL) {
+ ret_val = -ENOMEM;
+ goto socket_setattr_failure;
+ }
+
+ ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+ if (ret_val < 0)
+ goto socket_setattr_failure;
+ buf_len = ret_val;
/* We can't use ip_options_get() directly because it makes a call to
* ip_options_get_alloc() which allocates memory with GFP_KERNEL and
@@ -1822,6 +1894,80 @@ socket_setattr_failure:
}
/**
+ * cipso_v4_sock_delattr - Delete the CIPSO option from a socket
+ * @sk: the socket
+ *
+ * Description:
+ * Removes the CIPSO option from a socket, if present.
+ *
+ */
+void cipso_v4_sock_delattr(struct sock *sk)
+{
+ u8 hdr_delta;
+ struct ip_options *opt;
+ struct inet_sock *sk_inet;
+
+ sk_inet = inet_sk(sk);
+ opt = sk_inet->opt;
+ if (opt == NULL || opt->cipso == 0)
+ return;
+
+ if (opt->srr || opt->rr || opt->ts || opt->router_alert) {
+ u8 cipso_len;
+ u8 cipso_off;
+ unsigned char *cipso_ptr;
+ int iter;
+ int optlen_new;
+
+ cipso_off = opt->cipso - sizeof(struct iphdr);
+ cipso_ptr = &opt->__data[cipso_off];
+ cipso_len = cipso_ptr[1];
+
+ if (opt->srr > opt->cipso)
+ opt->srr -= cipso_len;
+ if (opt->rr > opt->cipso)
+ opt->rr -= cipso_len;
+ if (opt->ts > opt->cipso)
+ opt->ts -= cipso_len;
+ if (opt->router_alert > opt->cipso)
+ opt->router_alert -= cipso_len;
+ opt->cipso = 0;
+
+ memmove(cipso_ptr, cipso_ptr + cipso_len,
+ opt->optlen - cipso_off - cipso_len);
+
+ /* determining the new total option length is tricky because of
+ * the padding necessary, the only thing i can think to do at
+ * this point is walk the options one-by-one, skipping the
+ * padding at the end to determine the actual option size and
+ * from there we can determine the new total option length */
+ iter = 0;
+ optlen_new = 0;
+ while (iter < opt->optlen)
+ if (opt->__data[iter] != IPOPT_NOP) {
+ iter += opt->__data[iter + 1];
+ optlen_new = iter;
+ } else
+ iter++;
+ hdr_delta = opt->optlen;
+ opt->optlen = (optlen_new + 3) & ~3;
+ hdr_delta -= opt->optlen;
+ } else {
+ /* only the cipso option was present on the socket so we can
+ * remove the entire option struct */
+ sk_inet->opt = NULL;
+ hdr_delta = opt->optlen;
+ kfree(opt);
+ }
+
+ if (sk_inet->is_icsk && hdr_delta > 0) {
+ struct inet_connection_sock *sk_conn = inet_csk(sk);
+ sk_conn->icsk_ext_hdr_len -= hdr_delta;
+ sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+ }
+}
+
+/**
* cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions
* @cipso: the CIPSO v4 option
* @secattr: the security attributes
@@ -1859,6 +2005,9 @@ static int cipso_v4_getattr(const unsigned char *cipso,
case CIPSO_V4_TAG_RANGE:
ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr);
break;
+ case CIPSO_V4_TAG_LOCAL:
+ ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr);
+ break;
}
if (ret_val == 0)
secattr->type = NETLBL_NLTYPE_CIPSOV4;
@@ -1893,6 +2042,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
}
/**
+ * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Set the CIPSO option on the given packet based on the security attributes.
+ * Returns a pointer to the IP header on success and NULL on failure.
+ *
+ */
+int cipso_v4_skbuff_setattr(struct sk_buff *skb,
+ const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ struct iphdr *iph;
+ struct ip_options *opt = &IPCB(skb)->opt;
+ unsigned char buf[CIPSO_V4_OPT_LEN_MAX];
+ u32 buf_len = CIPSO_V4_OPT_LEN_MAX;
+ u32 opt_len;
+ int len_delta;
+
+ buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+ if (buf_len < 0)
+ return buf_len;
+ opt_len = (buf_len + 3) & ~3;
+
+ /* we overwrite any existing options to ensure that we have enough
+ * room for the CIPSO option, the reason is that we _need_ to guarantee
+ * that the security label is applied to the packet - we do the same
+ * thing when using the socket options and it hasn't caused a problem,
+ * if we need to we can always revisit this choice later */
+
+ len_delta = opt_len - opt->optlen;
+ /* if we don't ensure enough headroom we could panic on the skb_push()
+ * call below so make sure we have enough, we are also "mangling" the
+ * packet so we should probably do a copy-on-write call anyway */
+ ret_val = skb_cow(skb, skb_headroom(skb) + len_delta);
+ if (ret_val < 0)
+ return ret_val;
+
+ if (len_delta > 0) {
+ /* we assume that the header + opt->optlen have already been
+ * "pushed" in ip_options_build() or similar */
+ iph = ip_hdr(skb);
+ skb_push(skb, len_delta);
+ memmove((char *)iph - len_delta, iph, iph->ihl << 2);
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ } else if (len_delta < 0) {
+ iph = ip_hdr(skb);
+ memset(iph + 1, IPOPT_NOP, opt->optlen);
+ } else
+ iph = ip_hdr(skb);
+
+ if (opt->optlen > 0)
+ memset(opt, 0, sizeof(*opt));
+ opt->optlen = opt_len;
+ opt->cipso = sizeof(struct iphdr);
+ opt->is_changed = 1;
+
+ /* we have to do the following because we are being called from a
+ * netfilter hook which means the packet already has had the header
+ * fields populated and the checksum calculated - yes this means we
+ * are doing more work than needed but we do it to keep the core
+ * stack clean and tidy */
+ memcpy(iph + 1, buf, buf_len);
+ if (opt_len > buf_len)
+ memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len);
+ if (len_delta != 0) {
+ iph->ihl = 5 + (opt_len >> 2);
+ iph->tot_len = htons(skb->len);
+ }
+ ip_send_check(iph);
+
+ return 0;
+}
+
+/**
+ * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet
+ * @skb: the packet
+ *
+ * Description:
+ * Removes any and all CIPSO options from the given packet. Returns zero on
+ * success, negative values on failure.
+ *
+ */
+int cipso_v4_skbuff_delattr(struct sk_buff *skb)
+{
+ int ret_val;
+ struct iphdr *iph;
+ struct ip_options *opt = &IPCB(skb)->opt;
+ unsigned char *cipso_ptr;
+
+ if (opt->cipso == 0)
+ return 0;
+
+ /* since we are changing the packet we should make a copy */
+ ret_val = skb_cow(skb, skb_headroom(skb));
+ if (ret_val < 0)
+ return ret_val;
+
+ /* the easiest thing to do is just replace the cipso option with noop
+ * options since we don't change the size of the packet, although we
+ * still need to recalculate the checksum */
+
+ iph = ip_hdr(skb);
+ cipso_ptr = (unsigned char *)iph + opt->cipso;
+ memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]);
+ opt->cipso = 0;
+ opt->is_changed = 1;
+
+ ip_send_check(iph);
+
+ return 0;
+}
+
+/**
* cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
* @skb: the packet
* @secattr: the security attributes
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b12dae2b0b2..56fce3ab6c5 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -613,9 +613,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (colon)
*colon = 0;
-#ifdef CONFIG_KMOD
dev_load(net, ifr.ifr_name);
-#endif
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
@@ -1283,7 +1281,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
return ret;
}
-static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
+static int devinet_conf_sysctl(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
@@ -1379,12 +1377,11 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
return ret;
}
-int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
+int ipv4_doint_and_flush_strategy(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
- int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
- newval, newlen);
+ int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
struct net *net = table->extra2;
if (ret == 1)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 55c355e6323..72b2de76f1c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1,7 +1,7 @@
/*
* NET3: Implementation of the ICMP protocol layer.
*
- * Alan Cox, <alan@redhat.com>
+ * Alan Cox, <alan@lxorguk.ukuu.org.uk>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7f9e337e390..a0d86455c53 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -9,7 +9,7 @@
* seems to fall out with gcc 2.6.2.
*
* Authors:
- * Alan Cox <Alan.Cox@linux.org>
+ * Alan Cox <alan@lxorguk.ukuu.org.uk>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 89cb047ab31..564230dabcb 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -53,11 +53,9 @@ static DEFINE_MUTEX(inet_diag_table_mutex);
static const struct inet_diag_handler *inet_diag_lock_handler(int type)
{
-#ifdef CONFIG_KMOD
if (!inet_diag_table[type])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
NETLINK_INET_DIAG, type);
-#endif
mutex_lock(&inet_diag_table_mutex);
if (!inet_diag_table[type])
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2152d222b95..e4f81f54bef 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -6,7 +6,7 @@
* The IP fragmentation functionality.
*
* Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
- * Alan Cox <Alan.Cox@linux.org>
+ * Alan Cox <alan@lxorguk.ukuu.org.uk>
*
* Fixes:
* Alan Cox : Split from ip.c , see ip_input.c for history.
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e0bed56c51f..861978a4f1a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -8,7 +8,7 @@
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Donald Becker, <becker@super.org>
- * Alan Cox, <Alan.Cox@linux.org>
+ * Alan Cox, <alan@lxorguk.ukuu.org.uk>
* Richard Underwood
* Stefan Becker, <stefanb@yello.ping.de>
* Jorge Cwik, <jorge@laser.satlink.net>
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index be3f18a7a40..2c88da6e786 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -438,7 +438,7 @@ int ip_options_compile(struct net *net,
goto error;
}
opt->cipso = optptr - iph;
- if (cipso_v4_validate(&optptr)) {
+ if (cipso_v4_validate(skb, &optptr)) {
pp_ptr = optptr;
goto error;
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4c6d2caf920..29609d29df7 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -41,7 +41,7 @@
Made the tunnels use dev->name not tunnel: when error reporting.
Added tx_dropped stat
- -Alan Cox (Alan.Cox@linux.org) 21 March 95
+ -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
Reworked:
Changed to tunnel to destination gateway in addition to the
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c519b8d30ee..b42e082cc17 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1,7 +1,7 @@
/*
* IP multicast routing support for mrouted 3.6/3.8
*
- * (c) 1995 Alan Cox, <alan@redhat.com>
+ * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
* Linux Consultancy and Custom Driver Development
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index aa2c50a180f..fa2d6b6fc3e 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -41,12 +41,13 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
int (*okfn)(struct sk_buff *))
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
if (skb->nfct)
return NF_ACCEPT;
#endif
-
+#endif
/* Gather fragments. */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
if (nf_ct_ipv4_gather_frags(skb,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 2ac9eaf1a8c..a65cf692359 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -584,6 +584,98 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
.flags = NF_CT_EXT_F_PREALLOC,
};
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
+ [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
+ [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
+};
+
+static int nfnetlink_parse_nat_proto(struct nlattr *attr,
+ const struct nf_conn *ct,
+ struct nf_nat_range *range)
+{
+ struct nlattr *tb[CTA_PROTONAT_MAX+1];
+ const struct nf_nat_protocol *npt;
+ int err;
+
+ err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
+ if (err < 0)
+ return err;
+
+ npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
+ if (npt->nlattr_to_range)
+ err = npt->nlattr_to_range(tb, range);
+ nf_nat_proto_put(npt);
+ return err;
+}
+
+static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
+ [CTA_NAT_MINIP] = { .type = NLA_U32 },
+ [CTA_NAT_MAXIP] = { .type = NLA_U32 },
+};
+
+static int
+nfnetlink_parse_nat(struct nlattr *nat,
+ const struct nf_conn *ct, struct nf_nat_range *range)
+{
+ struct nlattr *tb[CTA_NAT_MAX+1];
+ int err;
+
+ memset(range, 0, sizeof(*range));
+
+ err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[CTA_NAT_MINIP])
+ range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
+
+ if (!tb[CTA_NAT_MAXIP])
+ range->max_ip = range->min_ip;
+ else
+ range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
+
+ if (range->min_ip)
+ range->flags |= IP_NAT_RANGE_MAP_IPS;
+
+ if (!tb[CTA_NAT_PROTO])
+ return 0;
+
+ err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr)
+{
+ struct nf_nat_range range;
+
+ if (nfnetlink_parse_nat(attr, ct, &range) < 0)
+ return -EINVAL;
+ if (nf_nat_initialized(ct, manip))
+ return -EEXIST;
+
+ return nf_nat_setup_info(ct, &range, manip);
+}
+#else
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
static int __net_init nf_nat_net_init(struct net *net)
{
net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
@@ -654,6 +746,9 @@ static int __init nf_nat_init(void)
BUG_ON(nf_nat_seq_adjust_hook != NULL);
rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
+ BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
+ rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
+ nfnetlink_parse_nat_setup);
return 0;
cleanup_extend:
@@ -667,10 +762,12 @@ static void __exit nf_nat_cleanup(void)
nf_ct_l3proto_put(l3proto);
nf_ct_extend_unregister(&nat_extend);
rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
+ rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
synchronize_net();
}
MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-ipv4");
module_init(nf_nat_init);
module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a6d7c584f53..2ea6dcc3e2c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1109,7 +1109,12 @@ restart:
printk("\n");
}
#endif
- rt_hash_table[hash].chain = rt;
+ /*
+ * Since lookup is lockfree, we must make sure
+ * previous writes to rt are comitted to memory
+ * before making rt visible to other CPUS.
+ */
+ rcu_assign_pointer(rt_hash_table[hash].chain, rt);
spin_unlock_bh(rt_hash_lock_addr(hash));
*rp = rt;
return 0;
@@ -2908,8 +2913,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
}
static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
- int __user *name,
- int nlen,
void __user *oldval,
size_t __user *oldlenp,
void __user *newval,
@@ -2972,16 +2975,13 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
}
static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table,
- int __user *name,
- int nlen,
void __user *oldval,
size_t __user *oldlenp,
void __user *newval,
size_t newlen)
{
int old = ip_rt_secret_interval;
- int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval,
- newlen);
+ int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen);
rt_secret_reschedule(old);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 276d047fb85..1bb10df8ce7 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -64,8 +64,8 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
}
/* Validate changes from sysctl interface. */
-static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
- int nlen, void __user *oldval,
+static int ipv4_sysctl_local_port_range(ctl_table *table,
+ void __user *oldval,
size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
@@ -80,7 +80,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
};
inet_get_local_port_range(range, range + 1);
- ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
+ ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen);
if (ret == 0 && newval && newlen) {
if (range[1] < range[0])
ret = -EINVAL;
@@ -109,8 +109,8 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
return ret;
}
-static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
- int nlen, void __user *oldval,
+static int sysctl_tcp_congestion_control(ctl_table *table,
+ void __user *oldval,
size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
@@ -122,7 +122,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
int ret;
tcp_get_default_congestion_control(val);
- ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
+ ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen);
if (ret == 1 && newval && newlen)
ret = tcp_set_default_congestion_control(val);
return ret;
@@ -165,8 +165,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
return ret;
}
-static int strategy_allowed_congestion_control(ctl_table *table, int __user *name,
- int nlen, void __user *oldval,
+static int strategy_allowed_congestion_control(ctl_table *table,
+ void __user *oldval,
size_t __user *oldlenp,
void __user *newval,
size_t newlen)
@@ -179,7 +179,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
return -ENOMEM;
tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
- ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
+ ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen);
if (ret == 1 && newval && newlen)
ret = tcp_set_allowed_congestion_control(tbl.data);
kfree(tbl.data);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6a250828b76..4ec5b4e97c4 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -115,7 +115,7 @@ int tcp_set_default_congestion_control(const char *name)
spin_lock(&tcp_cong_list_lock);
ca = tcp_ca_find(name);
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
if (!ca && capable(CAP_SYS_MODULE)) {
spin_unlock(&tcp_cong_list_lock);
@@ -244,7 +244,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
if (ca == icsk->icsk_ca_ops)
goto out;
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
/* not found attempt to autoload module */
if (!ca && capable(CAP_SYS_MODULE)) {
rcu_read_unlock();
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eacf4cfef14..2095abc3cab 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -8,7 +8,7 @@
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- * Alan Cox, <Alan.Cox@linux.org>
+ * Alan Cox, <alan@lxorguk.ukuu.org.uk>
* Hirokazu Takahashi, <taka@valinux.co.jp>
*
* Fixes:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7b6a584b62d..eea9542728c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3982,7 +3982,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
}
static int addrconf_sysctl_forward_strategy(ctl_table *table,
- int __user *name, int nlen,
void __user *oldval,
size_t __user *oldlenp,
void __user *newval, size_t newlen)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 050e14b7f70..01edac88851 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -834,7 +834,7 @@ static void __net_exit ipv6_cleanup_mibs(struct net *net)
snmp_mib_free((void **)net->mib.icmpv6msg_statistics);
}
-static int inet6_net_init(struct net *net)
+static int __net_init inet6_net_init(struct net *net)
{
int err = 0;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 840b15780a3..172438320ee 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1199,7 +1199,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
}
neigh->flags |= NTF_ROUTER;
} else if (rt) {
- rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+ rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
}
if (rt)
@@ -1730,9 +1730,8 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
return ret;
}
-int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
- int nlen, void __user *oldval,
- size_t __user *oldlenp,
+int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl,
+ void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
struct net_device *dev = ctl->extra1;
@@ -1745,13 +1744,11 @@ int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
switch (ctl->ctl_name) {
case NET_NEIGH_REACHABLE_TIME:
- ret = sysctl_jiffies(ctl, name, nlen,
- oldval, oldlenp, newval, newlen);
+ ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen);
break;
case NET_NEIGH_RETRANS_TIME_MS:
case NET_NEIGH_REACHABLE_TIME_MS:
- ret = sysctl_ms_jiffies(ctl, name, nlen,
- oldval, oldlenp, newval, newlen);
+ ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen);
break;
default:
ret = 0;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 6b29b03925f..fd5b3a4e332 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -12,6 +12,7 @@
int ip6_route_me_harder(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb->dst->dev);
struct ipv6hdr *iph = ipv6_hdr(skb);
struct dst_entry *dst;
struct flowi fl = {
@@ -23,7 +24,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
.saddr = iph->saddr, } },
};
- dst = ip6_route_output(dev_net(skb->dst->dev), skb->sk, &fl);
+ dst = ip6_route_output(net, skb->sk, &fl);
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -33,8 +34,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
#endif
if (dst->error) {
- IP6_INC_STATS(&init_net, ip6_dst_idev(dst),
- IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
dst_release(dst);
return -EINVAL;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 2a451562377..2ad504fc341 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -545,8 +545,12 @@ static int netdev_notify(struct notifier_block *nb,
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- sprintf(buf, "netdev:%s", dev->name);
dir = sdata->debugfsdir;
+
+ if (!dir)
+ return 0;
+
+ sprintf(buf, "netdev:%s", dev->name);
if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf))
printk(KERN_ERR "mac80211: debugfs: failed to rename debugfs "
"dir to %s\n", buf);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index b9902e425f0..189d0bafa91 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -249,11 +249,22 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
DECLARE_MAC_BUF(mbuf);
u8 *mac;
+ sta->debugfs.add_has_run = true;
+
if (!stations_dir)
return;
mac = print_mac(mbuf, sta->sta.addr);
+ /*
+ * This might fail due to a race condition:
+ * When mac80211 unlinks a station, the debugfs entries
+ * remain, but it is already possible to link a new
+ * station with the same address which triggers adding
+ * it to debugfs; therefore, if the old station isn't
+ * destroyed quickly enough the old station's debugfs
+ * dir might still be around.
+ */
sta->debugfs.dir = debugfs_create_dir(mac, stations_dir);
if (!sta->debugfs.dir)
return;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8025b294588..156e42a003a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -816,8 +816,8 @@ struct ieee802_11_elems {
u8 *ext_supp_rates;
u8 *wmm_info;
u8 *wmm_param;
- u8 *ht_cap_elem;
- u8 *ht_info_elem;
+ struct ieee80211_ht_cap *ht_cap_elem;
+ struct ieee80211_ht_addt_info *ht_info_elem;
u8 *mesh_config;
u8 *mesh_id;
u8 *peer_link;
@@ -844,8 +844,6 @@ struct ieee802_11_elems {
u8 ext_supp_rates_len;
u8 wmm_info_len;
u8 wmm_param_len;
- u8 ht_cap_elem_len;
- u8 ht_info_elem_len;
u8 mesh_config_len;
u8 mesh_id_len;
u8 peer_link_len;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 49f86fa56bf..87665d7bb4f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1348,10 +1348,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
(ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
struct ieee80211_ht_bss_info bss_info;
ieee80211_ht_cap_ie_to_ht_info(
- (struct ieee80211_ht_cap *)
elems.ht_cap_elem, &sta->sta.ht_info);
ieee80211_ht_addt_info_ie_to_ht_bss_info(
- (struct ieee80211_ht_addt_info *)
elems.ht_info_elem, &bss_info);
ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info);
}
@@ -1709,7 +1707,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
struct ieee80211_ht_bss_info bss_info;
ieee80211_ht_addt_info_ie_to_ht_bss_info(
- (struct ieee80211_ht_addt_info *)
elems.ht_info_elem, &bss_info);
changed |= ieee80211_handle_ht(local, 1, &conf->ht_conf,
&bss_info);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 77e7b014872..cf6b121e1bb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1379,6 +1379,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
return RX_QUEUED;
}
+#ifdef CONFIG_MAC80211_MESH
static ieee80211_rx_result
ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
{
@@ -1453,7 +1454,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
else
return RX_DROP_MONITOR;
}
-
+#endif
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
@@ -1780,8 +1781,10 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
/* must be after MMIC verify so header is counted in MPDU mic */
CALL_RXH(ieee80211_rx_h_remove_qos_control)
CALL_RXH(ieee80211_rx_h_amsdu)
+#ifdef CONFIG_MAC80211_MESH
if (ieee80211_vif_is_mesh(&sdata->vif))
CALL_RXH(ieee80211_rx_h_mesh_fwding);
+#endif
CALL_RXH(ieee80211_rx_h_data)
CALL_RXH(ieee80211_rx_h_ctrl)
CALL_RXH(ieee80211_rx_h_action)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 8e6685e7ae8..416bb41099f 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -388,7 +388,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
bss = ieee80211_bss_info_update(sdata->local, rx_status,
mgmt, skb->len, &elems,
freq, beacon);
- ieee80211_rx_bss_put(sdata->local, bss);
+ if (bss)
+ ieee80211_rx_bss_put(sdata->local, bss);
dev_kfree_skb(skb);
return RX_QUEUED;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 9b72d15bc8d..7fef8ea1f5e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -635,7 +635,12 @@ static void sta_info_debugfs_add_work(struct work_struct *work)
spin_lock_irqsave(&local->sta_lock, flags);
list_for_each_entry(tmp, &local->sta_list, list) {
- if (!tmp->debugfs.dir) {
+ /*
+ * debugfs.add_has_run will be set by
+ * ieee80211_sta_debugfs_add regardless
+ * of what else it does.
+ */
+ if (!tmp->debugfs.add_has_run) {
sta = tmp;
__sta_info_pin(sta);
break;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index a6b51862a89..168a39a298b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -300,6 +300,7 @@ struct sta_info {
struct dentry *inactive_ms;
struct dentry *last_seq_ctrl;
struct dentry *agg_status;
+ bool add_has_run;
} debugfs;
#endif
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index f32561ec224..cee4884b9d0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -529,12 +529,12 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
elems->ext_supp_rates_len = elen;
break;
case WLAN_EID_HT_CAPABILITY:
- elems->ht_cap_elem = pos;
- elems->ht_cap_elem_len = elen;
+ if (elen >= sizeof(struct ieee80211_ht_cap))
+ elems->ht_cap_elem = (void *)pos;
break;
case WLAN_EID_HT_EXTRA_INFO:
- elems->ht_info_elem = pos;
- elems->ht_info_elem_len = elen;
+ if (elen >= sizeof(struct ieee80211_ht_addt_info))
+ elems->ht_info_elem = (void *)pos;
break;
case WLAN_EID_MESH_ID:
elems->mesh_id = pos;
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 7e0d53abde2..742f811ca41 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -775,7 +775,7 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev,
* configure it here */
if (local->ops->set_frag_threshold)
- local->ops->set_frag_threshold(
+ return local->ops->set_frag_threshold(
local_to_hw(local),
local->fragmentation_threshold);
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 03591d37b9c..b92df5c1dfc 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -115,7 +115,7 @@ int nf_conntrack_acct_init(struct net *net)
if (net_eq(net, &init_net)) {
#ifdef CONFIG_NF_CT_ACCT
- printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
+ printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
#endif
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 27de3c7b006..622d7c671cb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -38,9 +38,16 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_nat.h>
#define NF_CONNTRACK_VERSION "0.5.0"
+unsigned int
+(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr) __read_mostly;
+EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
+
DEFINE_SPINLOCK(nf_conntrack_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index cadfd15b44f..2e4ad9671e1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -689,71 +689,6 @@ ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
return 0;
}
-#ifdef CONFIG_NF_NAT_NEEDED
-static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
- [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
- [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
-};
-
-static int nfnetlink_parse_nat_proto(struct nlattr *attr,
- const struct nf_conn *ct,
- struct nf_nat_range *range)
-{
- struct nlattr *tb[CTA_PROTONAT_MAX+1];
- const struct nf_nat_protocol *npt;
- int err;
-
- err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
- if (err < 0)
- return err;
-
- npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
- if (npt->nlattr_to_range)
- err = npt->nlattr_to_range(tb, range);
- nf_nat_proto_put(npt);
- return err;
-}
-
-static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
- [CTA_NAT_MINIP] = { .type = NLA_U32 },
- [CTA_NAT_MAXIP] = { .type = NLA_U32 },
-};
-
-static inline int
-nfnetlink_parse_nat(struct nlattr *nat,
- const struct nf_conn *ct, struct nf_nat_range *range)
-{
- struct nlattr *tb[CTA_NAT_MAX+1];
- int err;
-
- memset(range, 0, sizeof(*range));
-
- err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
- if (err < 0)
- return err;
-
- if (tb[CTA_NAT_MINIP])
- range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
-
- if (!tb[CTA_NAT_MAXIP])
- range->max_ip = range->min_ip;
- else
- range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
-
- if (range->min_ip)
- range->flags |= IP_NAT_RANGE_MAP_IPS;
-
- if (!tb[CTA_NAT_PROTO])
- return 0;
-
- err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
- if (err < 0)
- return err;
-
- return 0;
-}
-#endif
-
static inline int
ctnetlink_parse_help(struct nlattr *attr, char **helper_name)
{
@@ -879,6 +814,34 @@ out:
}
static int
+ctnetlink_parse_nat_setup(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr)
+{
+ typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup;
+
+ parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook);
+ if (!parse_nat_setup) {
+#ifdef CONFIG_MODULES
+ rcu_read_unlock();
+ nfnl_unlock();
+ if (request_module("nf-nat-ipv4") < 0) {
+ nfnl_lock();
+ rcu_read_lock();
+ return -EOPNOTSUPP;
+ }
+ nfnl_lock();
+ rcu_read_lock();
+ if (nfnetlink_parse_nat_setup_hook)
+ return -EAGAIN;
+#endif
+ return -EOPNOTSUPP;
+ }
+
+ return parse_nat_setup(ct, manip, attr);
+}
+
+static int
ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
{
unsigned long d;
@@ -897,31 +860,6 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
/* ASSURED bit can only be set */
return -EBUSY;
- if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
-#ifndef CONFIG_NF_NAT_NEEDED
- return -EOPNOTSUPP;
-#else
- struct nf_nat_range range;
-
- if (cda[CTA_NAT_DST]) {
- if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct,
- &range) < 0)
- return -EINVAL;
- if (nf_nat_initialized(ct, IP_NAT_MANIP_DST))
- return -EEXIST;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
- }
- if (cda[CTA_NAT_SRC]) {
- if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct,
- &range) < 0)
- return -EINVAL;
- if (nf_nat_initialized(ct, IP_NAT_MANIP_SRC))
- return -EEXIST;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
- }
-#endif
- }
-
/* Be careful here, modifying NAT bits can screw up things,
* so don't let users modify them directly if they don't pass
* nf_nat_range. */
@@ -929,6 +867,31 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
return 0;
}
+static int
+ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[])
+{
+#ifdef CONFIG_NF_NAT_NEEDED
+ int ret;
+
+ if (cda[CTA_NAT_DST]) {
+ ret = ctnetlink_parse_nat_setup(ct,
+ IP_NAT_MANIP_DST,
+ cda[CTA_NAT_DST]);
+ if (ret < 0)
+ return ret;
+ }
+ if (cda[CTA_NAT_SRC]) {
+ ret = ctnetlink_parse_nat_setup(ct,
+ IP_NAT_MANIP_SRC,
+ cda[CTA_NAT_SRC]);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
static inline int
ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
@@ -1157,6 +1120,14 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
}
}
+ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
+ err = ctnetlink_change_nat(ct, cda);
+ if (err < 0) {
+ rcu_read_unlock();
+ goto err;
+ }
+ }
+
if (cda[CTA_PROTOINFO]) {
err = ctnetlink_change_protoinfo(ct, cda);
if (err < 0) {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 373e51e91ce..1bc3001d182 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -65,7 +65,7 @@ void
struct nf_conntrack_expect *exp) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
-#ifdef DEBUG
+#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
/* PptpControlMessageType names */
const char *const pptp_msg_name[] = {
"UNKNOWN_MESSAGE",
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index fe34f4bf74c..cdc97f3105a 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -10,7 +10,6 @@
*
*/
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/net.h>
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b75c9c4a995..9c0ba17a1dd 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -44,15 +44,17 @@ static struct sock *nfnl = NULL;
static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
static DEFINE_MUTEX(nfnl_mutex);
-static inline void nfnl_lock(void)
+void nfnl_lock(void)
{
mutex_lock(&nfnl_mutex);
}
+EXPORT_SYMBOL_GPL(nfnl_lock);
-static inline void nfnl_unlock(void)
+void nfnl_unlock(void)
{
mutex_unlock(&nfnl_mutex);
}
+EXPORT_SYMBOL_GPL(nfnl_unlock);
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
{
@@ -132,9 +134,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return 0;
type = nlh->nlmsg_type;
+replay:
ss = nfnetlink_get_subsys(type);
if (!ss) {
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
nfnl_unlock();
request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
nfnl_lock();
@@ -165,7 +168,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
} else
return -EINVAL;
- return nc->call(nfnl, skb, nlh, cda);
+ err = nc->call(nfnl, skb, nlh, cda);
+ if (err == -EAGAIN)
+ goto replay;
+ return err;
}
}
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index 8af18c0a47d..ea750e9df65 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -5,7 +5,8 @@
#
# base objects
-obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o
+obj-y := netlabel_user.o netlabel_kapi.o
+obj-y += netlabel_domainhash.o netlabel_addrlist.o
# management objects
obj-y += netlabel_mgmt.o
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
new file mode 100644
index 00000000000..b0925a30335
--- /dev/null
+++ b/net/netlabel/netlabel_addrlist.c
@@ -0,0 +1,388 @@
+/*
+ * NetLabel Network Address Lists
+ *
+ * This file contains network address list functions used to manage ordered
+ * lists of network addresses for use by the NetLabel subsystem. The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <linux/audit.h>
+
+#include "netlabel_addrlist.h"
+
+/*
+ * Address List Functions
+ */
+
+/**
+ * netlbl_af4list_search - Search for a matching IPv4 address entry
+ * @addr: IPv4 address
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv4 address list given by @head. If a matching address entry
+ * is found it is returned, otherwise NULL is returned. The caller is
+ * responsible for calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
+ struct list_head *head)
+{
+ struct netlbl_af4list *iter;
+
+ list_for_each_entry_rcu(iter, head, list)
+ if (iter->valid && (addr & iter->mask) == iter->addr)
+ return iter;
+
+ return NULL;
+}
+
+/**
+ * netlbl_af4list_search_exact - Search for an exact IPv4 address entry
+ * @addr: IPv4 address
+ * @mask: IPv4 address mask
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv4 address list given by @head. If an exact match if found
+ * it is returned, otherwise NULL is returned. The caller is responsible for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
+ __be32 mask,
+ struct list_head *head)
+{
+ struct netlbl_af4list *iter;
+
+ list_for_each_entry_rcu(iter, head, list)
+ if (iter->valid && iter->addr == addr && iter->mask == mask)
+ return iter;
+
+ return NULL;
+}
+
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_search - Search for a matching IPv6 address entry
+ * @addr: IPv6 address
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv6 address list given by @head. If a matching address entry
+ * is found it is returned, otherwise NULL is returned. The caller is
+ * responsible for calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
+ struct list_head *head)
+{
+ struct netlbl_af6list *iter;
+
+ list_for_each_entry_rcu(iter, head, list)
+ if (iter->valid &&
+ ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0)
+ return iter;
+
+ return NULL;
+}
+
+/**
+ * netlbl_af6list_search_exact - Search for an exact IPv6 address entry
+ * @addr: IPv6 address
+ * @mask: IPv6 address mask
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv6 address list given by @head. If an exact match if found
+ * it is returned, otherwise NULL is returned. The caller is responsible for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr,
+ const struct in6_addr *mask,
+ struct list_head *head)
+{
+ struct netlbl_af6list *iter;
+
+ list_for_each_entry_rcu(iter, head, list)
+ if (iter->valid &&
+ ipv6_addr_equal(&iter->addr, addr) &&
+ ipv6_addr_equal(&iter->mask, mask))
+ return iter;
+
+ return NULL;
+}
+#endif /* IPv6 */
+
+/**
+ * netlbl_af4list_add - Add a new IPv4 address entry to a list
+ * @entry: address entry
+ * @head: the list head
+ *
+ * Description:
+ * Add a new address entry to the list pointed to by @head. On success zero is
+ * returned, otherwise a negative value is returned. The caller is responsible
+ * for calling the necessary locking functions.
+ *
+ */
+int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head)
+{
+ struct netlbl_af4list *iter;
+
+ iter = netlbl_af4list_search(entry->addr, head);
+ if (iter != NULL &&
+ iter->addr == entry->addr && iter->mask == entry->mask)
+ return -EEXIST;
+
+ /* in order to speed up address searches through the list (the common
+ * case) we need to keep the list in order based on the size of the
+ * address mask such that the entry with the widest mask (smallest
+ * numerical value) appears first in the list */
+ list_for_each_entry_rcu(iter, head, list)
+ if (iter->valid &&
+ ntohl(entry->mask) > ntohl(iter->mask)) {
+ __list_add_rcu(&entry->list,
+ iter->list.prev,
+ &iter->list);
+ return 0;
+ }
+ list_add_tail_rcu(&entry->list, head);
+ return 0;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_add - Add a new IPv6 address entry to a list
+ * @entry: address entry
+ * @head: the list head
+ *
+ * Description:
+ * Add a new address entry to the list pointed to by @head. On success zero is
+ * returned, otherwise a negative value is returned. The caller is responsible
+ * for calling the necessary locking functions.
+ *
+ */
+int netlbl_af6list_add(struct netlbl_af6list *entry, struct list_head *head)
+{
+ struct netlbl_af6list *iter;
+
+ iter = netlbl_af6list_search(&entry->addr, head);
+ if (iter != NULL &&
+ ipv6_addr_equal(&iter->addr, &entry->addr) &&
+ ipv6_addr_equal(&iter->mask, &entry->mask))
+ return -EEXIST;
+
+ /* in order to speed up address searches through the list (the common
+ * case) we need to keep the list in order based on the size of the
+ * address mask such that the entry with the widest mask (smallest
+ * numerical value) appears first in the list */
+ list_for_each_entry_rcu(iter, head, list)
+ if (iter->valid &&
+ ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) {
+ __list_add_rcu(&entry->list,
+ iter->list.prev,
+ &iter->list);
+ return 0;
+ }
+ list_add_tail_rcu(&entry->list, head);
+ return 0;
+}
+#endif /* IPv6 */
+
+/**
+ * netlbl_af4list_remove_entry - Remove an IPv4 address entry
+ * @entry: address entry
+ *
+ * Description:
+ * Remove the specified IP address entry. The caller is responsible for
+ * calling the necessary locking functions.
+ *
+ */
+void netlbl_af4list_remove_entry(struct netlbl_af4list *entry)
+{
+ entry->valid = 0;
+ list_del_rcu(&entry->list);
+}
+
+/**
+ * netlbl_af4list_remove - Remove an IPv4 address entry
+ * @addr: IP address
+ * @mask: IP address mask
+ * @head: the list head
+ *
+ * Description:
+ * Remove an IP address entry from the list pointed to by @head. Returns the
+ * entry on success, NULL on failure. The caller is responsible for calling
+ * the necessary locking functions.
+ *
+ */
+struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
+ struct list_head *head)
+{
+ struct netlbl_af4list *entry;
+
+ entry = netlbl_af4list_search(addr, head);
+ if (entry != NULL && entry->addr == addr && entry->mask == mask) {
+ netlbl_af4list_remove_entry(entry);
+ return entry;
+ }
+
+ return NULL;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_remove_entry - Remove an IPv6 address entry
+ * @entry: address entry
+ *
+ * Description:
+ * Remove the specified IP address entry. The caller is responsible for
+ * calling the necessary locking functions.
+ *
+ */
+void netlbl_af6list_remove_entry(struct netlbl_af6list *entry)
+{
+ entry->valid = 0;
+ list_del_rcu(&entry->list);
+}
+
+/**
+ * netlbl_af6list_remove - Remove an IPv6 address entry
+ * @addr: IP address
+ * @mask: IP address mask
+ * @head: the list head
+ *
+ * Description:
+ * Remove an IP address entry from the list pointed to by @head. Returns the
+ * entry on success, NULL on failure. The caller is responsible for calling
+ * the necessary locking functions.
+ *
+ */
+struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
+ const struct in6_addr *mask,
+ struct list_head *head)
+{
+ struct netlbl_af6list *entry;
+
+ entry = netlbl_af6list_search(addr, head);
+ if (entry != NULL &&
+ ipv6_addr_equal(&entry->addr, addr) &&
+ ipv6_addr_equal(&entry->mask, mask)) {
+ netlbl_af6list_remove_entry(entry);
+ return entry;
+ }
+
+ return NULL;
+}
+#endif /* IPv6 */
+
+/*
+ * Audit Helper Functions
+ */
+
+/**
+ * netlbl_af4list_audit_addr - Audit an IPv4 address
+ * @audit_buf: audit buffer
+ * @src: true if source address, false if destination
+ * @dev: network interface
+ * @addr: IP address
+ * @mask: IP address mask
+ *
+ * Description:
+ * Write the IPv4 address and address mask, if necessary, to @audit_buf.
+ *
+ */
+void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
+ int src, const char *dev,
+ __be32 addr, __be32 mask)
+{
+ u32 mask_val = ntohl(mask);
+ char *dir = (src ? "src" : "dst");
+
+ if (dev != NULL)
+ audit_log_format(audit_buf, " netif=%s", dev);
+ audit_log_format(audit_buf, " %s=" NIPQUAD_FMT, dir, NIPQUAD(addr));
+ if (mask_val != 0xffffffff) {
+ u32 mask_len = 0;
+ while (mask_val > 0) {
+ mask_val <<= 1;
+ mask_len++;
+ }
+ audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len);
+ }
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_audit_addr - Audit an IPv6 address
+ * @audit_buf: audit buffer
+ * @src: true if source address, false if destination
+ * @dev: network interface
+ * @addr: IP address
+ * @mask: IP address mask
+ *
+ * Description:
+ * Write the IPv6 address and address mask, if necessary, to @audit_buf.
+ *
+ */
+void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
+ int src,
+ const char *dev,
+ const struct in6_addr *addr,
+ const struct in6_addr *mask)
+{
+ char *dir = (src ? "src" : "dst");
+
+ if (dev != NULL)
+ audit_log_format(audit_buf, " netif=%s", dev);
+ audit_log_format(audit_buf, " %s=" NIP6_FMT, dir, NIP6(*addr));
+ if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
+ u32 mask_len = 0;
+ u32 mask_val;
+ int iter = -1;
+ while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff)
+ mask_len += 32;
+ mask_val = ntohl(mask->s6_addr32[iter]);
+ while (mask_val > 0) {
+ mask_val <<= 1;
+ mask_len++;
+ }
+ audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len);
+ }
+}
+#endif /* IPv6 */
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
new file mode 100644
index 00000000000..0242bead405
--- /dev/null
+++ b/net/netlabel/netlabel_addrlist.h
@@ -0,0 +1,189 @@
+/*
+ * NetLabel Network Address Lists
+ *
+ * This file contains network address list functions used to manage ordered
+ * lists of network addresses for use by the NetLabel subsystem. The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_ADDRLIST_H
+#define _NETLABEL_ADDRLIST_H
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/in6.h>
+#include <linux/audit.h>
+
+/**
+ * struct netlbl_af4list - NetLabel IPv4 address list
+ * @addr: IPv4 address
+ * @mask: IPv4 address mask
+ * @valid: valid flag
+ * @list: list structure, used internally
+ */
+struct netlbl_af4list {
+ __be32 addr;
+ __be32 mask;
+
+ u32 valid;
+ struct list_head list;
+};
+
+/**
+ * struct netlbl_af6list - NetLabel IPv6 address list
+ * @addr: IPv6 address
+ * @mask: IPv6 address mask
+ * @valid: valid flag
+ * @list: list structure, used internally
+ */
+struct netlbl_af6list {
+ struct in6_addr addr;
+ struct in6_addr mask;
+
+ u32 valid;
+ struct list_head list;
+};
+
+#define __af4list_entry(ptr) container_of(ptr, struct netlbl_af4list, list)
+
+static inline struct netlbl_af4list *__af4list_valid(struct list_head *s,
+ struct list_head *h)
+{
+ struct list_head *i = s;
+ struct netlbl_af4list *n = __af4list_entry(s);
+ while (i != h && !n->valid) {
+ i = i->next;
+ n = __af4list_entry(i);
+ }
+ return n;
+}
+
+static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
+ struct list_head *h)
+{
+ struct list_head *i = s;
+ struct netlbl_af4list *n = __af4list_entry(s);
+ while (i != h && !n->valid) {
+ i = rcu_dereference(i->next);
+ n = __af4list_entry(i);
+ }
+ return n;
+}
+
+#define netlbl_af4list_foreach(iter, head) \
+ for (iter = __af4list_valid((head)->next, head); \
+ prefetch(iter->list.next), &iter->list != (head); \
+ iter = __af4list_valid(iter->list.next, head))
+
+#define netlbl_af4list_foreach_rcu(iter, head) \
+ for (iter = __af4list_valid_rcu((head)->next, head); \
+ prefetch(iter->list.next), &iter->list != (head); \
+ iter = __af4list_valid_rcu(iter->list.next, head))
+
+#define netlbl_af4list_foreach_safe(iter, tmp, head) \
+ for (iter = __af4list_valid((head)->next, head), \
+ tmp = __af4list_valid(iter->list.next, head); \
+ &iter->list != (head); \
+ iter = tmp, tmp = __af4list_valid(iter->list.next, head))
+
+int netlbl_af4list_add(struct netlbl_af4list *entry,
+ struct list_head *head);
+struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
+ struct list_head *head);
+void netlbl_af4list_remove_entry(struct netlbl_af4list *entry);
+struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
+ struct list_head *head);
+struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
+ __be32 mask,
+ struct list_head *head);
+void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
+ int src, const char *dev,
+ __be32 addr, __be32 mask);
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
+#define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list)
+
+static inline struct netlbl_af6list *__af6list_valid(struct list_head *s,
+ struct list_head *h)
+{
+ struct list_head *i = s;
+ struct netlbl_af6list *n = __af6list_entry(s);
+ while (i != h && !n->valid) {
+ i = i->next;
+ n = __af6list_entry(i);
+ }
+ return n;
+}
+
+static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
+ struct list_head *h)
+{
+ struct list_head *i = s;
+ struct netlbl_af6list *n = __af6list_entry(s);
+ while (i != h && !n->valid) {
+ i = rcu_dereference(i->next);
+ n = __af6list_entry(i);
+ }
+ return n;
+}
+
+#define netlbl_af6list_foreach(iter, head) \
+ for (iter = __af6list_valid((head)->next, head); \
+ prefetch(iter->list.next), &iter->list != (head); \
+ iter = __af6list_valid(iter->list.next, head))
+
+#define netlbl_af6list_foreach_rcu(iter, head) \
+ for (iter = __af6list_valid_rcu((head)->next, head); \
+ prefetch(iter->list.next), &iter->list != (head); \
+ iter = __af6list_valid_rcu(iter->list.next, head))
+
+#define netlbl_af6list_foreach_safe(iter, tmp, head) \
+ for (iter = __af6list_valid((head)->next, head), \
+ tmp = __af6list_valid(iter->list.next, head); \
+ &iter->list != (head); \
+ iter = tmp, tmp = __af6list_valid(iter->list.next, head))
+
+int netlbl_af6list_add(struct netlbl_af6list *entry,
+ struct list_head *head);
+struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
+ const struct in6_addr *mask,
+ struct list_head *head);
+void netlbl_af6list_remove_entry(struct netlbl_af6list *entry);
+struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
+ struct list_head *head);
+struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr,
+ const struct in6_addr *mask,
+ struct list_head *head);
+void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
+ int src,
+ const char *dev,
+ const struct in6_addr *addr,
+ const struct in6_addr *mask);
+#endif /* IPV6 */
+
+#endif
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 0aec318bf0e..fff32b70efa 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -43,6 +43,7 @@
#include "netlabel_user.h"
#include "netlabel_cipso_v4.h"
#include "netlabel_mgmt.h"
+#include "netlabel_domainhash.h"
/* Argument struct for cipso_v4_doi_walk() */
struct netlbl_cipsov4_doiwalk_arg {
@@ -51,6 +52,12 @@ struct netlbl_cipsov4_doiwalk_arg {
u32 seq;
};
+/* Argument struct for netlbl_domhsh_walk() */
+struct netlbl_domhsh_walk_arg {
+ struct netlbl_audit *audit_info;
+ u32 doi;
+};
+
/* NetLabel Generic NETLINK CIPSOv4 family */
static struct genl_family netlbl_cipsov4_gnl_family = {
.id = GENL_ID_GENERATE,
@@ -81,32 +88,6 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1
*/
/**
- * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that the memory allocated to the DOI definition can be released
- * safely.
- *
- */
-void netlbl_cipsov4_doi_free(struct rcu_head *entry)
-{
- struct cipso_v4_doi *ptr;
-
- ptr = container_of(entry, struct cipso_v4_doi, rcu);
- switch (ptr->type) {
- case CIPSO_V4_MAP_STD:
- kfree(ptr->map.std->lvl.cipso);
- kfree(ptr->map.std->lvl.local);
- kfree(ptr->map.std->cat.cipso);
- kfree(ptr->map.std->cat.local);
- break;
- }
- kfree(ptr);
-}
-
-/**
* netlbl_cipsov4_add_common - Parse the common sections of a ADD message
* @info: the Generic NETLINK info block
* @doi_def: the CIPSO V4 DOI definition
@@ -151,9 +132,9 @@ static int netlbl_cipsov4_add_common(struct genl_info *info,
* @info: the Generic NETLINK info block
*
* Description:
- * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
- * and add it to the CIPSO V4 engine. Return zero on success and non-zero on
- * error.
+ * Create a new CIPSO_V4_MAP_TRANS DOI definition based on the given ADD
+ * message and add it to the CIPSO V4 engine. Return zero on success and
+ * non-zero on error.
*
*/
static int netlbl_cipsov4_add_std(struct genl_info *info)
@@ -183,7 +164,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
ret_val = -ENOMEM;
goto add_std_failure;
}
- doi_def->type = CIPSO_V4_MAP_STD;
+ doi_def->type = CIPSO_V4_MAP_TRANS;
ret_val = netlbl_cipsov4_add_common(info, doi_def);
if (ret_val != 0)
@@ -342,7 +323,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
add_std_failure:
if (doi_def)
- netlbl_cipsov4_doi_free(&doi_def->rcu);
+ cipso_v4_doi_free(doi_def);
return ret_val;
}
@@ -379,7 +360,44 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info)
return 0;
add_pass_failure:
- netlbl_cipsov4_doi_free(&doi_def->rcu);
+ cipso_v4_doi_free(doi_def);
+ return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add_local - Adds a CIPSO V4 DOI definition
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_LOCAL DOI definition based on the given ADD
+ * message and add it to the CIPSO V4 engine. Return zero on success and
+ * non-zero on error.
+ *
+ */
+static int netlbl_cipsov4_add_local(struct genl_info *info)
+{
+ int ret_val;
+ struct cipso_v4_doi *doi_def = NULL;
+
+ if (!info->attrs[NLBL_CIPSOV4_A_TAGLST])
+ return -EINVAL;
+
+ doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+ if (doi_def == NULL)
+ return -ENOMEM;
+ doi_def->type = CIPSO_V4_MAP_LOCAL;
+
+ ret_val = netlbl_cipsov4_add_common(info, doi_def);
+ if (ret_val != 0)
+ goto add_local_failure;
+
+ ret_val = cipso_v4_doi_add(doi_def);
+ if (ret_val != 0)
+ goto add_local_failure;
+ return 0;
+
+add_local_failure:
+ cipso_v4_doi_free(doi_def);
return ret_val;
}
@@ -412,14 +430,18 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
switch (type) {
- case CIPSO_V4_MAP_STD:
- type_str = "std";
+ case CIPSO_V4_MAP_TRANS:
+ type_str = "trans";
ret_val = netlbl_cipsov4_add_std(info);
break;
case CIPSO_V4_MAP_PASS:
type_str = "pass";
ret_val = netlbl_cipsov4_add_pass(info);
break;
+ case CIPSO_V4_MAP_LOCAL:
+ type_str = "local";
+ ret_val = netlbl_cipsov4_add_local(info);
+ break;
}
if (ret_val == 0)
atomic_inc(&netlabel_mgmt_protocount);
@@ -491,7 +513,7 @@ list_start:
doi_def = cipso_v4_doi_getdef(doi);
if (doi_def == NULL) {
ret_val = -EINVAL;
- goto list_failure;
+ goto list_failure_lock;
}
ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type);
@@ -516,7 +538,7 @@ list_start:
nla_nest_end(ans_skb, nla_a);
switch (doi_def->type) {
- case CIPSO_V4_MAP_STD:
+ case CIPSO_V4_MAP_TRANS:
nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST);
if (nla_a == NULL) {
ret_val = -ENOMEM;
@@ -655,7 +677,7 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct netlbl_cipsov4_doiwalk_arg cb_arg;
- int doi_skip = cb->args[0];
+ u32 doi_skip = cb->args[0];
cb_arg.nl_cb = cb;
cb_arg.skb = skb;
@@ -668,6 +690,29 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
}
/**
+ * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE
+ * @entry: LSM domain mapping entry
+ * @arg: the netlbl_domhsh_walk_arg structure
+ *
+ * Description:
+ * This function is intended for use by netlbl_cipsov4_remove() as the callback
+ * for the netlbl_domhsh_walk() function; it removes LSM domain map entries
+ * which are associated with the CIPSO DOI specified in @arg. Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg)
+{
+ struct netlbl_domhsh_walk_arg *cb_arg = arg;
+
+ if (entry->type == NETLBL_NLTYPE_CIPSOV4 &&
+ entry->type_def.cipsov4->doi == cb_arg->doi)
+ return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info);
+
+ return 0;
+}
+
+/**
* netlbl_cipsov4_remove - Handle a REMOVE message
* @skb: the NETLINK buffer
* @info: the Generic NETLINK info block
@@ -681,8 +726,11 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
{
int ret_val = -EINVAL;
u32 doi = 0;
+ struct netlbl_domhsh_walk_arg cb_arg;
struct audit_buffer *audit_buf;
struct netlbl_audit audit_info;
+ u32 skip_bkt = 0;
+ u32 skip_chain = 0;
if (!info->attrs[NLBL_CIPSOV4_A_DOI])
return -EINVAL;
@@ -690,11 +738,15 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
netlbl_netlink_auditinfo(skb, &audit_info);
- ret_val = cipso_v4_doi_remove(doi,
- &audit_info,
- netlbl_cipsov4_doi_free);
- if (ret_val == 0)
- atomic_dec(&netlabel_mgmt_protocount);
+ cb_arg.doi = doi;
+ cb_arg.audit_info = &audit_info;
+ ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
+ netlbl_cipsov4_remove_cb, &cb_arg);
+ if (ret_val == 0 || ret_val == -ENOENT) {
+ ret_val = cipso_v4_doi_remove(doi, &audit_info);
+ if (ret_val == 0)
+ atomic_dec(&netlabel_mgmt_protocount);
+ }
audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
&audit_info);
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 220cb9d06b4..c8a4079261f 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -45,12 +45,13 @@
* NLBL_CIPSOV4_A_MTYPE
* NLBL_CIPSOV4_A_TAGLST
*
- * If using CIPSO_V4_MAP_STD the following attributes are required:
+ * If using CIPSO_V4_MAP_TRANS the following attributes are required:
*
* NLBL_CIPSOV4_A_MLSLVLLST
* NLBL_CIPSOV4_A_MLSCATLST
*
- * If using CIPSO_V4_MAP_PASS no additional attributes are required.
+ * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes
+ * are required.
*
* o REMOVE:
* Sent by an application to remove a specific DOI mapping table from the
@@ -76,12 +77,13 @@
* NLBL_CIPSOV4_A_MTYPE
* NLBL_CIPSOV4_A_TAGLST
*
- * If using CIPSO_V4_MAP_STD the following attributes are required:
+ * If using CIPSO_V4_MAP_TRANS the following attributes are required:
*
* NLBL_CIPSOV4_A_MLSLVLLST
* NLBL_CIPSOV4_A_MLSCATLST
*
- * If using CIPSO_V4_MAP_PASS no additional attributes are required.
+ * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes
+ * are required.
*
* o LISTALL:
* This message is sent by an application to list the valid DOIs on the
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 643c032a3a5..5fadf10e5dd 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -11,7 +11,7 @@
*/
/*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -40,6 +40,7 @@
#include <asm/bug.h>
#include "netlabel_mgmt.h"
+#include "netlabel_addrlist.h"
#include "netlabel_domainhash.h"
#include "netlabel_user.h"
@@ -72,8 +73,28 @@ static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
static void netlbl_domhsh_free_entry(struct rcu_head *entry)
{
struct netlbl_dom_map *ptr;
+ struct netlbl_af4list *iter4;
+ struct netlbl_af4list *tmp4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct netlbl_af6list *iter6;
+ struct netlbl_af6list *tmp6;
+#endif /* IPv6 */
ptr = container_of(entry, struct netlbl_dom_map, rcu);
+ if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) {
+ netlbl_af4list_foreach_safe(iter4, tmp4,
+ &ptr->type_def.addrsel->list4) {
+ netlbl_af4list_remove_entry(iter4);
+ kfree(netlbl_domhsh_addr4_entry(iter4));
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ netlbl_af6list_foreach_safe(iter6, tmp6,
+ &ptr->type_def.addrsel->list6) {
+ netlbl_af6list_remove_entry(iter6);
+ kfree(netlbl_domhsh_addr6_entry(iter6));
+ }
+#endif /* IPv6 */
+ }
kfree(ptr->domain);
kfree(ptr);
}
@@ -115,13 +136,13 @@ static u32 netlbl_domhsh_hash(const char *key)
static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
{
u32 bkt;
+ struct list_head *bkt_list;
struct netlbl_dom_map *iter;
if (domain != NULL) {
bkt = netlbl_domhsh_hash(domain);
- list_for_each_entry_rcu(iter,
- &rcu_dereference(netlbl_domhsh)->tbl[bkt],
- list)
+ bkt_list = &rcu_dereference(netlbl_domhsh)->tbl[bkt];
+ list_for_each_entry_rcu(iter, bkt_list, list)
if (iter->valid && strcmp(iter->domain, domain) == 0)
return iter;
}
@@ -156,6 +177,69 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
return entry;
}
+/**
+ * netlbl_domhsh_audit_add - Generate an audit entry for an add event
+ * @entry: the entry being added
+ * @addr4: the IPv4 address information
+ * @addr6: the IPv6 address information
+ * @result: the result code
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Generate an audit record for adding a new NetLabel/LSM mapping entry with
+ * the given information. Caller is responsibile for holding the necessary
+ * locks.
+ *
+ */
+static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
+ struct netlbl_af4list *addr4,
+ struct netlbl_af6list *addr6,
+ int result,
+ struct netlbl_audit *audit_info)
+{
+ struct audit_buffer *audit_buf;
+ struct cipso_v4_doi *cipsov4 = NULL;
+ u32 type;
+
+ audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
+ if (audit_buf != NULL) {
+ audit_log_format(audit_buf, " nlbl_domain=%s",
+ entry->domain ? entry->domain : "(default)");
+ if (addr4 != NULL) {
+ struct netlbl_domaddr4_map *map4;
+ map4 = netlbl_domhsh_addr4_entry(addr4);
+ type = map4->type;
+ cipsov4 = map4->type_def.cipsov4;
+ netlbl_af4list_audit_addr(audit_buf, 0, NULL,
+ addr4->addr, addr4->mask);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ } else if (addr6 != NULL) {
+ struct netlbl_domaddr6_map *map6;
+ map6 = netlbl_domhsh_addr6_entry(addr6);
+ type = map6->type;
+ netlbl_af6list_audit_addr(audit_buf, 0, NULL,
+ &addr6->addr, &addr6->mask);
+#endif /* IPv6 */
+ } else {
+ type = entry->type;
+ cipsov4 = entry->type_def.cipsov4;
+ }
+ switch (type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ audit_log_format(audit_buf, " nlbl_protocol=unlbl");
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ BUG_ON(cipsov4 == NULL);
+ audit_log_format(audit_buf,
+ " nlbl_protocol=cipsov4 cipso_doi=%u",
+ cipsov4->doi);
+ break;
+ }
+ audit_log_format(audit_buf, " res=%u", result == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+ }
+}
+
/*
* Domain Hash Table Functions
*/
@@ -213,74 +297,106 @@ int __init netlbl_domhsh_init(u32 size)
int netlbl_domhsh_add(struct netlbl_dom_map *entry,
struct netlbl_audit *audit_info)
{
- int ret_val;
- u32 bkt;
- struct audit_buffer *audit_buf;
-
- switch (entry->type) {
- case NETLBL_NLTYPE_UNLABELED:
- ret_val = 0;
- break;
- case NETLBL_NLTYPE_CIPSOV4:
- ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
- entry->domain);
- break;
- default:
- return -EINVAL;
- }
- if (ret_val != 0)
- return ret_val;
-
- entry->valid = 1;
- INIT_RCU_HEAD(&entry->rcu);
+ int ret_val = 0;
+ struct netlbl_dom_map *entry_old;
+ struct netlbl_af4list *iter4;
+ struct netlbl_af4list *tmp4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct netlbl_af6list *iter6;
+ struct netlbl_af6list *tmp6;
+#endif /* IPv6 */
rcu_read_lock();
+
spin_lock(&netlbl_domhsh_lock);
- if (entry->domain != NULL) {
- bkt = netlbl_domhsh_hash(entry->domain);
- if (netlbl_domhsh_search(entry->domain) == NULL)
+ if (entry->domain != NULL)
+ entry_old = netlbl_domhsh_search(entry->domain);
+ else
+ entry_old = netlbl_domhsh_search_def(entry->domain);
+ if (entry_old == NULL) {
+ entry->valid = 1;
+ INIT_RCU_HEAD(&entry->rcu);
+
+ if (entry->domain != NULL) {
+ u32 bkt = netlbl_domhsh_hash(entry->domain);
list_add_tail_rcu(&entry->list,
&rcu_dereference(netlbl_domhsh)->tbl[bkt]);
- else
- ret_val = -EEXIST;
- } else {
- INIT_LIST_HEAD(&entry->list);
- if (rcu_dereference(netlbl_domhsh_def) == NULL)
+ } else {
+ INIT_LIST_HEAD(&entry->list);
rcu_assign_pointer(netlbl_domhsh_def, entry);
- else
- ret_val = -EEXIST;
- }
- spin_unlock(&netlbl_domhsh_lock);
- audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
- if (audit_buf != NULL) {
- audit_log_format(audit_buf,
- " nlbl_domain=%s",
- entry->domain ? entry->domain : "(default)");
- switch (entry->type) {
- case NETLBL_NLTYPE_UNLABELED:
- audit_log_format(audit_buf, " nlbl_protocol=unlbl");
- break;
- case NETLBL_NLTYPE_CIPSOV4:
- audit_log_format(audit_buf,
- " nlbl_protocol=cipsov4 cipso_doi=%u",
- entry->type_def.cipsov4->doi);
- break;
}
- audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
- audit_log_end(audit_buf);
- }
- rcu_read_unlock();
- if (ret_val != 0) {
- switch (entry->type) {
- case NETLBL_NLTYPE_CIPSOV4:
- if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
- entry->domain) != 0)
- BUG();
- break;
+ if (entry->type == NETLBL_NLTYPE_ADDRSELECT) {
+ netlbl_af4list_foreach_rcu(iter4,
+ &entry->type_def.addrsel->list4)
+ netlbl_domhsh_audit_add(entry, iter4, NULL,
+ ret_val, audit_info);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ netlbl_af6list_foreach_rcu(iter6,
+ &entry->type_def.addrsel->list6)
+ netlbl_domhsh_audit_add(entry, NULL, iter6,
+ ret_val, audit_info);
+#endif /* IPv6 */
+ } else
+ netlbl_domhsh_audit_add(entry, NULL, NULL,
+ ret_val, audit_info);
+ } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT &&
+ entry->type == NETLBL_NLTYPE_ADDRSELECT) {
+ struct list_head *old_list4;
+ struct list_head *old_list6;
+
+ old_list4 = &entry_old->type_def.addrsel->list4;
+ old_list6 = &entry_old->type_def.addrsel->list6;
+
+ /* we only allow the addition of address selectors if all of
+ * the selectors do not exist in the existing domain map */
+ netlbl_af4list_foreach_rcu(iter4,
+ &entry->type_def.addrsel->list4)
+ if (netlbl_af4list_search_exact(iter4->addr,
+ iter4->mask,
+ old_list4)) {
+ ret_val = -EEXIST;
+ goto add_return;
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ netlbl_af6list_foreach_rcu(iter6,
+ &entry->type_def.addrsel->list6)
+ if (netlbl_af6list_search_exact(&iter6->addr,
+ &iter6->mask,
+ old_list6)) {
+ ret_val = -EEXIST;
+ goto add_return;
+ }
+#endif /* IPv6 */
+
+ netlbl_af4list_foreach_safe(iter4, tmp4,
+ &entry->type_def.addrsel->list4) {
+ netlbl_af4list_remove_entry(iter4);
+ iter4->valid = 1;
+ ret_val = netlbl_af4list_add(iter4, old_list4);
+ netlbl_domhsh_audit_add(entry_old, iter4, NULL,
+ ret_val, audit_info);
+ if (ret_val != 0)
+ goto add_return;
}
- }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ netlbl_af6list_foreach_safe(iter6, tmp6,
+ &entry->type_def.addrsel->list6) {
+ netlbl_af6list_remove_entry(iter6);
+ iter6->valid = 1;
+ ret_val = netlbl_af6list_add(iter6, old_list6);
+ netlbl_domhsh_audit_add(entry_old, NULL, iter6,
+ ret_val, audit_info);
+ if (ret_val != 0)
+ goto add_return;
+ }
+#endif /* IPv6 */
+ } else
+ ret_val = -EINVAL;
+add_return:
+ spin_unlock(&netlbl_domhsh_lock);
+ rcu_read_unlock();
return ret_val;
}
@@ -302,35 +418,26 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
}
/**
- * netlbl_domhsh_remove - Removes an entry from the domain hash table
- * @domain: the domain to remove
+ * netlbl_domhsh_remove_entry - Removes a given entry from the domain table
+ * @entry: the entry to remove
* @audit_info: NetLabel audit information
*
* Description:
* Removes an entry from the domain hash table and handles any updates to the
- * lower level protocol handler (i.e. CIPSO). Returns zero on success,
- * negative on failure.
+ * lower level protocol handler (i.e. CIPSO). Caller is responsible for
+ * ensuring that the RCU read lock is held. Returns zero on success, negative
+ * on failure.
*
*/
-int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
+int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
+ struct netlbl_audit *audit_info)
{
- int ret_val = -ENOENT;
- struct netlbl_dom_map *entry;
+ int ret_val = 0;
struct audit_buffer *audit_buf;
- rcu_read_lock();
- if (domain)
- entry = netlbl_domhsh_search(domain);
- else
- entry = netlbl_domhsh_search_def(domain);
if (entry == NULL)
- goto remove_return;
- switch (entry->type) {
- case NETLBL_NLTYPE_CIPSOV4:
- cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
- entry->domain);
- break;
- }
+ return -ENOENT;
+
spin_lock(&netlbl_domhsh_lock);
if (entry->valid) {
entry->valid = 0;
@@ -338,8 +445,8 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
list_del_rcu(&entry->list);
else
rcu_assign_pointer(netlbl_domhsh_def, NULL);
- ret_val = 0;
- }
+ } else
+ ret_val = -ENOENT;
spin_unlock(&netlbl_domhsh_lock);
audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
@@ -351,10 +458,54 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
audit_log_end(audit_buf);
}
-remove_return:
- rcu_read_unlock();
- if (ret_val == 0)
+ if (ret_val == 0) {
+ struct netlbl_af4list *iter4;
+ struct netlbl_domaddr4_map *map4;
+
+ switch (entry->type) {
+ case NETLBL_NLTYPE_ADDRSELECT:
+ netlbl_af4list_foreach_rcu(iter4,
+ &entry->type_def.addrsel->list4) {
+ map4 = netlbl_domhsh_addr4_entry(iter4);
+ cipso_v4_doi_putdef(map4->type_def.cipsov4);
+ }
+ /* no need to check the IPv6 list since we currently
+ * support only unlabeled protocols for IPv6 */
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ cipso_v4_doi_putdef(entry->type_def.cipsov4);
+ break;
+ }
call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
+ }
+
+ return ret_val;
+}
+
+/**
+ * netlbl_domhsh_remove - Removes an entry from the domain hash table
+ * @domain: the domain to remove
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Removes an entry from the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO). Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
+{
+ int ret_val;
+ struct netlbl_dom_map *entry;
+
+ rcu_read_lock();
+ if (domain)
+ entry = netlbl_domhsh_search(domain);
+ else
+ entry = netlbl_domhsh_search_def(domain);
+ ret_val = netlbl_domhsh_remove_entry(entry, audit_info);
+ rcu_read_unlock();
+
return ret_val;
}
@@ -389,6 +540,70 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
}
/**
+ * netlbl_domhsh_getentry_af4 - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ * @addr: the IP address to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain
+ * and @addr, return a pointer to a copy of the entry or NULL. The caller is
+ * responsible for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
+ __be32 addr)
+{
+ struct netlbl_dom_map *dom_iter;
+ struct netlbl_af4list *addr_iter;
+
+ dom_iter = netlbl_domhsh_search_def(domain);
+ if (dom_iter == NULL)
+ return NULL;
+ if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
+ return NULL;
+
+ addr_iter = netlbl_af4list_search(addr,
+ &dom_iter->type_def.addrsel->list4);
+ if (addr_iter == NULL)
+ return NULL;
+
+ return netlbl_domhsh_addr4_entry(addr_iter);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ * @addr: the IP address to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain
+ * and @addr, return a pointer to a copy of the entry or NULL. The caller is
+ * responsible for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
+ const struct in6_addr *addr)
+{
+ struct netlbl_dom_map *dom_iter;
+ struct netlbl_af6list *addr_iter;
+
+ dom_iter = netlbl_domhsh_search_def(domain);
+ if (dom_iter == NULL)
+ return NULL;
+ if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
+ return NULL;
+
+ addr_iter = netlbl_af6list_search(addr,
+ &dom_iter->type_def.addrsel->list6);
+ if (addr_iter == NULL)
+ return NULL;
+
+ return netlbl_domhsh_addr6_entry(addr_iter);
+}
+#endif /* IPv6 */
+
+/**
* netlbl_domhsh_walk - Iterate through the domain mapping hash table
* @skip_bkt: the number of buckets to skip at the start
* @skip_chain: the number of entries to skip in the first iterated bucket
@@ -410,6 +625,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
{
int ret_val = -ENOENT;
u32 iter_bkt;
+ struct list_head *iter_list;
struct netlbl_dom_map *iter_entry;
u32 chain_cnt = 0;
@@ -417,9 +633,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
for (iter_bkt = *skip_bkt;
iter_bkt < rcu_dereference(netlbl_domhsh)->size;
iter_bkt++, chain_cnt = 0) {
- list_for_each_entry_rcu(iter_entry,
- &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt],
- list)
+ iter_list = &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt];
+ list_for_each_entry_rcu(iter_entry, iter_list, list)
if (iter_entry->valid) {
if (chain_cnt++ < *skip_chain)
continue;
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 8220990ceb9..bfcb6763a1a 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -11,7 +11,7 @@
*/
/*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -36,16 +36,43 @@
#include <linux/rcupdate.h>
#include <linux/list.h>
+#include "netlabel_addrlist.h"
+
/* Domain hash table size */
/* XXX - currently this number is an uneducated guess */
#define NETLBL_DOMHSH_BITSIZE 7
-/* Domain mapping definition struct */
+/* Domain mapping definition structures */
+#define netlbl_domhsh_addr4_entry(iter) \
+ container_of(iter, struct netlbl_domaddr4_map, list)
+struct netlbl_domaddr4_map {
+ u32 type;
+ union {
+ struct cipso_v4_doi *cipsov4;
+ } type_def;
+
+ struct netlbl_af4list list;
+};
+#define netlbl_domhsh_addr6_entry(iter) \
+ container_of(iter, struct netlbl_domaddr6_map, list)
+struct netlbl_domaddr6_map {
+ u32 type;
+
+ /* NOTE: no 'type_def' union needed at present since we don't currently
+ * support any IPv6 labeling protocols */
+
+ struct netlbl_af6list list;
+};
+struct netlbl_domaddr_map {
+ struct list_head list4;
+ struct list_head list6;
+};
struct netlbl_dom_map {
char *domain;
u32 type;
union {
struct cipso_v4_doi *cipsov4;
+ struct netlbl_domaddr_map *addrsel;
} type_def;
u32 valid;
@@ -61,12 +88,21 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
struct netlbl_audit *audit_info);
int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
struct netlbl_audit *audit_info);
+int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
+ struct netlbl_audit *audit_info);
int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
+struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
+ __be32 addr);
int netlbl_domhsh_walk(u32 *skip_bkt,
u32 *skip_chain,
int (*callback) (struct netlbl_dom_map *entry, void *arg),
void *cb_arg);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
+ const struct in6_addr *addr);
+#endif /* IPv6 */
+
#endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 39793a1a93a..b32eceb3ab0 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -10,7 +10,7 @@
*/
/*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -82,7 +82,7 @@ int netlbl_cfg_unlbl_add_map(const char *domain,
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (entry == NULL)
- goto cfg_unlbl_add_map_failure;
+ return -ENOMEM;
if (domain != NULL) {
entry->domain = kstrdup(domain, GFP_ATOMIC);
if (entry->domain == NULL)
@@ -104,49 +104,6 @@ cfg_unlbl_add_map_failure:
}
/**
- * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition
- * @doi_def: the DOI definition
- * @audit_info: NetLabel audit information
- *
- * Description:
- * Add a new CIPSOv4 DOI definition to the NetLabel subsystem. Returns zero on
- * success, negative values on failure.
- *
- */
-int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def,
- struct netlbl_audit *audit_info)
-{
- int ret_val;
- const char *type_str;
- struct audit_buffer *audit_buf;
-
- ret_val = cipso_v4_doi_add(doi_def);
-
- audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
- audit_info);
- if (audit_buf != NULL) {
- switch (doi_def->type) {
- case CIPSO_V4_MAP_STD:
- type_str = "std";
- break;
- case CIPSO_V4_MAP_PASS:
- type_str = "pass";
- break;
- default:
- type_str = "(unknown)";
- }
- audit_log_format(audit_buf,
- " cipso_doi=%u cipso_type=%s res=%u",
- doi_def->doi,
- type_str,
- ret_val == 0 ? 1 : 0);
- audit_log_end(audit_buf);
- }
-
- return ret_val;
-}
-
-/**
* netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping
* @doi_def: the DOI definition
* @domain: the domain mapping to add
@@ -164,58 +121,71 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
struct netlbl_audit *audit_info)
{
int ret_val = -ENOMEM;
+ u32 doi;
+ u32 doi_type;
struct netlbl_dom_map *entry;
+ const char *type_str;
+ struct audit_buffer *audit_buf;
+
+ doi = doi_def->doi;
+ doi_type = doi_def->type;
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (entry == NULL)
- goto cfg_cipsov4_add_map_failure;
+ return -ENOMEM;
if (domain != NULL) {
entry->domain = kstrdup(domain, GFP_ATOMIC);
if (entry->domain == NULL)
goto cfg_cipsov4_add_map_failure;
}
- entry->type = NETLBL_NLTYPE_CIPSOV4;
- entry->type_def.cipsov4 = doi_def;
-
- /* Grab a RCU read lock here so nothing happens to the doi_def variable
- * between adding it to the CIPSOv4 protocol engine and adding a
- * domain mapping for it. */
- rcu_read_lock();
- ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info);
+ ret_val = cipso_v4_doi_add(doi_def);
if (ret_val != 0)
- goto cfg_cipsov4_add_map_failure_unlock;
+ goto cfg_cipsov4_add_map_failure_remove_doi;
+ entry->type = NETLBL_NLTYPE_CIPSOV4;
+ entry->type_def.cipsov4 = cipso_v4_doi_getdef(doi);
+ if (entry->type_def.cipsov4 == NULL) {
+ ret_val = -ENOENT;
+ goto cfg_cipsov4_add_map_failure_remove_doi;
+ }
ret_val = netlbl_domhsh_add(entry, audit_info);
if (ret_val != 0)
- goto cfg_cipsov4_add_map_failure_remove_doi;
- rcu_read_unlock();
+ goto cfg_cipsov4_add_map_failure_release_doi;
- return 0;
+cfg_cipsov4_add_map_return:
+ audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
+ audit_info);
+ if (audit_buf != NULL) {
+ switch (doi_type) {
+ case CIPSO_V4_MAP_TRANS:
+ type_str = "trans";
+ break;
+ case CIPSO_V4_MAP_PASS:
+ type_str = "pass";
+ break;
+ case CIPSO_V4_MAP_LOCAL:
+ type_str = "local";
+ break;
+ default:
+ type_str = "(unknown)";
+ }
+ audit_log_format(audit_buf,
+ " cipso_doi=%u cipso_type=%s res=%u",
+ doi, type_str, ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+ }
+ return ret_val;
+
+cfg_cipsov4_add_map_failure_release_doi:
+ cipso_v4_doi_putdef(doi_def);
cfg_cipsov4_add_map_failure_remove_doi:
- cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free);
-cfg_cipsov4_add_map_failure_unlock:
- rcu_read_unlock();
+ cipso_v4_doi_remove(doi, audit_info);
cfg_cipsov4_add_map_failure:
if (entry != NULL)
kfree(entry->domain);
kfree(entry);
- return ret_val;
-}
-
-/**
- * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition
- * @doi: the CIPSO DOI value
- * @audit_info: NetLabel audit information
- *
- * Description:
- * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem.
- * Returns zero on success, negative values on failure.
- *
- */
-int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info)
-{
- return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free);
+ goto cfg_cipsov4_add_map_return;
}
/*
@@ -452,7 +422,9 @@ int netlbl_enabled(void)
* Attach the correct label to the given socket using the security attributes
* specified in @secattr. This function requires exclusive access to @sk,
* which means it either needs to be in the process of being created or locked.
- * Returns zero on success, negative values on failure.
+ * Returns zero on success, -EDESTADDRREQ if the domain is configured to use
+ * network address selectors (can't blindly label the socket), and negative
+ * values on all other failures.
*
*/
int netlbl_sock_setattr(struct sock *sk,
@@ -466,6 +438,9 @@ int netlbl_sock_setattr(struct sock *sk,
if (dom_entry == NULL)
goto socket_setattr_return;
switch (dom_entry->type) {
+ case NETLBL_NLTYPE_ADDRSELECT:
+ ret_val = -EDESTADDRREQ;
+ break;
case NETLBL_NLTYPE_CIPSOV4:
ret_val = cipso_v4_sock_setattr(sk,
dom_entry->type_def.cipsov4,
@@ -484,6 +459,20 @@ socket_setattr_return:
}
/**
+ * netlbl_sock_delattr - Delete all the NetLabel labels on a socket
+ * @sk: the socket
+ *
+ * Description:
+ * Remove all the NetLabel labeling from @sk. The caller is responsible for
+ * ensuring that @sk is locked.
+ *
+ */
+void netlbl_sock_delattr(struct sock *sk)
+{
+ cipso_v4_sock_delattr(sk);
+}
+
+/**
* netlbl_sock_getattr - Determine the security attributes of a sock
* @sk: the sock
* @secattr: the security attributes
@@ -501,6 +490,128 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
}
/**
+ * netlbl_conn_setattr - Label a connected socket using the correct protocol
+ * @sk: the socket to label
+ * @addr: the destination address
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given connected socket using the security
+ * attributes specified in @secattr. The caller is responsible for ensuring
+ * that @sk is locked. Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_conn_setattr(struct sock *sk,
+ struct sockaddr *addr,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ struct sockaddr_in *addr4;
+ struct netlbl_domaddr4_map *af4_entry;
+
+ rcu_read_lock();
+ switch (addr->sa_family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
+ addr4->sin_addr.s_addr);
+ if (af4_entry == NULL) {
+ ret_val = -ENOENT;
+ goto conn_setattr_return;
+ }
+ switch (af4_entry->type) {
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = cipso_v4_sock_setattr(sk,
+ af4_entry->type_def.cipsov4,
+ secattr);
+ break;
+ case NETLBL_NLTYPE_UNLABELED:
+ /* just delete the protocols we support for right now
+ * but we could remove other protocols if needed */
+ cipso_v4_sock_delattr(sk);
+ ret_val = 0;
+ break;
+ default:
+ ret_val = -ENOENT;
+ }
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6:
+ /* since we don't support any IPv6 labeling protocols right
+ * now we can optimize everything away until we do */
+ ret_val = 0;
+ break;
+#endif /* IPv6 */
+ default:
+ ret_val = 0;
+ }
+
+conn_setattr_return:
+ rcu_read_unlock();
+ return ret_val;
+}
+
+/**
+ * netlbl_skbuff_setattr - Label a packet using the correct protocol
+ * @skb: the packet
+ * @family: protocol family
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given packet using the security attributes
+ * specified in @secattr. Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_skbuff_setattr(struct sk_buff *skb,
+ u16 family,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ struct iphdr *hdr4;
+ struct netlbl_domaddr4_map *af4_entry;
+
+ rcu_read_lock();
+ switch (family) {
+ case AF_INET:
+ hdr4 = ip_hdr(skb);
+ af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
+ hdr4->daddr);
+ if (af4_entry == NULL) {
+ ret_val = -ENOENT;
+ goto skbuff_setattr_return;
+ }
+ switch (af4_entry->type) {
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = cipso_v4_skbuff_setattr(skb,
+ af4_entry->type_def.cipsov4,
+ secattr);
+ break;
+ case NETLBL_NLTYPE_UNLABELED:
+ /* just delete the protocols we support for right now
+ * but we could remove other protocols if needed */
+ ret_val = cipso_v4_skbuff_delattr(skb);
+ break;
+ default:
+ ret_val = -ENOENT;
+ }
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6:
+ /* since we don't support any IPv6 labeling protocols right
+ * now we can optimize everything away until we do */
+ ret_val = 0;
+ break;
+#endif /* IPv6 */
+ default:
+ ret_val = 0;
+ }
+
+skbuff_setattr_return:
+ rcu_read_unlock();
+ return ret_val;
+}
+
+/**
* netlbl_skbuff_getattr - Determine the security attributes of a packet
* @skb: the packet
* @family: protocol family
@@ -528,6 +639,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
* netlbl_skbuff_err - Handle a LSM error on a sk_buff
* @skb: the packet
* @error: the error code
+ * @gateway: true if host is acting as a gateway, false otherwise
*
* Description:
* Deal with a LSM problem when handling the packet in @skb, typically this is
@@ -535,10 +647,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
* according to the packet's labeling protocol.
*
*/
-void netlbl_skbuff_err(struct sk_buff *skb, int error)
+void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway)
{
if (CIPSO_V4_OPTEXIST(skb))
- cipso_v4_error(skb, error, 0);
+ cipso_v4_error(skb, error, gateway);
}
/**
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 44be5d5261f..ee769ecaa13 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -10,7 +10,7 @@
*/
/*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -32,9 +32,13 @@
#include <linux/socket.h>
#include <linux/string.h>
#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/in6.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/genetlink.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
#include <net/netlabel.h>
#include <net/cipso_ipv4.h>
#include <asm/atomic.h>
@@ -71,86 +75,337 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
};
/*
- * NetLabel Command Handlers
+ * Helper Functions
*/
/**
* netlbl_mgmt_add - Handle an ADD message
- * @skb: the NETLINK buffer
* @info: the Generic NETLINK info block
+ * @audit_info: NetLabel audit information
*
* Description:
- * Process a user generated ADD message and add the domains from the message
- * to the hash table. See netlabel.h for a description of the message format.
- * Returns zero on success, negative values on failure.
+ * Helper function for the ADD and ADDDEF messages to add the domain mappings
+ * from the message to the hash table. See netlabel.h for a description of the
+ * message format. Returns zero on success, negative values on failure.
*
*/
-static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+static int netlbl_mgmt_add_common(struct genl_info *info,
+ struct netlbl_audit *audit_info)
{
int ret_val = -EINVAL;
struct netlbl_dom_map *entry = NULL;
- size_t tmp_size;
+ struct netlbl_domaddr_map *addrmap = NULL;
+ struct cipso_v4_doi *cipsov4 = NULL;
u32 tmp_val;
- struct netlbl_audit audit_info;
-
- if (!info->attrs[NLBL_MGMT_A_DOMAIN] ||
- !info->attrs[NLBL_MGMT_A_PROTOCOL])
- goto add_failure;
-
- netlbl_netlink_auditinfo(skb, &audit_info);
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (entry == NULL) {
ret_val = -ENOMEM;
goto add_failure;
}
- tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
- entry->domain = kmalloc(tmp_size, GFP_KERNEL);
- if (entry->domain == NULL) {
- ret_val = -ENOMEM;
- goto add_failure;
- }
entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
- nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
+ if (info->attrs[NLBL_MGMT_A_DOMAIN]) {
+ size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
+ entry->domain = kmalloc(tmp_size, GFP_KERNEL);
+ if (entry->domain == NULL) {
+ ret_val = -ENOMEM;
+ goto add_failure;
+ }
+ nla_strlcpy(entry->domain,
+ info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
+ }
+
+ /* NOTE: internally we allow/use a entry->type value of
+ * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users
+ * to pass that as a protocol value because we need to know the
+ * "real" protocol */
switch (entry->type) {
case NETLBL_NLTYPE_UNLABELED:
- ret_val = netlbl_domhsh_add(entry, &audit_info);
break;
case NETLBL_NLTYPE_CIPSOV4:
if (!info->attrs[NLBL_MGMT_A_CV4DOI])
goto add_failure;
tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
- /* We should be holding a rcu_read_lock() here while we hold
- * the result but since the entry will always be deleted when
- * the CIPSO DOI is deleted we aren't going to keep the
- * lock. */
- rcu_read_lock();
- entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
- if (entry->type_def.cipsov4 == NULL) {
- rcu_read_unlock();
+ cipsov4 = cipso_v4_doi_getdef(tmp_val);
+ if (cipsov4 == NULL)
goto add_failure;
- }
- ret_val = netlbl_domhsh_add(entry, &audit_info);
- rcu_read_unlock();
+ entry->type_def.cipsov4 = cipsov4;
break;
default:
goto add_failure;
}
+
+ if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) {
+ struct in_addr *addr;
+ struct in_addr *mask;
+ struct netlbl_domaddr4_map *map;
+
+ addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
+ if (addrmap == NULL) {
+ ret_val = -ENOMEM;
+ goto add_failure;
+ }
+ INIT_LIST_HEAD(&addrmap->list4);
+ INIT_LIST_HEAD(&addrmap->list6);
+
+ if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) !=
+ sizeof(struct in_addr)) {
+ ret_val = -EINVAL;
+ goto add_failure;
+ }
+ if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) !=
+ sizeof(struct in_addr)) {
+ ret_val = -EINVAL;
+ goto add_failure;
+ }
+ addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]);
+ mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]);
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (map == NULL) {
+ ret_val = -ENOMEM;
+ goto add_failure;
+ }
+ map->list.addr = addr->s_addr & mask->s_addr;
+ map->list.mask = mask->s_addr;
+ map->list.valid = 1;
+ map->type = entry->type;
+ if (cipsov4)
+ map->type_def.cipsov4 = cipsov4;
+
+ ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
+ if (ret_val != 0) {
+ kfree(map);
+ goto add_failure;
+ }
+
+ entry->type = NETLBL_NLTYPE_ADDRSELECT;
+ entry->type_def.addrsel = addrmap;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) {
+ struct in6_addr *addr;
+ struct in6_addr *mask;
+ struct netlbl_domaddr6_map *map;
+
+ addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
+ if (addrmap == NULL) {
+ ret_val = -ENOMEM;
+ goto add_failure;
+ }
+ INIT_LIST_HEAD(&addrmap->list4);
+ INIT_LIST_HEAD(&addrmap->list6);
+
+ if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) !=
+ sizeof(struct in6_addr)) {
+ ret_val = -EINVAL;
+ goto add_failure;
+ }
+ if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) !=
+ sizeof(struct in6_addr)) {
+ ret_val = -EINVAL;
+ goto add_failure;
+ }
+ addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]);
+ mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]);
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (map == NULL) {
+ ret_val = -ENOMEM;
+ goto add_failure;
+ }
+ ipv6_addr_copy(&map->list.addr, addr);
+ map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
+ map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
+ map->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
+ map->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
+ ipv6_addr_copy(&map->list.mask, mask);
+ map->list.valid = 1;
+ map->type = entry->type;
+
+ ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
+ if (ret_val != 0) {
+ kfree(map);
+ goto add_failure;
+ }
+
+ entry->type = NETLBL_NLTYPE_ADDRSELECT;
+ entry->type_def.addrsel = addrmap;
+#endif /* IPv6 */
+ }
+
+ ret_val = netlbl_domhsh_add(entry, audit_info);
if (ret_val != 0)
goto add_failure;
return 0;
add_failure:
+ if (cipsov4)
+ cipso_v4_doi_putdef(cipsov4);
if (entry)
kfree(entry->domain);
+ kfree(addrmap);
kfree(entry);
return ret_val;
}
/**
+ * netlbl_mgmt_listentry - List a NetLabel/LSM domain map entry
+ * @skb: the NETLINK buffer
+ * @entry: the map entry
+ *
+ * Description:
+ * This function is a helper function used by the LISTALL and LISTDEF command
+ * handlers. The caller is responsibile for ensuring that the RCU read lock
+ * is held. Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_listentry(struct sk_buff *skb,
+ struct netlbl_dom_map *entry)
+{
+ int ret_val;
+ struct nlattr *nla_a;
+ struct nlattr *nla_b;
+ struct netlbl_af4list *iter4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct netlbl_af6list *iter6;
+#endif
+
+ if (entry->domain != NULL) {
+ ret_val = nla_put_string(skb,
+ NLBL_MGMT_A_DOMAIN, entry->domain);
+ if (ret_val != 0)
+ return ret_val;
+ }
+
+ switch (entry->type) {
+ case NETLBL_NLTYPE_ADDRSELECT:
+ nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST);
+ if (nla_a == NULL)
+ return -ENOMEM;
+
+ netlbl_af4list_foreach_rcu(iter4,
+ &entry->type_def.addrsel->list4) {
+ struct netlbl_domaddr4_map *map4;
+ struct in_addr addr_struct;
+
+ nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
+ if (nla_b == NULL)
+ return -ENOMEM;
+
+ addr_struct.s_addr = iter4->addr;
+ ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR,
+ sizeof(struct in_addr),
+ &addr_struct);
+ if (ret_val != 0)
+ return ret_val;
+ addr_struct.s_addr = iter4->mask;
+ ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK,
+ sizeof(struct in_addr),
+ &addr_struct);
+ if (ret_val != 0)
+ return ret_val;
+ map4 = netlbl_domhsh_addr4_entry(iter4);
+ ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
+ map4->type);
+ if (ret_val != 0)
+ return ret_val;
+ switch (map4->type) {
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
+ map4->type_def.cipsov4->doi);
+ if (ret_val != 0)
+ return ret_val;
+ break;
+ }
+
+ nla_nest_end(skb, nla_b);
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ netlbl_af6list_foreach_rcu(iter6,
+ &entry->type_def.addrsel->list6) {
+ struct netlbl_domaddr6_map *map6;
+
+ nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
+ if (nla_b == NULL)
+ return -ENOMEM;
+
+ ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR,
+ sizeof(struct in6_addr),
+ &iter6->addr);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK,
+ sizeof(struct in6_addr),
+ &iter6->mask);
+ if (ret_val != 0)
+ return ret_val;
+ map6 = netlbl_domhsh_addr6_entry(iter6);
+ ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
+ map6->type);
+ if (ret_val != 0)
+ return ret_val;
+
+ nla_nest_end(skb, nla_b);
+ }
+#endif /* IPv6 */
+
+ nla_nest_end(skb, nla_a);
+ break;
+ case NETLBL_NLTYPE_UNLABELED:
+ ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+ if (ret_val != 0)
+ return ret_val;
+ ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
+ entry->type_def.cipsov4->doi);
+ break;
+ }
+
+ return ret_val;
+}
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_mgmt_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADD message and add the domains from the message
+ * to the hash table. See netlabel.h for a description of the message format.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlbl_audit audit_info;
+
+ if ((!info->attrs[NLBL_MGMT_A_DOMAIN]) ||
+ (!info->attrs[NLBL_MGMT_A_PROTOCOL]) ||
+ (info->attrs[NLBL_MGMT_A_IPV4ADDR] &&
+ info->attrs[NLBL_MGMT_A_IPV6ADDR]) ||
+ (info->attrs[NLBL_MGMT_A_IPV4MASK] &&
+ info->attrs[NLBL_MGMT_A_IPV6MASK]) ||
+ ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^
+ (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) ||
+ ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^
+ (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
+ return -EINVAL;
+
+ netlbl_netlink_auditinfo(skb, &audit_info);
+
+ return netlbl_mgmt_add_common(info, &audit_info);
+}
+
+/**
* netlbl_mgmt_remove - Handle a REMOVE message
* @skb: the NETLINK buffer
* @info: the Generic NETLINK info block
@@ -198,23 +453,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
if (data == NULL)
goto listall_cb_failure;
- ret_val = nla_put_string(cb_arg->skb,
- NLBL_MGMT_A_DOMAIN,
- entry->domain);
+ ret_val = netlbl_mgmt_listentry(cb_arg->skb, entry);
if (ret_val != 0)
goto listall_cb_failure;
- ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type);
- if (ret_val != 0)
- goto listall_cb_failure;
- switch (entry->type) {
- case NETLBL_NLTYPE_CIPSOV4:
- ret_val = nla_put_u32(cb_arg->skb,
- NLBL_MGMT_A_CV4DOI,
- entry->type_def.cipsov4->doi);
- if (ret_val != 0)
- goto listall_cb_failure;
- break;
- }
cb_arg->seq++;
return genlmsg_end(cb_arg->skb, data);
@@ -268,56 +509,22 @@ static int netlbl_mgmt_listall(struct sk_buff *skb,
*/
static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
{
- int ret_val = -EINVAL;
- struct netlbl_dom_map *entry = NULL;
- u32 tmp_val;
struct netlbl_audit audit_info;
- if (!info->attrs[NLBL_MGMT_A_PROTOCOL])
- goto adddef_failure;
+ if ((!info->attrs[NLBL_MGMT_A_PROTOCOL]) ||
+ (info->attrs[NLBL_MGMT_A_IPV4ADDR] &&
+ info->attrs[NLBL_MGMT_A_IPV6ADDR]) ||
+ (info->attrs[NLBL_MGMT_A_IPV4MASK] &&
+ info->attrs[NLBL_MGMT_A_IPV6MASK]) ||
+ ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^
+ (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) ||
+ ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^
+ (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
+ return -EINVAL;
netlbl_netlink_auditinfo(skb, &audit_info);
- entry = kzalloc(sizeof(*entry), GFP_KERNEL);
- if (entry == NULL) {
- ret_val = -ENOMEM;
- goto adddef_failure;
- }
- entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
-
- switch (entry->type) {
- case NETLBL_NLTYPE_UNLABELED:
- ret_val = netlbl_domhsh_add_default(entry, &audit_info);
- break;
- case NETLBL_NLTYPE_CIPSOV4:
- if (!info->attrs[NLBL_MGMT_A_CV4DOI])
- goto adddef_failure;
-
- tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
- /* We should be holding a rcu_read_lock() here while we hold
- * the result but since the entry will always be deleted when
- * the CIPSO DOI is deleted we aren't going to keep the
- * lock. */
- rcu_read_lock();
- entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
- if (entry->type_def.cipsov4 == NULL) {
- rcu_read_unlock();
- goto adddef_failure;
- }
- ret_val = netlbl_domhsh_add_default(entry, &audit_info);
- rcu_read_unlock();
- break;
- default:
- goto adddef_failure;
- }
- if (ret_val != 0)
- goto adddef_failure;
-
- return 0;
-
-adddef_failure:
- kfree(entry);
- return ret_val;
+ return netlbl_mgmt_add_common(info, &audit_info);
}
/**
@@ -371,19 +578,10 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
ret_val = -ENOENT;
goto listdef_failure_lock;
}
- ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type);
- if (ret_val != 0)
- goto listdef_failure_lock;
- switch (entry->type) {
- case NETLBL_NLTYPE_CIPSOV4:
- ret_val = nla_put_u32(ans_skb,
- NLBL_MGMT_A_CV4DOI,
- entry->type_def.cipsov4->doi);
- if (ret_val != 0)
- goto listdef_failure_lock;
- break;
- }
+ ret_val = netlbl_mgmt_listentry(ans_skb, entry);
rcu_read_unlock();
+ if (ret_val != 0)
+ goto listdef_failure;
genlmsg_end(ans_skb, data);
return genlmsg_reply(ans_skb, info);
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index a43bff169d6..05d96431f81 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -45,6 +45,16 @@
* NLBL_MGMT_A_DOMAIN
* NLBL_MGMT_A_PROTOCOL
*
+ * If IPv4 is specified the following attributes are required:
+ *
+ * NLBL_MGMT_A_IPV4ADDR
+ * NLBL_MGMT_A_IPV4MASK
+ *
+ * If IPv6 is specified the following attributes are required:
+ *
+ * NLBL_MGMT_A_IPV6ADDR
+ * NLBL_MGMT_A_IPV6MASK
+ *
* If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
*
* NLBL_MGMT_A_CV4DOI
@@ -68,13 +78,24 @@
* Required attributes:
*
* NLBL_MGMT_A_DOMAIN
+ *
+ * If the IP address selectors are not used the following attribute is
+ * required:
+ *
* NLBL_MGMT_A_PROTOCOL
*
- * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ * If the IP address selectors are used then the following attritbute is
+ * required:
+ *
+ * NLBL_MGMT_A_SELECTORLIST
+ *
+ * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following
+ * attributes are required:
*
* NLBL_MGMT_A_CV4DOI
*
- * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other
+ * attributes are required.
*
* o ADDDEF:
* Sent by an application to set the default domain mapping for the NetLabel
@@ -100,15 +121,23 @@
* application there is no payload. On success the kernel should send a
* response using the following format.
*
- * Required attributes:
+ * If the IP address selectors are not used the following attribute is
+ * required:
*
* NLBL_MGMT_A_PROTOCOL
*
- * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ * If the IP address selectors are used then the following attritbute is
+ * required:
+ *
+ * NLBL_MGMT_A_SELECTORLIST
+ *
+ * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following
+ * attributes are required:
*
* NLBL_MGMT_A_CV4DOI
*
- * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other
+ * attributes are required.
*
* o PROTOCOLS:
* Sent by an application to request a list of configured NetLabel protocols
@@ -162,6 +191,26 @@ enum {
NLBL_MGMT_A_CV4DOI,
/* (NLA_U32)
* the CIPSOv4 DOI value */
+ NLBL_MGMT_A_IPV6ADDR,
+ /* (NLA_BINARY, struct in6_addr)
+ * an IPv6 address */
+ NLBL_MGMT_A_IPV6MASK,
+ /* (NLA_BINARY, struct in6_addr)
+ * an IPv6 address mask */
+ NLBL_MGMT_A_IPV4ADDR,
+ /* (NLA_BINARY, struct in_addr)
+ * an IPv4 address */
+ NLBL_MGMT_A_IPV4MASK,
+ /* (NLA_BINARY, struct in_addr)
+ * and IPv4 address mask */
+ NLBL_MGMT_A_ADDRSELECTOR,
+ /* (NLA_NESTED)
+ * an IP address selector, must contain an address, mask, and protocol
+ * attribute plus any protocol specific attributes */
+ NLBL_MGMT_A_SELECTORLIST,
+ /* (NLA_NESTED)
+ * the selector list, there must be at least one
+ * NLBL_MGMT_A_ADDRSELECTOR attribute */
__NLBL_MGMT_A_MAX,
};
#define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 921c118ead8..e8a5c32b0f1 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -10,7 +10,7 @@
*/
/*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2007
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -54,6 +54,7 @@
#include <asm/atomic.h>
#include "netlabel_user.h"
+#include "netlabel_addrlist.h"
#include "netlabel_domainhash.h"
#include "netlabel_unlabeled.h"
#include "netlabel_mgmt.h"
@@ -76,22 +77,20 @@ struct netlbl_unlhsh_tbl {
struct list_head *tbl;
u32 size;
};
+#define netlbl_unlhsh_addr4_entry(iter) \
+ container_of(iter, struct netlbl_unlhsh_addr4, list)
struct netlbl_unlhsh_addr4 {
- __be32 addr;
- __be32 mask;
u32 secid;
- u32 valid;
- struct list_head list;
+ struct netlbl_af4list list;
struct rcu_head rcu;
};
+#define netlbl_unlhsh_addr6_entry(iter) \
+ container_of(iter, struct netlbl_unlhsh_addr6, list)
struct netlbl_unlhsh_addr6 {
- struct in6_addr addr;
- struct in6_addr mask;
u32 secid;
- u32 valid;
- struct list_head list;
+ struct netlbl_af6list list;
struct rcu_head rcu;
};
struct netlbl_unlhsh_iface {
@@ -147,76 +146,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1
};
/*
- * Audit Helper Functions
- */
-
-/**
- * netlbl_unlabel_audit_addr4 - Audit an IPv4 address
- * @audit_buf: audit buffer
- * @dev: network interface
- * @addr: IP address
- * @mask: IP address mask
- *
- * Description:
- * Write the IPv4 address and address mask, if necessary, to @audit_buf.
- *
- */
-static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf,
- const char *dev,
- __be32 addr, __be32 mask)
-{
- u32 mask_val = ntohl(mask);
-
- if (dev != NULL)
- audit_log_format(audit_buf, " netif=%s", dev);
- audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr));
- if (mask_val != 0xffffffff) {
- u32 mask_len = 0;
- while (mask_val > 0) {
- mask_val <<= 1;
- mask_len++;
- }
- audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
- }
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-/**
- * netlbl_unlabel_audit_addr6 - Audit an IPv6 address
- * @audit_buf: audit buffer
- * @dev: network interface
- * @addr: IP address
- * @mask: IP address mask
- *
- * Description:
- * Write the IPv6 address and address mask, if necessary, to @audit_buf.
- *
- */
-static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf,
- const char *dev,
- const struct in6_addr *addr,
- const struct in6_addr *mask)
-{
- if (dev != NULL)
- audit_log_format(audit_buf, " netif=%s", dev);
- audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr));
- if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
- u32 mask_len = 0;
- u32 mask_val;
- int iter = -1;
- while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff)
- mask_len += 32;
- mask_val = ntohl(mask->s6_addr32[iter]);
- while (mask_val > 0) {
- mask_val <<= 1;
- mask_len++;
- }
- audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
- }
-}
-#endif /* IPv6 */
-
-/*
* Unlabeled Connection Hash Table Functions
*/
@@ -274,26 +203,28 @@ static void netlbl_unlhsh_free_addr6(struct rcu_head *entry)
static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
{
struct netlbl_unlhsh_iface *iface;
- struct netlbl_unlhsh_addr4 *iter4;
- struct netlbl_unlhsh_addr4 *tmp4;
- struct netlbl_unlhsh_addr6 *iter6;
- struct netlbl_unlhsh_addr6 *tmp6;
+ struct netlbl_af4list *iter4;
+ struct netlbl_af4list *tmp4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct netlbl_af6list *iter6;
+ struct netlbl_af6list *tmp6;
+#endif /* IPv6 */
iface = container_of(entry, struct netlbl_unlhsh_iface, rcu);
/* no need for locks here since we are the only one with access to this
* structure */
- list_for_each_entry_safe(iter4, tmp4, &iface->addr4_list, list)
- if (iter4->valid) {
- list_del_rcu(&iter4->list);
- kfree(iter4);
- }
- list_for_each_entry_safe(iter6, tmp6, &iface->addr6_list, list)
- if (iter6->valid) {
- list_del_rcu(&iter6->list);
- kfree(iter6);
- }
+ netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) {
+ netlbl_af4list_remove_entry(iter4);
+ kfree(netlbl_unlhsh_addr4_entry(iter4));
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) {
+ netlbl_af6list_remove_entry(iter6);
+ kfree(netlbl_unlhsh_addr6_entry(iter6));
+ }
+#endif /* IPv6 */
kfree(iface);
}
@@ -316,59 +247,6 @@ static u32 netlbl_unlhsh_hash(int ifindex)
}
/**
- * netlbl_unlhsh_search_addr4 - Search for a matching IPv4 address entry
- * @addr: IPv4 address
- * @iface: the network interface entry
- *
- * Description:
- * Searches the IPv4 address list of the network interface specified by @iface.
- * If a matching address entry is found it is returned, otherwise NULL is
- * returned. The caller is responsible for calling the rcu_read_[un]lock()
- * functions.
- *
- */
-static struct netlbl_unlhsh_addr4 *netlbl_unlhsh_search_addr4(
- __be32 addr,
- const struct netlbl_unlhsh_iface *iface)
-{
- struct netlbl_unlhsh_addr4 *iter;
-
- list_for_each_entry_rcu(iter, &iface->addr4_list, list)
- if (iter->valid && (addr & iter->mask) == iter->addr)
- return iter;
-
- return NULL;
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-/**
- * netlbl_unlhsh_search_addr6 - Search for a matching IPv6 address entry
- * @addr: IPv6 address
- * @iface: the network interface entry
- *
- * Description:
- * Searches the IPv6 address list of the network interface specified by @iface.
- * If a matching address entry is found it is returned, otherwise NULL is
- * returned. The caller is responsible for calling the rcu_read_[un]lock()
- * functions.
- *
- */
-static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6(
- const struct in6_addr *addr,
- const struct netlbl_unlhsh_iface *iface)
-{
- struct netlbl_unlhsh_addr6 *iter;
-
- list_for_each_entry_rcu(iter, &iface->addr6_list, list)
- if (iter->valid &&
- ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0)
- return iter;
-
- return NULL;
-}
-#endif /* IPv6 */
-
-/**
* netlbl_unlhsh_search_iface - Search for a matching interface entry
* @ifindex: the network interface
*
@@ -381,12 +259,12 @@ static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6(
static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
{
u32 bkt;
+ struct list_head *bkt_list;
struct netlbl_unlhsh_iface *iter;
bkt = netlbl_unlhsh_hash(ifindex);
- list_for_each_entry_rcu(iter,
- &rcu_dereference(netlbl_unlhsh)->tbl[bkt],
- list)
+ bkt_list = &rcu_dereference(netlbl_unlhsh)->tbl[bkt];
+ list_for_each_entry_rcu(iter, bkt_list, list)
if (iter->valid && iter->ifindex == ifindex)
return iter;
@@ -439,43 +317,26 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
const struct in_addr *mask,
u32 secid)
{
+ int ret_val;
struct netlbl_unlhsh_addr4 *entry;
- struct netlbl_unlhsh_addr4 *iter;
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (entry == NULL)
return -ENOMEM;
- entry->addr = addr->s_addr & mask->s_addr;
- entry->mask = mask->s_addr;
- entry->secid = secid;
- entry->valid = 1;
+ entry->list.addr = addr->s_addr & mask->s_addr;
+ entry->list.mask = mask->s_addr;
+ entry->list.valid = 1;
INIT_RCU_HEAD(&entry->rcu);
+ entry->secid = secid;
spin_lock(&netlbl_unlhsh_lock);
- iter = netlbl_unlhsh_search_addr4(entry->addr, iface);
- if (iter != NULL &&
- iter->addr == addr->s_addr && iter->mask == mask->s_addr) {
- spin_unlock(&netlbl_unlhsh_lock);
- kfree(entry);
- return -EEXIST;
- }
- /* in order to speed up address searches through the list (the common
- * case) we need to keep the list in order based on the size of the
- * address mask such that the entry with the widest mask (smallest
- * numerical value) appears first in the list */
- list_for_each_entry_rcu(iter, &iface->addr4_list, list)
- if (iter->valid &&
- ntohl(entry->mask) > ntohl(iter->mask)) {
- __list_add_rcu(&entry->list,
- iter->list.prev,
- &iter->list);
- spin_unlock(&netlbl_unlhsh_lock);
- return 0;
- }
- list_add_tail_rcu(&entry->list, &iface->addr4_list);
+ ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list);
spin_unlock(&netlbl_unlhsh_lock);
- return 0;
+
+ if (ret_val != 0)
+ kfree(entry);
+ return ret_val;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -498,47 +359,29 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
const struct in6_addr *mask,
u32 secid)
{
+ int ret_val;
struct netlbl_unlhsh_addr6 *entry;
- struct netlbl_unlhsh_addr6 *iter;
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (entry == NULL)
return -ENOMEM;
- ipv6_addr_copy(&entry->addr, addr);
- entry->addr.s6_addr32[0] &= mask->s6_addr32[0];
- entry->addr.s6_addr32[1] &= mask->s6_addr32[1];
- entry->addr.s6_addr32[2] &= mask->s6_addr32[2];
- entry->addr.s6_addr32[3] &= mask->s6_addr32[3];
- ipv6_addr_copy(&entry->mask, mask);
- entry->secid = secid;
- entry->valid = 1;
+ ipv6_addr_copy(&entry->list.addr, addr);
+ entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
+ entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
+ entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
+ entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
+ ipv6_addr_copy(&entry->list.mask, mask);
+ entry->list.valid = 1;
INIT_RCU_HEAD(&entry->rcu);
+ entry->secid = secid;
spin_lock(&netlbl_unlhsh_lock);
- iter = netlbl_unlhsh_search_addr6(&entry->addr, iface);
- if (iter != NULL &&
- (ipv6_addr_equal(&iter->addr, addr) &&
- ipv6_addr_equal(&iter->mask, mask))) {
- spin_unlock(&netlbl_unlhsh_lock);
- kfree(entry);
- return -EEXIST;
- }
- /* in order to speed up address searches through the list (the common
- * case) we need to keep the list in order based on the size of the
- * address mask such that the entry with the widest mask (smallest
- * numerical value) appears first in the list */
- list_for_each_entry_rcu(iter, &iface->addr6_list, list)
- if (iter->valid &&
- ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) {
- __list_add_rcu(&entry->list,
- iter->list.prev,
- &iter->list);
- spin_unlock(&netlbl_unlhsh_lock);
- return 0;
- }
- list_add_tail_rcu(&entry->list, &iface->addr6_list);
+ ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list);
spin_unlock(&netlbl_unlhsh_lock);
+
+ if (ret_val != 0)
+ kfree(entry);
return 0;
}
#endif /* IPv6 */
@@ -658,10 +501,10 @@ static int netlbl_unlhsh_add(struct net *net,
mask4 = (struct in_addr *)mask;
ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid);
if (audit_buf != NULL)
- netlbl_unlabel_audit_addr4(audit_buf,
- dev_name,
- addr4->s_addr,
- mask4->s_addr);
+ netlbl_af4list_audit_addr(audit_buf, 1,
+ dev_name,
+ addr4->s_addr,
+ mask4->s_addr);
break;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -672,9 +515,9 @@ static int netlbl_unlhsh_add(struct net *net,
mask6 = (struct in6_addr *)mask;
ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid);
if (audit_buf != NULL)
- netlbl_unlabel_audit_addr6(audit_buf,
- dev_name,
- addr6, mask6);
+ netlbl_af6list_audit_addr(audit_buf, 1,
+ dev_name,
+ addr6, mask6);
break;
}
#endif /* IPv6 */
@@ -719,35 +562,34 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
const struct in_addr *mask,
struct netlbl_audit *audit_info)
{
- int ret_val = -ENOENT;
+ int ret_val = 0;
+ struct netlbl_af4list *list_entry;
struct netlbl_unlhsh_addr4 *entry;
- struct audit_buffer *audit_buf = NULL;
+ struct audit_buffer *audit_buf;
struct net_device *dev;
- char *secctx = NULL;
+ char *secctx;
u32 secctx_len;
spin_lock(&netlbl_unlhsh_lock);
- entry = netlbl_unlhsh_search_addr4(addr->s_addr, iface);
- if (entry != NULL &&
- entry->addr == addr->s_addr && entry->mask == mask->s_addr) {
- entry->valid = 0;
- list_del_rcu(&entry->list);
- ret_val = 0;
- }
+ list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr,
+ &iface->addr4_list);
spin_unlock(&netlbl_unlhsh_lock);
+ if (list_entry == NULL)
+ ret_val = -ENOENT;
+ entry = netlbl_unlhsh_addr4_entry(list_entry);
audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
audit_info);
if (audit_buf != NULL) {
dev = dev_get_by_index(net, iface->ifindex);
- netlbl_unlabel_audit_addr4(audit_buf,
- (dev != NULL ? dev->name : NULL),
- entry->addr, entry->mask);
+ netlbl_af4list_audit_addr(audit_buf, 1,
+ (dev != NULL ? dev->name : NULL),
+ addr->s_addr, mask->s_addr);
if (dev != NULL)
dev_put(dev);
- if (security_secid_to_secctx(entry->secid,
- &secctx,
- &secctx_len) == 0) {
+ if (entry && security_secid_to_secctx(entry->secid,
+ &secctx,
+ &secctx_len) == 0) {
audit_log_format(audit_buf, " sec_obj=%s", secctx);
security_release_secctx(secctx, secctx_len);
}
@@ -781,36 +623,33 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
const struct in6_addr *mask,
struct netlbl_audit *audit_info)
{
- int ret_val = -ENOENT;
+ int ret_val = 0;
+ struct netlbl_af6list *list_entry;
struct netlbl_unlhsh_addr6 *entry;
- struct audit_buffer *audit_buf = NULL;
+ struct audit_buffer *audit_buf;
struct net_device *dev;
- char *secctx = NULL;
+ char *secctx;
u32 secctx_len;
spin_lock(&netlbl_unlhsh_lock);
- entry = netlbl_unlhsh_search_addr6(addr, iface);
- if (entry != NULL &&
- (ipv6_addr_equal(&entry->addr, addr) &&
- ipv6_addr_equal(&entry->mask, mask))) {
- entry->valid = 0;
- list_del_rcu(&entry->list);
- ret_val = 0;
- }
+ list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list);
spin_unlock(&netlbl_unlhsh_lock);
+ if (list_entry == NULL)
+ ret_val = -ENOENT;
+ entry = netlbl_unlhsh_addr6_entry(list_entry);
audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
audit_info);
if (audit_buf != NULL) {
dev = dev_get_by_index(net, iface->ifindex);
- netlbl_unlabel_audit_addr6(audit_buf,
- (dev != NULL ? dev->name : NULL),
- addr, mask);
+ netlbl_af6list_audit_addr(audit_buf, 1,
+ (dev != NULL ? dev->name : NULL),
+ addr, mask);
if (dev != NULL)
dev_put(dev);
- if (security_secid_to_secctx(entry->secid,
- &secctx,
- &secctx_len) == 0) {
+ if (entry && security_secid_to_secctx(entry->secid,
+ &secctx,
+ &secctx_len) == 0) {
audit_log_format(audit_buf, " sec_obj=%s", secctx);
security_release_secctx(secctx, secctx_len);
}
@@ -836,16 +675,18 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
*/
static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
{
- struct netlbl_unlhsh_addr4 *iter4;
- struct netlbl_unlhsh_addr6 *iter6;
+ struct netlbl_af4list *iter4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct netlbl_af6list *iter6;
+#endif /* IPv6 */
spin_lock(&netlbl_unlhsh_lock);
- list_for_each_entry_rcu(iter4, &iface->addr4_list, list)
- if (iter4->valid)
- goto unlhsh_condremove_failure;
- list_for_each_entry_rcu(iter6, &iface->addr6_list, list)
- if (iter6->valid)
- goto unlhsh_condremove_failure;
+ netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list)
+ goto unlhsh_condremove_failure;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list)
+ goto unlhsh_condremove_failure;
+#endif /* IPv6 */
iface->valid = 0;
if (iface->ifindex > 0)
list_del_rcu(&iface->list);
@@ -1349,7 +1190,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
if (addr4) {
struct in_addr addr_struct;
- addr_struct.s_addr = addr4->addr;
+ addr_struct.s_addr = addr4->list.addr;
ret_val = nla_put(cb_arg->skb,
NLBL_UNLABEL_A_IPV4ADDR,
sizeof(struct in_addr),
@@ -1357,7 +1198,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
if (ret_val != 0)
goto list_cb_failure;
- addr_struct.s_addr = addr4->mask;
+ addr_struct.s_addr = addr4->list.mask;
ret_val = nla_put(cb_arg->skb,
NLBL_UNLABEL_A_IPV4MASK,
sizeof(struct in_addr),
@@ -1370,14 +1211,14 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
ret_val = nla_put(cb_arg->skb,
NLBL_UNLABEL_A_IPV6ADDR,
sizeof(struct in6_addr),
- &addr6->addr);
+ &addr6->list.addr);
if (ret_val != 0)
goto list_cb_failure;
ret_val = nla_put(cb_arg->skb,
NLBL_UNLABEL_A_IPV6MASK,
sizeof(struct in6_addr),
- &addr6->mask);
+ &addr6->list.mask);
if (ret_val != 0)
goto list_cb_failure;
@@ -1425,8 +1266,11 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
u32 iter_bkt;
u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
struct netlbl_unlhsh_iface *iface;
- struct netlbl_unlhsh_addr4 *addr4;
- struct netlbl_unlhsh_addr6 *addr6;
+ struct list_head *iter_list;
+ struct netlbl_af4list *addr4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct netlbl_af6list *addr6;
+#endif
cb_arg.nl_cb = cb;
cb_arg.skb = skb;
@@ -1436,44 +1280,43 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
for (iter_bkt = skip_bkt;
iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) {
- list_for_each_entry_rcu(iface,
- &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt],
- list) {
+ iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt];
+ list_for_each_entry_rcu(iface, iter_list, list) {
if (!iface->valid ||
iter_chain++ < skip_chain)
continue;
- list_for_each_entry_rcu(addr4,
- &iface->addr4_list,
- list) {
- if (!addr4->valid || iter_addr4++ < skip_addr4)
+ netlbl_af4list_foreach_rcu(addr4,
+ &iface->addr4_list) {
+ if (iter_addr4++ < skip_addr4)
continue;
if (netlbl_unlabel_staticlist_gen(
- NLBL_UNLABEL_C_STATICLIST,
- iface,
- addr4,
- NULL,
- &cb_arg) < 0) {
+ NLBL_UNLABEL_C_STATICLIST,
+ iface,
+ netlbl_unlhsh_addr4_entry(addr4),
+ NULL,
+ &cb_arg) < 0) {
iter_addr4--;
iter_chain--;
goto unlabel_staticlist_return;
}
}
- list_for_each_entry_rcu(addr6,
- &iface->addr6_list,
- list) {
- if (!addr6->valid || iter_addr6++ < skip_addr6)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ netlbl_af6list_foreach_rcu(addr6,
+ &iface->addr6_list) {
+ if (iter_addr6++ < skip_addr6)
continue;
if (netlbl_unlabel_staticlist_gen(
- NLBL_UNLABEL_C_STATICLIST,
- iface,
- NULL,
- addr6,
- &cb_arg) < 0) {
+ NLBL_UNLABEL_C_STATICLIST,
+ iface,
+ NULL,
+ netlbl_unlhsh_addr6_entry(addr6),
+ &cb_arg) < 0) {
iter_addr6--;
iter_chain--;
goto unlabel_staticlist_return;
}
}
+#endif /* IPv6 */
}
}
@@ -1504,9 +1347,12 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
struct netlbl_unlhsh_iface *iface;
u32 skip_addr4 = cb->args[0];
u32 skip_addr6 = cb->args[1];
- u32 iter_addr4 = 0, iter_addr6 = 0;
- struct netlbl_unlhsh_addr4 *addr4;
- struct netlbl_unlhsh_addr6 *addr6;
+ u32 iter_addr4 = 0;
+ struct netlbl_af4list *addr4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ u32 iter_addr6 = 0;
+ struct netlbl_af6list *addr6;
+#endif
cb_arg.nl_cb = cb;
cb_arg.skb = skb;
@@ -1517,30 +1363,32 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
if (iface == NULL || !iface->valid)
goto unlabel_staticlistdef_return;
- list_for_each_entry_rcu(addr4, &iface->addr4_list, list) {
- if (!addr4->valid || iter_addr4++ < skip_addr4)
+ netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) {
+ if (iter_addr4++ < skip_addr4)
continue;
if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
- iface,
- addr4,
- NULL,
- &cb_arg) < 0) {
+ iface,
+ netlbl_unlhsh_addr4_entry(addr4),
+ NULL,
+ &cb_arg) < 0) {
iter_addr4--;
goto unlabel_staticlistdef_return;
}
}
- list_for_each_entry_rcu(addr6, &iface->addr6_list, list) {
- if (!addr6->valid || iter_addr6++ < skip_addr6)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
+ if (iter_addr6++ < skip_addr6)
continue;
if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
- iface,
- NULL,
- addr6,
- &cb_arg) < 0) {
+ iface,
+ NULL,
+ netlbl_unlhsh_addr6_entry(addr6),
+ &cb_arg) < 0) {
iter_addr6--;
goto unlabel_staticlistdef_return;
}
}
+#endif /* IPv6 */
unlabel_staticlistdef_return:
rcu_read_unlock();
@@ -1718,25 +1566,27 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
switch (family) {
case PF_INET: {
struct iphdr *hdr4;
- struct netlbl_unlhsh_addr4 *addr4;
+ struct netlbl_af4list *addr4;
hdr4 = ip_hdr(skb);
- addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface);
+ addr4 = netlbl_af4list_search(hdr4->saddr,
+ &iface->addr4_list);
if (addr4 == NULL)
goto unlabel_getattr_nolabel;
- secattr->attr.secid = addr4->secid;
+ secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid;
break;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case PF_INET6: {
struct ipv6hdr *hdr6;
- struct netlbl_unlhsh_addr6 *addr6;
+ struct netlbl_af6list *addr6;
hdr6 = ipv6_hdr(skb);
- addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface);
+ addr6 = netlbl_af6list_search(&hdr6->saddr,
+ &iface->addr6_list);
if (addr6 == NULL)
goto unlabel_getattr_nolabel;
- secattr->attr.secid = addr6->secid;
+ secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid;
break;
}
#endif /* IPv6 */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b0eacc0007c..480184a857d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1,7 +1,7 @@
/*
* NETLINK Kernel-user communication protocol.
*
- * Authors: Alan Cox <alan@redhat.com>
+ * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
@@ -435,7 +435,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
return -EPROTONOSUPPORT;
netlink_lock_table();
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
if (!nl_table[protocol].registered) {
netlink_unlock_table();
request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 9e9c6fce11a..b9d97effebe 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -67,11 +67,10 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol)
}
pnp = phonet_proto_get(protocol);
-#ifdef CONFIG_KMOD
if (pnp == NULL &&
request_module("net-pf-%d-proto-%d", PF_PHONET, protocol) == 0)
pnp = phonet_proto_get(protocol);
-#endif
+
if (pnp == NULL)
return -EPROTONOSUPPORT;
if (sock->type != pnp->sock_type) {
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index e5b69556bb5..21124ec0a73 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -16,6 +16,7 @@
#include <linux/workqueue.h>
#include <linux/init.h>
#include <linux/rfkill.h>
+#include <linux/sched.h>
#include "rfkill-input.h"
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9974b3f04f0..8f457f1e0ac 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -494,7 +494,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
a_o = tc_lookup_action_n(act_name);
if (a_o == NULL) {
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
rtnl_unlock();
request_module("act_%s", act_name);
rtnl_lock();
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8eb79e92e94..16e7ac9774e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -227,7 +227,7 @@ replay:
err = -ENOENT;
tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
if (tp_ops == NULL) {
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
struct nlattr *kind = tca[TCA_KIND];
char name[IFNAMSIZ];
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5e6f82e0e6f..e82519e548d 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -224,7 +224,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
if (em->ops == NULL) {
err = -ENOENT;
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
__rtnl_unlock();
request_module("ematch-kind-%u", em_hdr->kind);
rtnl_lock();
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1122c952aa9..b16ad2972c6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -764,7 +764,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
struct qdisc_size_table *stab;
ops = qdisc_lookup_ops(kind);
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
if (ops == NULL && kind != NULL) {
char name[IFNAMSIZ];
if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
diff --git a/net/socket.c b/net/socket.c
index 3e8d4e35c08..2b7a4b5c9b7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1142,7 +1142,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol,
sock->type = type;
-#if defined(CONFIG_KMOD)
+#ifdef CONFIG_MODULES
/* Attempt to load a protocol module if the find failed.
*
* 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 6bfea9ed686..436bf1b4b76 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -83,10 +83,8 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
if (flavor >= RPC_AUTH_MAXFLAVOR)
goto out;
-#ifdef CONFIG_KMOD
if ((ops = auth_flavors[flavor]) == NULL)
request_module("rpc-auth-%u", flavor);
-#endif
spin_lock(&rpc_authflavor_lock);
ops = auth_flavors[flavor];
if (ops == NULL || !try_module_get(ops->owner)) {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 76739e928d0..4895c341e46 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
clnt->cl_procinfo = version->procs;
clnt->cl_maxproc = version->nrprocs;
clnt->cl_protname = program->name;
- clnt->cl_prog = program->number;
+ clnt->cl_prog = args->prognumber ? : program->number;
clnt->cl_vers = version->number;
clnt->cl_stats = program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
@@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
}
/* save the nodename */
- clnt->cl_nodelen = strlen(utsname()->nodename);
+ clnt->cl_nodelen = strlen(init_utsname()->nodename);
if (clnt->cl_nodelen > UNX_MAXNODENAME)
clnt->cl_nodelen = UNX_MAXNODENAME;
- memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
+ memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen);
rpc_register_client(clnt);
return clnt;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 24db2b4d12d..41013dd66ac 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -20,6 +20,7 @@
#include <linux/in6.h>
#include <linux/kernel.h>
#include <linux/errno.h>
+#include <net/ipv6.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
@@ -176,13 +177,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
}
static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
- u32 version, struct rpc_message *msg,
- int *result)
+ u32 version, struct rpc_message *msg)
{
struct rpc_clnt *rpcb_clnt;
- int error = 0;
+ int result, error = 0;
- *result = 0;
+ msg->rpc_resp = &result;
rpcb_clnt = rpcb_create_local(addr, addrlen, version);
if (!IS_ERR(rpcb_clnt)) {
@@ -191,12 +191,15 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
} else
error = PTR_ERR(rpcb_clnt);
- if (error < 0)
+ if (error < 0) {
printk(KERN_WARNING "RPC: failed to contact local rpcbind "
"server (errno %d).\n", -error);
- dprintk("RPC: registration status %d/%d\n", error, *result);
+ return error;
+ }
- return error;
+ if (!result)
+ return -EACCES;
+ return 0;
}
/**
@@ -205,7 +208,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
* @vers: RPC version number to bind
* @prot: transport protocol to register
* @port: port value to register
- * @okay: OUT: result code
+ *
+ * Returns zero if the registration request was dispatched successfully
+ * and the rpcbind daemon returned success. Otherwise, returns an errno
+ * value that reflects the nature of the error (request could not be
+ * dispatched, timed out, or rpcbind returned an error).
*
* RPC services invoke this function to advertise their contact
* information via the system's rpcbind daemon. RPC services
@@ -217,15 +224,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
* all registered transports for [program, version] from the local
* rpcbind database.
*
- * Returns zero if the registration request was dispatched
- * successfully and a reply was received. The rpcbind daemon's
- * boolean result code is stored in *okay.
- *
- * Returns an errno value and sets *result to zero if there was
- * some problem that prevented the rpcbind request from being
- * dispatched, or if the rpcbind daemon did not respond within
- * the timeout.
- *
* This function uses rpcbind protocol version 2 to contact the
* local rpcbind daemon.
*
@@ -236,7 +234,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
* IN6ADDR_ANY (ie available for all AF_INET and AF_INET6
* addresses).
*/
-int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
{
struct rpcbind_args map = {
.r_prog = prog,
@@ -246,7 +244,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
};
struct rpc_message msg = {
.rpc_argp = &map,
- .rpc_resp = okay,
};
dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
@@ -259,7 +256,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
sizeof(rpcb_inaddr_loopback),
- RPCBVERS_2, &msg, okay);
+ RPCBVERS_2, &msg);
}
/*
@@ -290,7 +287,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
sizeof(rpcb_inaddr_loopback),
- RPCBVERS_4, msg, msg->rpc_resp);
+ RPCBVERS_4, msg);
}
/*
@@ -304,10 +301,13 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
char buf[64];
/* Construct AF_INET6 universal address */
- snprintf(buf, sizeof(buf),
- NIP6_FMT".%u.%u",
- NIP6(address_to_register->sin6_addr),
- port >> 8, port & 0xff);
+ if (ipv6_addr_any(&address_to_register->sin6_addr))
+ snprintf(buf, sizeof(buf), "::.%u.%u",
+ port >> 8, port & 0xff);
+ else
+ snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u",
+ NIP6(address_to_register->sin6_addr),
+ port >> 8, port & 0xff);
map->r_addr = buf;
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
sizeof(rpcb_in6addr_loopback),
- RPCBVERS_4, msg, msg->rpc_resp);
+ RPCBVERS_4, msg);
}
/**
@@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
* @version: RPC version number of service to (un)register
* @address: address family, IP address, and port to (un)register
* @netid: netid of transport protocol to (un)register
- * @result: result code from rpcbind RPC call
+ *
+ * Returns zero if the registration request was dispatched successfully
+ * and the rpcbind daemon returned success. Otherwise, returns an errno
+ * value that reflects the nature of the error (request could not be
+ * dispatched, timed out, or rpcbind returned an error).
*
* RPC services invoke this function to advertise their contact
* information via the system's rpcbind daemon. RPC services
@@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
* to zero. Callers pass a netid of "" to unregister all
* transport netids associated with [program, version, address].
*
- * Returns zero if the registration request was dispatched
- * successfully and a reply was received. The rpcbind daemon's
- * result code is stored in *result.
- *
- * Returns an errno value and sets *result to zero if there was
- * some problem that prevented the rpcbind request from being
- * dispatched, or if the rpcbind daemon did not respond within
- * the timeout.
- *
* This function uses rpcbind protocol version 4 to contact the
* local rpcbind daemon. The local rpcbind daemon must support
* version 4 of the rpcbind protocol in order for these functions
@@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
* advertises the service on all IPv4 and IPv6 addresses.
*/
int rpcb_v4_register(const u32 program, const u32 version,
- const struct sockaddr *address, const char *netid,
- int *result)
+ const struct sockaddr *address, const char *netid)
{
struct rpcbind_args map = {
.r_prog = program,
@@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version,
};
struct rpc_message msg = {
.rpc_argp = &map,
- .rpc_resp = result,
};
- *result = 0;
-
switch (address->sa_family) {
case AF_INET:
return rpcb_register_netid4((struct sockaddr_in *)address,
@@ -469,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
return rpc_run_task(&task_setup_data);
}
+/*
+ * In the case where rpc clients have been cloned, we want to make
+ * sure that we use the program number/version etc of the actual
+ * owner of the xprt. To do so, we walk back up the tree of parents
+ * to find whoever created the transport and/or whoever has the
+ * autobind flag set.
+ */
+static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
+{
+ struct rpc_clnt *parent = clnt->cl_parent;
+
+ while (parent != clnt) {
+ if (parent->cl_xprt != clnt->cl_xprt)
+ break;
+ if (clnt->cl_autobind)
+ break;
+ clnt = parent;
+ parent = parent->cl_parent;
+ }
+ return clnt;
+}
+
/**
* rpcb_getport_async - obtain the port for a given RPC service on a given host
* @task: task that is waiting for portmapper request
@@ -478,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
*/
void rpcb_getport_async(struct rpc_task *task)
{
- struct rpc_clnt *clnt = task->tk_client;
+ struct rpc_clnt *clnt;
struct rpc_procinfo *proc;
u32 bind_version;
- struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_xprt *xprt;
struct rpc_clnt *rpcb_clnt;
static struct rpcbind_args *map;
struct rpc_task *child;
@@ -490,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task)
size_t salen;
int status;
+ clnt = rpcb_find_transport_owner(task->tk_client);
+ xprt = clnt->cl_xprt;
+
dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
task->tk_pid, __func__,
clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot);
- /* Autobind on cloned rpc clients is discouraged */
- BUG_ON(clnt->cl_parent != clnt);
-
/* Put self on the wait queue to ensure we get notified if
* some other task is already attempting to bind the port */
rpc_sleep_on(&xprt->binding, task, NULL);
@@ -558,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task)
status = -ENOMEM;
dprintk("RPC: %5u %s: no memory available\n",
task->tk_pid, __func__);
- goto bailout_nofree;
+ goto bailout_release_client;
}
map->r_prog = clnt->cl_prog;
map->r_vers = clnt->cl_vers;
@@ -578,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task)
task->tk_pid, __func__);
return;
}
- rpc_put_task(child);
- task->tk_xprt->stat.bind_count++;
+ xprt->stat.bind_count++;
+ rpc_put_task(child);
return;
+bailout_release_client:
+ rpc_release_client(rpcb_clnt);
bailout_nofree:
rpcb_wake_rpcbind_waiters(xprt, status);
task->tk_status = status;
@@ -633,7 +648,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
struct rpcbind_args *rpcb)
{
- dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n",
+ dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n",
rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
*p++ = htonl(rpcb->r_prog);
*p++ = htonl(rpcb->r_vers);
@@ -648,7 +663,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
unsigned short *portp)
{
*portp = (unsigned short) ntohl(*p++);
- dprintk("RPC: rpcb_decode_getport result %u\n",
+ dprintk("RPC: rpcb getport result: %u\n",
*portp);
return 0;
}
@@ -657,7 +672,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
unsigned int *boolp)
{
*boolp = (unsigned int) ntohl(*p++);
- dprintk("RPC: rpcb_decode_set: call %s\n",
+ dprintk("RPC: rpcb set/unset call %s\n",
(*boolp ? "succeeded" : "failed"));
return 0;
}
@@ -665,7 +680,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
struct rpcbind_args *rpcb)
{
- dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n",
+ dprintk("RPC: encoding rpcb request (%u, %u, %s)\n",
rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
*p++ = htonl(rpcb->r_prog);
*p++ = htonl(rpcb->r_vers);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5a32cb7c4bb..54c98d87684 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -28,6 +28,8 @@
#define RPCDBG_FACILITY RPCDBG_SVCDSP
+static void svc_unregister(const struct svc_serv *serv);
+
#define svc_serv_is_pooled(serv) ((serv)->sv_function)
/*
@@ -357,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- void (*shutdown)(struct svc_serv *serv))
+ sa_family_t family, void (*shutdown)(struct svc_serv *serv))
{
struct svc_serv *serv;
unsigned int vers;
@@ -366,6 +368,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
+ serv->sv_family = family;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
serv->sv_nrthreads = 1;
@@ -416,30 +419,29 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
spin_lock_init(&pool->sp_lock);
}
-
/* Remove any stale portmap registrations */
- svc_register(serv, 0, 0);
+ svc_unregister(serv);
return serv;
}
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize,
- void (*shutdown)(struct svc_serv *serv))
+ sa_family_t family, void (*shutdown)(struct svc_serv *serv))
{
- return __svc_create(prog, bufsize, /*npools*/1, shutdown);
+ return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
}
EXPORT_SYMBOL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- void (*shutdown)(struct svc_serv *serv),
+ sa_family_t family, void (*shutdown)(struct svc_serv *serv),
svc_thread_fn func, struct module *mod)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, shutdown);
+ serv = __svc_create(prog, bufsize, npools, family, shutdown);
if (serv != NULL) {
serv->sv_function = func;
@@ -486,8 +488,7 @@ svc_destroy(struct svc_serv *serv)
if (svc_serv_is_pooled(serv))
svc_pool_map_put();
- /* Unregister service with the portmapper */
- svc_register(serv, 0, 0);
+ svc_unregister(serv);
kfree(serv->sv_pools);
kfree(serv);
}
@@ -718,55 +719,245 @@ svc_exit_thread(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL(svc_exit_thread);
+#ifdef CONFIG_SUNRPC_REGISTER_V4
+
/*
- * Register an RPC service with the local portmapper.
- * To unregister a service, call this routine with
- * proto and port == 0.
+ * Register an "inet" protocol family netid with the local
+ * rpcbind daemon via an rpcbind v4 SET request.
+ *
+ * No netconfig infrastructure is available in the kernel, so
+ * we map IP_ protocol numbers to netids by hand.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
*/
-int
-svc_register(struct svc_serv *serv, int proto, unsigned short port)
+static int __svc_rpcb_register4(const u32 program, const u32 version,
+ const unsigned short protocol,
+ const unsigned short port)
+{
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ .sin_port = htons(port),
+ };
+ char *netid;
+
+ switch (protocol) {
+ case IPPROTO_UDP:
+ netid = RPCBIND_NETID_UDP;
+ break;
+ case IPPROTO_TCP:
+ netid = RPCBIND_NETID_TCP;
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ return rpcb_v4_register(program, version,
+ (struct sockaddr *)&sin, netid);
+}
+
+/*
+ * Register an "inet6" protocol family netid with the local
+ * rpcbind daemon via an rpcbind v4 SET request.
+ *
+ * No netconfig infrastructure is available in the kernel, so
+ * we map IP_ protocol numbers to netids by hand.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
+ */
+static int __svc_rpcb_register6(const u32 program, const u32 version,
+ const unsigned short protocol,
+ const unsigned short port)
+{
+ struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ANY_INIT,
+ .sin6_port = htons(port),
+ };
+ char *netid;
+
+ switch (protocol) {
+ case IPPROTO_UDP:
+ netid = RPCBIND_NETID_UDP6;
+ break;
+ case IPPROTO_TCP:
+ netid = RPCBIND_NETID_TCP6;
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ return rpcb_v4_register(program, version,
+ (struct sockaddr *)&sin6, netid);
+}
+
+/*
+ * Register a kernel RPC service via rpcbind version 4.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
+ */
+static int __svc_register(const u32 program, const u32 version,
+ const sa_family_t family,
+ const unsigned short protocol,
+ const unsigned short port)
+{
+ int error;
+
+ switch (family) {
+ case AF_INET:
+ return __svc_rpcb_register4(program, version,
+ protocol, port);
+ case AF_INET6:
+ error = __svc_rpcb_register6(program, version,
+ protocol, port);
+ if (error < 0)
+ return error;
+
+ /*
+ * Work around bug in some versions of Linux rpcbind
+ * which don't allow registration of both inet and
+ * inet6 netids.
+ *
+ * Error return ignored for now.
+ */
+ __svc_rpcb_register4(program, version,
+ protocol, port);
+ return 0;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+#else /* CONFIG_SUNRPC_REGISTER_V4 */
+
+/*
+ * Register a kernel RPC service via rpcbind version 2.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
+ */
+static int __svc_register(const u32 program, const u32 version,
+ sa_family_t family,
+ const unsigned short protocol,
+ const unsigned short port)
+{
+ if (family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ return rpcb_register(program, version, protocol, port);
+}
+
+#endif /* CONFIG_SUNRPC_REGISTER_V4 */
+
+/**
+ * svc_register - register an RPC service with the local portmapper
+ * @serv: svc_serv struct for the service to register
+ * @proto: transport protocol number to advertise
+ * @port: port to advertise
+ *
+ * Service is registered for any address in serv's address family
+ */
+int svc_register(const struct svc_serv *serv, const unsigned short proto,
+ const unsigned short port)
{
struct svc_program *progp;
- unsigned long flags;
unsigned int i;
- int error = 0, dummy;
+ int error = 0;
- if (!port)
- clear_thread_flag(TIF_SIGPENDING);
+ BUG_ON(proto == 0 && port == 0);
for (progp = serv->sv_program; progp; progp = progp->pg_next) {
for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL)
continue;
- dprintk("svc: svc_register(%s, %s, %d, %d)%s\n",
+ dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
progp->pg_name,
+ i,
proto == IPPROTO_UDP? "udp" : "tcp",
port,
- i,
+ serv->sv_family,
progp->pg_vers[i]->vs_hidden?
" (but not telling portmap)" : "");
if (progp->pg_vers[i]->vs_hidden)
continue;
- error = rpcb_register(progp->pg_prog, i, proto, port, &dummy);
+ error = __svc_register(progp->pg_prog, i,
+ serv->sv_family, proto, port);
if (error < 0)
break;
- if (port && !dummy) {
- error = -EACCES;
- break;
- }
}
}
- if (!port) {
- spin_lock_irqsave(&current->sighand->siglock, flags);
- recalc_sigpending();
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
+ return error;
+}
+
+#ifdef CONFIG_SUNRPC_REGISTER_V4
+
+static void __svc_unregister(const u32 program, const u32 version,
+ const char *progname)
+{
+ struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ANY_INIT,
+ .sin6_port = 0,
+ };
+ int error;
+
+ error = rpcb_v4_register(program, version,
+ (struct sockaddr *)&sin6, "");
+ dprintk("svc: %s(%sv%u), error %d\n",
+ __func__, progname, version, error);
+}
+
+#else /* CONFIG_SUNRPC_REGISTER_V4 */
+
+static void __svc_unregister(const u32 program, const u32 version,
+ const char *progname)
+{
+ int error;
+
+ error = rpcb_register(program, version, 0, 0);
+ dprintk("svc: %s(%sv%u), error %d\n",
+ __func__, progname, version, error);
+}
+
+#endif /* CONFIG_SUNRPC_REGISTER_V4 */
+
+/*
+ * All netids, bind addresses and ports registered for [program, version]
+ * are removed from the local rpcbind database (if the service is not
+ * hidden) to make way for a new instance of the service.
+ *
+ * The result of unregistration is reported via dprintk for those who want
+ * verification of the result, but is otherwise not important.
+ */
+static void svc_unregister(const struct svc_serv *serv)
+{
+ struct svc_program *progp;
+ unsigned long flags;
+ unsigned int i;
+
+ clear_thread_flag(TIF_SIGPENDING);
+
+ for (progp = serv->sv_program; progp; progp = progp->pg_next) {
+ for (i = 0; i < progp->pg_nvers; i++) {
+ if (progp->pg_vers[i] == NULL)
+ continue;
+ if (progp->pg_vers[i]->vs_hidden)
+ continue;
+
+ __svc_unregister(progp->pg_prog, i, progp->pg_name);
+ }
}
- return error;
+ spin_lock_irqsave(&current->sighand->siglock, flags);
+ recalc_sigpending();
+ spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
/*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e46c825f495..bf5b5cdafeb 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
-int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
- int flags)
+static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
+ struct svc_serv *serv,
+ unsigned short port, int flags)
{
- struct svc_xprt_class *xcl;
struct sockaddr_in sin = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
.sin_port = htons(port),
};
+ struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ANY_INIT,
+ .sin6_port = htons(port),
+ };
+ struct sockaddr *sap;
+ size_t len;
+
+ switch (serv->sv_family) {
+ case AF_INET:
+ sap = (struct sockaddr *)&sin;
+ len = sizeof(sin);
+ break;
+ case AF_INET6:
+ sap = (struct sockaddr *)&sin6;
+ len = sizeof(sin6);
+ break;
+ default:
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
+ return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
+}
+
+int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
+ int flags)
+{
+ struct svc_xprt_class *xcl;
+
dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
spin_lock(&svc_xprt_class_lock);
list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
@@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
goto err;
spin_unlock(&svc_xprt_class_lock);
- newxprt = xcl->xcl_ops->
- xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin),
- flags);
+ newxprt = __svc_xpo_create(xcl, serv, port, flags);
if (IS_ERR(newxprt)) {
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3e65719f1ef..95293f549e9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
+ int val;
dprintk("svc: svc_setup_socket %p\n", sock);
if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
+ /*
+ * We start one listener per sv_serv. We want AF_INET
+ * requests to be automatically shunted to our AF_INET6
+ * listener using a mapped IPv4 address. Make sure
+ * no-one starts an equivalent IPv4 listener, which
+ * would steal our incoming connections.
+ */
+ val = 0;
+ if (serv->sv_family == AF_INET6)
+ kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
+ (char *)&val, sizeof(val));
+
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
@@ -1154,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
int svc_addsock(struct svc_serv *serv,
int fd,
- char *name_return,
- int *proto)
+ char *name_return)
{
int err = 0;
struct socket *so = sockfd_lookup(fd, &err);
@@ -1190,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv,
sockfd_put(so);
return err;
}
- if (proto) *proto = so->sk->sk_protocol;
return one_sock_name(name_return, svsk);
}
EXPORT_SYMBOL_GPL(svc_addsock);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 99a52aabe33..29e401bb612 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport)
goto out;
}
- result = -EINVAL;
- if (try_module_get(THIS_MODULE)) {
- list_add_tail(&transport->list, &xprt_list);
- printk(KERN_INFO "RPC: Registered %s transport module.\n",
- transport->name);
- result = 0;
- }
+ list_add_tail(&transport->list, &xprt_list);
+ printk(KERN_INFO "RPC: Registered %s transport module.\n",
+ transport->name);
+ result = 0;
out:
spin_unlock(&xprt_list_lock);
@@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport)
"RPC: Unregistered %s transport module.\n",
transport->name);
list_del_init(&transport->list);
- module_put(THIS_MODULE);
goto out;
}
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 5c1954d28d0..14106d26bb9 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
}
if (xdrbuf->tail[0].iov_len) {
+ /* the rpcrdma protocol allows us to omit any trailing
+ * xdr pad bytes, saving the server an RDMA operation. */
+ if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
+ return n;
if (n == nsegs)
return 0;
seg[n].mr_page = NULL;
@@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
if (hdrlen == 0)
return -1;
- dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"
- " headerp 0x%p base 0x%p lkey 0x%x\n",
+ dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
+ " headerp 0x%p base 0x%p lkey 0x%x\n",
__func__, transfertypes[wtype], hdrlen, rpclen, padlen,
headerp, base, req->rl_iov.lkey);
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
* Scatter inline received data back into provided iov's.
*/
static void
-rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
+rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
{
int i, npages, curlen, olen;
char *destp;
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
} else
rqst->rq_rcv_buf.tail[0].iov_len = 0;
+ if (pad) {
+ /* implicit padding on terminal chunk */
+ unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
+ while (pad--)
+ p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
+ }
+
if (copy_len)
dprintk("RPC: %s: %d bytes in"
" %d extra segments (%d lost)\n",
@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
struct rpc_xprt *xprt = ep->rep_xprt;
spin_lock_bh(&xprt->transport_lock);
+ if (++xprt->connect_cookie == 0) /* maintain a reserved value */
+ ++xprt->connect_cookie;
if (ep->rep_connected > 0) {
if (!xprt_test_and_set_connected(xprt))
xprt_wake_pending_tasks(xprt, 0);
} else {
if (xprt_test_and_clear_connected(xprt))
- xprt_wake_pending_tasks(xprt, ep->rep_connected);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
}
spin_unlock_bh(&xprt->transport_lock);
}
@@ -792,14 +805,20 @@ repost:
((unsigned char *)iptr - (unsigned char *)headerp);
status = rep->rr_len + rdmalen;
r_xprt->rx_stats.total_rdma_reply += rdmalen;
+ /* special case - last chunk may omit padding */
+ if (rdmalen &= 3) {
+ rdmalen = 4 - rdmalen;
+ status += rdmalen;
+ }
} else {
/* else ordinary inline */
+ rdmalen = 0;
iptr = (__be32 *)((unsigned char *)headerp + 28);
rep->rr_len -= 28; /*sizeof *headerp;*/
status = rep->rr_len;
}
/* Fix up the rpc results for upper layer */
- rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
+ rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
break;
case htonl(RDMA_NOMSG):
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 74de31a0661..a4756576d68 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
*
* Assumptions:
* - chunk[0]->position points to pages[0] at an offset of 0
- * - pages[] is not physically or virtually contigous and consists of
+ * - pages[] is not physically or virtually contiguous and consists of
* PAGE_SIZE elements.
*
* Output:
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
* chunk in the read list
*
*/
-static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
+static int map_read_chunks(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
struct rpcrdma_msg *rmsgp,
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
return sge_no;
}
-static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
- struct svc_rdma_op_ctxt *ctxt,
- struct kvec *vec,
- u64 *sgl_offset,
- int count)
+/* Map a read-chunk-list to an XDR and fast register the page-list.
+ *
+ * Assumptions:
+ * - chunk[0] position points to pages[0] at an offset of 0
+ * - pages[] will be made physically contiguous by creating a one-off memory
+ * region using the fastreg verb.
+ * - byte_count is # of bytes in read-chunk-list
+ * - ch_count is # of chunks in read-chunk-list
+ *
+ * Output:
+ * - sge array pointing into pages[] array.
+ * - chunk_sge array specifying sge index and count for each
+ * chunk in the read list
+ */
+static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ struct rpcrdma_msg *rmsgp,
+ struct svc_rdma_req_map *rpl_map,
+ struct svc_rdma_req_map *chl_map,
+ int ch_count,
+ int byte_count)
+{
+ int page_no;
+ int ch_no;
+ u32 offset;
+ struct rpcrdma_read_chunk *ch;
+ struct svc_rdma_fastreg_mr *frmr;
+ int ret = 0;
+
+ frmr = svc_rdma_get_frmr(xprt);
+ if (IS_ERR(frmr))
+ return -ENOMEM;
+
+ head->frmr = frmr;
+ head->arg.head[0] = rqstp->rq_arg.head[0];
+ head->arg.tail[0] = rqstp->rq_arg.tail[0];
+ head->arg.pages = &head->pages[head->count];
+ head->hdr_count = head->count; /* save count of hdr pages */
+ head->arg.page_base = 0;
+ head->arg.page_len = byte_count;
+ head->arg.len = rqstp->rq_arg.len + byte_count;
+ head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
+
+ /* Fast register the page list */
+ frmr->kva = page_address(rqstp->rq_arg.pages[0]);
+ frmr->direction = DMA_FROM_DEVICE;
+ frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
+ frmr->map_len = byte_count;
+ frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
+ for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
+ frmr->page_list->page_list[page_no] =
+ ib_dma_map_single(xprt->sc_cm_id->device,
+ page_address(rqstp->rq_arg.pages[page_no]),
+ PAGE_SIZE, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ frmr->page_list->page_list[page_no]))
+ goto fatal_err;
+ atomic_inc(&xprt->sc_dma_used);
+ head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
+ }
+ head->count += page_no;
+
+ /* rq_respages points one past arg pages */
+ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
+
+ /* Create the reply and chunk maps */
+ offset = 0;
+ ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+ for (ch_no = 0; ch_no < ch_count; ch_no++) {
+ rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
+ rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
+ chl_map->ch[ch_no].count = 1;
+ chl_map->ch[ch_no].start = ch_no;
+ offset += ch->rc_target.rs_length;
+ ch++;
+ }
+
+ ret = svc_rdma_fastreg(xprt, frmr);
+ if (ret)
+ goto fatal_err;
+
+ return ch_no;
+
+ fatal_err:
+ printk("svcrdma: error fast registering xdr for xprt %p", xprt);
+ svc_rdma_put_frmr(xprt, frmr);
+ return -EIO;
+}
+
+static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
+ struct svc_rdma_op_ctxt *ctxt,
+ struct svc_rdma_fastreg_mr *frmr,
+ struct kvec *vec,
+ u64 *sgl_offset,
+ int count)
{
int i;
ctxt->count = count;
ctxt->direction = DMA_FROM_DEVICE;
for (i = 0; i < count; i++) {
- atomic_inc(&xprt->sc_dma_used);
- ctxt->sge[i].addr =
- ib_dma_map_single(xprt->sc_cm_id->device,
- vec[i].iov_base, vec[i].iov_len,
- DMA_FROM_DEVICE);
+ ctxt->sge[i].length = 0; /* in case map fails */
+ if (!frmr) {
+ ctxt->sge[i].addr =
+ ib_dma_map_single(xprt->sc_cm_id->device,
+ vec[i].iov_base,
+ vec[i].iov_len,
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ ctxt->sge[i].addr))
+ return -EINVAL;
+ ctxt->sge[i].lkey = xprt->sc_dma_lkey;
+ atomic_inc(&xprt->sc_dma_used);
+ } else {
+ ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
+ ctxt->sge[i].lkey = frmr->mr->lkey;
+ }
ctxt->sge[i].length = vec[i].iov_len;
- ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
*sgl_offset = *sgl_offset + vec[i].iov_len;
}
+ return 0;
}
static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
struct svc_rdma_op_ctxt *hdr_ctxt)
{
struct ib_send_wr read_wr;
+ struct ib_send_wr inv_wr;
int err = 0;
int ch_no;
int ch_count;
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
if (ch_count > RPCSVC_MAXPAGES)
return -EINVAL;
- sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
- rpl_map, chl_map,
- ch_count, byte_count);
+
+ if (!xprt->sc_frmr_pg_list_len)
+ sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
+ rpl_map, chl_map, ch_count,
+ byte_count);
+ else
+ sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
+ rpl_map, chl_map, ch_count,
+ byte_count);
+ if (sge_count < 0) {
+ err = -EIO;
+ goto out;
+ }
+
sgl_offset = 0;
ch_no = 0;
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
next_sge:
ctxt = svc_rdma_get_context(xprt);
ctxt->direction = DMA_FROM_DEVICE;
+ ctxt->frmr = hdr_ctxt->frmr;
+ ctxt->read_hdr = NULL;
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
/* Prepare READ WR */
memset(&read_wr, 0, sizeof read_wr);
- ctxt->wr_op = IB_WR_RDMA_READ;
read_wr.wr_id = (unsigned long)ctxt;
read_wr.opcode = IB_WR_RDMA_READ;
+ ctxt->wr_op = read_wr.opcode;
read_wr.send_flags = IB_SEND_SIGNALED;
read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
read_wr.wr.rdma.remote_addr =
@@ -327,10 +444,15 @@ next_sge:
read_wr.sg_list = ctxt->sge;
read_wr.num_sge =
rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
- rdma_set_ctxt_sge(xprt, ctxt,
- &rpl_map->sge[chl_map->ch[ch_no].start],
- &sgl_offset,
- read_wr.num_sge);
+ err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
+ &rpl_map->sge[chl_map->ch[ch_no].start],
+ &sgl_offset,
+ read_wr.num_sge);
+ if (err) {
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 0);
+ goto out;
+ }
if (((ch+1)->rc_discrim == 0) &&
(read_wr.num_sge == chl_map->ch[ch_no].count)) {
/*
@@ -339,6 +461,29 @@ next_sge:
* the client and the RPC needs to be enqueued.
*/
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ if (hdr_ctxt->frmr) {
+ set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+ /*
+ * Invalidate the local MR used to map the data
+ * sink.
+ */
+ if (xprt->sc_dev_caps &
+ SVCRDMA_DEVCAP_READ_W_INV) {
+ read_wr.opcode =
+ IB_WR_RDMA_READ_WITH_INV;
+ ctxt->wr_op = read_wr.opcode;
+ read_wr.ex.invalidate_rkey =
+ ctxt->frmr->mr->lkey;
+ } else {
+ /* Prepare INVALIDATE WR */
+ memset(&inv_wr, 0, sizeof inv_wr);
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.send_flags = IB_SEND_SIGNALED;
+ inv_wr.ex.invalidate_rkey =
+ hdr_ctxt->frmr->mr->lkey;
+ read_wr.next = &inv_wr;
+ }
+ }
ctxt->read_hdr = hdr_ctxt;
}
/* Post the read */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 84d328329d9..9a7a8e7ae03 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -69,9 +69,127 @@
* array is only concerned with the reply we are assured that we have
* on extra page for the RPCRMDA header.
*/
-static void xdr_to_sge(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
+int fast_reg_xdr(struct svcxprt_rdma *xprt,
+ struct xdr_buf *xdr,
+ struct svc_rdma_req_map *vec)
+{
+ int sge_no;
+ u32 sge_bytes;
+ u32 page_bytes;
+ u32 page_off;
+ int page_no = 0;
+ u8 *frva;
+ struct svc_rdma_fastreg_mr *frmr;
+
+ frmr = svc_rdma_get_frmr(xprt);
+ if (IS_ERR(frmr))
+ return -ENOMEM;
+ vec->frmr = frmr;
+
+ /* Skip the RPCRDMA header */
+ sge_no = 1;
+
+ /* Map the head. */
+ frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
+ vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
+ vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
+ vec->count = 2;
+ sge_no++;
+
+ /* Build the FRMR */
+ frmr->kva = frva;
+ frmr->direction = DMA_TO_DEVICE;
+ frmr->access_flags = 0;
+ frmr->map_len = PAGE_SIZE;
+ frmr->page_list_len = 1;
+ frmr->page_list->page_list[page_no] =
+ ib_dma_map_single(xprt->sc_cm_id->device,
+ (void *)xdr->head[0].iov_base,
+ PAGE_SIZE, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ frmr->page_list->page_list[page_no]))
+ goto fatal_err;
+ atomic_inc(&xprt->sc_dma_used);
+
+ page_off = xdr->page_base;
+ page_bytes = xdr->page_len + page_off;
+ if (!page_bytes)
+ goto encode_tail;
+
+ /* Map the pages */
+ vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
+ vec->sge[sge_no].iov_len = page_bytes;
+ sge_no++;
+ while (page_bytes) {
+ struct page *page;
+
+ page = xdr->pages[page_no++];
+ sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
+ page_bytes -= sge_bytes;
+
+ frmr->page_list->page_list[page_no] =
+ ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
+ PAGE_SIZE, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ frmr->page_list->page_list[page_no]))
+ goto fatal_err;
+
+ atomic_inc(&xprt->sc_dma_used);
+ page_off = 0; /* reset for next time through loop */
+ frmr->map_len += PAGE_SIZE;
+ frmr->page_list_len++;
+ }
+ vec->count++;
+
+ encode_tail:
+ /* Map tail */
+ if (0 == xdr->tail[0].iov_len)
+ goto done;
+
+ vec->count++;
+ vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
+
+ if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
+ ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
+ /*
+ * If head and tail use the same page, we don't need
+ * to map it again.
+ */
+ vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
+ } else {
+ void *va;
+
+ /* Map another page for the tail */
+ page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+ va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
+ vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
+
+ frmr->page_list->page_list[page_no] =
+ ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ frmr->page_list->page_list[page_no]))
+ goto fatal_err;
+ atomic_inc(&xprt->sc_dma_used);
+ frmr->map_len += PAGE_SIZE;
+ frmr->page_list_len++;
+ }
+
+ done:
+ if (svc_rdma_fastreg(xprt, frmr))
+ goto fatal_err;
+
+ return 0;
+
+ fatal_err:
+ printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
+ svc_rdma_put_frmr(xprt, frmr);
+ return -EIO;
+}
+
+static int map_xdr(struct svcxprt_rdma *xprt,
+ struct xdr_buf *xdr,
+ struct svc_rdma_req_map *vec)
{
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
int sge_no;
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
BUG_ON(xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
+ if (xprt->sc_frmr_pg_list_len)
+ return fast_reg_xdr(xprt, xdr, vec);
+
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
BUG_ON(sge_no > sge_max);
vec->count = sge_no;
+ return 0;
}
/* Assumptions:
+ * - We are using FRMR
+ * - or -
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_no = 0;
/* Copy the remaining SGE */
- while (bc != 0 && xdr_sge_no < vec->count) {
- sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
- sge_bytes = min((size_t)bc,
- (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
+ while (bc != 0) {
+ sge_bytes = min_t(size_t,
+ bc, vec->sge[xdr_sge_no].iov_len-sge_off);
sge[sge_no].length = sge_bytes;
- atomic_inc(&xprt->sc_dma_used);
- sge[sge_no].addr =
- ib_dma_map_single(xprt->sc_cm_id->device,
- (void *)
- vec->sge[xdr_sge_no].iov_base + sge_off,
- sge_bytes, DMA_TO_DEVICE);
- if (dma_mapping_error(xprt->sc_cm_id->device->dma_device,
- sge[sge_no].addr))
- goto err;
+ if (!vec->frmr) {
+ sge[sge_no].addr =
+ ib_dma_map_single(xprt->sc_cm_id->device,
+ (void *)
+ vec->sge[xdr_sge_no].iov_base + sge_off,
+ sge_bytes, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ sge[sge_no].addr))
+ goto err;
+ atomic_inc(&xprt->sc_dma_used);
+ sge[sge_no].lkey = xprt->sc_dma_lkey;
+ } else {
+ sge[sge_no].addr = (unsigned long)
+ vec->sge[xdr_sge_no].iov_base + sge_off;
+ sge[sge_no].lkey = vec->frmr->mr->lkey;
+ }
+ ctxt->count++;
+ ctxt->frmr = vec->frmr;
sge_off = 0;
sge_no++;
- ctxt->count++;
xdr_sge_no++;
+ BUG_ON(xdr_sge_no > vec->count);
bc -= sge_bytes;
}
- BUG_ON(bc != 0);
- BUG_ON(xdr_sge_no > vec->count);
-
/* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr);
ctxt->wr_op = IB_WR_RDMA_WRITE;
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
- max_write = xprt->sc_max_sge * PAGE_SIZE;
+ if (vec->frmr)
+ max_write = vec->frmr->map_len;
+ else
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
- max_write = xprt->sc_max_sge * PAGE_SIZE;
+ if (vec->frmr)
+ max_write = vec->frmr->map_len;
+ else
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
/* xdr offset starts at RPC message */
for (xdr_off = 0, chunk_no = 0;
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
ch = &arg_ary->wc_array[chunk_no].wc_target;
write_len = min(xfer_len, ch->rs_length);
-
/* Prepare the reply chunk given the length actually
* written */
rs_offset = get_unaligned(&(ch->rs_offset));
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
int byte_count)
{
struct ib_send_wr send_wr;
+ struct ib_send_wr inv_wr;
int sge_no;
int sge_bytes;
int page_no;
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
+ ctxt->frmr = vec->frmr;
+ if (vec->frmr)
+ set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+ else
+ clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
/* Prepare the SGE for the RPCRDMA Header */
- atomic_inc(&rdma->sc_dma_used);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device,
page, 0, PAGE_SIZE, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
+ goto err;
+ atomic_inc(&rdma->sc_dma_used);
+
ctxt->direction = DMA_TO_DEVICE;
+
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
- ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
+ ctxt->sge[0].lkey = rdma->sc_dma_lkey;
/* Determine how many of our SGE are to be transmitted */
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
- atomic_inc(&rdma->sc_dma_used);
- ctxt->sge[sge_no].addr =
- ib_dma_map_single(rdma->sc_cm_id->device,
- vec->sge[sge_no].iov_base,
- sge_bytes, DMA_TO_DEVICE);
+ if (!vec->frmr) {
+ ctxt->sge[sge_no].addr =
+ ib_dma_map_single(rdma->sc_cm_id->device,
+ vec->sge[sge_no].iov_base,
+ sge_bytes, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+ ctxt->sge[sge_no].addr))
+ goto err;
+ atomic_inc(&rdma->sc_dma_used);
+ ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+ } else {
+ ctxt->sge[sge_no].addr = (unsigned long)
+ vec->sge[sge_no].iov_base;
+ ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
+ }
ctxt->sge[sge_no].length = sge_bytes;
- ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
}
BUG_ON(byte_count != 0);
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++;
rqstp->rq_respages[page_no] = NULL;
- /* If there are more pages than SGE, terminate SGE list */
+ /*
+ * If there are more pages than SGE, terminate SGE
+ * list so that svc_rdma_unmap_dma doesn't attempt to
+ * unmap garbage.
+ */
if (page_no+1 >= sge_no)
ctxt->sge[page_no+1].length = 0;
}
BUG_ON(sge_no > rdma->sc_max_sge);
+ BUG_ON(sge_no > ctxt->count);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr.num_sge = sge_no;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
+ if (vec->frmr) {
+ /* Prepare INVALIDATE WR */
+ memset(&inv_wr, 0, sizeof inv_wr);
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.send_flags = IB_SEND_SIGNALED;
+ inv_wr.ex.invalidate_rkey =
+ vec->frmr->mr->lkey;
+ send_wr.next = &inv_wr;
+ }
ret = svc_rdma_send(rdma, &send_wr);
if (ret)
- svc_rdma_put_context(ctxt, 1);
+ goto err;
- return ret;
+ return 0;
+
+ err:
+ svc_rdma_put_frmr(rdma, vec->frmr);
+ svc_rdma_put_context(ctxt, 1);
+ return -EIO;
}
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
vec = svc_rdma_get_req_map();
- xdr_to_sge(rdma, &rqstp->rq_res, vec);
-
+ ret = map_xdr(rdma, &rqstp->rq_res, vec);
+ if (ret)
+ goto err0;
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
ret);
- goto error;
+ goto err1;
}
inline_bytes -= ret;
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
ret);
- goto error;
+ goto err1;
}
inline_bytes -= ret;
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
svc_rdma_put_req_map(vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
- error:
+
+ err1:
+ put_page(res_page);
+ err0:
svc_rdma_put_req_map(vec);
svc_rdma_put_context(ctxt, 0);
- put_page(res_page);
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 900cb69728c..6fb493cbd29 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->dto_q);
ctxt->count = 0;
+ ctxt->frmr = NULL;
atomic_inc(&xprt->sc_ctxt_used);
return ctxt;
}
-static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
+void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
{
struct svcxprt_rdma *xprt = ctxt->xprt;
int i;
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
- atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_single(xprt->sc_cm_id->device,
- ctxt->sge[i].addr,
- ctxt->sge[i].length,
- ctxt->direction);
+ /*
+ * Unmap the DMA addr in the SGE if the lkey matches
+ * the sc_dma_lkey, otherwise, ignore it since it is
+ * an FRMR lkey and will be unmapped later when the
+ * last WR that uses it completes.
+ */
+ if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+ atomic_dec(&xprt->sc_dma_used);
+ ib_dma_unmap_single(xprt->sc_cm_id->device,
+ ctxt->sge[i].addr,
+ ctxt->sge[i].length,
+ ctxt->direction);
+ }
}
}
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
}
map->count = 0;
+ map->frmr = NULL;
return map;
}
@@ -316,6 +326,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
}
/*
+ * Processs a completion context
+ */
+static void process_context(struct svcxprt_rdma *xprt,
+ struct svc_rdma_op_ctxt *ctxt)
+{
+ svc_rdma_unmap_dma(ctxt);
+
+ switch (ctxt->wr_op) {
+ case IB_WR_SEND:
+ if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
+ svc_rdma_put_frmr(xprt, ctxt->frmr);
+ svc_rdma_put_context(ctxt, 1);
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ svc_rdma_put_context(ctxt, 0);
+ break;
+
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
+ struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
+ BUG_ON(!read_hdr);
+ if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
+ svc_rdma_put_frmr(xprt, ctxt->frmr);
+ spin_lock_bh(&xprt->sc_rq_dto_lock);
+ set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ list_add_tail(&read_hdr->dto_q,
+ &xprt->sc_read_complete_q);
+ spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ svc_xprt_enqueue(&xprt->sc_xprt);
+ }
+ svc_rdma_put_context(ctxt, 0);
+ break;
+
+ default:
+ printk(KERN_ERR "svcrdma: unexpected completion type, "
+ "opcode=%d\n",
+ ctxt->wr_op);
+ break;
+ }
+}
+
+/*
* Send Queue Completion Handler - potentially called on interrupt context.
*
* Note that caller must hold a transport reference.
@@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
struct ib_cq *cq = xprt->sc_sq_cq;
int ret;
-
if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
return;
ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_sq_poll);
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
- ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
- xprt = ctxt->xprt;
-
- svc_rdma_unmap_dma(ctxt);
if (wc.status != IB_WC_SUCCESS)
/* Close the transport */
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
atomic_dec(&xprt->sc_sq_count);
wake_up(&xprt->sc_send_wait);
- switch (ctxt->wr_op) {
- case IB_WR_SEND:
- svc_rdma_put_context(ctxt, 1);
- break;
-
- case IB_WR_RDMA_WRITE:
- svc_rdma_put_context(ctxt, 0);
- break;
-
- case IB_WR_RDMA_READ:
- if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
- struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
- BUG_ON(!read_hdr);
- spin_lock_bh(&xprt->sc_rq_dto_lock);
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- list_add_tail(&read_hdr->dto_q,
- &xprt->sc_read_complete_q);
- spin_unlock_bh(&xprt->sc_rq_dto_lock);
- svc_xprt_enqueue(&xprt->sc_xprt);
- }
- svc_rdma_put_context(ctxt, 0);
- break;
+ ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
+ if (ctxt)
+ process_context(xprt, ctxt);
- default:
- printk(KERN_ERR "svcrdma: unexpected completion type, "
- "opcode=%d, status=%d\n",
- wc.opcode, wc.status);
- break;
- }
svc_xprt_put(&xprt->sc_xprt);
}
@@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
+ INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
+ spin_lock_init(&cma_xprt->sc_frmr_q_lock);
cma_xprt->sc_ord = svcrdma_ord;
@@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
struct ib_recv_wr recv_wr, *bad_recv_wr;
struct svc_rdma_op_ctxt *ctxt;
struct page *page;
- unsigned long pa;
+ dma_addr_t pa;
int sge_no;
int buflen;
int ret;
@@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
BUG_ON(sge_no >= xprt->sc_max_sge);
page = svc_rdma_get_page();
ctxt->pages[sge_no] = page;
- atomic_inc(&xprt->sc_dma_used);
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
+ goto err_put_ctxt;
+ atomic_inc(&xprt->sc_dma_used);
ctxt->sge[sge_no].addr = pa;
ctxt->sge[sge_no].length = PAGE_SIZE;
- ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+ ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
buflen += PAGE_SIZE;
}
ctxt->count = sge_no;
@@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
svc_rdma_put_context(ctxt, 1);
}
return ret;
+
+ err_put_ctxt:
+ svc_rdma_put_context(ctxt, 1);
+ return -ENOMEM;
}
/*
@@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
"event=%d\n", cma_id, cma_id->context, event->event);
handle_connect_req(cma_id,
- event->param.conn.responder_resources);
+ event->param.conn.initiator_depth);
break;
case RDMA_CM_EVENT_ESTABLISHED:
@@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
return ERR_PTR(ret);
}
+static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
+{
+ struct ib_mr *mr;
+ struct ib_fast_reg_page_list *pl;
+ struct svc_rdma_fastreg_mr *frmr;
+
+ frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
+ if (!frmr)
+ goto err;
+
+ mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
+ if (!mr)
+ goto err_free_frmr;
+
+ pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
+ RPCSVC_MAXPAGES);
+ if (!pl)
+ goto err_free_mr;
+
+ frmr->mr = mr;
+ frmr->page_list = pl;
+ INIT_LIST_HEAD(&frmr->frmr_list);
+ return frmr;
+
+ err_free_mr:
+ ib_dereg_mr(mr);
+ err_free_frmr:
+ kfree(frmr);
+ err:
+ return ERR_PTR(-ENOMEM);
+}
+
+static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
+{
+ struct svc_rdma_fastreg_mr *frmr;
+
+ while (!list_empty(&xprt->sc_frmr_q)) {
+ frmr = list_entry(xprt->sc_frmr_q.next,
+ struct svc_rdma_fastreg_mr, frmr_list);
+ list_del_init(&frmr->frmr_list);
+ ib_dereg_mr(frmr->mr);
+ ib_free_fast_reg_page_list(frmr->page_list);
+ kfree(frmr);
+ }
+}
+
+struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_fastreg_mr *frmr = NULL;
+
+ spin_lock_bh(&rdma->sc_frmr_q_lock);
+ if (!list_empty(&rdma->sc_frmr_q)) {
+ frmr = list_entry(rdma->sc_frmr_q.next,
+ struct svc_rdma_fastreg_mr, frmr_list);
+ list_del_init(&frmr->frmr_list);
+ frmr->map_len = 0;
+ frmr->page_list_len = 0;
+ }
+ spin_unlock_bh(&rdma->sc_frmr_q_lock);
+ if (frmr)
+ return frmr;
+
+ return rdma_alloc_frmr(rdma);
+}
+
+static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
+ struct svc_rdma_fastreg_mr *frmr)
+{
+ int page_no;
+ for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
+ dma_addr_t addr = frmr->page_list->page_list[page_no];
+ if (ib_dma_mapping_error(frmr->mr->device, addr))
+ continue;
+ atomic_dec(&xprt->sc_dma_used);
+ ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
+ frmr->direction);
+ }
+}
+
+void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
+ struct svc_rdma_fastreg_mr *frmr)
+{
+ if (frmr) {
+ frmr_unmap_dma(rdma, frmr);
+ spin_lock_bh(&rdma->sc_frmr_q_lock);
+ BUG_ON(!list_empty(&frmr->frmr_list));
+ list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
+ spin_unlock_bh(&rdma->sc_frmr_q_lock);
+ }
+}
+
/*
* This is the xpo_recvfrom function for listening endpoints. Its
* purpose is to accept incoming connections. The CMA callback handler
@@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct ib_device_attr devattr;
+ int dma_mr_acc;
+ int need_dma_mr;
int ret;
int i;
@@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
newxprt->sc_qp = newxprt->sc_cm_id->qp;
- /* Register all of physical memory */
- newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd,
- IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE);
- if (IS_ERR(newxprt->sc_phys_mr)) {
- dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret);
+ /*
+ * Use the most secure set of MR resources based on the
+ * transport type and available memory management features in
+ * the device. Here's the table implemented below:
+ *
+ * Fast Global DMA Remote WR
+ * Reg LKEY MR Access
+ * Sup'd Sup'd Needed Needed
+ *
+ * IWARP N N Y Y
+ * N Y Y Y
+ * Y N Y N
+ * Y Y N -
+ *
+ * IB N N Y N
+ * N Y N -
+ * Y N Y N
+ * Y Y N -
+ *
+ * NB: iWARP requires remote write access for the data sink
+ * of an RDMA_READ. IB does not.
+ */
+ if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ newxprt->sc_frmr_pg_list_len =
+ devattr.max_fast_reg_page_list_len;
+ newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
+ }
+
+ /*
+ * Determine if a DMA MR is required and if so, what privs are required
+ */
+ switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
+ case RDMA_TRANSPORT_IWARP:
+ newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
+ if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
+ need_dma_mr = 1;
+ dma_mr_acc =
+ (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE);
+ } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+ need_dma_mr = 1;
+ dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+ } else
+ need_dma_mr = 0;
+ break;
+ case RDMA_TRANSPORT_IB:
+ if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+ need_dma_mr = 1;
+ dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+ } else
+ need_dma_mr = 0;
+ break;
+ default:
goto errout;
}
+ /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
+ if (need_dma_mr) {
+ /* Register all of physical memory */
+ newxprt->sc_phys_mr =
+ ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
+ if (IS_ERR(newxprt->sc_phys_mr)) {
+ dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
+ ret);
+ goto errout;
+ }
+ newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
+ } else
+ newxprt->sc_dma_lkey =
+ newxprt->sc_cm_id->device->local_dma_lkey;
+
/* Post receive buffers */
for (i = 0; i < newxprt->sc_max_requests; i++) {
ret = svc_rdma_post_recv(newxprt);
@@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work)
WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
+ /* De-allocate fastreg mr */
+ rdma_dealloc_frmr_q(rdma);
+
/* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_destroy_qp(rdma->sc_qp);
@@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
+/*
+ * Attempt to register the kvec representing the RPC memory with the
+ * device.
+ *
+ * Returns:
+ * NULL : The device does not support fastreg or there were no more
+ * fastreg mr.
+ * frmr : The kvec register request was successfully posted.
+ * <0 : An error was encountered attempting to register the kvec.
+ */
+int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
+ struct svc_rdma_fastreg_mr *frmr)
+{
+ struct ib_send_wr fastreg_wr;
+ u8 key;
+
+ /* Bump the key */
+ key = (u8)(frmr->mr->lkey & 0x000000FF);
+ ib_update_fast_reg_key(frmr->mr, ++key);
+
+ /* Prepare FASTREG WR */
+ memset(&fastreg_wr, 0, sizeof fastreg_wr);
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.send_flags = IB_SEND_SIGNALED;
+ fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+ fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+ fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+ fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fastreg_wr.wr.fast_reg.length = frmr->map_len;
+ fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+ fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+ return svc_rdma_send(xprt, &fastreg_wr);
+}
+
int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
{
- struct ib_send_wr *bad_wr;
+ struct ib_send_wr *bad_wr, *n_wr;
+ int wr_count;
+ int i;
int ret;
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
return -ENOTCONN;
BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
- BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
- wr->opcode);
+ wr_count = 1;
+ for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
+ wr_count++;
+
/* If the SQ is full, wait until an SQ entry is available */
while (1) {
spin_lock_bh(&xprt->sc_lock);
- if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
+ if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
spin_unlock_bh(&xprt->sc_lock);
atomic_inc(&rdma_stat_sq_starve);
@@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
return 0;
continue;
}
- /* Bumped used SQ WR count and post */
- svc_xprt_get(&xprt->sc_xprt);
+ /* Take a transport ref for each WR posted */
+ for (i = 0; i < wr_count; i++)
+ svc_xprt_get(&xprt->sc_xprt);
+
+ /* Bump used SQ WR count and post */
+ atomic_add(wr_count, &xprt->sc_sq_count);
ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
- if (!ret)
- atomic_inc(&xprt->sc_sq_count);
- else {
- svc_xprt_put(&xprt->sc_xprt);
+ if (ret) {
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ atomic_sub(wr_count, &xprt->sc_sq_count);
+ for (i = 0; i < wr_count; i ++)
+ svc_xprt_put(&xprt->sc_xprt);
dprintk("svcrdma: failed to post SQ WR rc=%d, "
"sc_sq_count=%d, sc_sq_depth=%d\n",
ret, atomic_read(&xprt->sc_sq_count),
xprt->sc_sq_depth);
}
spin_unlock_bh(&xprt->sc_lock);
+ if (ret)
+ wake_up(&xprt->sc_send_wait);
break;
}
return ret;
@@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
/* Prepare SGE for local address */
- atomic_inc(&xprt->sc_dma_used);
sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
- sge.lkey = xprt->sc_phys_mr->lkey;
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
+ put_page(p);
+ return;
+ }
+ atomic_inc(&xprt->sc_dma_used);
+ sge.lkey = xprt->sc_dma_lkey;
sge.length = length;
ctxt = svc_rdma_get_context(xprt);
@@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
if (ret) {
dprintk("svcrdma: Error %d posting send for protocol error\n",
ret);
+ ib_dma_unmap_page(xprt->sc_cm_id->device,
+ sge.addr, PAGE_SIZE,
+ DMA_FROM_DEVICE);
svc_rdma_put_context(ctxt, 1);
}
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a564c1a39ec..9839c3d9414 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
-#if !RPCRDMA_PERSISTENT_REGISTRATION
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
-#else
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
-#endif
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+ int xprt_rdma_pad_optimize = 0;
#ifdef RPC_DEBUG
@@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = {
.extra2 = &max_memreg,
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "rdma_pad_optimize",
+ .data = &xprt_rdma_pad_optimize,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = 0,
},
};
@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
dprintk("RPC: %s: closing\n", __func__);
+ if (r_xprt->rx_ep.rep_connected > 0)
+ xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
}
@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task)
/* Reconnect */
schedule_delayed_work(&r_xprt->rdma_connect,
xprt->reestablish_timeout);
+ xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout > (30 * HZ))
+ xprt->reestablish_timeout = (30 * HZ);
+ else if (xprt->reestablish_timeout < (5 * HZ))
+ xprt->reestablish_timeout = (5 * HZ);
} else {
schedule_delayed_work(&r_xprt->rdma_connect, 0);
if (!RPC_IS_ASYNC(task))
@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
}
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
out:
+ req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf;
outfail:
@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task)
req->rl_reply->rr_xprt = xprt;
}
- if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
- xprt_disconnect_done(xprt);
- return -ENOTCONN; /* implies disconnect */
- }
+ /* Must suppress retransmit to maintain credits */
+ if (req->rl_connect_cookie == xprt->connect_cookie)
+ goto drop_connection;
+ req->rl_connect_cookie = xprt->connect_cookie;
+
+ if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+ goto drop_connection;
+ task->tk_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
return 0;
+
+drop_connection:
+ xprt_disconnect_done(xprt);
+ return -ENOTCONN; /* implies disconnect */
}
static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void)
{
int rc;
- dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+ dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n");
#ifdef RPC_DEBUG
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8ea283ecc52..a5fef5e6c32 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ ia->ri_async_rc = 0;
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
@@ -338,13 +339,32 @@ connected:
wake_up_all(&ep->rep_connect_wait);
break;
default:
- ia->ri_async_rc = -EINVAL;
- dprintk("RPC: %s: unexpected CM event %X\n",
+ dprintk("RPC: %s: unexpected CM event %d\n",
__func__, event->event);
- complete(&ia->ri_done);
break;
}
+#ifdef RPC_DEBUG
+ if (connstate == 1) {
+ int ird = attr.max_dest_rd_atomic;
+ int tird = ep->rep_remote_cma.responder_resources;
+ printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
+ "on %s, memreg %d slots %d ird %d%s\n",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port),
+ ia->ri_id->device->name,
+ ia->ri_memreg_strategy,
+ xprt->rx_buf.rb_max_requests,
+ ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
+ } else if (connstate < 0) {
+ printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
+ "closed (%d)\n",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port),
+ connstate);
+ }
+#endif
+
return 0;
}
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
struct rdma_cm_id *id;
int rc;
+ init_completion(&ia->ri_done);
+
id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
return id;
}
- ia->ri_async_rc = 0;
+ ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc);
goto out;
}
- wait_for_completion(&ia->ri_done);
+ wait_for_completion_interruptible_timeout(&ia->ri_done,
+ msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc;
if (rc)
goto out;
- ia->ri_async_rc = 0;
+ ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
__func__, rc);
goto out;
}
- wait_for_completion(&ia->ri_done);
+ wait_for_completion_interruptible_timeout(&ia->ri_done,
+ msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc;
if (rc)
goto out;
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq)
int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
- int rc;
+ int rc, mem_priv;
+ struct ib_device_attr devattr;
struct rpcrdma_ia *ia = &xprt->rx_ia;
- init_completion(&ia->ri_done);
-
ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id);
@@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
}
/*
+ * Query the device to determine if the requested memory
+ * registration strategy is supported. If it isn't, set the
+ * strategy to a globally supported model.
+ */
+ rc = ib_query_device(ia->ri_id->device, &devattr);
+ if (rc) {
+ dprintk("RPC: %s: ib_query_device failed %d\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
+ ia->ri_have_dma_lkey = 1;
+ ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
+ }
+
+ switch (memreg) {
+ case RPCRDMA_MEMWINDOWS:
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
+ dprintk("RPC: %s: MEMWINDOWS registration "
+ "specified but not supported by adapter, "
+ "using slower RPCRDMA_REGISTER\n",
+ __func__);
+ memreg = RPCRDMA_REGISTER;
+ }
+ break;
+ case RPCRDMA_MTHCAFMR:
+ if (!ia->ri_id->device->alloc_fmr) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ dprintk("RPC: %s: MTHCAFMR registration "
+ "specified but not supported by adapter, "
+ "using riskier RPCRDMA_ALLPHYSICAL\n",
+ __func__);
+ memreg = RPCRDMA_ALLPHYSICAL;
+#else
+ dprintk("RPC: %s: MTHCAFMR registration "
+ "specified but not supported by adapter, "
+ "using slower RPCRDMA_REGISTER\n",
+ __func__);
+ memreg = RPCRDMA_REGISTER;
+#endif
+ }
+ break;
+ case RPCRDMA_FRMR:
+ /* Requires both frmr reg and local dma lkey */
+ if ((devattr.device_cap_flags &
+ (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
+ (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ dprintk("RPC: %s: FRMR registration "
+ "specified but not supported by adapter, "
+ "using riskier RPCRDMA_ALLPHYSICAL\n",
+ __func__);
+ memreg = RPCRDMA_ALLPHYSICAL;
+#else
+ dprintk("RPC: %s: FRMR registration "
+ "specified but not supported by adapter, "
+ "using slower RPCRDMA_REGISTER\n",
+ __func__);
+ memreg = RPCRDMA_REGISTER;
+#endif
+ }
+ break;
+ }
+
+ /*
* Optionally obtain an underlying physical identity mapping in
* order to do a memory window-based bind. This base registration
* is protected from remote access - that is enabled only by binding
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
* revoked after the corresponding completion similar to a storage
* adapter.
*/
- if (memreg > RPCRDMA_REGISTER) {
- int mem_priv = IB_ACCESS_LOCAL_WRITE;
- switch (memreg) {
+ switch (memreg) {
+ case RPCRDMA_BOUNCEBUFFERS:
+ case RPCRDMA_REGISTER:
+ case RPCRDMA_FRMR:
+ break;
#if RPCRDMA_PERSISTENT_REGISTRATION
- case RPCRDMA_ALLPHYSICAL:
- mem_priv |= IB_ACCESS_REMOTE_WRITE;
- mem_priv |= IB_ACCESS_REMOTE_READ;
- break;
+ case RPCRDMA_ALLPHYSICAL:
+ mem_priv = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ;
+ goto register_setup;
#endif
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- mem_priv |= IB_ACCESS_MW_BIND;
- break;
- default:
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ mem_priv = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_MW_BIND;
+ goto register_setup;
+ case RPCRDMA_MTHCAFMR:
+ if (ia->ri_have_dma_lkey)
break;
- }
+ mem_priv = IB_ACCESS_LOCAL_WRITE;
+ register_setup:
ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
if (IS_ERR(ia->ri_bind_mem)) {
printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
memreg = RPCRDMA_REGISTER;
ia->ri_bind_mem = NULL;
}
+ break;
+ default:
+ printk(KERN_ERR "%s: invalid memory registration mode %d\n",
+ __func__, memreg);
+ rc = -EINVAL;
+ goto out2;
}
+ dprintk("RPC: %s: memory registration strategy is %d\n",
+ __func__, memreg);
/* Else will do memory reg/dereg for each chunk */
ia->ri_memreg_strategy = memreg;
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
return 0;
out2:
rdma_destroy_id(ia->ri_id);
+ ia->ri_id = NULL;
out1:
return rc;
}
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
dprintk("RPC: %s: ib_dereg_mr returned %i\n",
__func__, rc);
}
- if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp)
- rdma_destroy_qp(ia->ri_id);
+ if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
+ if (ia->ri_id->qp)
+ rdma_destroy_qp(ia->ri_id);
+ rdma_destroy_id(ia->ri_id);
+ ia->ri_id = NULL;
+ }
if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
rc = ib_dealloc_pd(ia->ri_pd);
dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
__func__, rc);
}
- if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
- rdma_destroy_id(ia->ri_id);
}
/*
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ /* Add room for frmr register and invalidate WRs */
+ ep->rep_attr.cap.max_send_wr *= 3;
+ if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
+ return -EINVAL;
+ break;
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
/* Add room for mw_binds+unbinds - overkill! */
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_remote_cma.private_data_len = 0;
/* Client offers RDMA Read but does not initiate */
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_BOUNCEBUFFERS:
+ ep->rep_remote_cma.initiator_depth = 0;
+ if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
ep->rep_remote_cma.responder_resources = 0;
- break;
- case RPCRDMA_MTHCAFMR:
- case RPCRDMA_REGISTER:
- ep->rep_remote_cma.responder_resources = cdata->max_requests *
- (RPCRDMA_MAX_DATA_SEGS / 8);
- break;
- case RPCRDMA_MEMWINDOWS:
- case RPCRDMA_MEMWINDOWS_ASYNC:
-#if RPCRDMA_PERSISTENT_REGISTRATION
- case RPCRDMA_ALLPHYSICAL:
-#endif
- ep->rep_remote_cma.responder_resources = cdata->max_requests *
- (RPCRDMA_MAX_DATA_SEGS / 2);
- break;
- default:
- break;
- }
- if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
+ else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ ep->rep_remote_cma.responder_resources = 32;
+ else
ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
- ep->rep_remote_cma.initiator_depth = 0;
ep->rep_remote_cma.retry_count = 7;
ep->rep_remote_cma.flow_control = 0;
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
if (rc)
dprintk("RPC: %s: rpcrdma_ep_disconnect"
" returned %i\n", __func__, rc);
+ rdma_destroy_qp(ia->ri_id);
+ ia->ri_id->qp = NULL;
}
- ep->rep_func = NULL;
-
/* padding - could be done in rpcrdma_buffer_destroy... */
if (ep->rep_pad_mr) {
rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
ep->rep_pad_mr = NULL;
}
- if (ia->ri_id->qp) {
- rdma_destroy_qp(ia->ri_id);
- ia->ri_id->qp = NULL;
- }
-
rpcrdma_clean_cq(ep->rep_cq);
rc = ib_destroy_cq(ep->rep_cq);
if (rc)
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
struct rdma_cm_id *id;
int rc = 0;
int retry_count = 0;
- int reconnect = (ep->rep_connected != 0);
- if (reconnect) {
+ if (ep->rep_connected != 0) {
struct rpcrdma_xprt *xprt;
retry:
rc = rpcrdma_ep_disconnect(ep, ia);
@@ -745,6 +836,7 @@ retry:
goto out;
}
/* END TEMP */
+ rdma_destroy_qp(ia->ri_id);
rdma_destroy_id(ia->ri_id);
ia->ri_id = id;
}
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
}
}
- /* Theoretically a client initiator_depth > 0 is not needed,
- * but many peers fail to complete the connection unless they
- * == responder_resources! */
- if (ep->rep_remote_cma.initiator_depth !=
- ep->rep_remote_cma.responder_resources)
- ep->rep_remote_cma.initiator_depth =
- ep->rep_remote_cma.responder_resources;
-
ep->rep_connected = 0;
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
goto out;
}
- if (reconnect)
- return 0;
-
wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
/*
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
if (ep->rep_connected <= 0) {
/* Sometimes, the only way to reliably connect to remote
* CMs is to use same nonzero values for ORD and IRD. */
- ep->rep_remote_cma.initiator_depth =
- ep->rep_remote_cma.responder_resources;
- if (ep->rep_remote_cma.initiator_depth == 0)
- ++ep->rep_remote_cma.initiator_depth;
- if (ep->rep_remote_cma.responder_resources == 0)
- ++ep->rep_remote_cma.responder_resources;
- if (retry_count++ == 0)
+ if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
+ (ep->rep_remote_cma.responder_resources == 0 ||
+ ep->rep_remote_cma.initiator_depth !=
+ ep->rep_remote_cma.responder_resources)) {
+ if (ep->rep_remote_cma.responder_resources == 0)
+ ep->rep_remote_cma.responder_resources = 1;
+ ep->rep_remote_cma.initiator_depth =
+ ep->rep_remote_cma.responder_resources;
goto retry;
+ }
rc = ep->rep_connected;
} else {
dprintk("RPC: %s: connected\n", __func__);
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
char *p;
size_t len;
int i, rc;
+ struct rpcrdma_mw *r;
buf->rb_max_requests = cdata->max_requests;
spin_lock_init(&buf->rb_lock);
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* 2. arrays of struct rpcrdma_req to fill in pointers
* 3. array of struct rpcrdma_rep for replies
* 4. padding, if any
- * 5. mw's, if any
+ * 5. mw's, fmr's or frmr's, if any
* Send/recv buffers in req/rep need to be registered
*/
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
len += cdata->padding;
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
+ sizeof(struct rpcrdma_mw);
+ break;
case RPCRDMA_MTHCAFMR:
/* TBD we are perhaps overallocating here */
len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* and also reduce unbind-to-bind collision.
*/
INIT_LIST_HEAD(&buf->rb_mws);
+ r = (struct rpcrdma_mw *)p;
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
+ r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
+ RPCRDMA_MAX_SEGS);
+ if (IS_ERR(r->r.frmr.fr_mr)) {
+ rc = PTR_ERR(r->r.frmr.fr_mr);
+ dprintk("RPC: %s: ib_alloc_fast_reg_mr"
+ " failed %i\n", __func__, rc);
+ goto out;
+ }
+ r->r.frmr.fr_pgl =
+ ib_alloc_fast_reg_page_list(ia->ri_id->device,
+ RPCRDMA_MAX_SEGS);
+ if (IS_ERR(r->r.frmr.fr_pgl)) {
+ rc = PTR_ERR(r->r.frmr.fr_pgl);
+ dprintk("RPC: %s: "
+ "ib_alloc_fast_reg_page_list "
+ "failed %i\n", __func__, rc);
+ goto out;
+ }
+ list_add(&r->mw_list, &buf->rb_mws);
+ ++r;
+ }
+ break;
case RPCRDMA_MTHCAFMR:
- {
- struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
- struct ib_fmr_attr fa = {
- RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
- };
/* TBD we are perhaps overallocating here */
for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+ static struct ib_fmr_attr fa =
+ { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
r->r.fmr = ib_alloc_fmr(ia->ri_pd,
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
&fa);
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
list_add(&r->mw_list, &buf->rb_mws);
++r;
}
- }
break;
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
- {
- struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
/* Allocate one extra request's worth, for full cycling */
for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
r->r.mw = ib_alloc_mw(ia->ri_pd);
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
list_add(&r->mw_list, &buf->rb_mws);
++r;
}
- }
break;
default:
break;
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
int rc, i;
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+ struct rpcrdma_mw *r;
/* clean up in reverse order from create
* 1. recv mr memory (mr free, then kfree)
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
}
if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
while (!list_empty(&buf->rb_mws)) {
- struct rpcrdma_mw *r;
r = list_entry(buf->rb_mws.next,
struct rpcrdma_mw, mw_list);
list_del(&r->mw_list);
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ rc = ib_dereg_mr(r->r.frmr.fr_mr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dereg_mr"
+ " failed %i\n",
+ __func__, rc);
+ ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+ break;
case RPCRDMA_MTHCAFMR:
rc = ib_dealloc_fmr(r->r.fmr);
if (rc)
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
{
struct rpcrdma_req *req;
unsigned long flags;
+ int i;
+ struct rpcrdma_mw *r;
spin_lock_irqsave(&buffers->rb_lock, flags);
if (buffers->rb_send_index == buffers->rb_max_requests) {
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
}
buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
if (!list_empty(&buffers->rb_mws)) {
- int i = RPCRDMA_MAX_SEGS - 1;
+ i = RPCRDMA_MAX_SEGS - 1;
do {
- struct rpcrdma_mw *r;
r = list_entry(buffers->rb_mws.next,
struct rpcrdma_mw, mw_list);
list_del(&r->mw_list);
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
req->rl_reply = NULL;
}
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
case RPCRDMA_MTHCAFMR:
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
va, len, DMA_BIDIRECTIONAL);
iov->length = len;
- if (ia->ri_bind_mem != NULL) {
+ if (ia->ri_have_dma_lkey) {
+ *mrp = NULL;
+ iov->lkey = ia->ri_dma_lkey;
+ return 0;
+ } else if (ia->ri_bind_mem != NULL) {
*mrp = NULL;
iov->lkey = ia->ri_bind_mem->lkey;
return 0;
@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
}
+static int
+rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
+ int *nsegs, int writing, struct rpcrdma_ia *ia,
+ struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ struct ib_send_wr frmr_wr, *bad_wr;
+ u8 key;
+ int len, pageoff;
+ int i, rc;
+
+ pageoff = offset_in_page(seg1->mr_offset);
+ seg1->mr_offset -= pageoff; /* start of page */
+ seg1->mr_len += pageoff;
+ len = -pageoff;
+ if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+ *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (i = 0; i < *nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+ break;
+ }
+ dprintk("RPC: %s: Using frmr %p to map %d segments\n",
+ __func__, seg1->mr_chunk.rl_mw, i);
+
+ /* Bump the key */
+ key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+
+ /* Prepare FRMR WR */
+ memset(&frmr_wr, 0, sizeof frmr_wr);
+ frmr_wr.opcode = IB_WR_FAST_REG_MR;
+ frmr_wr.send_flags = 0; /* unsignaled */
+ frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
+ frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
+ frmr_wr.wr.fast_reg.page_list_len = i;
+ frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
+ frmr_wr.wr.fast_reg.access_flags = (writing ?
+ IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
+ frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+
+ rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
+
+ if (rc) {
+ dprintk("RPC: %s: failed ib_post_send for register,"
+ " status %i\n", __func__, rc);
+ while (i--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ seg1->mr_base = seg1->mr_dma + pageoff;
+ seg1->mr_nsegs = i;
+ seg1->mr_len = len;
+ }
+ *nsegs = i;
+ return rc;
+}
+
+static int
+rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ struct ib_send_wr invalidate_wr, *bad_wr;
+ int rc;
+
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+
+ memset(&invalidate_wr, 0, sizeof invalidate_wr);
+ invalidate_wr.opcode = IB_WR_LOCAL_INV;
+ invalidate_wr.send_flags = 0; /* unsignaled */
+ invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+
+ rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+ if (rc)
+ dprintk("RPC: %s: failed ib_post_send for invalidate,"
+ " status %i\n", __func__, rc);
+ return rc;
+}
+
+static int
+rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
+ int *nsegs, int writing, struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+ int len, pageoff, i, rc;
+
+ pageoff = offset_in_page(seg1->mr_offset);
+ seg1->mr_offset -= pageoff; /* start of page */
+ seg1->mr_len += pageoff;
+ len = -pageoff;
+ if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+ *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (i = 0; i < *nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ physaddrs[i] = seg->mr_dma;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+ break;
+ }
+ rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
+ physaddrs, i, seg1->mr_dma);
+ if (rc) {
+ dprintk("RPC: %s: failed ib_map_phys_fmr "
+ "%u@0x%llx+%i (%d)... status %i\n", __func__,
+ len, (unsigned long long)seg1->mr_dma,
+ pageoff, i, rc);
+ while (i--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
+ seg1->mr_base = seg1->mr_dma + pageoff;
+ seg1->mr_nsegs = i;
+ seg1->mr_len = len;
+ }
+ *nsegs = i;
+ return rc;
+}
+
+static int
+rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ LIST_HEAD(l);
+ int rc;
+
+ list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
+ rc = ib_unmap_fmr(&l);
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ if (rc)
+ dprintk("RPC: %s: failed ib_unmap_fmr,"
+ " status %i\n", __func__, rc);
+ return rc;
+}
+
+static int
+rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
+ int *nsegs, int writing, struct rpcrdma_ia *ia,
+ struct rpcrdma_xprt *r_xprt)
+{
+ int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+ IB_ACCESS_REMOTE_READ);
+ struct ib_mw_bind param;
+ int rc;
+
+ *nsegs = 1;
+ rpcrdma_map_one(ia, seg, writing);
+ param.mr = ia->ri_bind_mem;
+ param.wr_id = 0ULL; /* no send cookie */
+ param.addr = seg->mr_dma;
+ param.length = seg->mr_len;
+ param.send_flags = 0;
+ param.mw_access_flags = mem_priv;
+
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+ rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
+ if (rc) {
+ dprintk("RPC: %s: failed ib_bind_mw "
+ "%u@0x%llx status %i\n",
+ __func__, seg->mr_len,
+ (unsigned long long)seg->mr_dma, rc);
+ rpcrdma_unmap_one(ia, seg);
+ } else {
+ seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
+ seg->mr_base = param.addr;
+ seg->mr_nsegs = 1;
+ }
+ return rc;
+}
+
+static int
+rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_ia *ia,
+ struct rpcrdma_xprt *r_xprt, void **r)
+{
+ struct ib_mw_bind param;
+ LIST_HEAD(l);
+ int rc;
+
+ BUG_ON(seg->mr_nsegs != 1);
+ param.mr = ia->ri_bind_mem;
+ param.addr = 0ULL; /* unbind */
+ param.length = 0;
+ param.mw_access_flags = 0;
+ if (*r) {
+ param.wr_id = (u64) (unsigned long) *r;
+ param.send_flags = IB_SEND_SIGNALED;
+ INIT_CQCOUNT(&r_xprt->rx_ep);
+ } else {
+ param.wr_id = 0ULL;
+ param.send_flags = 0;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+ }
+ rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
+ rpcrdma_unmap_one(ia, seg);
+ if (rc)
+ dprintk("RPC: %s: failed ib_(un)bind_mw,"
+ " status %i\n", __func__, rc);
+ else
+ *r = NULL; /* will upcall on completion */
+ return rc;
+}
+
+static int
+rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
+ int *nsegs, int writing, struct rpcrdma_ia *ia)
+{
+ int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+ IB_ACCESS_REMOTE_READ);
+ struct rpcrdma_mr_seg *seg1 = seg;
+ struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
+ int len, i, rc = 0;
+
+ if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+ *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (len = 0, i = 0; i < *nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ ipb[i].addr = seg->mr_dma;
+ ipb[i].size = seg->mr_len;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+ break;
+ }
+ seg1->mr_base = seg1->mr_dma;
+ seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
+ ipb, i, mem_priv, &seg1->mr_base);
+ if (IS_ERR(seg1->mr_chunk.rl_mr)) {
+ rc = PTR_ERR(seg1->mr_chunk.rl_mr);
+ dprintk("RPC: %s: failed ib_reg_phys_mr "
+ "%u@0x%llx (%d)... status %i\n",
+ __func__, len,
+ (unsigned long long)seg1->mr_dma, i, rc);
+ while (i--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
+ seg1->mr_nsegs = i;
+ seg1->mr_len = len;
+ }
+ *nsegs = i;
+ return rc;
+}
+
+static int
+rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ int rc;
+
+ rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
+ seg1->mr_chunk.rl_mr = NULL;
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ if (rc)
+ dprintk("RPC: %s: failed ib_dereg_mr,"
+ " status %i\n", __func__, rc);
+ return rc;
+}
+
int
rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
- IB_ACCESS_REMOTE_READ);
- struct rpcrdma_mr_seg *seg1 = seg;
- int i;
int rc = 0;
switch (ia->ri_memreg_strategy) {
@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
break;
#endif
- /* Registration using fast memory registration */
+ /* Registration using frmr registration */
+ case RPCRDMA_FRMR:
+ rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
+ break;
+
+ /* Registration using fmr memory registration */
case RPCRDMA_MTHCAFMR:
- {
- u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
- int len, pageoff = offset_in_page(seg->mr_offset);
- seg1->mr_offset -= pageoff; /* start of page */
- seg1->mr_len += pageoff;
- len = -pageoff;
- if (nsegs > RPCRDMA_MAX_DATA_SEGS)
- nsegs = RPCRDMA_MAX_DATA_SEGS;
- for (i = 0; i < nsegs;) {
- rpcrdma_map_one(ia, seg, writing);
- physaddrs[i] = seg->mr_dma;
- len += seg->mr_len;
- ++seg;
- ++i;
- /* Check for holes */
- if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
- offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
- break;
- }
- nsegs = i;
- rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
- physaddrs, nsegs, seg1->mr_dma);
- if (rc) {
- dprintk("RPC: %s: failed ib_map_phys_fmr "
- "%u@0x%llx+%i (%d)... status %i\n", __func__,
- len, (unsigned long long)seg1->mr_dma,
- pageoff, nsegs, rc);
- while (nsegs--)
- rpcrdma_unmap_one(ia, --seg);
- } else {
- seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
- seg1->mr_base = seg1->mr_dma + pageoff;
- seg1->mr_nsegs = nsegs;
- seg1->mr_len = len;
- }
- }
+ rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
break;
/* Registration using memory windows */
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
- {
- struct ib_mw_bind param;
- rpcrdma_map_one(ia, seg, writing);
- param.mr = ia->ri_bind_mem;
- param.wr_id = 0ULL; /* no send cookie */
- param.addr = seg->mr_dma;
- param.length = seg->mr_len;
- param.send_flags = 0;
- param.mw_access_flags = mem_priv;
-
- DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_bind_mw(ia->ri_id->qp,
- seg->mr_chunk.rl_mw->r.mw, &param);
- if (rc) {
- dprintk("RPC: %s: failed ib_bind_mw "
- "%u@0x%llx status %i\n",
- __func__, seg->mr_len,
- (unsigned long long)seg->mr_dma, rc);
- rpcrdma_unmap_one(ia, seg);
- } else {
- seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
- seg->mr_base = param.addr;
- seg->mr_nsegs = 1;
- nsegs = 1;
- }
- }
+ rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
break;
/* Default registration each time */
default:
- {
- struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
- int len = 0;
- if (nsegs > RPCRDMA_MAX_DATA_SEGS)
- nsegs = RPCRDMA_MAX_DATA_SEGS;
- for (i = 0; i < nsegs;) {
- rpcrdma_map_one(ia, seg, writing);
- ipb[i].addr = seg->mr_dma;
- ipb[i].size = seg->mr_len;
- len += seg->mr_len;
- ++seg;
- ++i;
- /* Check for holes */
- if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
- offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
- break;
- }
- nsegs = i;
- seg1->mr_base = seg1->mr_dma;
- seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
- ipb, nsegs, mem_priv, &seg1->mr_base);
- if (IS_ERR(seg1->mr_chunk.rl_mr)) {
- rc = PTR_ERR(seg1->mr_chunk.rl_mr);
- dprintk("RPC: %s: failed ib_reg_phys_mr "
- "%u@0x%llx (%d)... status %i\n",
- __func__, len,
- (unsigned long long)seg1->mr_dma, nsegs, rc);
- while (nsegs--)
- rpcrdma_unmap_one(ia, --seg);
- } else {
- seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
- seg1->mr_nsegs = nsegs;
- seg1->mr_len = len;
- }
- }
+ rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
break;
}
if (rc)
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_xprt *r_xprt, void *r)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_mr_seg *seg1 = seg;
int nsegs = seg->mr_nsegs, rc;
switch (ia->ri_memreg_strategy) {
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
break;
#endif
+ case RPCRDMA_FRMR:
+ rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
+ break;
+
case RPCRDMA_MTHCAFMR:
- {
- LIST_HEAD(l);
- list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
- rc = ib_unmap_fmr(&l);
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
- }
- if (rc)
- dprintk("RPC: %s: failed ib_unmap_fmr,"
- " status %i\n", __func__, rc);
+ rc = rpcrdma_deregister_fmr_external(seg, ia);
break;
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
- {
- struct ib_mw_bind param;
- BUG_ON(nsegs != 1);
- param.mr = ia->ri_bind_mem;
- param.addr = 0ULL; /* unbind */
- param.length = 0;
- param.mw_access_flags = 0;
- if (r) {
- param.wr_id = (u64) (unsigned long) r;
- param.send_flags = IB_SEND_SIGNALED;
- INIT_CQCOUNT(&r_xprt->rx_ep);
- } else {
- param.wr_id = 0ULL;
- param.send_flags = 0;
- DECR_CQCOUNT(&r_xprt->rx_ep);
- }
- rc = ib_bind_mw(ia->ri_id->qp,
- seg->mr_chunk.rl_mw->r.mw, &param);
- rpcrdma_unmap_one(ia, seg);
- }
- if (rc)
- dprintk("RPC: %s: failed ib_(un)bind_mw,"
- " status %i\n", __func__, rc);
- else
- r = NULL; /* will upcall on completion */
+ rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
break;
default:
- rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
- seg1->mr_chunk.rl_mr = NULL;
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
- if (rc)
- dprintk("RPC: %s: failed ib_dereg_mr,"
- " status %i\n", __func__, rc);
+ rc = rpcrdma_deregister_default_external(seg, ia);
break;
}
if (r) {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2427822f8bd..c7a7eba991b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,6 +51,9 @@
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
+#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
+#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
+
/*
* Interface Adapter -- one per transport instance
*/
@@ -58,6 +61,8 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem;
+ u32 ri_dma_lkey;
+ int ri_have_dma_lkey;
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
union {
struct ib_mw *mw;
struct ib_fmr *fmr;
+ struct {
+ struct ib_fast_reg_page_list *fr_pgl;
+ struct ib_mr *fr_mr;
+ } frmr;
} r;
struct list_head mw_list;
} *rl_mw;
@@ -175,6 +184,7 @@ struct rpcrdma_req {
size_t rl_size; /* actual length of buffer */
unsigned int rl_niovs; /* 0, 2 or 4 */
unsigned int rl_nchunks; /* non-zero if chunks */
+ unsigned int rl_connect_cookie; /* retry detection */
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
@@ -198,7 +208,7 @@ struct rpcrdma_buffer {
atomic_t rb_credits; /* most recent server credits */
unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
int rb_max_requests;/* client max requests */
- struct list_head rb_mws; /* optional memory windows/fmrs */
+ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
int rb_send_index;
struct rpcrdma_req **rb_send_bufs;
int rb_recv_index;
@@ -273,6 +283,11 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+/* Setting this to 0 ensures interoperability with early servers.
+ * Setting this to 1 enhances certain unaligned read/write performance.
+ * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
+extern int xprt_rdma_pad_optimize;
+
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4486c59c3ac..9a288d5eea6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -3,8 +3,8 @@
*
* Client-side transport implementation for sockets.
*
- * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com>
- * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
+ * TCP callback races fixes (C) 1998 Red Hat
+ * TCP send fixes (C) 1998 Red Hat
* TCP NFS related read + write fixes
* (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
*
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 015606b54d9..c647aab8d41 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1,7 +1,7 @@
/*
* NET4: Implementation of BSD Unix domain sockets.
*
- * Authors: Alan Cox, <alan.cox@linux.org>
+ * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5cadbeb76a1..5031db7b275 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -13,7 +13,6 @@
#include <linux/debugfs.h>
#include <linux/notifier.h>
#include <linux/device.h>
-#include <linux/list.h>
#include <net/genetlink.h>
#include <net/cfg80211.h>
#include <net/wireless.h>
@@ -185,7 +184,8 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
if (result)
goto out_unlock;
- if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
+ if (rdev->wiphy.debugfsdir &&
+ !debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
rdev->wiphy.debugfsdir,
rdev->wiphy.debugfsdir->d_parent,
newname))
@@ -318,6 +318,8 @@ int wiphy_register(struct wiphy *wiphy)
drv->wiphy.debugfsdir =
debugfs_create_dir(wiphy_name(&drv->wiphy),
ieee80211_debugfs_dir);
+ if (IS_ERR(drv->wiphy.debugfsdir))
+ drv->wiphy.debugfsdir = NULL;
res = 0;
out_unlock: