From c13c8260da3155f2cefb63b0d1b7dcdcb405c644 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 17:18:44 -0700 Subject: [I/OAT]: DMA memcpy subsystem Provides an API for offloading memory copies to DMA devices Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/dmaengine.h | 337 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 include/linux/dmaengine.h (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h new file mode 100644 index 00000000000..30781546ac9 --- /dev/null +++ b/include/linux/dmaengine.h @@ -0,0 +1,337 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef DMAENGINE_H +#define DMAENGINE_H +#include +#ifdef CONFIG_DMA_ENGINE + +#include +#include +#include +#include +#include + +/** + * enum dma_event - resource PNP/power managment events + * @DMA_RESOURCE_SUSPEND: DMA device going into low power state + * @DMA_RESOURCE_RESUME: DMA device returning to full power + * @DMA_RESOURCE_ADDED: DMA device added to the system + * @DMA_RESOURCE_REMOVED: DMA device removed from the system + */ +enum dma_event { + DMA_RESOURCE_SUSPEND, + DMA_RESOURCE_RESUME, + DMA_RESOURCE_ADDED, + DMA_RESOURCE_REMOVED, +}; + +/** + * typedef dma_cookie_t + * + * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code + */ +typedef s32 dma_cookie_t; + +#define dma_submit_error(cookie) ((cookie) < 0 ? 1 : 0) + +/** + * enum dma_status - DMA transaction status + * @DMA_SUCCESS: transaction completed successfully + * @DMA_IN_PROGRESS: transaction not yet processed + * @DMA_ERROR: transaction failed + */ +enum dma_status { + DMA_SUCCESS, + DMA_IN_PROGRESS, + DMA_ERROR, +}; + +/** + * struct dma_chan_percpu - the per-CPU part of struct dma_chan + * @refcount: local_t used for open-coded "bigref" counting + * @memcpy_count: transaction counter + * @bytes_transferred: byte counter + */ + +struct dma_chan_percpu { + local_t refcount; + /* stats */ + unsigned long memcpy_count; + unsigned long bytes_transferred; +}; + +/** + * struct dma_chan - devices supply DMA channels, clients use them + * @client: ptr to the client user of this chan, will be NULL when unused + * @device: ptr to the dma device who supplies this channel, always !NULL + * @cookie: last cookie value returned to client + * @chan_id: + * @class_dev: + * @refcount: kref, used in "bigref" slow-mode + * @slow_ref: + * @rcu: + * @client_node: used to add this to the client chan list + * @device_node: used to add this to the device chan list + * @local: per-cpu pointer to a struct dma_chan_percpu + */ +struct dma_chan { + struct dma_client *client; + struct dma_device *device; + dma_cookie_t cookie; + + /* sysfs */ + int chan_id; + struct class_device class_dev; + + struct kref refcount; + int slow_ref; + struct rcu_head rcu; + + struct list_head client_node; + struct list_head device_node; + struct dma_chan_percpu *local; +}; + +void dma_chan_cleanup(struct kref *kref); + +static inline void dma_chan_get(struct dma_chan *chan) +{ + if (unlikely(chan->slow_ref)) + kref_get(&chan->refcount); + else { + local_inc(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); + put_cpu(); + } +} + +static inline void dma_chan_put(struct dma_chan *chan) +{ + if (unlikely(chan->slow_ref)) + kref_put(&chan->refcount, dma_chan_cleanup); + else { + local_dec(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); + put_cpu(); + } +} + +/* + * typedef dma_event_callback - function pointer to a DMA event callback + */ +typedef void (*dma_event_callback) (struct dma_client *client, + struct dma_chan *chan, enum dma_event event); + +/** + * struct dma_client - info on the entity making use of DMA services + * @event_callback: func ptr to call when something happens + * @chan_count: number of chans allocated + * @chans_desired: number of chans requested. Can be +/- chan_count + * @lock: protects access to the channels list + * @channels: the list of DMA channels allocated + * @global_node: list_head for global dma_client_list + */ +struct dma_client { + dma_event_callback event_callback; + unsigned int chan_count; + unsigned int chans_desired; + + spinlock_t lock; + struct list_head channels; + struct list_head global_node; +}; + +/** + * struct dma_device - info on the entity supplying DMA services + * @chancnt: how many DMA channels are supported + * @channels: the list of struct dma_chan + * @global_node: list_head for global dma_device_list + * @refcount: + * @done: + * @dev_id: + * Other func ptrs: used to make use of this device's capabilities + */ +struct dma_device { + + unsigned int chancnt; + struct list_head channels; + struct list_head global_node; + + struct kref refcount; + struct completion done; + + int dev_id; + + int (*device_alloc_chan_resources)(struct dma_chan *chan); + void (*device_free_chan_resources)(struct dma_chan *chan); + dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan, + void *dest, void *src, size_t len); + dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan, + struct page *page, unsigned int offset, void *kdata, + size_t len); + dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan, + struct page *dest_pg, unsigned int dest_off, + struct page *src_pg, unsigned int src_off, size_t len); + enum dma_status (*device_memcpy_complete)(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, + dma_cookie_t *used); + void (*device_memcpy_issue_pending)(struct dma_chan *chan); +}; + +/* --- public DMA engine API --- */ + +struct dma_client *dma_async_client_register(dma_event_callback event_callback); +void dma_async_client_unregister(struct dma_client *client); +void dma_async_client_chan_request(struct dma_client *client, + unsigned int number); + +/** + * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses + * @chan: DMA channel to offload copy to + * @dest: destination address (virtual) + * @src: source address (virtual) + * @len: length + * + * Both @dest and @src must be mappable to a bus address according to the + * DMA mapping API rules for streaming mappings. + * Both @dest and @src must stay memory resident (kernel memory or locked + * user space pages) + */ +static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, + void *dest, void *src, size_t len) +{ + int cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len); +} + +/** + * dma_async_memcpy_buf_to_pg - offloaded copy + * @chan: DMA channel to offload copy to + * @page: destination page + * @offset: offset in page to copy to + * @kdata: source address (virtual) + * @len: length + * + * Both @page/@offset and @kdata must be mappable to a bus address according + * to the DMA mapping API rules for streaming mappings. + * Both @page/@offset and @kdata must stay memory resident (kernel memory or + * locked user space pages) + */ +static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, + struct page *page, unsigned int offset, void *kdata, size_t len) +{ + int cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return chan->device->device_memcpy_buf_to_pg(chan, page, offset, + kdata, len); +} + +/** + * dma_async_memcpy_buf_to_pg - offloaded copy + * @chan: DMA channel to offload copy to + * @dest_page: destination page + * @dest_off: offset in page to copy to + * @src_page: source page + * @src_off: offset in page to copy from + * @len: length + * + * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus + * address according to the DMA mapping API rules for streaming mappings. + * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident + * (kernel memory or locked user space pages) + */ +static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, + struct page *dest_pg, unsigned int dest_off, struct page *src_pg, + unsigned int src_off, size_t len) +{ + int cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off, + src_pg, src_off, len); +} + +/** + * dma_async_memcpy_issue_pending - flush pending copies to HW + * @chan: + * + * This allows drivers to push copies to HW in batches, + * reducing MMIO writes where possible. + */ +static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan) +{ + return chan->device->device_memcpy_issue_pending(chan); +} + +/** + * dma_async_memcpy_complete - poll for transaction completion + * @chan: DMA channel + * @cookie: transaction identifier to check status of + * @last: returns last completed cookie, can be NULL + * @used: returns last issued cookie, can be NULL + * + * If @last and @used are passed in, upon return they reflect the driver + * internal state and can be used with dma_async_is_complete() to check + * the status of multiple cookies without re-checking hardware state. + */ +static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) +{ + return chan->device->device_memcpy_complete(chan, cookie, last, used); +} + +/** + * dma_async_is_complete - test a cookie against chan state + * @cookie: transaction identifier to test status of + * @last_complete: last know completed transaction + * @last_used: last cookie value handed out + * + * dma_async_is_complete() is used in dma_async_memcpy_complete() + * the test logic is seperated for lightweight testing of multiple cookies + */ +static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, + dma_cookie_t last_complete, dma_cookie_t last_used) +{ + if (last_complete <= last_used) { + if ((cookie <= last_complete) || (cookie > last_used)) + return DMA_SUCCESS; + } else { + if ((cookie <= last_complete) && (cookie > last_used)) + return DMA_SUCCESS; + } + return DMA_IN_PROGRESS; +} + + +/* --- DMA device --- */ + +int dma_async_device_register(struct dma_device *device); +void dma_async_device_unregister(struct dma_device *device); + +#endif /* CONFIG_DMA_ENGINE */ +#endif /* DMAENGINE_H */ -- cgit v1.2.3 From 57c651f74cd8383df10a648e677902849de1bc0b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 May 2006 17:39:49 -0700 Subject: [I/OAT]: Move PCI_DEVICE_ID_INTEL_IOAT to linux/pci_ids.h Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 590dc6dca31..819c8e1324d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2043,6 +2043,7 @@ #define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 +#define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 #define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410 #define PCI_DEVICE_ID_INTEL_82801AA_1 0x2411 #define PCI_DEVICE_ID_INTEL_82801AA_3 0x2413 -- cgit v1.2.3 From db21733488f84a596faaad0d05430b3f51804692 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Sat, 17 Jun 2006 21:24:58 -0700 Subject: [I/OAT]: Setup the networking subsystem as a DMA client Attempts to allocate per-CPU DMA channels Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f4169bbb60e..b5760c67af9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -37,6 +37,7 @@ #include #include #include +#include struct divert_blk; struct vlan_group; @@ -593,6 +594,9 @@ struct softnet_data struct sk_buff *completion_queue; struct net_device backlog_dev; /* Sorry. 8) */ +#ifdef CONFIG_NET_DMA + struct dma_chan *net_dma; +#endif }; DECLARE_PER_CPU(struct softnet_data,softnet_data); -- cgit v1.2.3 From de5506e155276d385712c2aa1c2d9a27cd4ed947 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 17:50:37 -0700 Subject: [I/OAT]: Utility functions for offloading sk_buff to iovec copies Provides for pinning user space pages in memory, copying to iovecs, and copying from sk_buffs including fragmented and chained sk_buffs. Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/dmaengine.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 30781546ac9..78b236ca04f 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -333,5 +333,27 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); +/* --- Helper iov-locking functions --- */ + +struct dma_page_list { + char *base_address; + int nr_pages; + struct page **pages; +}; + +struct dma_pinned_list { + int nr_iovecs; + struct dma_page_list page_list[0]; +}; + +struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len); +void dma_unpin_iovec_pages(struct dma_pinned_list* pinned_list); + +dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, + struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len); +dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, + struct dma_pinned_list *pinned_list, struct page *page, + unsigned int offset, size_t len); + #endif /* CONFIG_DMA_ENGINE */ #endif /* DMAENGINE_H */ -- cgit v1.2.3 From 97fc2f0848c928c63c2ae619deee61a0b1107b69 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 17:55:33 -0700 Subject: [I/OAT]: Structure changes for TCP recv offload to I/OAT Adds an async_wait_queue and some additional fields to tcp_sock, and a dma_cookie_t to sk_buff. Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ include/linux/tcp.h | 8 ++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f8f234708b9..23bad3bf3c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -29,6 +29,7 @@ #include #include #include +#include #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ @@ -285,6 +286,9 @@ struct sk_buff { __u16 tc_verd; /* traffic control verdict */ #endif #endif +#ifdef CONFIG_NET_DMA + dma_cookie_t dma_cookie; +#endif /* These elements must be at the end, see alloc_skb() for details. */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 542d39596bd..c90daa5da6c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -18,6 +18,7 @@ #define _LINUX_TCP_H #include +#include #include struct tcphdr { @@ -233,6 +234,13 @@ struct tcp_sock { struct iovec *iov; int memory; int len; +#ifdef CONFIG_NET_DMA + /* members for async copy */ + struct dma_chan *dma_chan; + int wakeup; + struct dma_pinned_list *pinned_list; + dma_cookie_t dma_cookie; +#endif } ucopy; __u32 snd_wl1; /* Sequence for window update */ -- cgit v1.2.3 From 9593782585e0cf70babe787a8463d492a68b1744 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 18:02:55 -0700 Subject: [I/OAT]: Add a sysctl for tuning the I/OAT offloaded I/O threshold Any socket recv of less than this ammount will not be offloaded Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 76eaeff76f8..cd9e7c0825a 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -403,6 +403,7 @@ enum NET_TCP_MTU_PROBING=113, NET_TCP_BASE_MSS=114, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, + NET_TCP_DMA_COPYBREAK=116, }; enum { -- cgit v1.2.3 From 30b6c28d2aca4669f2e609ad5d77ea2a6cf0dd3a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 26 May 2006 17:44:45 -0700 Subject: [TG3]: Add 5786 PCI ID Add PCI ID for BCM5786 which is a variant of 5787. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 819c8e1324d..cf45e8bb69a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1887,6 +1887,7 @@ #define PCI_DEVICE_ID_TIGON3_5751F 0x167e #define PCI_DEVICE_ID_TIGON3_5787M 0x1693 #define PCI_DEVICE_ID_TIGON3_5782 0x1696 +#define PCI_DEVICE_ID_TIGON3_5786 0x169a #define PCI_DEVICE_ID_TIGON3_5787 0x169b #define PCI_DEVICE_ID_TIGON3_5788 0x169c #define PCI_DEVICE_ID_TIGON3_5789 0x169d -- cgit v1.2.3 From b59f45d0b2878ab76f8053b0973654e6621828ee Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 27 May 2006 23:05:54 -0700 Subject: [IPSEC] xfrm: Abstract out encapsulation modes This patch adds the structure xfrm_mode. It is meant to represent the operations carried out by transport/tunnel modes. By doing this we allow additional encapsulation modes to be added without clogging up the xfrm_input/xfrm_output paths. Candidate modes include 4-to-6 tunnel mode, 6-to-4 tunnel mode, and BEET modes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/xfrm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 6b42cc474c0..46a15c7a1a1 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -118,6 +118,10 @@ enum XFRM_SHARE_UNIQUE /* Use once */ }; +#define XFRM_MODE_TRANSPORT 0 +#define XFRM_MODE_TUNNEL 1 +#define XFRM_MODE_MAX 2 + /* Netlink configuration messages. */ enum { XFRM_MSG_BASE = 0x10, -- cgit v1.2.3 From 62b7743483b402f8fb73545d5d487ca714e82766 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:20:32 -0700 Subject: [NETFILTER]: x_tables: add quota match Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_quota.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/netfilter/xt_quota.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h new file mode 100644 index 00000000000..acd7fd77bbe --- /dev/null +++ b/include/linux/netfilter/xt_quota.h @@ -0,0 +1,16 @@ +#ifndef _XT_QUOTA_H +#define _XT_QUOTA_H + +enum xt_quota_flags { + XT_QUOTA_INVERT = 0x1, +}; +#define XT_QUOTA_MASK 0x1 + +struct xt_quota_info { + u_int32_t flags; + u_int32_t pad; + aligned_u64 quota; + struct xt_quota_info *master; +}; + +#endif /* _XT_QUOTA_H */ -- cgit v1.2.3 From f3389805e53a13bd969ee1c8fc5a4137b7c6c167 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:21:00 -0700 Subject: [NETFILTER]: x_tables: add statistic match Add statistic match which is a combination of the nth and random matches. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_statistic.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/netfilter/xt_statistic.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_statistic.h b/include/linux/netfilter/xt_statistic.h new file mode 100644 index 00000000000..c344e9916e2 --- /dev/null +++ b/include/linux/netfilter/xt_statistic.h @@ -0,0 +1,32 @@ +#ifndef _XT_STATISTIC_H +#define _XT_STATISTIC_H + +enum xt_statistic_mode { + XT_STATISTIC_MODE_RANDOM, + XT_STATISTIC_MODE_NTH, + __XT_STATISTIC_MODE_MAX +}; +#define XT_STATISTIC_MODE_MAX (__XT_STATISTIC_MODE_MAX - 1) + +enum xt_statistic_flags { + XT_STATISTIC_INVERT = 0x1, +}; +#define XT_STATISTIC_MASK 0x1 + +struct xt_statistic_info { + u_int16_t mode; + u_int16_t flags; + union { + struct { + u_int32_t probability; + } random; + struct { + u_int32_t every; + u_int32_t packet; + u_int32_t count; + } nth; + } u; + struct xt_statistic_info *master __attribute__((aligned(8))); +}; + +#endif /* _XT_STATISTIC_H */ -- cgit v1.2.3 From 39a27a35c5c1b5be499a0576a35c45a011788bf8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:23:54 -0700 Subject: [NETFILTER]: conntrack: add sysctl to disable checksumming Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 1 + include/linux/sysctl.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index d54d7b278e9..5473c01f69e 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -293,6 +293,7 @@ static inline int is_dying(struct ip_conntrack *ct) } extern unsigned int ip_conntrack_htable_size; +extern int ip_conntrack_checksum; #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index cd9e7c0825a..98338ed2c0b 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -313,6 +313,7 @@ enum NET_NF_CONNTRACK_FRAG6_TIMEOUT=29, NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30, NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31, + NET_NF_CONNTRACK_CHECKSUM=32, }; /* /proc/sys/net/ipv4 */ @@ -492,6 +493,7 @@ enum NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, NET_IPV4_NF_CONNTRACK_COUNT=27, + NET_IPV4_NF_CONNTRACK_CHECKSUM=28, }; /* /proc/sys/net/ipv6 */ -- cgit v1.2.3 From 997ae831ade74bdaed4172b1c02060b9efd6e206 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 29 May 2006 18:24:20 -0700 Subject: [NETFILTER]: conntrack: add fixed timeout flag in connection tracking Add a flag in a connection status to have a non updated timeout. This permits to have connection that automatically die at a given time. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_common.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 3ff88c87830..d2e4bd7a7a1 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -69,6 +69,10 @@ enum ip_conntrack_status { /* Connection is dying (removed from lists), can not be unset. */ IPS_DYING_BIT = 9, IPS_DYING = (1 << IPS_DYING_BIT), + + /* Connection has fixed timeout. */ + IPS_FIXED_TIMEOUT_BIT = 10, + IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), }; /* Connection tracking event bits */ -- cgit v1.2.3 From 3726add76643c715d437aceda320d319153b6113 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:24:39 -0700 Subject: [NETFILTER]: ctnetlink: fix NAT configuration The current configuration only allows to configure one manip and overloads conntrack status flags with netlink semantic. Signed-off-by: Patrick Mchardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 668ec946c8e..b5883ccee29 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -27,13 +27,15 @@ enum ctattr_type { CTA_STATUS, CTA_PROTOINFO, CTA_HELP, - CTA_NAT, + CTA_NAT_SRC, +#define CTA_NAT CTA_NAT_SRC /* backwards compatibility */ CTA_TIMEOUT, CTA_MARK, CTA_COUNTERS_ORIG, CTA_COUNTERS_REPLY, CTA_USE, CTA_ID, + CTA_NAT_DST, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) -- cgit v1.2.3 From c0d4cfd96dd0cc0dbf49435898808b5553af4822 Mon Sep 17 00:00:00 2001 From: Jing Min Zhao Date: Mon, 29 May 2006 18:26:27 -0700 Subject: [NETFILTER]: H.323 helper: Add support for Call Forwarding Signed-off-by: Jing Min Zhao Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 1 + include/linux/netfilter_ipv4/ip_conntrack_h323.h | 7 +++++++ include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 5473c01f69e..17d7ef938a0 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -154,6 +154,7 @@ struct ip_conntrack_expect unsigned int flags; #ifdef CONFIG_IP_NF_NAT_NEEDED + u_int32_t saved_ip; /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ union ip_conntrack_manip_proto saved_proto; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_h323.h b/include/linux/netfilter_ipv4/ip_conntrack_h323.h index eace86bd2ad..3cbff737900 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_h323.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_h323.h @@ -71,6 +71,13 @@ extern int (*nat_h245_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct, unsigned char **data, int dataoff, TransportAddress * addr, u_int16_t port, struct ip_conntrack_expect * exp); +extern int (*nat_callforwarding_hook) (struct sk_buff ** pskb, + struct ip_conntrack * ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress * addr, + u_int16_t port, + struct ip_conntrack_expect * exp); extern int (*nat_q931_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress * addr, diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h b/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h index cc98f7aa5ab..3d4a773799f 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h @@ -1,4 +1,4 @@ -/* Generated by Jing Min Zhao's ASN.1 parser, Mar 15 2006 +/* Generated by Jing Min Zhao's ASN.1 parser, Apr 20 2006 * * Copyright (c) 2006 Jing Min Zhao * @@ -412,6 +412,7 @@ typedef struct Facility_UUIE { /* SEQUENCE */ eFacility_UUIE_destinationInfo = (1 << 14), eFacility_UUIE_h245SecurityMode = (1 << 13), } options; + TransportAddress alternativeAddress; FacilityReason reason; TransportAddress h245Address; Facility_UUIE_fastStart fastStart; -- cgit v1.2.3 From ae5b7d8ba2c28d7d9835856fe0ca5f6ec95ea768 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:27:09 -0700 Subject: [NETFILTER]: Add SIP connection tracking helper Add SIP connection tracking helper. Originally written by Christian Hentschel , some cleanup, minor fixes and bidirectional SIP support added by myself. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_sip.h | 44 +++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_sip.h (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_sip.h b/include/linux/netfilter_ipv4/ip_conntrack_sip.h new file mode 100644 index 00000000000..913dad66c0f --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_sip.h @@ -0,0 +1,44 @@ +#ifndef __IP_CONNTRACK_SIP_H__ +#define __IP_CONNTRACK_SIP_H__ +#ifdef __KERNEL__ + +#define SIP_PORT 5060 +#define SIP_TIMEOUT 3600 + +#define POS_VIA 0 +#define POS_CONTACT 1 +#define POS_CONTENT 2 +#define POS_MEDIA 3 +#define POS_OWNER 4 +#define POS_CONNECTION 5 +#define POS_REQ_HEADER 6 +#define POS_SDP_HEADER 7 + +struct sip_header_nfo { + const char *lname; + const char *sname; + const char *ln_str; + size_t lnlen; + size_t snlen; + size_t ln_strlen; + int (*match_len)(const char *, const char *, int *); +}; + +extern unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack *ct, + const char **dptr); +extern unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack_expect *exp, + const char *dptr); + +extern int ct_sip_get_info(const char *dptr, size_t dlen, + unsigned int *matchoff, + unsigned int *matchlen, + struct sip_header_nfo *hnfo); +extern int ct_sip_lnlen(const char *line, const char *limit); +extern const char *ct_sip_search(const char *needle, const char *haystack, + size_t needle_len, size_t haystack_len); +#endif /* __KERNEL__ */ +#endif /* __IP_CONNTRACK_SIP_H__ */ -- cgit v1.2.3 From 338fcf9886df9ad2873772197a73a57818973316 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Jun 2006 21:04:39 -0700 Subject: [IPV4] igmp: Fixup struct ip_mc_list::multiaddr type All users except two expect 32-bit big-endian value. One is of ->multiaddr = ->multiaddr variety. And last one is "%08lX". Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 28f4f3b3695..899c3d4776f 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -169,7 +169,7 @@ struct ip_sf_list struct ip_mc_list { struct in_device *interface; - unsigned long multiaddr; + __be32 multiaddr; struct ip_sf_list *sources; struct ip_sf_list *tomb; unsigned int sfmode; -- cgit v1.2.3 From c8c05a8eec6f1258f6d5cb71a44ee5dc1e989b63 Mon Sep 17 00:00:00 2001 From: Catherine Zhang Date: Thu, 8 Jun 2006 23:39:49 -0700 Subject: [LSM-IPsec]: SELinux Authorize This patch contains a fix for the previous patch that adds security contexts to IPsec policies and security associations. In the previous patch, no authorization (besides the check for write permissions to SAD and SPD) is required to delete IPsec policies and security assocations with security contexts. Thus a user authorized to change SAD and SPD can bypass the IPsec policy authorization by simply deleteing policies with security contexts. To fix this security hole, an additional authorization check is added for removing security policies and security associations with security contexts. Note that if no security context is supplied on add or present on policy to be deleted, the SELinux module allows the change unconditionally. The hook is called on deletion when no context is present, which we may want to change. At present, I left it up to the module. LSM changes: The patch adds two new LSM hooks: xfrm_policy_delete and xfrm_state_delete. The new hooks are necessary to authorize deletion of IPsec policies that have security contexts. The existing hooks xfrm_policy_free and xfrm_state_free lack the context to do the authorization, so I decided to split authorization of deletion and memory management of security data, as is typical in the LSM interface. Use: The new delete hooks are checked when xfrm_policy or xfrm_state are deleted by either the xfrm_user interface (xfrm_get_policy, xfrm_del_sa) or the pfkey interface (pfkey_spddelete, pfkey_delete). SELinux changes: The new policy_delete and state_delete functions are added. Signed-off-by: Catherine Zhang Signed-off-by: Trent Jaeger Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/security.h | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 1bab48f6aea..14c9bd05060 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -805,31 +805,37 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). - * Allocate a security structure to the xp->selector.security field. + * Allocate a security structure to the xp->security field. * The security field is initialized to NULL when the xfrm_policy is * allocated. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. * @new contains a new xfrm_policy being cloned from old. - * Allocate a security structure to the new->selector.security field - * that contains the information from the old->selector.security field. + * Allocate a security structure to the new->security field + * that contains the information from the old->security field. * Return 0 if operation was successful (memory to allocate). * @xfrm_policy_free_security: * @xp contains the xfrm_policy - * Deallocate xp->selector.security. + * Deallocate xp->security. + * @xfrm_policy_delete_security: + * @xp contains the xfrm_policy. + * Authorize deletion of xp->security. * @xfrm_state_alloc_security: * @x contains the xfrm_state being added to the Security Association * Database by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level SA generation program (e.g., setkey or racoon). - * Allocate a security structure to the x->sel.security field. The + * Allocate a security structure to the x->security field. The * security field is initialized to NULL when the xfrm_state is * allocated. * Return 0 if operation was successful (memory to allocate, legal context). * @xfrm_state_free_security: * @x contains the xfrm_state. - * Deallocate x>sel.security. + * Deallocate x->security. + * @xfrm_state_delete_security: + * @x contains the xfrm_state. + * Authorize deletion of x->security. * @xfrm_policy_lookup: * @xp contains the xfrm_policy for which the access control is being * checked. @@ -1298,8 +1304,10 @@ struct security_operations { int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); + int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); void (*xfrm_state_free_security) (struct xfrm_state *x); + int (*xfrm_state_delete_security) (struct xfrm_state *x); int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -2934,11 +2942,21 @@ static inline void security_xfrm_policy_free(struct xfrm_policy *xp) security_ops->xfrm_policy_free_security(xp); } +static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) +{ + return security_ops->xfrm_policy_delete_security(xp); +} + static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) { return security_ops->xfrm_state_alloc_security(x, sec_ctx); } +static inline int security_xfrm_state_delete(struct xfrm_state *x) +{ + return security_ops->xfrm_state_delete_security(x); +} + static inline void security_xfrm_state_free(struct xfrm_state *x) { security_ops->xfrm_state_free_security(x); @@ -2963,6 +2981,11 @@ static inline void security_xfrm_policy_free(struct xfrm_policy *xp) { } +static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) +{ + return 0; +} + static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) { return 0; @@ -2972,6 +2995,11 @@ static inline void security_xfrm_state_free(struct xfrm_state *x) { } +static inline int security_xfrm_state_delete(struct xfrm_policy *xp) +{ + return 0; +} + static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) { return 0; -- cgit v1.2.3 From 6f68dc37759b1d6ff3b4d4a9d097605a09f8f043 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 8 Jun 2006 23:58:52 -0700 Subject: [NET]: Fix warnings after LSM-IPSEC changes. Assignment used as truth value in xfrm_del_sa() and xfrm_get_policy(). Wrong argument type declared for security_xfrm_state_delete() when SELINUX is disabled. Signed-off-by: David S. Miller --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 14c9bd05060..4dfb1b84a9b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2995,7 +2995,7 @@ static inline void security_xfrm_state_free(struct xfrm_state *x) { } -static inline int security_xfrm_state_delete(struct xfrm_policy *xp) +static inline int security_xfrm_state_delete(struct xfrm_state *x) { return 0; } -- cgit v1.2.3 From c749b29fae74ed59c507d84025b3298202b42609 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:28:25 -0700 Subject: [SECMARK]: Add SELinux exports Add and export new functions to the in-kernel SELinux API in support of the new secmark-based packet controls. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/selinux.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 4047bcde448..aad4e390d6a 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -118,6 +118,27 @@ void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid); */ void selinux_get_task_sid(struct task_struct *tsk, u32 *sid); +/** + * selinux_string_to_sid - map a security context string to a security ID + * @str: the security context string to be mapped + * @sid: ID value returned via this. + * + * Returns 0 if successful, with the SID stored in sid. A value + * of zero for sid indicates no SID could be determined (but no error + * occurred). + */ +int selinux_string_to_sid(char *str, u32 *sid); + +/** + * selinux_relabel_packet_permission - check permission to relabel a packet + * @sid: ID value to be applied to network packet (via SECMARK, most likely) + * + * Returns 0 if the current task is allowed to label packets with the + * supplied security ID. Note that it is implicit that the packet is always + * being relabeled from the default unlabled value, and that the access + * control decision is made in the AVC. + */ +int selinux_relabel_packet_permission(u32 sid); #else @@ -172,6 +193,17 @@ static inline void selinux_get_task_sid(struct task_struct *tsk, u32 *sid) *sid = 0; } +static inline int selinux_string_to_sid(const char *str, u32 *sid) +{ + *sid = 0; + return 0; +} + +static inline int selinux_relabel_packet_permission(u32 sid) +{ + return 0; +} + #endif /* CONFIG_SECURITY_SELINUX */ #endif /* _LINUX_SELINUX_H */ -- cgit v1.2.3 From 984bc16cc92ea3c247bf34ad667cfb95331b9d3c Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:29:17 -0700 Subject: [SECMARK]: Add secmark support to core networking. Add a secmark field to the skbuff structure, to allow security subsystems to place security markings on network packets. This is similar to the nfmark field, except is intended for implementing security policy, rather than than networking policy. This patch was already acked in principle by Dave Miller. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/skbuff.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 23bad3bf3c9..fe2c58e5306 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -210,6 +210,7 @@ enum { * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @tc_index: Traffic control index * @tc_verd: traffic control verdict + * @secmark: security marking */ struct sk_buff { @@ -289,6 +290,9 @@ struct sk_buff { #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; #endif +#ifdef CONFIG_NETWORK_SECMARK + __u32 secmark; +#endif /* These elements must be at the end, see alloc_skb() for details. */ @@ -1400,5 +1404,23 @@ static inline void nf_reset(struct sk_buff *skb) static inline void nf_reset(struct sk_buff *skb) {} #endif /* CONFIG_NETFILTER */ +#ifdef CONFIG_NETWORK_SECMARK +static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) +{ + to->secmark = from->secmark; +} + +static inline void skb_init_secmark(struct sk_buff *skb) +{ + skb->secmark = 0; +} +#else +static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) +{ } + +static inline void skb_init_secmark(struct sk_buff *skb) +{ } +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 5e6874cdb8de94cd3c15d853a8ef9c6f4c305055 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:30:57 -0700 Subject: [SECMARK]: Add xtables SECMARK target Add a SECMARK target to xtables, allowing the admin to apply security marks to packets via both iptables and ip6tables. The target currently handles SELinux security marking, but can be extended for other purposes as needed. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/netfilter/xt_SECMARK.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/linux/netfilter/xt_SECMARK.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h new file mode 100644 index 00000000000..c53fbffa997 --- /dev/null +++ b/include/linux/netfilter/xt_SECMARK.h @@ -0,0 +1,26 @@ +#ifndef _XT_SECMARK_H_target +#define _XT_SECMARK_H_target + +/* + * This is intended for use by various security subsystems (but not + * at the same time). + * + * 'mode' refers to the specific security subsystem which the + * packets are being marked for. + */ +#define SECMARK_MODE_SEL 0x01 /* SELinux */ +#define SECMARK_SELCTX_MAX 256 + +struct xt_secmark_target_selinux_info { + u_int32_t selsid; + char selctx[SECMARK_SELCTX_MAX]; +}; + +struct xt_secmark_target_info { + u_int8_t mode; + union { + struct xt_secmark_target_selinux_info sel; + } u; +}; + +#endif /*_XT_SECMARK_H_target */ -- cgit v1.2.3 From 7c9728c393dceb724d66d696cfabce82151a78e5 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:31:46 -0700 Subject: [SECMARK]: Add secmark support to conntrack Add a secmark field to IP and NF conntracks, so that security markings on packets can be copied to their associated connections, and also copied back to packets as required. This is similar to the network mark field currently used with conntrack, although it is intended for enforcement of security policy rather than network policy. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 17d7ef938a0..e0e9951eb8c 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -121,6 +121,10 @@ struct ip_conntrack u_int32_t mark; #endif +#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK + u_int32_t secmark; +#endif + /* Traversed often, so hopefully in different cacheline to top */ /* These are my tuples; original and reply */ struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; -- cgit v1.2.3 From 100468e9c05c10fb6872751c1af523b996d6afa9 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:32:39 -0700 Subject: [SECMARK]: Add CONNSECMARK xtables target Add a new xtables target, CONNSECMARK, which is used to specify rules for copying security marks from packets to connections, and for copyying security marks back from connections to packets. This is similar to the CONNMARK target, but is more limited in scope in that it only allows copying of security marks to and from packets, as this is all it needs to do. A typical scenario would be to apply a security mark to a 'new' packet with SECMARK, then copy that to its conntrack via CONNMARK, and then restore the security mark from the connection to established and related packets on that connection. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/netfilter/xt_CONNSECMARK.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/netfilter/xt_CONNSECMARK.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_CONNSECMARK.h b/include/linux/netfilter/xt_CONNSECMARK.h new file mode 100644 index 00000000000..c6bd75469ba --- /dev/null +++ b/include/linux/netfilter/xt_CONNSECMARK.h @@ -0,0 +1,13 @@ +#ifndef _XT_CONNSECMARK_H_target +#define _XT_CONNSECMARK_H_target + +enum { + CONNSECMARK_SAVE = 1, + CONNSECMARK_RESTORE, +}; + +struct xt_connsecmark_target_info { + u_int8_t mode; +}; + +#endif /*_XT_CONNSECMARK_H_target */ -- cgit v1.2.3 From 932ff279a43ab7257942cddff2595acd541cc49b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 9 Jun 2006 12:20:56 -0700 Subject: [NET]: Add netif_tx_lock Various drivers use xmit_lock internally to synchronise with their transmission routines. They do so without setting xmit_lock_owner. This is fine as long as netpoll is not in use. With netpoll it is possible for deadlocks to occur if xmit_lock_owner isn't set. This is because if a printk occurs while xmit_lock is held and xmit_lock_owner is not set can cause netpoll to attempt to take xmit_lock recursively. While it is possible to resolve this by getting netpoll to use trylock, it is suboptimal because netpoll's sole objective is to maximise the chance of getting the printk out on the wire. So delaying or dropping the message is to be avoided as much as possible. So the only alternative is to always set xmit_lock_owner. The following patch does this by introducing the netif_tx_lock family of functions that take care of setting/unsetting xmit_lock_owner. I renamed xmit_lock to _xmit_lock to indicate that it should not be used directly. I didn't provide irq versions of the netif_tx_lock functions since xmit_lock is meant to be a BH-disabling lock. This is pretty much a straight text substitution except for a small bug fix in winbond. It currently uses netif_stop_queue/spin_unlock_wait to stop transmission. This is unsafe as an IRQ can potentially wake up the queue. So it is safer to use netif_tx_disable. The hamradio bits used spin_lock_irq but it is unnecessary as xmit_lock must never be taken in an IRQ handler. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b5760c67af9..067b9ccafd8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -407,7 +407,7 @@ struct net_device * One part is mostly used on xmit path (device) */ /* hard_start_xmit synchronizer */ - spinlock_t xmit_lock ____cacheline_aligned_in_smp; + spinlock_t _xmit_lock ____cacheline_aligned_in_smp; /* cpu id of processor entered to hard_start_xmit or -1, if nobody entered there. */ @@ -893,11 +893,43 @@ static inline void __netif_rx_complete(struct net_device *dev) clear_bit(__LINK_STATE_RX_SCHED, &dev->state); } +static inline void netif_tx_lock(struct net_device *dev) +{ + spin_lock(&dev->_xmit_lock); + dev->xmit_lock_owner = smp_processor_id(); +} + +static inline void netif_tx_lock_bh(struct net_device *dev) +{ + spin_lock_bh(&dev->_xmit_lock); + dev->xmit_lock_owner = smp_processor_id(); +} + +static inline int netif_tx_trylock(struct net_device *dev) +{ + int err = spin_trylock(&dev->_xmit_lock); + if (!err) + dev->xmit_lock_owner = smp_processor_id(); + return err; +} + +static inline void netif_tx_unlock(struct net_device *dev) +{ + dev->xmit_lock_owner = -1; + spin_unlock(&dev->_xmit_lock); +} + +static inline void netif_tx_unlock_bh(struct net_device *dev) +{ + dev->xmit_lock_owner = -1; + spin_unlock_bh(&dev->_xmit_lock); +} + static inline void netif_tx_disable(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); netif_stop_queue(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); } /* These functions live elsewhere (drivers/net/net_init.c, but related) */ -- cgit v1.2.3 From 364c6badde0dd62a0a38e5ed67f85d87d6665780 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 9 Jun 2006 16:10:40 -0700 Subject: [NET]: Clean up skb_linearize The linearisation operation doesn't need to be super-optimised. So we can replace __skb_linearize with __pskb_pull_tail which does the same thing but is more general. Also, most users of skb_linearize end up testing whether the skb is linear or not so it helps to make skb_linearize do just that. Some callers of skb_linearize also use it to copy cloned data, so it's useful to have a new function skb_linearize_cow to copy the data if it's either non-linear or cloned. Last but not least, I've removed the gfp argument since nobody uses it anymore. If it's ever needed we can easily add it back. Misc bugs fixed by this patch: * via-velocity error handling (also, no SG => no frags) Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe2c58e5306..830f58fa03a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1169,18 +1169,34 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, return 0; } +static inline int __skb_linearize(struct sk_buff *skb) +{ + return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; +} + /** * skb_linearize - convert paged skb to linear one * @skb: buffer to linarize - * @gfp: allocation mode * * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ -extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); -static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) +static inline int skb_linearize(struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; +} + +/** + * skb_linearize_cow - make sure skb is linear and writable + * @skb: buffer to process + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. + */ +static inline int skb_linearize_cow(struct sk_buff *skb) { - return __skb_linearize(skb, gfp); + return skb_is_nonlinear(skb) || skb_cloned(skb) ? + __skb_linearize(skb) : 0; } /** -- cgit v1.2.3 From 3cc0e873986fe594d0e96d07259b11f755325cb2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 9 Jun 2006 16:13:38 -0700 Subject: [NET]: Warn in __skb_trim if skb is paged It's better to warn and fail rather than rarely triggering BUG on paths that incorrectly call skb_trim/__skb_trim on a non-linear skb. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 830f58fa03a..93e4db22158 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -975,15 +975,16 @@ static inline void skb_reserve(struct sk_buff *skb, int len) #define NET_SKB_PAD 16 #endif -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) { - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data + len; - } else - ___pskb_trim(skb, len, 0); + if (unlikely(skb->data_len)) { + WARN_ON(1); + return; + } + skb->len = len; + skb->tail = skb->data + len; } /** @@ -993,6 +994,7 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len) * * Cut the length of a buffer down by removing data from the tail. If * the buffer is already under the length specified it is not modified. + * The skb must be linear. */ static inline void skb_trim(struct sk_buff *skb, unsigned int len) { @@ -1003,12 +1005,10 @@ static inline void skb_trim(struct sk_buff *skb, unsigned int len) static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) { - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - return 0; - } - return ___pskb_trim(skb, len, 1); + if (skb->data_len) + return ___pskb_trim(skb, len); + __skb_trim(skb, len); + return 0; } static inline int pskb_trim(struct sk_buff *skb, unsigned int len) -- cgit v1.2.3 From 35089bb203f44e33b6bbb6c4de0b0708f9a48921 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Jun 2006 22:33:04 -0700 Subject: [TCP]: Add tcp_slow_start_after_idle sysctl. A lot of people have asked for a way to disable tcp_cwnd_restart(), and it seems reasonable to add a sysctl to do that. Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 98338ed2c0b..cee944dbdcd 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -405,6 +405,7 @@ enum NET_TCP_BASE_MSS=114, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, NET_TCP_DMA_COPYBREAK=116, + NET_TCP_SLOW_START_AFTER_IDLE=117, }; enum { -- cgit v1.2.3 From 8648b3053bff39a7ee4c711d74268079c928a657 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 17 Jun 2006 22:06:05 -0700 Subject: [NET]: Add NETIF_F_GEN_CSUM and NETIF_F_ALL_CSUM The current stack treats NETIF_F_HW_CSUM and NETIF_F_NO_CSUM identically so we test for them in quite a few places. For the sake of brevity, I'm adding the macro NETIF_F_GEN_CSUM for these two. We also test the disjunct of NETIF_F_IP_CSUM and the other two in various places, for that purpose I've added NETIF_F_ALL_CSUM. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 067b9ccafd8..e432b743dda 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -312,6 +312,9 @@ struct net_device #define NETIF_F_LLTX 4096 /* LockLess TX */ #define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/ +#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) +#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) + struct net_device *next_sched; /* Interface index. Unique device identifier */ -- cgit v1.2.3 From c7ce1ae21223fe1f905feba272bc14b87994a57d Mon Sep 17 00:00:00 2001 From: Tushar Gohad Date: Sat, 17 Jun 2006 22:54:03 -0700 Subject: [PFKEYV2]: Fix inconsistent typing in struct sadb_x_kmprivate. Signed-off-by: Tushar Gohad Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index bac0fb389cf..d5dd471da22 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -159,7 +159,7 @@ struct sadb_spirange { struct sadb_x_kmprivate { uint16_t sadb_x_kmprivate_len; uint16_t sadb_x_kmprivate_exttype; - u_int32_t sadb_x_kmprivate_reserved; + uint32_t sadb_x_kmprivate_reserved; } __attribute__((packed)); /* sizeof(struct sadb_x_kmprivate) == 8 */ -- cgit v1.2.3