From 099f53cb50e45ef617a9f1d63ceec799e489418b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 8 Apr 2009 14:28:37 -0700 Subject: async_tx: rename zero_sum to val 'zero_sum' does not properly describe the operation of generating parity and checking that it validates against an existing buffer. Change the name of the operation to 'val' (for 'validate'). This is in anticipation of the p+q case where it is a requirement to identify the target parity buffers separately from the source buffers, because the target parity buffers will not have corresponding pq coefficients. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 5fc2ef8d97f..513150d8c25 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -117,7 +117,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, dma_async_tx_callback cb_fn, void *cb_fn_param); struct dma_async_tx_descriptor * -async_xor_zero_sum(struct page *dest, struct page **src_list, +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, u32 *result, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 2e2aa3df170..6768727d00d 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -55,8 +55,8 @@ enum dma_transaction_type { DMA_PQ_XOR, DMA_DUAL_XOR, DMA_PQ_UPDATE, - DMA_ZERO_SUM, - DMA_PQ_ZERO_SUM, + DMA_XOR_VAL, + DMA_PQ_VAL, DMA_MEMSET, DMA_MEMCPY_CRC32C, DMA_INTERRUPT, @@ -214,7 +214,7 @@ struct dma_async_tx_descriptor { * @device_free_chan_resources: release DMA channel's resources * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation - * @device_prep_dma_zero_sum: prepares a zero_sum operation + * @device_prep_dma_xor_val: prepares a xor validation operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -243,7 +243,7 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor)( struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( + struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, u32 *result, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( -- cgit v1.2.3 From 88ba2aa586c874681c072101287e15d40de7e6e2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Apr 2009 16:16:18 -0700 Subject: async_tx: kill ASYNC_TX_DEP_ACK flag In support of inter-channel chaining async_tx utilizes an ack flag to gate whether a dependent operation can be chained to another. While the flag is not set the chain can be considered open for appending. Setting the ack flag closes the chain and flags the descriptor for garbage collection. The ASYNC_TX_DEP_ACK flag essentially means "close the chain after adding this dependency". Since each operation can only have one child the api now implicitly sets the ack flag at dependency submission time. This removes an unnecessary management burden from clients of the api. [ Impact: clean up and enforce one dependency per operation ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 513150d8c25..9f14cd540cd 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,13 +58,11 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain - * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), - ASYNC_TX_ACK = (1 << 3), - ASYNC_TX_DEP_ACK = (1 << 4), + ASYNC_TX_ACK = (1 << 2), }; #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From a08abd8ca890a377521d65d493d174bebcaf694b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 3 Jun 2009 11:43:59 -0700 Subject: async_tx: structify submission arguments, add scribble Prepare the api for the arrival of a new parameter, 'scribble'. This will allow callers to identify scratchpad memory for dma address or page address conversions. As this adds yet another parameter, take this opportunity to convert the common submission parameters (flags, dependency, callback, and callback argument) into an object that is passed by reference. Also, take this opportunity to fix up the kerneldoc and add notes about the relevant ASYNC_TX_* flags for each routine. [ Impact: moves api pass-by-value parameters to a pass-by-reference struct ] Signed-off-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 84 +++++++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 9f14cd540cd..00cfb637ddf 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -65,6 +65,22 @@ enum async_tx_flags { ASYNC_TX_ACK = (1 << 2), }; +/** + * struct async_submit_ctl - async_tx submission/completion modifiers + * @flags: submission modifiers + * @depend_tx: parent dependency of the current operation being submitted + * @cb_fn: callback routine to run at operation completion + * @cb_param: parameter for the callback routine + * @scribble: caller provided space for dma/page address conversions + */ +struct async_submit_ctl { + enum async_tx_flags flags; + struct dma_async_tx_descriptor *depend_tx; + dma_async_tx_callback cb_fn; + void *cb_param; + void *scribble; +}; + #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL @@ -73,8 +89,8 @@ enum async_tx_flags { #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ __async_tx_find_channel(dep, type) struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type); +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type); #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ #else static inline void async_tx_issue_pending_all(void) @@ -83,9 +99,10 @@ static inline void async_tx_issue_pending_all(void) } static inline struct dma_chan * -async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type, struct page **dst, int dst_count, - struct page **src, int src_count, size_t len) +async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type, struct page **dst, + int dst_count, struct page **src, int src_count, + size_t len) { return NULL; } @@ -97,46 +114,53 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, * @cb_fn_param: parameter to pass to the callback routine */ static inline void -async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) +async_tx_sync_epilog(struct async_submit_ctl *submit) +{ + if (submit->cb_fn) + submit->cb_fn(submit->cb_param); +} + +typedef union { + unsigned long addr; + struct page *page; + dma_addr_t dma; +} addr_conv_t; + +static inline void +init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags, + struct dma_async_tx_descriptor *tx, + dma_async_tx_callback cb_fn, void *cb_param, + addr_conv_t *scribble) { - if (cb_fn) - cb_fn(cb_fn_param); + args->flags = flags; + args->depend_tx = tx; + args->cb_fn = cb_fn; + args->cb_param = cb_param; + args->scribble = scribble; } -void -async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, u32 *result, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + size_t len, struct async_submit_ctl *submit); -struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3 From ad283ea4a3ce82cda2efe33163748a397b31b1eb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:09:26 -0700 Subject: async_tx: add sum check flags Replace the flat zero_sum_result with a collection of flags to contain the P (xor) zero-sum result, and the soon to be utilized Q (raid6 reed solomon syndrome) zero-sum result. Use the SUM_CHECK_ namespace instead of DMA_ since these flags will be used on non-dma-zero-sum enabled platforms. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 00cfb637ddf..3d21a251751 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -148,7 +148,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, u32 *result, + int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6768727d00d..02447afceba 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -86,6 +86,25 @@ enum dma_ctrl_flags { DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), }; +/** + * enum sum_check_bits - bit position of pq_check_flags + */ +enum sum_check_bits { + SUM_CHECK_P = 0, + SUM_CHECK_Q = 1, +}; + +/** + * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations + * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise + * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise + */ +enum sum_check_flags { + SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P), + SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q), +}; + + /** * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. * See linux/cpumask.h @@ -245,7 +264,7 @@ struct dma_device { unsigned int src_cnt, size_t len, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, - size_t len, u32 *result, unsigned long flags); + size_t len, enum sum_check_flags *result, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); -- cgit v1.2.3 From 95475e57113c66aac7583925736ed2e2d58c990d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:19:02 -0700 Subject: async_tx: remove walk of tx->parent chain in dma_wait_for_async_tx We currently walk the parent chain when waiting for a given tx to complete however this walk may race with the driver cleanup routine. The routines in async_raid6_recov.c may fall back to the synchronous path at any point so we need to be prepared to call async_tx_quiesce() (which calls dma_wait_for_async_tx). To remove the ->parent walk we guarantee that every time a dependency is attached ->issue_pending() is invoked, then we can simply poll the initial descriptor until completion. This also allows for a lighter weight 'issue pending' implementation as there is no longer a requirement to iterate through all the channels' ->issue_pending() routines as long as operations have been submitted in an ordered chain. async_tx_issue_pending() is added for this case. Signed-off-by: Dan Williams --- include/linux/async_tx.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 3d21a251751..12a2efcbd56 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -83,6 +83,24 @@ struct async_submit_ctl { #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all + +/** + * async_tx_issue_pending - send pending descriptor to the hardware channel + * @tx: descriptor handle to retrieve hardware context + * + * Note: any dependent operations will have already been issued by + * async_tx_channel_switch, or (in the case of no channel switch) will + * be already pending on this channel. + */ +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + if (likely(tx)) { + struct dma_chan *chan = tx->chan; + struct dma_device *dma = chan->device; + + dma->device_issue_pending(chan); + } +} #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else @@ -98,6 +116,11 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + do { } while (0); +} + static inline struct dma_chan * async_tx_find_channel(struct async_submit_ctl *submit, enum dma_transaction_type tx_type, struct page **dst, -- cgit v1.2.3 From b2f46fd8ef3dff2ab30f31126833f78b7480283a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:36 -0700 Subject: async_tx: add support for asynchronous GF multiplication [ Based on an original patch by Yuri Tikhonov ] This adds support for doing asynchronous GF multiplication by adding two additional functions to the async_tx API: async_gen_syndrome() does simultaneous XOR and Galois field multiplication of sources. async_syndrome_val() validates the given source buffers against known P and Q values. When a request is made to run async_pq against more than the hardware maximum number of supported sources we need to reuse the previous generated P and Q values as sources into the next operation. Care must be taken to remove Q from P' and P from Q'. For example to perform a 5 source pq op with hardware that only supports 4 sources at a time the following approach is taken: p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) p' = p + q + q + src4 = p + src4 q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4 Note: 4 is the minimum acceptable maxpq otherwise we punt to synchronous-software path. The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as sources (in the above manner) and fill the remaining slots up to maxpq with the new sources/coefficients. Note1: Some devices have native support for P+Q continuation and can skip this extra work. Devices with this capability can advertise it with dma_set_maxpq. It is up to each driver how to handle the DMA_PREP_CONTINUE flag. Note2: The api supports disabling the generation of P when generating Q, this is ignored by the synchronous path but is implemented by some dma devices to save unnecessary writes. In this case the continuation algorithm is simplified to only reuse Q as a source. Cc: H. Peter Anvin Cc: David Woodhouse Signed-off-by: Yuri Tikhonov Signed-off-by: Ilya Yanok Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 9 +++++ include/linux/dmaengine.h | 87 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 90 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 12a2efcbd56..e6ce5f004f9 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset, struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 02447afceba..ce010cd991d 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -52,7 +52,7 @@ enum dma_status { enum dma_transaction_type { DMA_MEMCPY, DMA_XOR, - DMA_PQ_XOR, + DMA_PQ, DMA_DUAL_XOR, DMA_PQ_UPDATE, DMA_XOR_VAL, @@ -70,20 +70,28 @@ enum dma_transaction_type { /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, - * control completion, and communicate status. + * control completion, and communicate status. * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of - * this transaction + * this transaction * @DMA_CTRL_ACK - the descriptor cannot be reused until the client - * acknowledges receipt, i.e. has has a chance to establish any - * dependency chains + * acknowledges receipt, i.e. has has a chance to establish any dependency + * chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) + * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q + * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P + * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as + * sources that were the result of a previous operation, in the case of a PQ + * operation it continues the calculation with new sources */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), + DMA_PREP_PQ_DISABLE_P = (1 << 4), + DMA_PREP_PQ_DISABLE_Q = (1 << 5), + DMA_PREP_CONTINUE = (1 << 6), }; /** @@ -226,6 +234,7 @@ struct dma_async_tx_descriptor { * @global_node: list_head for global dma_device_list * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability + * @max_pq: maximum number of PQ sources and PQ-continue capability * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -234,6 +243,8 @@ struct dma_async_tx_descriptor { * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation * @device_prep_dma_xor_val: prepares a xor validation operation + * @device_prep_dma_pq: prepares a pq operation + * @device_prep_dma_pq_val: prepares a pqzero_sum operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -248,7 +259,9 @@ struct dma_device { struct list_head channels; struct list_head global_node; dma_cap_mask_t cap_mask; - int max_xor; + unsigned short max_xor; + unsigned short max_pq; + #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; struct device *dev; @@ -265,6 +278,14 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, enum sum_check_flags *result, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq)( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); @@ -283,6 +304,60 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline void +dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) +{ + dma->max_pq = maxpq; + if (has_pq_continue) + dma->max_pq |= DMA_HAS_PQ_CONTINUE; +} + +static inline bool dmaf_continue(enum dma_ctrl_flags flags) +{ + return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE; +} + +static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags) +{ + enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P; + + return (flags & mask) == mask; +} + +static inline bool dma_dev_has_pq_continue(struct dma_device *dma) +{ + return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; +} + +static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +{ + return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; +} + +/* dma_maxpq - reduce maxpq in the face of continued operations + * @dma - dma device with PQ capability + * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set + * + * When an engine does not support native continuation we need 3 extra + * source slots to reuse P and Q with the following coefficients: + * 1/ {00} * P : remove P from Q', but use it as a source for P' + * 2/ {01} * Q : use Q to continue Q' calculation + * 3/ {00} * Q : subtract Q from P' to cancel (2) + * + * In the case where P is disabled we only need 1 extra source: + * 1/ {01} * Q : use Q to continue Q' calculation + */ +static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) +{ + if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) + return dma_dev_to_maxpq(dma); + else if (dmaf_p_disabled_continue(flags)) + return dma_dev_to_maxpq(dma) - 1; + else if (dmaf_continue(flags)) + return dma_dev_to_maxpq(dma) - 3; + BUG(); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From 0a82a6239beecc95db6e05fe43ee62d16b381d38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:37 -0700 Subject: async_tx: add support for asynchronous RAID6 recovery operations async_raid6_2data_recov() recovers two data disk failures async_raid6_datap_recov() recovers a data disk and the P disk These routines are a port of the synchronous versions found in drivers/md/raid6recov.c. The primary difference is breaking out the xor operations into separate calls to async_xor. Two helper routines are introduced to perform scalar multiplication where needed. async_sum_product() multiplies two sources by scalar coefficients and then sums (xor) the result. async_mult() simply multiplies a single source by a scalar. This implemention also includes, in contrast to the original synchronous-only code, special case handling for the 4-disk and 5-disk array cases. In these situations the default N-disk algorithm will present 0-source or 1-source operations to dma devices. To cover for dma devices where the minimum source count is 2 we implement 4-disk and 5-disk handling in the recovery code. [ Impact: asynchronous raid6 recovery routines for 2data and datap cases ] Cc: Yuri Tikhonov Cc: Ilya Yanok Cc: H. Peter Anvin Cc: David Woodhouse Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index e6ce5f004f9..866e61c4e2e 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -194,5 +194,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *pqres, struct page *spare, struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, + struct page **ptrs, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int src_num, size_t bytes, int faila, + struct page **ptrs, struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3 From c746b5519a88b8803d43946ad06326ece4829116 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 5 Sep 2009 14:09:21 +0100 Subject: leds: Add WM831x status LED driver The WM831x devices feature two software controlled status LEDs with hardware assisted blinking. The device can also autonomously control the LEDs based on a selection of sources. This can be configured at boot time using either platform data or the chip OTP. A sysfs file in the style of that for triggers allowing the control source to be configured at run time. Triggers can't be used here since they can't depend on the implementation details of a specific LED type. Signed-off-by: Mark Brown Signed-off-by: Richard Purdie --- include/linux/mfd/wm831x/status.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/mfd/wm831x/status.h (limited to 'include/linux') diff --git a/include/linux/mfd/wm831x/status.h b/include/linux/mfd/wm831x/status.h new file mode 100644 index 00000000000..6bc090d0e3a --- /dev/null +++ b/include/linux/mfd/wm831x/status.h @@ -0,0 +1,34 @@ +/* + * include/linux/mfd/wm831x/status.h -- Status LEDs for WM831x + * + * Copyright 2009 Wolfson Microelectronics PLC. + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __MFD_WM831X_STATUS_H__ +#define __MFD_WM831X_STATUS_H__ + +#define WM831X_LED_SRC_MASK 0xC000 /* LED_SRC - [15:14] */ +#define WM831X_LED_SRC_SHIFT 14 /* LED_SRC - [15:14] */ +#define WM831X_LED_SRC_WIDTH 2 /* LED_SRC - [15:14] */ +#define WM831X_LED_MODE_MASK 0x0300 /* LED_MODE - [9:8] */ +#define WM831X_LED_MODE_SHIFT 8 /* LED_MODE - [9:8] */ +#define WM831X_LED_MODE_WIDTH 2 /* LED_MODE - [9:8] */ +#define WM831X_LED_SEQ_LEN_MASK 0x0030 /* LED_SEQ_LEN - [5:4] */ +#define WM831X_LED_SEQ_LEN_SHIFT 4 /* LED_SEQ_LEN - [5:4] */ +#define WM831X_LED_SEQ_LEN_WIDTH 2 /* LED_SEQ_LEN - [5:4] */ +#define WM831X_LED_DUR_MASK 0x000C /* LED_DUR - [3:2] */ +#define WM831X_LED_DUR_SHIFT 2 /* LED_DUR - [3:2] */ +#define WM831X_LED_DUR_WIDTH 2 /* LED_DUR - [3:2] */ +#define WM831X_LED_DUTY_CYC_MASK 0x0003 /* LED_DUTY_CYC - [1:0] */ +#define WM831X_LED_DUTY_CYC_SHIFT 0 /* LED_DUTY_CYC - [1:0] */ +#define WM831X_LED_DUTY_CYC_WIDTH 2 /* LED_DUTY_CYC - [1:0] */ + +#endif -- cgit v1.2.3 From 5036cc41e07d6614350e329666ee8a79cea6f793 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 6 Aug 2009 16:07:10 -0700 Subject: backlight: spi driver for LMS283GF05 LCD ADd support for the SPI part of LMS283GF05 LCD. The LCD uses SPI for initialization and powerdown sequences. No further defails are specified in the datasheet about the initialization/powerdown sequence, just the magic numbers that have to be sent over SPI bus. This LCD can be found in the Aeronix Zipit Z2 handheld. Signed-off-by: Marek Vasut Signed-off-by: Andrew Morton Signed-off-by: Richard Purdie --- include/linux/spi/lms283gf05.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/spi/lms283gf05.h (limited to 'include/linux') diff --git a/include/linux/spi/lms283gf05.h b/include/linux/spi/lms283gf05.h new file mode 100644 index 00000000000..555d254e660 --- /dev/null +++ b/include/linux/spi/lms283gf05.h @@ -0,0 +1,28 @@ +/* + * lms283gf05.h - Platform glue for Samsung LMS283GF05 LCD + * + * Copyright (C) 2009 Marek Vasut + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef _INCLUDE_LINUX_SPI_LMS283GF05_H_ +#define _INCLUDE_LINUX_SPI_LMS283GF05_H_ + +struct lms283gf05_pdata { + unsigned long reset_gpio; + bool reset_inverted; +}; + +#endif /* _INCLUDE_LINUX_SPI_LMS283GF05_H_ */ -- cgit v1.2.3 From 0403e3827788d878163f9ef0541b748b0f88ca5d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:50 -0700 Subject: dmaengine: add fence support Some engines optimize operation by reading ahead in the descriptor chain such that descriptor2 may start execution before descriptor1 completes. If descriptor2 depends on the result from descriptor1 then a fence is required (on descriptor2) to disable this optimization. The async_tx api could implicitly identify dependencies via the 'depend_tx' parameter, but that would constrain cases where the dependency chain only specifies a completion order rather than a data dependency. So, provide an ASYNC_TX_FENCE to explicitly identify data dependencies. Signed-off-by: Dan Williams --- include/linux/async_tx.h | 3 +++ include/linux/dmaengine.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 866e61c4e2e..a1c486a88e8 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,11 +58,14 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain + * @ASYNC_TX_FENCE: specify that the next operation in the dependency + * chain uses this operation's result as an input */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), ASYNC_TX_ACK = (1 << 2), + ASYNC_TX_FENCE = (1 << 3), }; /** diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 1012f1abcb5..4d6c1c925fd 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -87,6 +87,8 @@ enum dma_transaction_type { * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as * sources that were the result of a previous operation, in the case of a PQ * operation it continues the calculation with new sources + * @DMA_PREP_FENCE - tell the driver that subsequent operations depend + * on the result of this operation */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -98,6 +100,7 @@ enum dma_ctrl_flags { DMA_PREP_PQ_DISABLE_P = (1 << 6), DMA_PREP_PQ_DISABLE_Q = (1 << 7), DMA_PREP_CONTINUE = (1 << 8), + DMA_PREP_FENCE = (1 << 9), }; /** -- cgit v1.2.3 From 138f4c359d23d2ec38d18bd70dd9613ae515fe93 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:51 -0700 Subject: dmaengine, async_tx: add a "no channel switch" allocator Channel switching is problematic for some dmaengine drivers as the architecture precludes separating the ->prep from ->submit. In these cases the driver can select ASYNC_TX_DISABLE_CHANNEL_SWITCH to modify the async_tx allocator to only return channels that support all of the required asynchronous operations. For example MD_RAID456=y selects support for asynchronous xor, xor validate, pq, pq validate, and memcpy. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=y any channel with all these capabilities is marked DMA_ASYNC_TX allowing async_tx_find_channel() to quickly locate compatible channels with the guarantee that dependency chains will remain on one channel. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=n async_tx_find_channel() may select channels that lead to operation chains that need to cross channel boundaries using the async_tx channel switch capability. Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 4d6c1c925fd..86853ed7970 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -48,6 +48,9 @@ enum dma_status { /** * enum dma_transaction_type - DMA transaction types/indexes + * + * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is + * automatically set as dma devices are registered. */ enum dma_transaction_type { DMA_MEMCPY, @@ -61,6 +64,7 @@ enum dma_transaction_type { DMA_MEMCPY_CRC32C, DMA_INTERRUPT, DMA_PRIVATE, + DMA_ASYNC_TX, DMA_SLAVE, }; @@ -396,7 +400,11 @@ static inline void net_dmaengine_put(void) #ifdef CONFIG_ASYNC_TX_DMA #define async_dmaengine_get() dmaengine_get() #define async_dmaengine_put() dmaengine_put() +#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH +#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX) +#else #define async_dma_find_channel(type) dma_find_channel(type) +#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */ #else static inline void async_dmaengine_get(void) { @@ -409,7 +417,7 @@ async_dma_find_channel(enum dma_transaction_type type) { return NULL; } -#endif +#endif /* CONFIG_ASYNC_TX_DMA */ dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); -- cgit v1.2.3 From 9308add6ea4fedeba37b0d7c4630a542bd34f214 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:52 -0700 Subject: dmaengine: cleanup unused transaction types No drivers currently implement these operation types, so they can be deleted. Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 86853ed7970..db23fd583f9 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -56,12 +56,9 @@ enum dma_transaction_type { DMA_MEMCPY, DMA_XOR, DMA_PQ, - DMA_DUAL_XOR, - DMA_PQ_UPDATE, DMA_XOR_VAL, DMA_PQ_VAL, DMA_MEMSET, - DMA_MEMCPY_CRC32C, DMA_INTERRUPT, DMA_PRIVATE, DMA_ASYNC_TX, -- cgit v1.2.3 From 83544ae9f3991bfc7d5e0fe9a3008cd05a8d57b7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:53 -0700 Subject: dmaengine, async_tx: support alignment checks Some engines have transfer size and address alignment restrictions. Add a per-operation alignment property to struct dma_device that the async routines and dmatest can use to check alignment capabilities. Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index db23fd583f9..835b9c7bf1c 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -245,6 +245,10 @@ struct dma_async_tx_descriptor { * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability * @max_pq: maximum number of PQ sources and PQ-continue capability + * @copy_align: alignment shift for memcpy operations + * @xor_align: alignment shift for xor operations + * @pq_align: alignment shift for pq operations + * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -271,6 +275,10 @@ struct dma_device { dma_cap_mask_t cap_mask; unsigned short max_xor; unsigned short max_pq; + u8 copy_align; + u8 xor_align; + u8 pq_align; + u8 fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -314,6 +322,42 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len) +{ + size_t mask; + + if (!align) + return true; + mask = (1 << align) - 1; + if (mask & (off1 | off2 | len)) + return false; + return true; +} + +static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->copy_align, off1, off2, len); +} + +static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->xor_align, off1, off2, len); +} + +static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->pq_align, off1, off2, len); +} + +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->fill_align, off1, off2, len); +} + static inline void dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) { -- cgit v1.2.3 From b265b11fc1a0bd6ae5a7fde12e374583a52ab326 Mon Sep 17 00:00:00 2001 From: Tom Picard Date: Tue, 8 Sep 2009 17:43:01 -0700 Subject: ioat3: ioat3.2 pci ids for Jasper Forest Jasper Forest introduces raid offload support via ioat3.2 support. When raid offload is enabled two (out of 8 channels) will report raid5/raid6 offload capabilities. The remaining channels will only report ioat3.0 capabilities (memcpy). Signed-off-by: Tom Picard Signed-off-by: Dan Williams --- include/linux/pci_ids.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f71812d67d..2b4b8ce5325 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2529,6 +2529,16 @@ #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c +#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 -- cgit v1.2.3 From 0803172778901e24a75ab074798d98c2b7411559 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:04 -0700 Subject: dmaengine: kill tx_list The tx_list attribute of struct dma_async_tx_descriptor is common to most, but not all dma driver implementations. None of the upper level code (dmaengine/async_tx) uses it, so allow drivers to implement it locally if they need it. This saves sizeof(struct list_head) bytes for drivers that do not manage descriptors with a linked list (e.g.: ioatdma v2,3). Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ffefba81c81..f114bc7790b 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -180,8 +180,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param); * @flags: flags to augment operation preparation, control completion, and * communicate status * @phys: physical address of the descriptor - * @tx_list: driver common field for operations that require multiple - * descriptors * @chan: target channel for this operation * @tx_submit: set the prepared descriptor(s) to be executed by the engine * @callback: routine to call after this operation is complete @@ -195,7 +193,6 @@ struct dma_async_tx_descriptor { dma_cookie_t cookie; enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */ dma_addr_t phys; - struct list_head tx_list; struct dma_chan *chan; dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); dma_async_tx_callback callback; -- cgit v1.2.3 From 1a5aeeecd550ee4344cfba1791f1134739b16dc6 Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Thu, 10 Sep 2009 15:05:58 +0200 Subject: dca: registering requesters in multiple dca domains This patch enables DCA support on multiple-IOH/multiple-IIO architectures. It modifies dca module by replacing single dca_providers list with dca_domains list, each domain containing separate list of providers. This approach lets dca driver manage multiple domains, i.e. sets of providers and requesters mapped back to the same PCI root complex device. The driver takes care to register each requester to a provider from the same domain. Signed-off-by: Dan Williams Signed-off-by: Maciej Sosnowski --- include/linux/dca.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dca.h b/include/linux/dca.h index 9c20c7e87d0..d27a7a05718 100644 --- a/include/linux/dca.h +++ b/include/linux/dca.h @@ -20,6 +20,9 @@ */ #ifndef DCA_H #define DCA_H + +#include + /* DCA Provider API */ /* DCA Notifier Interface */ @@ -36,6 +39,12 @@ struct dca_provider { int id; }; +struct dca_domain { + struct list_head node; + struct list_head dca_providers; + struct pci_bus *pci_rc; +}; + struct dca_ops { int (*add_requester) (struct dca_provider *, struct device *); int (*remove_requester) (struct dca_provider *, struct device *); @@ -47,7 +56,7 @@ struct dca_ops { struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); void free_dca_provider(struct dca_provider *dca); int register_dca_provider(struct dca_provider *dca, struct device *dev); -void unregister_dca_provider(struct dca_provider *dca); +void unregister_dca_provider(struct dca_provider *dca, struct device *dev); static inline void *dca_priv(struct dca_provider *dca) { -- cgit v1.2.3 From 52a7a1cec88acdaf3f8b36a6b1fe904f6eca7ee5 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 10 Sep 2009 15:26:30 +0200 Subject: [ARM] pxafb: add accelerator ID for PXA3xx GCU Add ID 99 for PXA3xx frame buffers and report it in the pxa frame buffer conditionally, depending on a new flag in struct pxafb_mach_info. Signed-off-by: Daniel Mack Cc: linux-fbdev-devel@lists.sourceforge.net Cc: Dennis Oliver Kropp Cc: Sven Neumann Cc: Guennadi Liakhovetski Signed-off-by: Eric Miao --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index f847df9e99b..a34bdf5a9d2 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -133,6 +133,7 @@ struct dentry; #define FB_ACCEL_NEOMAGIC_NM2230 96 /* NeoMagic NM2230 */ #define FB_ACCEL_NEOMAGIC_NM2360 97 /* NeoMagic NM2360 */ #define FB_ACCEL_NEOMAGIC_NM2380 98 /* NeoMagic NM2380 */ +#define FB_ACCEL_PXA3XX 99 /* PXA3xx */ #define FB_ACCEL_SAVAGE4 0x80 /* S3 Savage4 */ #define FB_ACCEL_SAVAGE3D 0x81 /* S3 Savage3D */ -- cgit v1.2.3 From d466f2fcb32cd97fd586bfa33f5dba3ac78aadb0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:03 +0200 Subject: HWPOISON: Add page flag for poisoned pages Hardware poisoned pages need special handling in the VM and shouldn't be touched again. This requires a new page flag. Define it here. The page flags wars seem to be over, so it shouldn't be a problem to get a new one. v2: Add TestSetHWPoison (suggested by Johannes Weiner) Acked-by: Christoph Lameter Signed-off-by: Andi Kleen --- include/linux/page-flags.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2b87acfc5f8..9bc5fd9fdbf 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -51,6 +51,9 @@ * PG_buddy is set to indicate that the page is free and in the buddy system * (see mm/page_alloc.c). * + * PG_hwpoison indicates that a page got corrupted in hardware and contains + * data with incorrect ECC bits that triggered a machine check. Accessing is + * not safe since it may cause another machine check. Don't touch! */ /* @@ -101,6 +104,9 @@ enum pageflags { #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PG_uncached, /* Page has been mapped as uncached */ +#endif +#ifdef CONFIG_MEMORY_FAILURE + PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif __NR_PAGEFLAGS, @@ -263,6 +269,15 @@ PAGEFLAG(Uncached, uncached) PAGEFLAG_FALSE(Uncached) #endif +#ifdef CONFIG_MEMORY_FAILURE +PAGEFLAG(HWPoison, hwpoison) +TESTSETFLAG(HWPoison, hwpoison) +#define __PG_HWPOISON (1UL << PG_hwpoison) +#else +PAGEFLAG_FALSE(HWPoison) +#define __PG_HWPOISON 0 +#endif + static inline int PageUptodate(struct page *page) { int ret = test_bit(PG_uptodate, &(page)->flags); @@ -387,7 +402,7 @@ static inline void __ClearPageTail(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED) + 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON) /* * Flags checked when a page is prepped for return by the page allocator. -- cgit v1.2.3 From 10be22dfe1e6ad978269dc275147e0ed049187bb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:04 +0200 Subject: HWPOISON: Export some rmap vma locking to outside world Needed for later patch that walks rmap entries on its own. This used to be very frowned upon, but memory-failure.c does some rather specialized rmap walking and rmap has been stable for quite some time, so I think it's ok now to export it. Signed-off-by: Andi Kleen --- include/linux/rmap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf116d0dbf2..8dff2ffab82 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -112,6 +112,12 @@ int page_mkclean(struct page *); */ int try_to_munlock(struct page *); +/* + * Called by memory-failure.c to kill processes. + */ +struct anon_vma *page_lock_anon_vma(struct page *page); +void page_unlock_anon_vma(struct anon_vma *anon_vma); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) -- cgit v1.2.3 From a7420aa54dbf699a5a05feba3c859b6baaa3938c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:05 +0200 Subject: HWPOISON: Add support for poison swap entries v2 Memory migration uses special swap entry types to trigger special actions on page faults. Extend this mechanism to also support poisoned swap entries, to trigger poison handling on page faults. This allows follow-on patches to prevent processes from faulting in poisoned pages again. v2: Fix overflow in MAX_SWAPFILES (Fengguang Wu) v3: Better overflow fix (Hidehiro Kawai) Signed-off-by: Andi Kleen --- include/linux/swap.h | 34 ++++++++++++++++++++++++++++------ include/linux/swapops.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7c15334f3ff..f077e454c65 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -34,15 +34,37 @@ static inline int current_is_kswapd(void) * the type/offset into the pte as 5/27 as well. */ #define MAX_SWAPFILES_SHIFT 5 -#ifndef CONFIG_MIGRATION -#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT) + +/* + * Use some of the swap files numbers for other purposes. This + * is a convenient way to hook into the VM to trigger special + * actions on faults. + */ + +/* + * NUMA node memory migration support + */ +#ifdef CONFIG_MIGRATION +#define SWP_MIGRATION_NUM 2 +#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) +#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #else -/* Use last two entries for page migration swap entries */ -#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2) -#define SWP_MIGRATION_READ MAX_SWAPFILES -#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1) +#define SWP_MIGRATION_NUM 0 #endif +/* + * Handling of hardware poisoned pages with memory corruption. + */ +#ifdef CONFIG_MEMORY_FAILURE +#define SWP_HWPOISON_NUM 1 +#define SWP_HWPOISON MAX_SWAPFILES +#else +#define SWP_HWPOISON_NUM 0 +#endif + +#define MAX_SWAPFILES \ + ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) + /* * Magic header for a swap area. The first part of the union is * what the swap magic looks like for the old (limited to 128MB) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6ec39ab27b4..cd42e30b7c6 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry) #endif +#ifdef CONFIG_MEMORY_FAILURE +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) +static inline int non_swap_entry(swp_entry_t entry) +{ + return swp_type(entry) >= MAX_SWAPFILES; +} +#else +static inline int non_swap_entry(swp_entry_t entry) +{ + return 0; +} +#endif -- cgit v1.2.3 From d1737fdbec7f90edc52dd0c5c3767457f28e78d8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:06 +0200 Subject: HWPOISON: Add basic support for poisoned pages in fault handler v3 - Add a new VM_FAULT_HWPOISON error code to handle_mm_fault. Right now architectures have to explicitely enable poison page support, so this is forward compatible to all architectures. They only need to add it when they enable poison page support. - Add poison page handling in swap in fault code v2: Add missing delayacct_clear_flag (Hidehiro Kawai) v3: Really use delayacct_clear_flag (Hidehiro Kawai) Signed-off-by: Andi Kleen --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9a72cc78e6b..082b68cb5ff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -685,11 +685,12 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_SIGBUS 0x0002 #define VM_FAULT_MAJOR 0x0004 #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ +#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */ #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON) /* * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. -- cgit v1.2.3 From 14fa31b89c5ae79e4131da41761378a6df674352 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:10 +0200 Subject: HWPOISON: Use bitmask/action code for try_to_unmap behaviour try_to_unmap currently has multiple modi (migration, munlock, normal unmap) which are selected by magic flag variables. The logic is not very straight forward, because each of these flag change multiple behaviours (e.g. migration turns off aging, not only sets up migration ptes etc.) Also the different flags interact in magic ways. A later patch in this series adds another mode to try_to_unmap, so this becomes quickly unmanageable. Replace the different flags with a action code (migration, munlock, munmap) and some additional flags as modifiers (ignore mlock, ignore aging). This makes the logic more straight forward and allows easier extension to new behaviours. Change all the caller to declare what they want to do. This patch is supposed to be a nop in behaviour. If anyone can prove it is not that would be a bug. Cc: Lee.Schermerhorn@hp.com Cc: npiggin@suse.de Signed-off-by: Andi Kleen --- include/linux/rmap.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 8dff2ffab82..4c4a2d4d289 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -85,7 +85,18 @@ static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma, */ int page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt, unsigned long *vm_flags); -int try_to_unmap(struct page *, int ignore_refs); +enum ttu_flags { + TTU_UNMAP = 0, /* unmap mode */ + TTU_MIGRATION = 1, /* migration mode */ + TTU_MUNLOCK = 2, /* munlock mode */ + TTU_ACTION_MASK = 0xff, + + TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ + TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ +}; +#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) + +int try_to_unmap(struct page *, enum ttu_flags flags); /* * Called from mm/filemap_xip.c to unmap empty zero page -- cgit v1.2.3 From 888b9f7c58ebe8303bad817cd554df887a683957 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:11 +0200 Subject: HWPOISON: Handle hardware poisoned pages in try_to_unmap When a page has the poison bit set replace the PTE with a poison entry. This causes the right error handling to be done later when a process runs into it. v2: add a new flag to not do that (needed for the memory-failure handler later) (Fengguang) v3: remove unnecessary is_migration_entry() test (Fengguang, Minchan) Reviewed-by: Minchan Kim Reviewed-by: Wu Fengguang Signed-off-by: Andi Kleen --- include/linux/rmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 4c4a2d4d289..ce989f1fc2e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -93,6 +93,7 @@ enum ttu_flags { TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ + TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */ }; #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) -- cgit v1.2.3 From 750b4987b0cd4d408e54cb83a80a067cbe690feb Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 16 Sep 2009 11:50:12 +0200 Subject: HWPOISON: Refactor truncate to allow direct truncating of page v2 Extract out truncate_inode_page() out of the truncate path so that it can be used by memory-failure.c [AK: description, headers, fix typos] v2: Some white space changes from Fengguang Wu Signed-off-by: Andi Kleen --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 082b68cb5ff..8cbc0aafd5b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -794,6 +794,8 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); +int truncate_inode_page(struct address_space *mapping, struct page *page); + #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); -- cgit v1.2.3 From 83f786680aec8d030184f7ced1a0a3dd8ac81764 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 16 Sep 2009 11:50:13 +0200 Subject: HWPOISON: Add invalidate_inode_page Add a simple way to invalidate a single page This is just a refactoring of the truncate.c code. Originally from Fengguang, modified by Andi Kleen. Signed-off-by: Andi Kleen --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8cbc0aafd5b..b05bbde0296 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -796,6 +796,8 @@ extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); +int invalidate_inode_page(struct page *page); + #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); -- cgit v1.2.3 From 257187362123f15d9d1e09918cf87cebbea4e786 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:13 +0200 Subject: HWPOISON: Define a new error_remove_page address space op for async truncation Truncating metadata pages is not safe right now before we haven't audited all file systems. To enable truncation only for data address space define a new address_space callback error_remove_page. This is used for memory_failure.c memory error handling. This can be then set to truncate_inode_page() This patch just defines the new operation and adds documentation. Callers and users come in followon patches. Signed-off-by: Andi Kleen --- include/linux/fs.h | 1 + include/linux/mm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b21cf6b9c80..4f47afd3764 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -595,6 +595,7 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + int (*error_remove_page)(struct address_space *, struct page *); }; /* diff --git a/include/linux/mm.h b/include/linux/mm.h index b05bbde0296..a16018f7d61 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -795,6 +795,7 @@ extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); +int generic_error_remove_page(struct address_space *mapping, struct page *page); int invalidate_inode_page(struct page *page); -- cgit v1.2.3 From 4db96cf077aa938b11fe7ac79ecc9b29ec00fbab Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:14 +0200 Subject: HWPOISON: Add PR_MCE_KILL prctl to control early kill behaviour per process This allows processes to override their early/late kill behaviour on hardware memory errors. Typically applications which are memory error aware is better of with early kill (see the error as soon as possible), all others with late kill (only see the error when the error is really impacting execution) There's a global sysctl, but this way an application can set its specific policy. We're using two bits, one to signify that the process stated its intention and that I also made the prctl future proof by enforcing the unused arguments are 0. The state is inherited to children. Note this makes us officially run out of process flags on 32bit, but the next patch can easily add another field. Manpage patch will be supplied separately. Signed-off-by: Andi Kleen --- include/linux/prctl.h | 2 ++ include/linux/sched.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index b00df4c79c6..3dc303197e6 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -88,4 +88,6 @@ #define PR_TASK_PERF_COUNTERS_DISABLE 31 #define PR_TASK_PERF_COUNTERS_ENABLE 32 +#define PR_MCE_KILL 33 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index f3d74bd04d1..29eae73c951 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1687,6 +1687,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ @@ -1706,6 +1707,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ -- cgit v1.2.3 From 6a46079cf57a7f7758e8b926980a4f852f89b34d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:15 +0200 Subject: HWPOISON: The high level memory error handler in the VM v7 Add the high level memory handler that poisons pages that got corrupted by hardware (typically by a two bit flip in a DIMM or a cache) on the Linux level. The goal is to prevent everyone from accessing these pages in the future. This done at the VM level by marking a page hwpoisoned and doing the appropriate action based on the type of page it is. The code that does this is portable and lives in mm/memory-failure.c To quote the overview comment: High level machine check handler. Handles pages reported by the hardware as being corrupted usually due to a 2bit ECC memory or cache failure. This focuses on pages detected as corrupted in the background. When the current CPU tries to consume corruption the currently running process can just be killed directly instead. This implies that if the error cannot be handled for some reason it's safe to just ignore it because no corruption has been consumed yet. Instead when that happens another machine check will happen. Handles page cache pages in various states. The tricky part here is that we can access any page asynchronous to other VM users, because memory failures could happen anytime and anywhere, possibly violating some of their assumptions. This is why this code has to be extremely careful. Generally it tries to use normal locking rules, as in get the standard locks, even if that means the error handling takes potentially a long time. Some of the operations here are somewhat inefficient and have non linear algorithmic complexity, because the data structures have not been optimized for this case. This is in particular the case for the mapping from a vma to a process. Since this case is expected to be rare we hope we can get away with this. There are in principle two strategies to kill processes on poison: - just unmap the data and wait for an actual reference before killing - kill as soon as corruption is detected. Both have advantages and disadvantages and should be used in different situations. Right now both are implemented and can be switched with a new sysctl vm.memory_failure_early_kill The default is early kill. The patch does some rmap data structure walking on its own to collect processes to kill. This is unusual because normally all rmap data structure knowledge is in rmap.c only. I put it here for now to keep everything together and rmap knowledge has been seeping out anyways Includes contributions from Johannes Weiner, Chris Mason, Fengguang Wu, Nick Piggin (who did a lot of great work) and others. Cc: npiggin@suse.de Cc: riel@redhat.com Signed-off-by: Andi Kleen Acked-by: Rik van Riel Reviewed-by: Hidehiro Kawai --- include/linux/mm.h | 7 +++++++ include/linux/rmap.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a16018f7d61..1ffca03f34b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1309,5 +1309,12 @@ void vmemmap_populate_print_last(void); extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, size_t size); extern void refund_locked_memory(struct mm_struct *mm, size_t size); + +extern void memory_failure(unsigned long pfn, int trapno); +extern int __memory_failure(unsigned long pfn, int trapno, int ref); +extern int sysctl_memory_failure_early_kill; +extern int sysctl_memory_failure_recovery; +extern atomic_long_t mce_bad_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index ce989f1fc2e..3c1004e5074 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -129,6 +129,7 @@ int try_to_munlock(struct page *); */ struct anon_vma *page_lock_anon_vma(struct page *page); void page_unlock_anon_vma(struct anon_vma *anon_vma); +int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); #else /* !CONFIG_MMU */ -- cgit v1.2.3 From e2d4304b7d2b85c45de89ec420037d6b9261a12d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 17 Sep 2009 09:40:31 -0700 Subject: PCI: fix VGA arbiter header file Remove reference to vgaarb.c and replace it with a comment about the arbiter itself. Reported-by: Tiago Vignatti Signed-off-by: Jesse Barnes --- include/linux/vgaarb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index e81c64af80c..a1fa1da8dc1 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -1,5 +1,6 @@ /* - * vgaarb.c + * The VGA aribiter manages VGA space routing and VGA resource decode to + * allow multiple VGA devices to be used in a system in a safe way. * * (C) Copyright 2005 Benjamin Herrenschmidt * (C) Copyright 2007 Paulo R. Zanoni -- cgit v1.2.3 From 181d683d752c432635eda0f182ee71548c1f1820 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 16 Sep 2009 01:06:43 -0700 Subject: Input: libps2 - additional locking for i8042 ports The serio ports on i8042 are not completely isolated; while we provide enough locking to ensure proper serialization when accessing control and data registers AUX and KBD ports can still have an effect on each other on PS/2 protocol level. The most prominent effect is that issuing a command for the device connected to one port may cause abort of the command currently executing by the device connected to another port. Since i8042 nor serio subsystem are not aware of the details of the PS/2 protocol (length of the commands and their replies and so on) the locking should be done on libps2 level by adding special handling when we see that we are dealing with serio port on i8042. Signed-off-by: Dmitry Torokhov --- include/linux/i8042.h | 30 ++++++++++++++++++++++++++++++ include/linux/libps2.h | 2 ++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i8042.h b/include/linux/i8042.h index 7907a72403e..60c3360ef6a 100644 --- a/include/linux/i8042.h +++ b/include/linux/i8042.h @@ -7,6 +7,7 @@ * the Free Software Foundation. */ +#include /* * Standard commands. @@ -30,6 +31,35 @@ #define I8042_CMD_MUX_PFX 0x0090 #define I8042_CMD_MUX_SEND 0x1090 +struct serio; + +#if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE) + +void i8042_lock_chip(void); +void i8042_unlock_chip(void); int i8042_command(unsigned char *param, int command); +bool i8042_check_port_owner(const struct serio *); + +#else + +void i8042_lock_chip(void) +{ +} + +void i8042_unlock_chip(void) +{ +} + +int i8042_command(unsigned char *param, int command) +{ + return -ENOSYS; +} + +bool i8042_check_port_owner(const struct serio *serio) +{ + return false; +} + +#endif #endif diff --git a/include/linux/libps2.h b/include/linux/libps2.h index fcf5fbe6a50..79603a6c356 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -44,6 +44,8 @@ struct ps2dev { void ps2_init(struct ps2dev *ps2dev, struct serio *serio); int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout); void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout); +void ps2_begin_command(struct ps2dev *ps2dev); +void ps2_end_command(struct ps2dev *ps2dev); int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data); -- cgit v1.2.3 From ffd0db97196c1057f09c2ab42dd5b30e94e511d9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 16 Sep 2009 01:06:43 -0700 Subject: Input: add generic suspend and resume for input devices Automatically turn off leds and sound effects as part of suspend process and restore led state, sounds and repeat rate at resume. Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 8b3bc3e0d14..0ccfc30cd40 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1123,7 +1123,7 @@ struct input_dev { struct mutex mutex; unsigned int users; - int going_away; + bool going_away; struct device dev; -- cgit v1.2.3 From 38e783b38148531c0840ac130b97eb8158f84b48 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Thu, 17 Sep 2009 22:35:45 -0700 Subject: Input: add touchscreen driver for MELFAS MCS-5000 controller The MELPAS MCS-5000 is the touchscreen controller. The overview of this controller can see at the following website: http://www.melfas.com/product/product01.asp?k_r=eng_ This driver is tested on s3c6410 NCP board and supports only the i2c interface. Signed-off-by: Joonyoung Shim Signed-off-by: Dmitry Torokhov --- include/linux/i2c/mcs5000_ts.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/linux/i2c/mcs5000_ts.h (limited to 'include/linux') diff --git a/include/linux/i2c/mcs5000_ts.h b/include/linux/i2c/mcs5000_ts.h new file mode 100644 index 00000000000..5a117b5ca15 --- /dev/null +++ b/include/linux/i2c/mcs5000_ts.h @@ -0,0 +1,24 @@ +/* + * mcs5000_ts.h + * + * Copyright (C) 2009 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __LINUX_MCS5000_TS_H +#define __LINUX_MCS5000_TS_H + +/* platform data for the MELFAS MCS-5000 touchscreen driver */ +struct mcs5000_ts_platform_data { + void (*cfg_pin)(void); + int x_size; + int y_size; +}; + +#endif /* __LINUX_MCS5000_TS_H */ -- cgit v1.2.3 From 88751dd6ce1fb0627c36c4ab08a40730e5a50d3e Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Thu, 17 Sep 2009 22:39:38 -0700 Subject: Input: add driver for ADP5588 QWERTY I2C Keypad Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu Signed-off-by: Mike Frysinger Signed-off-by: Dmitry Torokhov --- include/linux/i2c/adp5588.h | 92 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 include/linux/i2c/adp5588.h (limited to 'include/linux') diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h new file mode 100644 index 00000000000..fc5db826b48 --- /dev/null +++ b/include/linux/i2c/adp5588.h @@ -0,0 +1,92 @@ +/* + * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller + * + * Copyright 2009 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef _ADP5588_H +#define _ADP5588_H + +#define DEV_ID 0x00 /* Device ID */ +#define CFG 0x01 /* Configuration Register1 */ +#define INT_STAT 0x02 /* Interrupt Status Register */ +#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */ +#define Key_EVENTA 0x04 /* Key Event Register A */ +#define Key_EVENTB 0x05 /* Key Event Register B */ +#define Key_EVENTC 0x06 /* Key Event Register C */ +#define Key_EVENTD 0x07 /* Key Event Register D */ +#define Key_EVENTE 0x08 /* Key Event Register E */ +#define Key_EVENTF 0x09 /* Key Event Register F */ +#define Key_EVENTG 0x0A /* Key Event Register G */ +#define Key_EVENTH 0x0B /* Key Event Register H */ +#define Key_EVENTI 0x0C /* Key Event Register I */ +#define Key_EVENTJ 0x0D /* Key Event Register J */ +#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */ +#define UNLOCK1 0x0F /* Unlock Key1 */ +#define UNLOCK2 0x10 /* Unlock Key2 */ +#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */ +#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */ +#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */ +#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */ +#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */ +#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */ +#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */ +#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */ +#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */ +#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */ +#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */ +#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */ +#define GPI_EM1 0x20 /* GPI Event Mode 1 */ +#define GPI_EM2 0x21 /* GPI Event Mode 2 */ +#define GPI_EM3 0x22 /* GPI Event Mode 3 */ +#define GPIO_DIR1 0x23 /* GPIO Data Direction */ +#define GPIO_DIR2 0x24 /* GPIO Data Direction */ +#define GPIO_DIR3 0x25 /* GPIO Data Direction */ +#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */ +#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */ +#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */ +#define Debounce_DIS1 0x29 /* Debounce Disable */ +#define Debounce_DIS2 0x2A /* Debounce Disable */ +#define Debounce_DIS3 0x2B /* Debounce Disable */ +#define GPIO_PULL1 0x2C /* GPIO Pull Disable */ +#define GPIO_PULL2 0x2D /* GPIO Pull Disable */ +#define GPIO_PULL3 0x2E /* GPIO Pull Disable */ +#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */ +#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */ +#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */ +#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */ +#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */ +#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */ +#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */ +#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */ +#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */ +#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */ +#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */ +#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */ +#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */ +#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */ +#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */ + +#define ADP5588_DEVICE_ID_MASK 0xF + +/* Put one of these structures in i2c_board_info platform_data */ + +#define ADP5588_KEYMAPSIZE 80 + +struct adp5588_kpad_platform_data { + int rows; /* Number of rows */ + int cols; /* Number of columns */ + const unsigned short *keymap; /* Pointer to keymap */ + unsigned short keymapsize; /* Keymap size */ + unsigned repeat:1; /* Enable key repeat */ + unsigned en_keylock:1; /* Enable Key Lock feature */ + unsigned short unlock_key1; /* Unlock Key 1 */ + unsigned short unlock_key2; /* Unlock Key 2 */ +}; + +#endif -- cgit v1.2.3 From ee2b805c8eb6459cf541ef141ff70dae17af59ca Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 15 Aug 2009 15:12:05 +0100 Subject: ARM: 5678/1: SSP/SPI PL022 polarity terminology fix The definition of the SPI clock phase for the Motorola mode of the PL022 driver was incorrect: the spec had been interpreted as data being recieved on rising or falling edge of the clocks while the correct interpretation is that data can be recieved on the first or second edge transition, falling or rising depending on the polarity setting. Signed-off-by: Linus Walleij Signed-off-by: Russell King --- include/linux/amba/pl022.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index dcad0ffd175..e4836c6b3dd 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -136,12 +136,12 @@ enum ssp_tx_level_trig { /** * enum SPI Clock Phase - clock phase (Motorola SPI interface only) - * @SSP_CLK_RISING_EDGE: Receive data on rising edge - * @SSP_CLK_FALLING_EDGE: Receive data on falling edge + * @SSP_CLK_FIRST_EDGE: Receive data on first edge transition (actual direction depends on polarity) + * @SSP_CLK_SECOND_EDGE: Receive data on second edge transition (actual direction depends on polarity) */ enum ssp_spi_clk_phase { - SSP_CLK_RISING_EDGE, - SSP_CLK_FALLING_EDGE + SSP_CLK_FIRST_EDGE, + SSP_CLK_SECOND_EDGE }; /** -- cgit v1.2.3 From 42f29a25207dc7b3051d299cc028d4b395d1328d Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Sun, 20 Sep 2009 18:14:12 -0400 Subject: kbuild: Don't define ALIGN and ENTRY when preprocessing linker scripts. Adding a reference to to x86's causes the x86 linker script to have syntax errors, because the ALIGN and ENTRY keywords get redefined to the assembly implementations of those. One could fix this by adjusting the include structure, but I think any solution based on that approach would be fragile. Currently, it is impossible when writing a header to do something different for assembly files and linker scripts, even though there are clearly cases where one wants them to define macros differently for the two (ENTRY being an excellent example). So I think the right solution here is to introduce a new preprocessor definition, called LINKER_SCRIPT that is set along with __ASSEMBLY__ for linker scripts, and to use that to not define ALIGN and ENTRY in linker scripts. I suspect we'll find other uses for this mechanism in the future. Signed-off-by: Tim Abbott Signed-off-by: Sam Ravnborg --- include/linux/linkage.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 691f59171c6..5126cceb6ae 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -57,6 +57,7 @@ #ifdef __ASSEMBLY__ +#ifndef LINKER_SCRIPT #define ALIGN __ALIGN #define ALIGN_STR __ALIGN_STR @@ -66,6 +67,7 @@ ALIGN; \ name: #endif +#endif /* LINKER_SCRIPT */ #ifndef WEAK #define WEAK(name) \ -- cgit v1.2.3 From 325253a6b2de4bdfa9ef0e28b5df8a4a4fe2b677 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 14 Jul 2009 17:06:02 +0100 Subject: backlight: Allow drivers to update the core, and generate events on changes Certain hardware will send us events when the backlight brightness changes. Add a function to update the value in the core, and additionally send a uevent so that userspace can pop up appropriate UI. The uevents are flagged depending on whether the update originated in the kernel or from userspace, making it easier to only display UI at the appropriate time. Signed-off-by: Matthew Garrett Signed-off-by: Richard Purdie --- include/linux/backlight.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 79ca2da81c8..0f5f57858a2 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -27,6 +27,11 @@ * Any other use of the locks below is probably wrong. */ +enum backlight_update_reason { + BACKLIGHT_UPDATE_HOTKEY, + BACKLIGHT_UPDATE_SYSFS, +}; + struct backlight_device; struct fb_info; @@ -100,6 +105,8 @@ static inline void backlight_update_status(struct backlight_device *bd) extern struct backlight_device *backlight_device_register(const char *name, struct device *dev, void *devdata, struct backlight_ops *ops); extern void backlight_device_unregister(struct backlight_device *bd); +extern void backlight_force_update(struct backlight_device *bd, + enum backlight_update_reason reason); #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev) -- cgit v1.2.3 From a6f10a2f5d8c2738b3ac05974bdbea3b68a2aecd Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 22 Sep 2009 22:34:24 +1000 Subject: perf_event: Update PERF_EVENT_FORK header definition PERF_EVENT_FORK always outputs the time field, so update the header to reflect this. Signed-off-by: Anton Blanchard Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090922123424.GD19453@kryten> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- include/linux/perf_event.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 368bd70f1d2..7b7fbf433cf 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -361,7 +361,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid, ppid; * u32 tid, ptid; - * { u64 time; } && PERF_SAMPLE_TIME + * u64 time; * }; */ PERF_EVENT_FORK = 7, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index acefaf71e6d..3a9d36d1e92 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -357,7 +357,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid, ppid; * u32 tid, ptid; - * { u64 time; } && PERF_SAMPLE_TIME + * u64 time; * }; */ PERF_RECORD_FORK = 7, -- cgit v1.2.3 From 6ef297f86b62f187c59475784208f75c2ed8ccd8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 22 Sep 2009 14:29:36 +0100 Subject: ARM: 5720/1: Move MMCI header to amba include dir This moves the mmci platform data definition struct away from arch/arm/include/asm/mach/mmc.h into the more proper place among the other primecells in include/linux/amba/mmci.h and at the same time renames it to "mmci.h", and also the struct in this file confusingly named mmc_platform_data has been renamed mmci_platform_data for clarity. Cc: Catalin Marinas Signed-off-by: Linus Walleij Signed-off-by: Russell King --- include/linux/amba/mmci.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/amba/mmci.h (limited to 'include/linux') diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h new file mode 100644 index 00000000000..6b4241748dd --- /dev/null +++ b/include/linux/amba/mmci.h @@ -0,0 +1,18 @@ +/* + * include/linux/amba/mmci.h + */ +#ifndef AMBA_MMCI_H +#define AMBA_MMCI_H + +#include + +struct mmci_platform_data { + unsigned int ocr_mask; /* available voltages */ + u32 (*translate_vdd)(struct device *, unsigned int); + unsigned int (*status)(struct device *); + int gpio_wp; + int gpio_cd; + unsigned long capabilities; +}; + +#endif -- cgit v1.2.3 From 8cd09a5984c08dafade74c9c1ab4311af2bf2d24 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Tue, 22 Sep 2009 18:00:44 +0800 Subject: tracing: Fix a comment and a trivial format issue in tracepoint.h Fix the tracepoint documentation path in tracepoints headers and a misaligned tabulation. Signed-off-by: Li Hong Cc: Steven Rostedt Cc: Ingo Molnar LKML-Reference: <3a3680030909220300h7cf18849q4d4702b9d4feaa67@mail.gmail.com> Signed-off-by: Frederic Weisbecker --- include/linux/tracepoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63a3f7a8058..2aac8a83e89 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -4,7 +4,7 @@ /* * Kernel Tracepoint API. * - * See Documentation/tracepoint.txt. + * See Documentation/trace/tracepoints.txt. * * (C) Copyright 2008 Mathieu Desnoyers * @@ -36,7 +36,7 @@ struct tracepoint { #ifndef DECLARE_TRACE #define TP_PROTO(args...) args -#define TP_ARGS(args...) args +#define TP_ARGS(args...) args #ifdef CONFIG_TRACEPOINTS -- cgit v1.2.3 From ec4756238239f1a331d9fb95bad8b281dad56855 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 22 Sep 2009 04:00:27 +0000 Subject: smsc95xx: fix transmission where ZLP is expected Usbnet framework assumes USB hardware doesn't handle zero length packets, but SMSC LAN95xx requires these to be sent for correct operation. This patch fixes an easily reproducible tx lockup when sending a frame that results in exactly 512 bytes in a USB transmission (e.g. a UDP frame with 458 data bytes, due to IP headers and our USB headers). It adds an extra flag to usbnet for the hardware driver to indicate that it can handle and requires the zero length packets. This patch should not affect other usbnet users, please also consider for -stable. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index bb69e256cd1..f8147305205 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -89,6 +89,7 @@ struct driver_info { #define FLAG_FRAMING_AX 0x0040 /* AX88772/178 packets */ #define FLAG_WLAN 0x0080 /* use "wlan%d" names */ #define FLAG_AVOID_UNLINK_URBS 0x0100 /* don't unlink urbs at usbnet_stop() */ +#define FLAG_SEND_ZLP 0x0200 /* hw requires ZLPs are sent */ /* init device ... can sleep, or cause probe() failure */ -- cgit v1.2.3 From fc2219d49ef1606e7fd2c88af2b423b01ff3d319 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Sep 2009 09:50:41 -0700 Subject: rcu: Clean up code based on review feedback from Josh Triplett These issues identified during an old-fashioned face-to-face code review extended over many hours. o Bury various forms of the "rsp->completed == rsp->gpnum" comparison into an rcu_gp_in_progress() function, which has the beneficial side-effect of forcing consistent use of ACCESS_ONCE(). o Replace hand-coded arithmetic with DIV_ROUND_UP(). o Bury several "!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x01])" instances into an rcu_preempted_readers() function, as this expression indicates that there are no readers blocked within RCU read-side critical sections blocking the current grace period. (Though there might well be similar readers blocking the next grace period.) o Remove a dangling rcu_restart_cpu() declaration that has been dangling for almost 20 minor releases of the kernel. Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12537246442687-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 37682770e9d..88109c87f29 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -85,7 +85,6 @@ static inline void synchronize_rcu_bh_expedited(void) extern void __rcu_init(void); extern void rcu_check_callbacks(int cpu, int user); -extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); -- cgit v1.2.3 From 1eba8f84380bede3c602bd7758dea96925cead01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Sep 2009 09:50:42 -0700 Subject: rcu: Clean up code based on review feedback from Josh Triplett, part 2 These issues identified during an old-fashioned face-to-face code review extending over many hours. o Add comments for tricky parts of code, and correct comments that have passed their sell-by date. o Get rid of the vestiges of rcu_init_sched(), which is no longer needed now that PREEMPT_RCU is gone. o Move the #include of rcutree_plugin.h to the end of rcutree.c, which means that, rather than having a random collection of forward declarations, the new set of forward declarations document the set of plugins. The new home for this #include also allows __rcu_init_preempt() to move into rcutree_plugin.h. o Fix rcu_preempt_check_callbacks() to be static. Suggested-by: Josh Triplett Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12537246443924-git-send-email-> Signed-off-by: Ingo Molnar Peter Zijlstra --- include/linux/rcupdate.h | 4 ++++ include/linux/rcutree.h | 6 +----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6fe0363724e..70331218e4b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -196,6 +196,8 @@ static inline void rcu_read_lock_sched(void) __acquire(RCU_SCHED); rcu_read_acquire(); } + +/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_lock_sched_notrace(void) { preempt_disable_notrace(); @@ -213,6 +215,8 @@ static inline void rcu_read_unlock_sched(void) __release(RCU_SCHED); preempt_enable(); } + +/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_unlock_sched_notrace(void) { __release(RCU_SCHED); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 88109c87f29..19a3b06943e 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -90,10 +90,6 @@ extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); -static inline void rcu_init_sched(void) -{ -} - #ifdef CONFIG_NO_HZ void rcu_enter_nohz(void); void rcu_exit_nohz(void); @@ -106,7 +102,7 @@ static inline void rcu_exit_nohz(void) } #endif /* CONFIG_NO_HZ */ -/* A context switch is a grace period for rcutree. */ +/* A context switch is a grace period for RCU-sched and RCU-bh. */ static inline int rcu_blocking_is_gp(void) { return num_online_cpus() == 1; -- cgit v1.2.3 From 9b2619aff0332e95ea5eb7a0d75b0208818d871c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Sep 2009 09:50:43 -0700 Subject: rcu: Clean up code to address Ingo's checkpatch feedback Move declarations and update storage classes to make checkpatch happy. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12537246441701-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutree.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 19a3b06943e..46e9ab3ee6e 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,10 +30,14 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H +struct notifier_block; + extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); - +extern int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu); extern int rcu_needs_cpu(int cpu); +extern int rcu_expedited_torture_stats(char *page); #ifdef CONFIG_TREE_PREEMPT_RCU -- cgit v1.2.3 From 74908a0009eb36054190ab80deb9671014efed96 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 17 Sep 2009 17:47:12 -0700 Subject: include/linux/cred.h: fix build mips allmodconfig: include/linux/cred.h: In function `creds_are_invalid': include/linux/cred.h:187: error: `PAGE_SIZE' undeclared (first use in this function) include/linux/cred.h:187: error: (Each undeclared identifier is reported only once include/linux/cred.h:187: error: for each function it appears in.) Fixes commit b6dff3ec5e116e3af6f537d4caedcad6b9e5082a Author: David Howells AuthorDate: Fri Nov 14 10:39:16 2008 +1100 Commit: James Morris CommitDate: Fri Nov 14 10:39:16 2008 +1100 CRED: Separate task security context from task_struct I think. It's way too large to be inlined anyway. Dunno if this needs an EXPORT_SYMBOL() yet. Cc: David Howells Cc: James Morris Cc: Serge Hallyn Signed-off-by: Andrew Morton Acked-by: David Howells Signed-off-by: James Morris --- include/linux/cred.h | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index fb371601a3b..4e3387a89cb 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -176,23 +176,7 @@ extern void __invalid_creds(const struct cred *, const char *, unsigned); extern void __validate_process_creds(struct task_struct *, const char *, unsigned); -static inline bool creds_are_invalid(const struct cred *cred) -{ - if (cred->magic != CRED_MAGIC) - return true; - if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers)) - return true; -#ifdef CONFIG_SECURITY_SELINUX - if (selinux_is_enabled()) { - if ((unsigned long) cred->security < PAGE_SIZE) - return true; - if ((*(u32 *)cred->security & 0xffffff00) == - (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)) - return true; - } -#endif - return false; -} +extern bool creds_are_invalid(const struct cred *cred); static inline void __validate_creds(const struct cred *cred, const char *file, unsigned line) -- cgit v1.2.3 From 97363c6a4f93a20380b4a9e11f35e27fed68a517 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 23 Sep 2009 14:36:38 -0400 Subject: sunrpc: xdr_xcode_hyper helpers cannot presume 64-bit alignment Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 7da466ba4b0..f5cc0898bc5 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -11,6 +11,7 @@ #include #include +#include #include /* @@ -117,14 +118,14 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le static inline __be32 * xdr_encode_hyper(__be32 *p, __u64 val) { - *(__be64 *)p = cpu_to_be64(val); + put_unaligned_be64(val, p); return p + 2; } static inline __be32 * xdr_decode_hyper(__be32 *p, __u64 *valp) { - *valp = be64_to_cpup((__be64 *)p); + *valp = get_unaligned_be64(p); return p + 2; } -- cgit v1.2.3 From a0219d948dd712561817b0d7c95fd2f10b698203 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:35 -0600 Subject: cpumask: remove dangerous CPU_MASK_ALL_PTR (Thanks to Al Viro for reminding me of this, via Ingo) CPU_MASK_ALL is the (deprecated) "all bits set" cpumask, defined as so: #define CPU_MASK_ALL (cpumask_t) { { ... } } Taking the address of such a temporary is questionable at best, unfortunately 321a8e9d (cpumask: add CPU_MASK_ALL_PTR macro) added CPU_MASK_ALL_PTR: #define CPU_MASK_ALL_PTR (&CPU_MASK_ALL) Which formalizes this practice. One day gcc could bite us over this usage (though we seem to have gotten away with it so far). Now all callers are removed, we kill it. Signed-off-by: Rusty Russell Acked-by: Ingo Molnar Reported-by: Al Viro Cc: Mike Travis --- include/linux/cpumask.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9b1d458aac6..c0ab3588129 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -324,8 +324,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } } -#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL) - #else #define CPU_MASK_ALL \ @@ -336,7 +334,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) /* cpu_mask_all is in init/main.c */ extern cpumask_t cpu_mask_all; -#define CPU_MASK_ALL_PTR (&cpu_mask_all) #endif -- cgit v1.2.3 From 72d78d05cbaa69f2a32f5f9d65a4551ba0da571f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:36 -0600 Subject: cpumask: remove unused cpu_mask_all It's only defined for NR_CPUS > BITS_PER_LONG; cpu_all_mask is always defined (and const). Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c0ab3588129..dbb8367ecf5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -332,9 +332,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } } -/* cpu_mask_all is in init/main.c */ -extern cpumask_t cpu_mask_all; - #endif #define CPU_MASK_NONE \ -- cgit v1.2.3 From ef79f8e191722dbc1fc33bdfc448f572266c37e9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:37 -0600 Subject: cpumask: remove unused mask field from struct irqaction. Up until 1.1.83, the primitive human tribes used struct sigaction for interrupts. The sa_mask field was overloaded to hold a pointer to the name. When someone created the new "struct irqaction" they carried across the "mask" field as a kind of ancestor worship: the fact that it was unused makes clear its spiritual significance. Signed-off-by: Rusty Russell --- include/linux/interrupt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 8e9e151f811..894ed7180bf 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -97,7 +97,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); struct irqaction { irq_handler_t handler; unsigned long flags; - cpumask_t mask; const char *name; void *dev_id; struct irqaction *next; -- cgit v1.2.3 From 144e2ce6115c0a1ee4cb5c935360ea4e2966b0ce Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Mon, 15 Jun 2009 12:16:54 +0900 Subject: cpumask: Remove mask field from comments By 7be23e278f, mask field was deleted by irqaction. However, it was not deleted from comment. Signed-off-by: Nobuhiro Iwamatsu CC: Rusty Russell Signed-off-by: Rusty Russell --- include/linux/interrupt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 894ed7180bf..b78cf819495 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -84,7 +84,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); * struct irqaction - per interrupt action descriptor * @handler: interrupt handler function * @flags: flags (see IRQF_* above) - * @mask: no comment as it is useless and about to be removed * @name: name of the device * @dev_id: cookie to identify the device * @next: pointer to the next irqaction for shared interrupts -- cgit v1.2.3 From e0ad955680878998ff7dc51ce06ddad12260423a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:38 -0600 Subject: cpumask: don't define set_cpus_allowed() if CONFIG_CPUMASK_OFFSTACK=y You're not supposed to pass cpumasks on the stack in that case. Signed-off-by: Rusty Russell --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cbf2a3b4628..848d1f20086 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1817,10 +1817,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, return 0; } #endif + +#ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { return set_cpus_allowed_ptr(p, &new_mask); } +#endif /* * Architectures can set this to 1 if they have specified -- cgit v1.2.3 From fe71a3c7dc8cfe0f239c04b4fc6501f4aa56aa0a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:40 -0600 Subject: cpumask: remove the deprecated smp_call_function_mask() Everyone is now using smp_call_function_many(). Signed-off-by: Rusty Russell --- include/linux/smp.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 9e3d8af0920..39c64bae776 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -73,15 +73,6 @@ int smp_call_function(void(*func)(void *info), void *info, int wait); void smp_call_function_many(const struct cpumask *mask, void (*func)(void *info), void *info, bool wait); -/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */ -static inline int -smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, - int wait) -{ - smp_call_function_many(&mask, func, info, wait); - return 0; -} - void __smp_call_function_single(int cpuid, struct call_single_data *data, int wait); @@ -144,8 +135,6 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) -#define smp_call_function_mask(mask, func, info, wait) \ - (up_smp_call_function(func, info)) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) static inline void init_call_single_data(void) -- cgit v1.2.3 From 6f401420e2822c24c36e6e1c657f6e7f7f777a93 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:40 -0600 Subject: cpumask: remove obsolete topology_core_siblings and topology_thread_siblings: core There were replaced by topology_core_cpumask and topology_thread_cpumask. Signed-off-by: Rusty Russell --- include/linux/topology.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 809b26c0709..fc0bf3edeb6 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -211,12 +211,6 @@ int arch_update_cpu_topology(void); #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif -#ifndef topology_thread_siblings -#define topology_thread_siblings(cpu) cpumask_of_cpu(cpu) -#endif -#ifndef topology_core_siblings -#define topology_core_siblings(cpu) cpumask_of_cpu(cpu) -#endif #ifndef topology_thread_cpumask #define topology_thread_cpumask(cpu) cpumask_of(cpu) #endif -- cgit v1.2.3 From 4b805b17382c11a8b1c9bb8053ce9d1dcde0701a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:52 -0600 Subject: cpumask: remove unused deprecated functions, avoid accusations of insanity We're not forcing removal of the old cpu_ functions, but we might as well delete the now-unused ones. Especially CPUMASK_ALLOC and friends. I actually got a phone call (!) from a hacker who thought I had introduced them as the new cpumask API. He seemed bewildered that I had lost all taste. Signed-off-by: Rusty Russell Cc: benh@kernel.crashing.org --- include/linux/cpumask.h | 112 +----------------------------------------------- 1 file changed, 1 insertion(+), 111 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index dbb8367ecf5..e162d13c65a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -15,10 +15,6 @@ * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. * For details of cpulist_scnprintf() and cpulist_parse(), see * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. - * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c - * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c. - * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. - * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. * * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . * Note: The alternate operations with the suffix "_nr" are used @@ -47,22 +43,17 @@ * void cpus_or(dst, src1, src2) dst = src1 | src2 [union] * void cpus_xor(dst, src1, src2) dst = src1 ^ src2 * int cpus_andnot(dst, src1, src2) dst = src1 & ~src2 - * void cpus_complement(dst, src) dst = ~src * * int cpus_equal(mask1, mask2) Does mask1 == mask2? * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect? * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2? * int cpus_empty(mask) Is mask empty (no bits sets)? - * int cpus_full(mask) Is mask full (all bits sets)? * int cpus_weight(mask) Hamming weigh - number of set bits - * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS * - * void cpus_shift_right(dst, src, n) Shift right * void cpus_shift_left(dst, src, n) Shift left * * int first_cpu(mask) Number lowest set bit, or NR_CPUS * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS - * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids * * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set * (can be used as an lvalue) @@ -70,45 +61,10 @@ * CPU_MASK_NONE Initializer - no bits set * unsigned long *cpus_addr(mask) Array of unsigned long's in mask * - * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t - * variables, and CPUMASK_PTR provides pointers to each field. - * - * The structure should be defined something like this: - * struct my_cpumasks { - * cpumask_t mask1; - * cpumask_t mask2; - * }; - * - * Usage is then: - * CPUMASK_ALLOC(my_cpumasks); - * CPUMASK_PTR(mask1, my_cpumasks); - * CPUMASK_PTR(mask2, my_cpumasks); - * - * --- DO NOT reference cpumask_t pointers until this check --- - * if (my_cpumasks == NULL) - * "kmalloc failed"... - * - * References are now pointers to the cpumask_t variables (*mask1, ...) - * - *if NR_CPUS > BITS_PER_LONG - * CPUMASK_ALLOC(m) Declares and allocates struct m *m = - * kmalloc(sizeof(*m), GFP_KERNEL) - * CPUMASK_FREE(m) Macro for kfree(m) - *else - * CPUMASK_ALLOC(m) Declares struct m _m, *m = &_m - * CPUMASK_FREE(m) Nop - *endif - * CPUMASK_PTR(v, m) Declares cpumask_t *v = &(m->v) - * ------------------------------------------------------------------------ - * * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing * int cpulist_parse(buf, map) Parse ascii string as cpulist - * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) - * void cpus_remap(dst, src, old, new) *dst = map(old, new)(src) - * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap - * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz * * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids @@ -142,7 +98,6 @@ #include typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; -extern cpumask_t _unused_cpumask_arg_; #ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) @@ -207,13 +162,6 @@ static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } -#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS) -static inline void __cpus_complement(cpumask_t *dstp, - const cpumask_t *srcp, int nbits) -{ - bitmap_complement(dstp->bits, srcp->bits, nbits); -} - #define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) static inline int __cpus_equal(const cpumask_t *src1p, const cpumask_t *src2p, int nbits) @@ -241,26 +189,12 @@ static inline int __cpus_empty(const cpumask_t *srcp, int nbits) return bitmap_empty(srcp->bits, nbits); } -#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS) -static inline int __cpus_full(const cpumask_t *srcp, int nbits) -{ - return bitmap_full(srcp->bits, nbits); -} - #define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) static inline int __cpus_weight(const cpumask_t *srcp, int nbits) { return bitmap_weight(srcp->bits, nbits); } -#define cpus_shift_right(dst, src, n) \ - __cpus_shift_right(&(dst), &(src), (n), NR_CPUS) -static inline void __cpus_shift_right(cpumask_t *dstp, - const cpumask_t *srcp, int n, int nbits) -{ - bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); -} - #define cpus_shift_left(dst, src, n) \ __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) static inline void __cpus_shift_left(cpumask_t *dstp, @@ -346,46 +280,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) #define cpus_addr(src) ((src).bits) -#if NR_CPUS > BITS_PER_LONG -#define CPUMASK_ALLOC(m) struct m *m = kmalloc(sizeof(*m), GFP_KERNEL) -#define CPUMASK_FREE(m) kfree(m) -#else -#define CPUMASK_ALLOC(m) struct m _m, *m = &_m -#define CPUMASK_FREE(m) -#endif -#define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v) - -#define cpu_remap(oldbit, old, new) \ - __cpu_remap((oldbit), &(old), &(new), NR_CPUS) -static inline int __cpu_remap(int oldbit, - const cpumask_t *oldp, const cpumask_t *newp, int nbits) -{ - return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); -} - -#define cpus_remap(dst, src, old, new) \ - __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS) -static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp, - const cpumask_t *oldp, const cpumask_t *newp, int nbits) -{ - bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); -} - -#define cpus_onto(dst, orig, relmap) \ - __cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS) -static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp, - const cpumask_t *relmapp, int nbits) -{ - bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); -} - -#define cpus_fold(dst, orig, sz) \ - __cpus_fold(&(dst), &(orig), sz, NR_CPUS) -static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, - int sz, int nbits) -{ - bitmap_fold(dstp->bits, origp->bits, sz, nbits); -} #endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ #if NR_CPUS == 1 @@ -419,18 +313,14 @@ int __any_online_cpu(const cpumask_t *mask); #ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS #if NR_CPUS <= 64 -#define next_cpu_nr(n, src) next_cpu(n, src) -#define cpus_weight_nr(cpumask) cpus_weight(cpumask) #define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) #else /* NR_CPUS > 64 */ int __next_cpu_nr(int n, const cpumask_t *srcp); -#define next_cpu_nr(n, src) __next_cpu_nr((n), &(src)) -#define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids) #define for_each_cpu_mask_nr(cpu, mask) \ for ((cpu) = -1; \ - (cpu) = next_cpu_nr((cpu), (mask)), \ + (cpu) = __next_cpu_nr((cpu), &(mask)), \ (cpu) < nr_cpu_ids; ) #endif /* NR_CPUS > 64 */ -- cgit v1.2.3 From 6ba2ef7baac23a5d9bb85e28b882d16b439a2293 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:53 -0600 Subject: cpumask: Move deprecated functions to end of header. The new ones have pretty kerneldoc. Move the old ones to the end to avoid confusing people. Signed-off-by: Rusty Russell Cc: benh@kernel.crashing.org --- include/linux/cpumask.h | 593 ++++++++++++++++++++---------------------------- 1 file changed, 252 insertions(+), 341 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index e162d13c65a..789cf5f920c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -3,328 +3,37 @@ /* * Cpumasks provide a bitmap suitable for representing the - * set of CPU's in a system, one bit position per CPU number. - * - * The new cpumask_ ops take a "struct cpumask *"; the old ones - * use cpumask_t. - * - * See detailed comments in the file linux/bitmap.h describing the - * data type on which these cpumasks are based. - * - * For details of cpumask_scnprintf() and cpumask_parse_user(), - * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. - * For details of cpulist_scnprintf() and cpulist_parse(), see - * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. - * - * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * Note: The alternate operations with the suffix "_nr" are used - * to limit the range of the loop to nr_cpu_ids instead of - * NR_CPUS when NR_CPUS > 64 for performance reasons. - * If NR_CPUS is <= 64 then most assembler bitmask - * operators execute faster with a constant range, so - * the operator will continue to use NR_CPUS. - * - * Another consideration is that nr_cpu_ids is initialized - * to NR_CPUS and isn't lowered until the possible cpus are - * discovered (including any disabled cpus). So early uses - * will span the entire range of NR_CPUS. - * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * - * The obsolescent cpumask operations are: - * - * void cpu_set(cpu, mask) turn on bit 'cpu' in mask - * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask - * void cpus_setall(mask) set all bits - * void cpus_clear(mask) clear all bits - * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask - * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask - * - * int cpus_and(dst, src1, src2) dst = src1 & src2 [intersection] - * void cpus_or(dst, src1, src2) dst = src1 | src2 [union] - * void cpus_xor(dst, src1, src2) dst = src1 ^ src2 - * int cpus_andnot(dst, src1, src2) dst = src1 & ~src2 - * - * int cpus_equal(mask1, mask2) Does mask1 == mask2? - * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect? - * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2? - * int cpus_empty(mask) Is mask empty (no bits sets)? - * int cpus_weight(mask) Hamming weigh - number of set bits - * - * void cpus_shift_left(dst, src, n) Shift left - * - * int first_cpu(mask) Number lowest set bit, or NR_CPUS - * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS - * - * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set - * (can be used as an lvalue) - * CPU_MASK_ALL Initializer - all bits set - * CPU_MASK_NONE Initializer - no bits set - * unsigned long *cpus_addr(mask) Array of unsigned long's in mask - * - * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing - * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask - * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing - * int cpulist_parse(buf, map) Parse ascii string as cpulist - * - * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS - * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids - * - * int num_online_cpus() Number of online CPUs - * int num_possible_cpus() Number of all possible CPUs - * int num_present_cpus() Number of present CPUs - * - * int cpu_online(cpu) Is some cpu online? - * int cpu_possible(cpu) Is some cpu possible? - * int cpu_present(cpu) Is some cpu present (can schedule)? - * - * int any_online_cpu(mask) First online cpu in mask - * - * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map - * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map - * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map - * - * Subtlety: - * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway) - * to generate slightly worse code. Note for example the additional - * 40 lines of assembly code compiling the "for each possible cpu" - * loops buried in the disk_stat_read() macros calls when compiling - * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple - * one-line #define for cpu_isset(), instead of wrapping an inline - * inside a macro, the way we do the other calls. + * set of CPU's in a system, one bit position per CPU number. In general, + * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ - #include #include #include typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) -static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) -{ - set_bit(cpu, dstp->bits); -} - -#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) -static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) -{ - clear_bit(cpu, dstp->bits); -} - -#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) -static inline void __cpus_setall(cpumask_t *dstp, int nbits) -{ - bitmap_fill(dstp->bits, nbits); -} - -#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) -static inline void __cpus_clear(cpumask_t *dstp, int nbits) -{ - bitmap_zero(dstp->bits, nbits); -} - -/* No static inline type checking - see Subtlety (1) above. */ -#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) - -#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask)) -static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) -{ - return test_and_set_bit(cpu, addr->bits); -} - -#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) -static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_andnot(dst, src1, src2) \ - __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) -static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) -static inline int __cpus_equal(const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_equal(src1p->bits, src2p->bits, nbits); -} - -#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) -static inline int __cpus_intersects(const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_intersects(src1p->bits, src2p->bits, nbits); -} - -#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) -static inline int __cpus_subset(const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_subset(src1p->bits, src2p->bits, nbits); -} - -#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) -static inline int __cpus_empty(const cpumask_t *srcp, int nbits) -{ - return bitmap_empty(srcp->bits, nbits); -} - -#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) -static inline int __cpus_weight(const cpumask_t *srcp, int nbits) -{ - return bitmap_weight(srcp->bits, nbits); -} - -#define cpus_shift_left(dst, src, n) \ - __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) -static inline void __cpus_shift_left(cpumask_t *dstp, - const cpumask_t *srcp, int n, int nbits) -{ - bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); -} -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ - /** - * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * - * @bitmap: the bitmap + * cpumask_bits - get the bits in a cpumask + * @maskp: the struct cpumask * * - * There are a few places where cpumask_var_t isn't appropriate and - * static cpumasks must be used (eg. very early boot), yet we don't - * expose the definition of 'struct cpumask'. - * - * This does the conversion, and can be used as a constant initializer. + * You should only assume nr_cpu_ids bits of this mask are valid. This is + * a macro so it's const-correct. */ -#define to_cpumask(bitmap) \ - ((struct cpumask *)(1 ? (bitmap) \ - : (void *)sizeof(__check_is_bitmap(bitmap)))) - -static inline int __check_is_bitmap(const unsigned long *bitmap) -{ - return 1; -} - -/* - * Special-case data structure for "single bit set only" constant CPU masks. - * - * We pre-generate all the 64 (or 32) possible bit positions, with enough - * padding to the left and the right, and return the constant pointer - * appropriately offset. - */ -extern const unsigned long - cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; - -static inline const struct cpumask *get_cpu_mask(unsigned int cpu) -{ - const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; - p -= cpu / BITS_PER_LONG; - return to_cpumask(p); -} - -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -/* - * In cases where we take the address of the cpumask immediately, - * gcc optimizes it out (it's a constant) and there's no huge stack - * variable created: - */ -#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) - - -#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) - -#if NR_CPUS <= BITS_PER_LONG - -#define CPU_MASK_ALL \ -(cpumask_t) { { \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} } - -#else - -#define CPU_MASK_ALL \ -(cpumask_t) { { \ - [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} } - -#endif - -#define CPU_MASK_NONE \ -(cpumask_t) { { \ - [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ -} } - -#define CPU_MASK_CPU0 \ -(cpumask_t) { { \ - [0] = 1UL \ -} } - -#define cpus_addr(src) ((src).bits) - -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ +#define cpumask_bits(maskp) ((maskp)->bits) #if NR_CPUS == 1 - #define nr_cpu_ids 1 -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#define first_cpu(src) ({ (void)(src); 0; }) -#define next_cpu(n, src) ({ (void)(src); 1; }) -#define any_online_cpu(mask) 0 -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ -#else /* NR_CPUS > 1 */ - +#else extern int nr_cpu_ids; -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -int __first_cpu(const cpumask_t *srcp); -int __next_cpu(int n, const cpumask_t *srcp); -int __any_online_cpu(const cpumask_t *mask); - -#define first_cpu(src) __first_cpu(&(src)) -#define next_cpu(n, src) __next_cpu((n), &(src)) -#define any_online_cpu(mask) __any_online_cpu(&(mask)) -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = next_cpu((cpu), (mask)), \ - (cpu) < NR_CPUS; ) -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ #endif -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#if NR_CPUS <= 64 - -#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) - -#else /* NR_CPUS > 64 */ - -int __next_cpu_nr(int n, const cpumask_t *srcp); -#define for_each_cpu_mask_nr(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = __next_cpu_nr((cpu), &(mask)), \ - (cpu) < nr_cpu_ids; ) - -#endif /* NR_CPUS > 64 */ -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ +#ifdef CONFIG_CPUMASK_OFFSTACK +/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, + * not all bits may be allocated. */ +#define nr_cpumask_bits nr_cpu_ids +#else +#define nr_cpumask_bits NR_CPUS +#endif /* * The following particular system cpumasks and operations manage @@ -371,12 +80,6 @@ extern const struct cpumask *const cpu_online_mask; extern const struct cpumask *const cpu_present_mask; extern const struct cpumask *const cpu_active_mask; -/* These strip const, as traditionally they weren't const. */ -#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask) -#define cpu_online_map (*(cpumask_t *)cpu_online_mask) -#define cpu_present_map (*(cpumask_t *)cpu_present_mask) -#define cpu_active_map (*(cpumask_t *)cpu_active_mask) - #if NR_CPUS > 1 #define num_online_cpus() cpumask_weight(cpu_online_mask) #define num_possible_cpus() cpumask_weight(cpu_possible_mask) @@ -395,35 +98,6 @@ extern const struct cpumask *const cpu_active_mask; #define cpu_active(cpu) ((cpu) == 0) #endif -#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) - -/* These are the new versions of the cpumask operators: passed by pointer. - * The older versions will be implemented in terms of these, then deleted. */ -#define cpumask_bits(maskp) ((maskp)->bits) - -#if NR_CPUS <= BITS_PER_LONG -#define CPU_BITS_ALL \ -{ \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} - -#else /* NR_CPUS > BITS_PER_LONG */ - -#define CPU_BITS_ALL \ -{ \ - [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} -#endif /* NR_CPUS > BITS_PER_LONG */ - -#ifdef CONFIG_CPUMASK_OFFSTACK -/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, - * not all bits may be allocated. */ -#define nr_cpumask_bits nr_cpu_ids -#else -#define nr_cpumask_bits NR_CPUS -#endif - /* verify cpu argument to cpumask_* operators */ static inline unsigned int cpumask_check(unsigned int cpu) { @@ -984,4 +658,241 @@ void set_cpu_active(unsigned int cpu, bool active); void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); + +/** + * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * @bitmap: the bitmap + * + * There are a few places where cpumask_var_t isn't appropriate and + * static cpumasks must be used (eg. very early boot), yet we don't + * expose the definition of 'struct cpumask'. + * + * This does the conversion, and can be used as a constant initializer. + */ +#define to_cpumask(bitmap) \ + ((struct cpumask *)(1 ? (bitmap) \ + : (void *)sizeof(__check_is_bitmap(bitmap)))) + +static inline int __check_is_bitmap(const unsigned long *bitmap) +{ + return 1; +} + +/* + * Special-case data structure for "single bit set only" constant CPU masks. + * + * We pre-generate all the 64 (or 32) possible bit positions, with enough + * padding to the left and the right, and return the constant pointer + * appropriately offset. + */ +extern const unsigned long + cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; + +static inline const struct cpumask *get_cpu_mask(unsigned int cpu) +{ + const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; + p -= cpu / BITS_PER_LONG; + return to_cpumask(p); +} + +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) + +#if NR_CPUS <= BITS_PER_LONG +#define CPU_BITS_ALL \ +{ \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} + +#else /* NR_CPUS > BITS_PER_LONG */ + +#define CPU_BITS_ALL \ +{ \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} +#endif /* NR_CPUS > BITS_PER_LONG */ + +/* + * + * From here down, all obsolete. Use cpumask_ variants! + * + */ +#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS +/* These strip const, as traditionally they weren't const. */ +#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask) +#define cpu_online_map (*(cpumask_t *)cpu_online_mask) +#define cpu_present_map (*(cpumask_t *)cpu_present_mask) +#define cpu_active_map (*(cpumask_t *)cpu_active_mask) + +#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) + +#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) + +#if NR_CPUS <= BITS_PER_LONG + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#else + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#endif + +#define CPU_MASK_NONE \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ +} } + +#define CPU_MASK_CPU0 \ +(cpumask_t) { { \ + [0] = 1UL \ +} } + +#if NR_CPUS == 1 +#define first_cpu(src) ({ (void)(src); 0; }) +#define next_cpu(n, src) ({ (void)(src); 1; }) +#define any_online_cpu(mask) 0 +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#else /* NR_CPUS > 1 */ +int __first_cpu(const cpumask_t *srcp); +int __next_cpu(int n, const cpumask_t *srcp); +int __any_online_cpu(const cpumask_t *mask); + +#define first_cpu(src) __first_cpu(&(src)) +#define next_cpu(n, src) __next_cpu((n), &(src)) +#define any_online_cpu(mask) __any_online_cpu(&(mask)) +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = next_cpu((cpu), (mask)), \ + (cpu) < NR_CPUS; ) +#endif /* SMP */ + +#if NR_CPUS <= 64 + +#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) + +#else /* NR_CPUS > 64 */ + +int __next_cpu_nr(int n, const cpumask_t *srcp); +#define for_each_cpu_mask_nr(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = __next_cpu_nr((cpu), &(mask)), \ + (cpu) < nr_cpu_ids; ) + +#endif /* NR_CPUS > 64 */ + +#define cpus_addr(src) ((src).bits) + +#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) +static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) +{ + set_bit(cpu, dstp->bits); +} + +#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) +static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) +{ + clear_bit(cpu, dstp->bits); +} + +#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) +static inline void __cpus_setall(cpumask_t *dstp, int nbits) +{ + bitmap_fill(dstp->bits, nbits); +} + +#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) +static inline void __cpus_clear(cpumask_t *dstp, int nbits) +{ + bitmap_zero(dstp->bits, nbits); +} + +/* No static inline type checking - see Subtlety (1) above. */ +#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) + +#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask)) +static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) +{ + return test_and_set_bit(cpu, addr->bits); +} + +#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) +static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_andnot(dst, src1, src2) \ + __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) +static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) +static inline int __cpus_equal(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_equal(src1p->bits, src2p->bits, nbits); +} + +#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) +static inline int __cpus_intersects(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_intersects(src1p->bits, src2p->bits, nbits); +} + +#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) +static inline int __cpus_subset(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_subset(src1p->bits, src2p->bits, nbits); +} + +#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) +static inline int __cpus_empty(const cpumask_t *srcp, int nbits) +{ + return bitmap_empty(srcp->bits, nbits); +} + +#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) +static inline int __cpus_weight(const cpumask_t *srcp, int nbits) +{ + return bitmap_weight(srcp->bits, nbits); +} + +#define cpus_shift_left(dst, src, n) \ + __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) +static inline void __cpus_shift_left(cpumask_t *dstp, + const cpumask_t *srcp, int n, int nbits) +{ + bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); +} +#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ + #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3 From 2bcd57ab61e7cabed626226a3771617981c11ce1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 24 Sep 2009 04:22:25 +0400 Subject: headers: utsname.h redux * remove asm/atomic.h inclusion from linux/utsname.h -- not needed after kref conversion * remove linux/utsname.h inclusion from files which do not need it NOTE: it looks like fs/binfmt_elf.c do not need utsname.h, however due to some personality stuff it _is_ needed -- cowardly leave ELF-related headers and files alone. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- include/linux/utsname.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 3656b300de3..69f39974c04 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -36,7 +36,6 @@ struct new_utsname { #include #include #include -#include struct uts_namespace { struct kref kref; -- cgit v1.2.3 From 22fe404218156328a27e66349b1175cd0baa4990 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 18 Sep 2009 13:05:44 -0700 Subject: vfs: split generic_forget_inode() so that hugetlbfs does not have to copy it Hugetlbfs needs to do special things instead of truncate_inode_pages(). Currently, it copied generic_forget_inode() except for truncate_inode_pages() call which is asking for trouble (the code there isn't trivial). So create a separate function generic_detach_inode() which does all the list magic done in generic_forget_inode() and call it from hugetlbfs_forget_inode(). Signed-off-by: Jan Kara Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51803528b09..955e34615cb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern void generic_delete_inode(struct inode *inode); extern void generic_drop_inode(struct inode *inode); +extern int generic_detach_inode(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), -- cgit v1.2.3 From 42cb56ae2ab67390da34906b27bedc3f2ff1393b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 18 Sep 2009 13:05:53 -0700 Subject: vfs: change sb->s_maxbytes to a loff_t sb->s_maxbytes is supposed to indicate the maximum size of a file that can exist on the filesystem. It's declared as an unsigned long long. Even if a filesystem has no inherent limit that prevents it from using every bit in that unsigned long long, it's still problematic to set it to anything larger than MAX_LFS_FILESIZE. There are places in the kernel that cast s_maxbytes to a signed value. If it's set too large then this cast makes it a negative number and generally breaks the comparison. Change s_maxbytes to be loff_t instead. That should help eliminate the temptation to set it too large by making it a signed value. Also, add a warning for couple of releases to help catch filesystems that set s_maxbytes too large. Eventually we can either convert this to a BUG() or just remove it and in the hope that no one will get it wrong now that it's a signed value. Signed-off-by: Jeff Layton Cc: Johannes Weiner Cc: Christoph Hellwig Cc: Al Viro Cc: Robert Love Cc: Mandeep Singh Baines Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 955e34615cb..cbb7724c11d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1315,7 +1315,7 @@ struct super_block { unsigned long s_blocksize; unsigned char s_blocksize_bits; unsigned char s_dirt; - unsigned long long s_maxbytes; /* Max file size */ + loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; -- cgit v1.2.3 From f84398068d9c2babe41500504ef247ae07081857 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Sep 2009 14:48:36 +0200 Subject: vfs: seq_file: add helpers for data filling Add two helpers that allow access to the seq_file's own buffer, but hide the internal details of seq_files. This allows easier implementation of special purpose filling functions. It also cleans up some existing functions which duplicated the seq_file logic. Make these inline functions in seq_file.h, as suggested by Al. Signed-off-by: Miklos Szeredi Acked-by: Hugh Dickins Signed-off-by: Al Viro --- include/linux/seq_file.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 0c6a86b7959..8366d8f12e5 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -35,6 +35,44 @@ struct seq_operations { #define SEQ_SKIP 1 +/** + * seq_get_buf - get buffer to write arbitrary data to + * @m: the seq_file handle + * @bufp: the beginning of the buffer is stored here + * + * Return the number of bytes available in the buffer, or zero if + * there's no space. + */ +static inline size_t seq_get_buf(struct seq_file *m, char **bufp) +{ + BUG_ON(m->count > m->size); + if (m->count < m->size) + *bufp = m->buf + m->count; + else + *bufp = NULL; + + return m->size - m->count; +} + +/** + * seq_commit - commit data to the buffer + * @m: the seq_file handle + * @num: the number of bytes to commit + * + * Commit @num bytes of data written to a buffer previously acquired + * by seq_buf_get. To signal an error condition, or that the data + * didn't fit in the available space, pass a negative @num value. + */ +static inline void seq_commit(struct seq_file *m, int num) +{ + if (num < 0) { + m->count = m->size; + } else { + BUG_ON(m->count + num > m->size); + m->count += num; + } +} + char *mangle_path(char *s, char *p, char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); -- cgit v1.2.3 From 4fadd7bb20a1e7c774ed88dc703d8fbcd00ff917 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Aug 2009 23:28:06 +0200 Subject: freeze_bdev: kill bd_mount_sem Now that we have the freeze count there is not much reason for bd_mount_sem anymore. The actual freeze/thaw operations are serialized using the bd_fsfreeze_mutex, and the only other place we take bd_mount_sem is get_sb_bdev which tries to prevent mounting a filesystem while the block device is frozen. Instead of add a check for bd_fsfreeze_count and return -EBUSY if a filesystem is frozen. While that is a change in user visible behaviour a failing mount is much better for this case rather than having the mount process stuck uninterruptible for a long time. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index cbb7724c11d..72dfbd42397 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -640,7 +640,6 @@ struct block_device { struct super_block * bd_super; int bd_openers; struct mutex bd_mutex; /* open/close mutex */ - struct semaphore bd_mount_sem; struct list_head bd_inodes; void * bd_holder; int bd_holders; -- cgit v1.2.3 From 4504230a71566785a05d3e6b53fa1ee071b864eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Aug 2009 23:28:35 +0200 Subject: freeze_bdev: grab active reference to frozen superblocks Currently we held s_umount while a filesystem is frozen, despite that we might return to userspace and unlock it from a different process. Instead grab an active reference to keep the file system busy and add an explicit check for frozen filesystems in remount and reject the remount instead of blocking on s_umount. Add a new get_active_super helper to super.c for use by freeze_bdev that grabs an active reference to a superblock from a given block device. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 72dfbd42397..502d96ef345 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2334,6 +2334,7 @@ extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); +extern struct super_block *get_active_super(struct block_device *bdev); extern struct super_block *user_get_super(dev_t); extern void drop_super(struct super_block *sb); -- cgit v1.2.3 From 25d9e2d15286281ec834b829a4aaf8969011f1cd Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Fri, 21 Aug 2009 02:35:05 +1000 Subject: truncate: new helpers Introduce new truncate helpers truncate_pagecache and inode_newsize_ok. vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and into mm/truncate.c. Reviewed-by: Christoph Hellwig Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/fs.h | 3 ++- include/linux/mm.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 502d96ef345..2b08b5ce09b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *, #define buffer_migrate_page NULL #endif -extern int inode_change_ok(struct inode *, struct iattr *); +extern int inode_change_ok(const struct inode *, struct iattr *); +extern int inode_newsize_ok(const struct inode *, loff_t offset); extern int __must_check inode_setattr(struct inode *, struct iattr *); extern void file_update_time(struct file *file); diff --git a/include/linux/mm.h b/include/linux/mm.h index b6eae5e3144..8347e938fb2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, unmap_mapping_range(mapping, holebegin, holelen, 0); } -extern int vmtruncate(struct inode * inode, loff_t offset); -extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); +extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); +extern int vmtruncate(struct inode *inode, loff_t offset); +extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, -- cgit v1.2.3 From 57f1f0874f426a9bdfc5cd3f886113dd5cd17834 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Wed, 23 Sep 2009 15:56:10 -0700 Subject: time: add function to convert between calendar time and broken-down time for universal use There are many similar code in kernel for one object: convert time between calendar time and broken-down time. Here is some source I found: fs/ncpfs/dir.c fs/smbfs/proc.c fs/fat/misc.c fs/udf/udftime.c fs/cifs/netmisc.c net/netfilter/xt_time.c drivers/scsi/ips.c drivers/input/misc/hp_sdc_rtc.c drivers/rtc/rtc-lib.c arch/ia64/hp/sim/boot/fw-emu.c arch/m68k/mac/misc.c arch/powerpc/kernel/time.c arch/parisc/include/asm/rtc.h ... We can make a common function for this type of conversion, At least we can get following benefit: 1: Make kernel simple and unify 2: Easy to fix bug in converting code 3: Reduce clone of code in future For example, I'm trying to make ftrace display walltime, this patch will make me easy. This code is based on code from glibc-2.6 Signed-off-by: Zhao Lei Cc: OGAWA Hirofumi Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Pavel Machek Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 56787c09334..fe04e5ef6a5 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond); struct tms; extern void do_sys_times(struct tms *); +/* + * Similar to the struct tm in userspace , but it needs to be here so + * that the kernel source is self contained. + */ +struct tm { + /* + * the number of seconds after the minute, normally in the range + * 0 to 59, but can be up to 60 to allow for leap seconds + */ + int tm_sec; + /* the number of minutes after the hour, in the range 0 to 59*/ + int tm_min; + /* the number of hours past midnight, in the range 0 to 23 */ + int tm_hour; + /* the day of the month, in the range 1 to 31 */ + int tm_mday; + /* the number of months since January, in the range 0 to 11 */ + int tm_mon; + /* the number of years since 1900 */ + long tm_year; + /* the number of days since Sunday, in the range 0 to 6 */ + int tm_wday; + /* the number of days since January 1, in the range 0 to 365 */ + int tm_yday; +}; + +void time_to_tm(time_t totalsecs, int offset, struct tm *result); + /** * timespec_to_ns - Convert timespec to nanoseconds * @ts: pointer to the timespec variable to be converted -- cgit v1.2.3 From 55dff4954ebdeba2be59e19398a607d799c5fa9f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Sep 2009 15:56:17 -0700 Subject: docs: fix various Documentation/ paths in header files Fix various Documentation/ paths in include/linux/. Signed-off-by: Randy Dunlap Reviewed-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/configfs.h | 4 ++-- include/linux/debugfs.h | 2 +- include/linux/relay.h | 2 +- include/linux/tracepoint.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 7f627775c94..ddb7a97c78c 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -27,8 +27,8 @@ * * configfs Copyright (C) 2005 Oracle. All rights reserved. * - * Please read Documentation/filesystems/configfs.txt before using the - * configfs interface, ESPECIALLY the parts about reference counts and + * Please read Documentation/filesystems/configfs/configfs.txt before using + * the configfs interface, ESPECIALLY the parts about reference counts and * item destructors. */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index eb5c2ba2f81..fc1b930f246 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -9,7 +9,7 @@ * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. - * See Documentation/DocBook/kernel-api for more details. + * See Documentation/DocBook/filesystems for more details. */ #ifndef _DEBUGFS_H_ diff --git a/include/linux/relay.h b/include/linux/relay.h index 953fc055e87..14a86bc7102 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -140,7 +140,7 @@ struct rchan_callbacks * cause relay_open() to create a single global buffer rather * than the default set of per-cpu buffers. * - * See Documentation/filesystems/relayfs.txt for more info. + * See Documentation/filesystems/relay.txt for more info. */ struct dentry *(*create_buf_file)(const char *filename, struct dentry *parent, diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63a3f7a8058..660a9de96f8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -4,7 +4,7 @@ /* * Kernel Tracepoint API. * - * See Documentation/tracepoint.txt. + * See Documentation/trace/tracepoints.txt. * * (C) Copyright 2008 Mathieu Desnoyers * -- cgit v1.2.3 From 8f3ff20862cfcb85500a2bb55ee64622bd59fd0c Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 23 Sep 2009 15:56:25 -0700 Subject: cgroups: revert "cgroups: fix pid namespace bug" The following series adds a "cgroup.procs" file to each cgroup that reports unique tgids rather than pids, and allows all threads in a threadgroup to be atomically moved to a new cgroup. The subsystem "attach" interface is modified to support attaching whole threadgroups at a time, which could introduce potential problems if any subsystem were to need to access the old cgroup of every thread being moved. The attach interface may need to be revised if this becomes the case. Also added is functionality for read/write locking all CLONE_THREAD fork()ing within a threadgroup, by means of an rwsem that lives in the sighand_struct, for per-threadgroup-ness and also for sharing a cacheline with the sighand's atomic count. This scheme should introduce no extra overhead in the fork path when there's no contention. The final patch reveals potential for a race when forking before a subsystem's attach function is called - one potential solution in case any subsystem has this problem is to hang on to the group's fork mutex through the attach() calls, though no subsystem yet demonstrates need for an extended critical section. This patch: Revert commit 096b7fe012d66ed55e98bc8022405ede0cc80e96 Author: Li Zefan AuthorDate: Wed Jul 29 15:04:04 2009 -0700 Commit: Linus Torvalds CommitDate: Wed Jul 29 19:10:35 2009 -0700 cgroups: fix pid namespace bug This is in preparation for some clashing cgroups changes that subsume the original commit's functionaliy. The original commit fixed a pid namespace bug which Ben Blum fixed independently (in the same way, but with different code) as part of a series of patches. I played around with trying to reconcile Ben's patch series with Li's patch, but concluded that it was simpler to just revert Li's, given that Ben's patch series contained essentially the same fix. Signed-off-by: Paul Menage Cc: Li Zefan Cc: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 90bba9e6228..c833d6f2367 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -179,11 +179,14 @@ struct cgroup { */ struct list_head release_list; - /* pids_mutex protects pids_list and cached pid arrays. */ + /* pids_mutex protects the fields below */ struct rw_semaphore pids_mutex; - - /* Linked list of struct cgroup_pids */ - struct list_head pids_list; + /* Array of process ids in the cgroup */ + pid_t *tasks_pids; + /* How many files are using the current tasks_pids array */ + int pids_use_count; + /* Length of the current tasks_pids array */ + int pids_length; /* For RCU-protected deletion */ struct rcu_head rcu_head; -- cgit v1.2.3 From 102a775e3647628727ae83a9a6abf0564c3ca7cb Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 23 Sep 2009 15:56:26 -0700 Subject: cgroups: add a read-only "procs" file similar to "tasks" that shows only unique tgids struct cgroup used to have a bunch of fields for keeping track of the pidlist for the tasks file. Those are now separated into a new struct cgroup_pidlist, of which two are had, one for procs and one for tasks. The way the seq_file operations are set up is changed so that just the pidlist struct gets passed around as the private data. Interface example: Suppose a multithreaded process has pid 1000 and other threads with ids 1001, 1002, 1003: $ cat tasks 1000 1001 1002 1003 $ cat cgroup.procs 1000 $ Signed-off-by: Ben Blum Signed-off-by: Paul Menage Acked-by: Li Zefan Cc: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c833d6f2367..2357733a0a8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -141,6 +141,17 @@ enum { CGRP_WAIT_ON_RMDIR, }; +struct cgroup_pidlist { + /* protects the other fields */ + struct rw_semaphore mutex; + /* array of xids */ + pid_t *list; + /* how many elements the above list has */ + int length; + /* how many files are using the current array */ + int use_count; +}; + struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ @@ -179,14 +190,9 @@ struct cgroup { */ struct list_head release_list; - /* pids_mutex protects the fields below */ - struct rw_semaphore pids_mutex; - /* Array of process ids in the cgroup */ - pid_t *tasks_pids; - /* How many files are using the current tasks_pids array */ - int pids_use_count; - /* Length of the current tasks_pids array */ - int pids_length; + /* we will have two separate pidlists, one for pids (the tasks file) + * and one for tgids (the procs file). */ + struct cgroup_pidlist tasks, procs; /* For RCU-protected deletion */ struct rcu_head rcu_head; -- cgit v1.2.3 From 72a8cb30d10d4041c455a7054607a7d519167c87 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 23 Sep 2009 15:56:27 -0700 Subject: cgroups: ensure correct concurrent opening/reading of pidlists across pid namespaces Previously there was the problem in which two processes from different pid namespaces reading the tasks or procs file could result in one process seeing results from the other's namespace. Rather than one pidlist for each file in a cgroup, we now keep a list of pidlists keyed by namespace and file type (tasks versus procs) in which entries are placed on demand. Each pidlist has its own lock, and that the pidlists themselves are passed around in the seq_file's private pointer means we don't have to touch the cgroup or its master list except when creating and destroying entries. Signed-off-by: Ben Blum Signed-off-by: Paul Menage Cc: Li Zefan Cc: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2357733a0a8..88e86346072 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -141,15 +141,36 @@ enum { CGRP_WAIT_ON_RMDIR, }; +/* which pidlist file are we talking about? */ +enum cgroup_filetype { + CGROUP_FILE_PROCS, + CGROUP_FILE_TASKS, +}; + +/* + * A pidlist is a list of pids that virtually represents the contents of one + * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, + * a pair (one each for procs, tasks) for each pid namespace that's relevant + * to the cgroup. + */ struct cgroup_pidlist { - /* protects the other fields */ - struct rw_semaphore mutex; + /* + * used to find which pidlist is wanted. doesn't change as long as + * this particular list stays in the list. + */ + struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; /* array of xids */ pid_t *list; /* how many elements the above list has */ int length; /* how many files are using the current array */ int use_count; + /* each of these stored in a list by its cgroup */ + struct list_head links; + /* pointer to the cgroup we belong to, for list removal purposes */ + struct cgroup *owner; + /* protects the other fields */ + struct rw_semaphore mutex; }; struct cgroup { @@ -190,9 +211,12 @@ struct cgroup { */ struct list_head release_list; - /* we will have two separate pidlists, one for pids (the tasks file) - * and one for tgids (the procs file). */ - struct cgroup_pidlist tasks, procs; + /* + * list of pidlists, up to two for each namespace (one for procs, one + * for tasks); created on demand. + */ + struct list_head pidlists; + struct mutex pidlist_mutex; /* For RCU-protected deletion */ struct rcu_head rcu_head; -- cgit v1.2.3 From c378369d8b4fa516ff2b1e79c3eded4e0e955ebb Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 23 Sep 2009 15:56:29 -0700 Subject: cgroups: change css_set freeing mechanism to be under RCU Changes css_set freeing mechanism to be under RCU This is a prepatch for making the procs file writable. In order to free the old css_sets for each task to be moved as they're being moved, the freeing mechanism must be RCU-protected, or else we would have to have a call to synchronize_rcu() for each task before freeing its old css_set. Signed-off-by: Ben Blum Signed-off-by: Paul Menage Cc: "Paul E. McKenney" Acked-by: Li Zefan Cc: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 88e86346072..3ac78a2f4b5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -260,6 +260,9 @@ struct css_set { * during subsystem registration (at boot time). */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; }; /* -- cgit v1.2.3 From be367d09927023d081f9199665c8500f69f14d22 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 23 Sep 2009 15:56:31 -0700 Subject: cgroups: let ss->can_attach and ss->attach do whole threadgroups at a time Alter the ss->can_attach and ss->attach functions to be able to deal with a whole threadgroup at a time, for use in cgroup_attach_proc. (This is a pre-patch to cgroup-procs-writable.patch.) Currently, new mode of the attach function can only tell the subsystem about the old cgroup of the threadgroup leader. No subsystem currently needs that information for each thread that's being moved, but if one were to be added (for example, one that counts tasks within a group) this bit would need to be reworked a bit to tell the subsystem the right information. [hidave.darkstar@gmail.com: fix build] Signed-off-by: Ben Blum Signed-off-by: Paul Menage Acked-by: Li Zefan Reviewed-by: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 3ac78a2f4b5..b62bb9294d0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -425,10 +425,11 @@ struct cgroup_subsys { struct cgroup *cgrp); int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); - int (*can_attach)(struct cgroup_subsys *ss, - struct cgroup *cgrp, struct task_struct *tsk); + int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct task_struct *tsk, bool threadgroup); void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *tsk); + struct cgroup *old_cgrp, struct task_struct *tsk, + bool threadgroup); void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); int (*populate)(struct cgroup_subsys *ss, -- cgit v1.2.3 From 4b3bde4c983de36c59e6c1a24701f6fe816f9f55 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 23 Sep 2009 15:56:32 -0700 Subject: memcg: remove the overhead associated with the root cgroup Change the memory cgroup to remove the overhead associated with accounting all pages in the root cgroup. As a side-effect, we can no longer set a memory hard limit in the root cgroup. A new flag to track whether the page has been accounted or not has been added as well. Flags are now set atomically for page_cgroup, pcg_default_flags is now obsolete and removed. [akpm@linux-foundation.org: fix a few documentation glitches] Signed-off-by: Balbir Singh Signed-off-by: Daisuke Nishimura Reviewed-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Cc: Li Zefan Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index ada779f2417..4b938d4f3ac 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -38,6 +38,7 @@ enum { PCG_LOCK, /* page cgroup is locked */ PCG_CACHE, /* charged as cache */ PCG_USED, /* this object is in use. */ + PCG_ACCT_LRU, /* page has been accounted for */ }; #define TESTPCGFLAG(uname, lname) \ @@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\ static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ { clear_bit(PCG_##lname, &pc->flags); } +#define TESTCLEARPCGFLAG(uname, lname) \ +static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ + { return test_and_clear_bit(PCG_##lname, &pc->flags); } + /* Cache flag is set only once (at allocation) */ TESTPCGFLAG(Cache, CACHE) +CLEARPCGFLAG(Cache, CACHE) +SETPCGFLAG(Cache, CACHE) TESTPCGFLAG(Used, USED) CLEARPCGFLAG(Used, USED) +SETPCGFLAG(Used, USED) + +SETPCGFLAG(AcctLRU, ACCT_LRU) +CLEARPCGFLAG(AcctLRU, ACCT_LRU) +TESTPCGFLAG(AcctLRU, ACCT_LRU) +TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU) static inline int page_cgroup_nid(struct page_cgroup *pc) { -- cgit v1.2.3 From 296c81d89f4f14269f7346f81442910158c0a83a Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 23 Sep 2009 15:56:36 -0700 Subject: memory controller: soft limit interface Add an interface to allow get/set of soft limits. Soft limits for memory plus swap controller (memsw) is currently not supported. Resource counters have been enhanced to support soft limits and new type RES_SOFT_LIMIT has been added. Unlike hard limits, soft limits can be directly set and do not need any reclaim or checks before setting them to a newer value. Kamezawa-San raised a question as to whether soft limit should belong to res_counter. Since all resources understand the basic concepts of hard and soft limits, it is justified to add soft limits here. Soft limits are a generic resource usage feature, even file system quotas support soft limits. Signed-off-by: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 511f42fc681..fcb9884df61 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -34,6 +34,10 @@ struct res_counter { * the limit that usage cannot exceed */ unsigned long long limit; + /* + * the limit that usage can be exceed + */ + unsigned long long soft_limit; /* * the number of unsuccessful attempts to consume the resource */ @@ -87,6 +91,7 @@ enum { RES_MAX_USAGE, RES_LIMIT, RES_FAILCNT, + RES_SOFT_LIMIT, }; /* @@ -132,6 +137,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt) return false; } +static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt) +{ + if (cnt->usage < cnt->soft_limit) + return true; + + return false; +} + +/** + * Get the difference between the usage and the soft limit + * @cnt: The counter + * + * Returns 0 if usage is less than or equal to soft limit + * The difference between usage and soft limit, otherwise. + */ +static inline unsigned long long +res_counter_soft_limit_excess(struct res_counter *cnt) +{ + unsigned long long excess; + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + if (cnt->usage <= cnt->soft_limit) + excess = 0; + else + excess = cnt->usage - cnt->soft_limit; + spin_unlock_irqrestore(&cnt->lock, flags); + return excess; +} + /* * Helper function to detect if the cgroup is within it's limit or * not. It's currently called from cgroup_rss_prepare() @@ -147,6 +182,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt) return ret; } +static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + ret = res_counter_soft_limit_check_locked(cnt); + spin_unlock_irqrestore(&cnt->lock, flags); + return ret; +} + static inline void res_counter_reset_max(struct res_counter *cnt) { unsigned long flags; @@ -180,4 +226,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt, return ret; } +static inline int +res_counter_set_soft_limit(struct res_counter *cnt, + unsigned long long soft_limit) +{ + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + cnt->soft_limit = soft_limit; + spin_unlock_irqrestore(&cnt->lock, flags); + return 0; +} + #endif -- cgit v1.2.3 From f64c3f54940d6929a2b6dcffaab942bd62be2e66 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 23 Sep 2009 15:56:37 -0700 Subject: memory controller: soft limit organize cgroups Organize cgroups over soft limit in a RB-Tree Introduce an RB-Tree for storing memory cgroups that are over their soft limit. The overall goal is to 1. Add a memory cgroup to the RB-Tree when the soft limit is exceeded. We are careful about updates, updates take place only after a particular time interval has passed 2. We remove the node from the RB-Tree when the usage goes below the soft limit The next set of patches will exploit the RB-Tree to get the group that is over its soft limit by the largest amount and reclaim from it, when we face memory contention. [hugh.dickins@tiscali.co.uk: CONFIG_CGROUP_MEM_RES_CTLR=y CONFIG_PREEMPT=y fails to boot] Signed-off-by: Balbir Singh Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: KOSAKI Motohiro Signed-off-by: Hugh Dickins Cc: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index fcb9884df61..731af71cddc 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -114,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val, struct res_counter **limit_fail_at); + unsigned long val, struct res_counter **limit_fail_at, + struct res_counter **soft_limit_at); /* * uncharge - tell that some portion of the resource is released @@ -127,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter, */ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); -void res_counter_uncharge(struct res_counter *counter, unsigned long val); +void res_counter_uncharge(struct res_counter *counter, unsigned long val, + bool *was_soft_limit_excess); static inline bool res_counter_limit_check_locked(struct res_counter *cnt) { -- cgit v1.2.3 From 4e41695356fb4e0b153be1440ad027e46e0a7ea2 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 23 Sep 2009 15:56:39 -0700 Subject: memory controller: soft limit reclaim on contention Implement reclaim from groups over their soft limit Permit reclaim from memory cgroups on contention (via the direct reclaim path). memory cgroup soft limit reclaim finds the group that exceeds its soft limit by the largest number of pages and reclaims pages from it and then reinserts the cgroup into its correct place in the rbtree. Add additional checks to mem_cgroup_hierarchical_reclaim() to detect long loops in case all swap is turned off. The code has been refactored and the loop check (loop < 2) has been enhanced for soft limits. For soft limits, we try to do more targetted reclaim. Instead of bailing out after two loops, the routine now reclaims memory proportional to the size by which the soft limit is exceeded. The proportion has been empirically determined. [akpm@linux-foundation.org: build fix] [kamezawa.hiroyu@jp.fujitsu.com: fix softlimit css refcnt handling] [nishimura@mxp.nes.nec.co.jp: refcount of the "victim" should be decremented before exiting the loop] Signed-off-by: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Li Zefan Acked-by: KOSAKI Motohiro Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++++++++++ include/linux/swap.h | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e46a0734ab6..bf9213b2db8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void) extern bool mem_cgroup_oom_called(struct task_struct *task); void mem_cgroup_update_mapped_file_stat(struct page *page, int val); +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, + int zid); #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page, { } +static inline +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, int zid) +{ + return 0; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 6c990e658f4..4c78fea989b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -217,6 +217,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, unsigned int swappiness); +extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap, + unsigned int swappiness, + struct zone *zone, + int nid); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; -- cgit v1.2.3 From a7f0765edfd53aed09cb7b0e15863688b39447de Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Sep 2009 15:56:44 -0700 Subject: ptrace: __ptrace_detach: do __wake_up_parent() if we reap the tracee The bug is old, it wasn't cause by recent changes. Test case: static void *tfunc(void *arg) { int pid = (long)arg; assert(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == 0); kill(pid, SIGKILL); sleep(1); return NULL; } int main(void) { pthread_t th; long pid = fork(); if (!pid) pause(); signal(SIGCHLD, SIG_IGN); assert(pthread_create(&th, NULL, tfunc, (void*)pid) == 0); int r = waitpid(-1, NULL, __WNOTHREAD); printf("waitpid: %d %m\n", r); return 0; } Before the patch this program hangs, after this patch waitpid() correctly fails with errno == -ECHILD. The problem is, __ptrace_detach() reaps the EXIT_ZOMBIE tracee if its ->real_parent is our sub-thread and we ignore SIGCHLD. But in this case we should wake up other threads which can sleep in do_wait(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Vitaly Mayatskikh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 848d1f20086..9e5a88afe6b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2059,6 +2059,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int kill_proc_info(int, struct siginfo *, pid_t); extern int do_notify_parent(struct task_struct *, int); +extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); -- cgit v1.2.3 From ae6d2ed7bb3877ff35b9569402025f40ea2e1803 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 23 Sep 2009 15:56:53 -0700 Subject: signals: tracehook_notify_jctl change This changes tracehook_notify_jctl() so it's called with the siglock held, and changes its argument and return value definition. These clean-ups make it a better fit for what new tracing hooks need to check. Tracing needs the siglock here, held from the time TASK_STOPPED was set, to avoid potential SIGCONT races if it wants to allow any blocking in its tracing hooks. This also folds the finish_stop() function into its caller do_signal_stop(). The function is short, called only once and only unconditionally. It aids readability to fold it in. [oleg@redhat.com: do not call tracehook_notify_jctl() in TASK_STOPPED state] [oleg@redhat.com: introduce tracehook_finish_jctl() helper] Signed-off-by: Roland McGrath Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 17ba82efa48..1eb44a924e5 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -1,7 +1,7 @@ /* * Tracing hooks * - * Copyright (C) 2008 Red Hat, Inc. All rights reserved. + * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task, /** * tracehook_notify_jctl - report about job control stop/continue - * @notify: nonzero if this is the last thread in the group to stop + * @notify: zero, %CLD_STOPPED or %CLD_CONTINUED * @why: %CLD_STOPPED or %CLD_CONTINUED * * This is called when we might call do_notify_parent_cldstop(). - * It's called when about to stop for job control; we are already in - * %TASK_STOPPED state, about to call schedule(). It's also called when - * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made. * - * Return nonzero to generate a %SIGCHLD with @why, which is - * normal if @notify is nonzero. + * @notify is zero if we would not ordinarily send a %SIGCHLD, + * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD. * - * Called with no locks held. + * @why is %CLD_STOPPED when about to stop for job control; + * we are already in %TASK_STOPPED state, about to call schedule(). + * It might also be that we have just exited (check %PF_EXITING), + * but need to report that a group-wide stop is complete. + * + * @why is %CLD_CONTINUED when waking up after job control stop and + * ready to make a delayed @notify report. + * + * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal. + * + * Called with the siglock held. */ static inline int tracehook_notify_jctl(int notify, int why) { - return notify || (current->ptrace & PT_PTRACED); + return notify ?: (current->ptrace & PT_PTRACED) ? why : 0; +} + +/** + * tracehook_finish_jctl - report about return from job control stop + * + * This is called by do_signal_stop() after wakeup. + */ +static inline void tracehook_finish_jctl(void) +{ } #define DEATH_REAP -1 -- cgit v1.2.3 From 964ee7df90d799e38fb1556c57cd5c45fc736436 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Sep 2009 15:56:59 -0700 Subject: exec: fix set_binfmt() vs sys_delete_module() race sys_delete_module() can set MODULE_STATE_GOING after search_binary_handler() does try_module_get(). In this case set_binfmt()->try_module_get() fails but since none of the callers check the returned error, the task will run with the wrong old ->binfmt. The proper fix should change all ->load_binary() methods, but we can rely on fact that the caller must hold a reference to binfmt->module and use __module_get() which never fails. Signed-off-by: Oleg Nesterov Acked-by: Rusty Russell Cc: Hiroshi Shimamoto Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 2046b5b8af4..aece486ac73 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); -extern int set_binfmt(struct linux_binfmt *new); +extern void set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 4a30debfb778240a4b1767d4b0c5a5b25ab97160 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Sep 2009 15:57:00 -0700 Subject: signals: introduce do_send_sig_info() helper Introduce do_send_sig_info() and convert group_send_sig_info(), send_sig_info(), do_send_specific() to use this helper. Hopefully it will have more users soon, it allows to specify specific/group behaviour via "bool group" argument. Shaves 80 bytes from .text. Signed-off-by: Oleg Nesterov Cc: Peter Zijlstra Cc: stephane eranian Cc: Ingo Molnar Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index c7552836bd9..ab9272cc270 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig) } extern int next_signal(struct sigpending *pending, sigset_t *mask); +extern int do_send_sig_info(int sig, struct siginfo *info, + struct task_struct *p, bool group); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, -- cgit v1.2.3 From d9588725e52650e82989707f8fd2feb67ad2dc8e Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 23 Sep 2009 15:57:04 -0700 Subject: signals: inline __fatal_signal_pending __fatal_signal_pending inlines to one instruction on x86, probably two instructions on other machines. It takes two longer x86 instructions just to call it and test its return value, not to mention the function itself. On my random x86_64 config, this saved 70 bytes of text (59 of those being __fatal_signal_pending itself). Signed-off-by: Roland McGrath Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9e5a88afe6b..e951bd2eb9f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2337,7 +2337,10 @@ static inline int signal_pending(struct task_struct *p) return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } -extern int __fatal_signal_pending(struct task_struct *p); +static inline int __fatal_signal_pending(struct task_struct *p) +{ + return unlikely(sigismember(&p->pending.signal, SIGKILL)); +} static inline int fatal_signal_pending(struct task_struct *p) { -- cgit v1.2.3 From 8d65af789f3e2cf4cfbdbf71a0f7a61ebcd41d38 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 23 Sep 2009 15:57:19 -0700 Subject: sysctl: remove "struct file *" argument of ->proc_handler It's unused. It isn't needed -- read or write flag is already passed and sysctl shouldn't care about the rest. It _was_ used in two places at arch/frv for some reason. Signed-off-by: Alexey Dobriyan Cc: David Howells Cc: "Eric W. Biederman" Cc: Al Viro Cc: Ralf Baechle Cc: Martin Schwidefsky Cc: Ingo Molnar Cc: "David S. Miller" Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/ftrace.h | 4 ++-- include/linux/hugetlb.h | 6 +++--- include/linux/mm.h | 2 +- include/linux/mmzone.h | 13 ++++++------- include/linux/sched.h | 8 ++++---- include/linux/security.h | 2 +- include/linux/swap.h | 2 +- include/linux/sysctl.h | 19 +++++++++---------- include/linux/writeback.h | 11 +++++------ 10 files changed, 33 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51803528b09..33ed6644abd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2467,7 +2467,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; -int proc_nr_files(struct ctl_table *table, int write, struct file *filp, +int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3c0924a18da..cd3d2abaf30 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -19,7 +19,7 @@ extern int ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); @@ -94,7 +94,7 @@ static inline void ftrace_start(void) { } extern int stack_tracer_enabled; int stack_trace_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 176e7ee73ef..11ab19ac6b3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) } void reset_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, diff --git a/include/linux/mm.h b/include/linux/mm.h index b6eae5e3144..87218ae84e3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1279,7 +1279,7 @@ int in_gate_area_no_task(unsigned long addr); #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ -int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, +int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 652ef01be58..6f7561730d8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -struct file; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int numa_zonelist_order_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ diff --git a/include/linux/sched.h b/include/linux/sched.h index e951bd2eb9f..811cd96524d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -309,7 +309,7 @@ extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; @@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -1906,7 +1906,7 @@ extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, + void __user *buffer, size_t *length, loff_t *ppos); #endif #ifdef CONFIG_SCHED_DEBUG @@ -1924,7 +1924,7 @@ extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; int sched_rt_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int sysctl_sched_compat_yield; diff --git a/include/linux/security.h b/include/linux/security.h index d050b66ab9e..239e40d0450 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) return PAGE_ALIGN(mmap_min_addr); return hint; } -extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, +extern int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #ifdef CONFIG_SECURITY diff --git a/include/linux/swap.h b/include/linux/swap.h index 4c78fea989b..82232dbea3f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -245,7 +245,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma); extern void scan_mapping_unevictable_pages(struct address_space *); extern unsigned long scan_unevictable_pages; -extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, +extern int scan_unevictable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int scan_unevictable_register_node(struct node *node); extern void scan_unevictable_unregister_node(struct node *node); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e76d3b22a46..1e4743ee683 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -29,7 +29,6 @@ #include #include -struct file; struct completion; #define CTL_MAXNAME 10 /* how many path components do we allow in a @@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp, +typedef int proc_handler (struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -extern int proc_dostring(struct ctl_table *, int, struct file *, +extern int proc_dostring(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec(struct ctl_table *, int, struct file *, +extern int proc_dointvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_doulongvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int do_sysctl (int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 75cf58666ff..66ebddcff66 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -110,21 +110,20 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); extern int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); struct ctl_table; -struct file; -int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, +int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, -- cgit v1.2.3 From 9064a6787aa1d8ceaf5ba16fe1dfedb0755dc7eb Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 23 Sep 2009 15:57:23 -0700 Subject: linux/futex.h: place kernel types behind __KERNEL__ The forward decls for some kernel types are only needed by the code behind __KERNEL__, so don't bleed these types to userspace. Signed-off-by: Mike Frysinger Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 34956c8fdeb..8ec17997d94 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,11 +4,6 @@ #include #include -struct inode; -struct mm_struct; -struct task_struct; -union ktime; - /* Second argument to futex syscall */ @@ -129,6 +124,11 @@ struct robust_list_head { #define FUTEX_BITSET_MATCH_ANY 0xffffffff #ifdef __KERNEL__ +struct inode; +struct mm_struct; +struct task_struct; +union ktime; + long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -- cgit v1.2.3 From 858f09930b32c11b40fd0c5c467982ba09b10894 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 23 Sep 2009 15:57:32 -0700 Subject: aio: ifdef fields in mm_struct ->ioctx_lock and ->ioctx_list are used only under CONFIG_AIO. Signed-off-by: Alexey Dobriyan Cc: Zach Brown Cc: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0042090a4d7..6b7029ab9c8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -259,11 +259,10 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ - - /* aio bits */ +#ifdef CONFIG_AIO spinlock_t ioctx_lock; struct hlist_head ioctx_list; - +#endif #ifdef CONFIG_MM_OWNER /* * "owner" points to a task that is regarded as the canonical -- cgit v1.2.3 From 2fa4341074cd02fb39aa23410740764948755635 Mon Sep 17 00:00:00 2001 From: Albin Tonnerre Date: Wed, 23 Sep 2009 15:57:38 -0700 Subject: include/linux/unaligned/{l,b}e_byteshift.h: fix usage for compressed kernels When unaligned accesses are required for uncompressing a kernel (such as for LZO decompression on ARM in a patch that follows), including causes issues as it brings in a lot of things that are not available in the decompression environment. linux/kernel.h brings at least: extern int console_printk[]; extern const char hex_asc[]; which causes errors at link-time as they are not available when compiling the pre-boot environement. There are also a few others: arch/arm/boot/compressed/misc.o: In function `valid_user_regs': arch/arm/include/asm/ptrace.h:158: undefined reference to `elf_hwcap' arch/arm/boot/compressed/misc.o: In function `console_silent': include/linux/kernel.h:292: undefined reference to `console_printk' arch/arm/boot/compressed/misc.o: In function `console_verbose': include/linux/kernel.h:297: undefined reference to `console_printk' arch/arm/boot/compressed/misc.o: In function `pack_hex_byte': include/linux/kernel.h:360: undefined reference to `hex_asc' arch/arm/boot/compressed/misc.o: In function `hweight_long': include/linux/bitops.h:45: undefined reference to `hweight32' arch/arm/boot/compressed/misc.o: In function `__cmpxchg_local_generic': include/asm-generic/cmpxchg-local.h:21: undefined reference to `wrong_size_cmpxchg' include/asm-generic/cmpxchg-local.h:42: undefined reference to `wrong_size_cmpxchg' arch/arm/boot/compressed/misc.o: In function `__xchg': arch/arm/include/asm/system.h:309: undefined reference to `__bad_xchg' However, those files apparently use nothing from , all they need is the declaration of types such as u32 or u64, so should be enough Signed-off-by: Albin Tonnerre Cc: Sam Ravnborg Cc: Russell King Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Phillip Lougher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/unaligned/be_byteshift.h | 2 +- include/linux/unaligned/le_byteshift.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h index 46dd12c5709..9356b24223a 100644 --- a/include/linux/unaligned/be_byteshift.h +++ b/include/linux/unaligned/be_byteshift.h @@ -1,7 +1,7 @@ #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H #define _LINUX_UNALIGNED_BE_BYTESHIFT_H -#include +#include static inline u16 __get_unaligned_be16(const u8 *p) { diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h index 59777e951ba..be376fb79b6 100644 --- a/include/linux/unaligned/le_byteshift.h +++ b/include/linux/unaligned/le_byteshift.h @@ -1,7 +1,7 @@ #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H #define _LINUX_UNALIGNED_LE_BYTESHIFT_H -#include +#include static inline u16 __get_unaligned_le16(const u8 *p) { -- cgit v1.2.3 From 801460d0cf5c5288153b722565773059b0f44348 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 23 Sep 2009 15:57:41 -0700 Subject: task_struct cleanup: move binfmt field to mm_struct Because the binfmt is not different between threads in the same process, it can be moved from task_struct to mm_struct. And binfmt moudle is handled per mm_struct instead of task_struct. Signed-off-by: Hiroshi Shimamoto Acked-by: Oleg Nesterov Cc: Rusty Russell Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ include/linux/sched.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6b7029ab9c8..21d6aa45206 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -240,6 +240,8 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ + struct linux_binfmt *binfmt; + cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 811cd96524d..8a16f6d11dc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1271,7 +1271,6 @@ struct task_struct { struct mm_struct *mm, *active_mm; /* task state */ - struct linux_binfmt *binfmt; int exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ -- cgit v1.2.3 From 4a4962263f07d14660849ec134ee42b63e95ea9a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:50:42 +0100 Subject: module: reduce symbol table for loaded modules (v2) Discard all symbols not interesting for kallsyms use: absolute, section, and in the common case (!KALLSYMS_ALL) data ones. Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 1c755b2f937..1d3ccb173fd 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,9 +308,13 @@ struct module #endif #ifdef CONFIG_KALLSYMS - /* We keep the symbol and string tables for kallsyms. */ - Elf_Sym *symtab; - unsigned int num_symtab; + /* + * We keep the symbol and string tables for kallsyms. + * The core_* fields below are temporary, loader-only (they + * could really be discarded after module init). + */ + Elf_Sym *symtab, *core_symtab; + unsigned int num_symtab, core_num_syms; char *strtab; /* Section attributes */ -- cgit v1.2.3 From 554bdfe5acf3715e87c8d5e25a4f9a896ac9f014 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:51:44 +0100 Subject: module: reduce string table for loaded modules (v2) Also remove all parts of the string table (referenced by the symbol table) that are not needed for kallsyms use (i.e. which were only referenced by symbols discarded by the previous patch, or not referenced at all for whatever reason). Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 1d3ccb173fd..aca98036595 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -315,7 +315,7 @@ struct module */ Elf_Sym *symtab, *core_symtab; unsigned int num_symtab, core_num_syms; - char *strtab; + char *strtab, *core_strtab; /* Section attributes */ struct module_sect_attrs *sect_attrs; -- cgit v1.2.3 From 1d7015caa082d465faeae5d6fd1be077ee6dfa87 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Sep 2009 00:32:58 -0600 Subject: module: preferred way to use MODULE_AUTHOR For the longest time now we've been using multiple MODULE_AUTHOR() statements when a module has more than one author, but the comment here disagrees. Signed-off-by: Johannes Berg Cc: Jiri Kosina Cc: Luciano Coelho Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell --- include/linux/module.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index aca98036595..482efc865ac 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -128,7 +128,10 @@ extern struct module __this_module; */ #define MODULE_LICENSE(_license) MODULE_INFO(license, _license) -/* Author, ideally of form NAME[, NAME]*[ and NAME] */ +/* + * Author(s), use "Name " or just "Name", for multiple + * authors use multiple MODULE_AUTHOR() statements/lines. + */ #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) /* What your module does. */ -- cgit v1.2.3 From 18a1166de994685d770425086b2bcc1ba567f7ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 23 Sep 2009 03:17:11 +0000 Subject: Phonet: error on broadcast sending (unimplemented) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we ever implement this, then we can stop returning an error. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 1ef5a078183..e5126cff9b2 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -38,6 +38,7 @@ #define PNPIPE_IFINDEX 2 #define PNADDR_ANY 0 +#define PNADDR_BROADCAST 0xFC #define PNPORT_RESOURCE_ROUTING 0 /* Values for PNPIPE_ENCAP option */ -- cgit v1.2.3 From b8273570f802a7658827dcb077b0b517ba75a289 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 24 Sep 2009 15:44:05 -0700 Subject: genetlink: fix netns vs. netlink table locking (2) Similar to commit d136f1bd366fdb7e747ca7e0218171e7a00a98a5, there's a bug when unregistering a generic netlink family, which is caught by the might_sleep() added in that commit: BUG: sleeping function called from invalid context at net/netlink/af_netlink.c:183 in_atomic(): 1, irqs_disabled(): 0, pid: 1510, name: rmmod 2 locks held by rmmod/1510: #0: (genl_mutex){+.+.+.}, at: [] genl_unregister_family+0x2b/0x130 #1: (rcu_read_lock){.+.+..}, at: [] __genl_unregister_mc_group+0x1c/0x120 Pid: 1510, comm: rmmod Not tainted 2.6.31-wl #444 Call Trace: [] __might_sleep+0x119/0x150 [] netlink_table_grab+0x21/0x100 [] netlink_clear_multicast_users+0x23/0x60 [] __genl_unregister_mc_group+0x71/0x120 [] genl_unregister_family+0x56/0x130 [] nl80211_exit+0x15/0x20 [cfg80211] [] cfg80211_exit+0x1a/0x40 [cfg80211] Fix in the same way by grabbing the netlink table lock before doing rcu_read_lock(). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 080f6ba9e73..ab5d3126831 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -187,6 +187,7 @@ extern struct sock *netlink_kernel_create(struct net *net, extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); +extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); -- cgit v1.2.3 From e9ea0e2d1d00959b451dfc239df126d3c6a22043 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 24 Sep 2009 14:47:45 -0700 Subject: hugetlb_file_setup(): use C, not cpp Why macros are always wrong: mm/mmap.c: In function 'do_mmap_pgoff': mm/mmap.c:953: warning: unused variable 'user' also, move a couple of struct forward-decls outside `#ifdef CONFIG_HUGETLB_PAGE' - it's pointless and frequently harmful to make these conditional (eg, this patch needed `struct user_struct'). Cc: Lee Schermerhorn Cc: Mel Gorman Cc: Nishanth Aravamudan Cc: David Rientjes Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Eric B Munson Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 11ab19ac6b3..16937995abd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -3,15 +3,15 @@ #include +struct ctl_table; +struct user_struct; + #ifdef CONFIG_HUGETLB_PAGE #include #include #include -struct ctl_table; -struct user_struct; - int PageHuge(struct page *page); static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) @@ -187,7 +187,11 @@ static inline void set_file_hugepages(struct file *file) #define is_file_hugepages(file) 0 #define set_file_hugepages(file) BUG() -#define hugetlb_file_setup(name,size,acct,user,creat) ERR_PTR(-ENOSYS) +static inline struct file *hugetlb_file_setup(const char *name, size_t size, + int acctflag, struct user_struct **user, int creat_flags) +{ + return ERR_PTR(-ENOSYS); +} #endif /* !CONFIG_HUGETLBFS */ -- cgit v1.2.3 From 934831d060ccd5471ecbc562804a8d3ccd6e562c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 24 Sep 2009 12:33:32 +0100 Subject: NOMMU: Fallback for is_vmalloc_or_module_addr() should be inline The NOMMU fallback for is_vmalloc_or_module_addr() should be static inline, not just static, in linux/mm.h. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index df08551cb0a..24c395694f4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -288,7 +288,7 @@ static inline int is_vmalloc_addr(const void *x) #ifdef CONFIG_MMU extern int is_vmalloc_or_module_addr(const void *x); #else -static int is_vmalloc_or_module_addr(const void *x) +static inline int is_vmalloc_or_module_addr(const void *x) { return 0; } -- cgit v1.2.3 From a72bfd4dea053bb8e2233902c3f1893ef5485802 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 26 Sep 2009 00:07:46 +0200 Subject: writeback: pass in super_block to bdi_start_writeback() Sometimes we only want to write pages from a specific super_block, so allow that to be passed in. This fixes a problem with commit 56a131dcf7ed36c3c6e36bea448b674ea85ed5bb causing writeback on all super_blocks on a bdi, where we only really want to sync a specific sb from writeback_inodes_sb(). Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0ee33c2e612..b449e738533 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -101,7 +101,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages); +void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, + long nr_pages); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); -- cgit v1.2.3 From 1d1764c39815db55e10b2d78732db4d6dd9d6039 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 26 Sep 2009 19:37:22 +0400 Subject: headers: kref.h redux * remove asm/atomic.h inclusion from kref.h -- not needed, linux/types.h is enough for atomic_t * remove linux/kref.h inclusion from files which do not need it. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- include/linux/ipc.h | 2 -- include/linux/kref.h | 1 - include/linux/nfs_fs.h | 1 - 3 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index b8826107b51..3b1594d662b 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -78,8 +78,6 @@ struct ipc_kludge { #define IPCCALL(version,op) ((version)<<16 | (op)) #ifdef __KERNEL__ - -#include #include #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ diff --git a/include/linux/kref.h b/include/linux/kref.h index 0cef6badd6f..b0cb0ebad9e 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -16,7 +16,6 @@ #define _KREF_H_ #include -#include struct kref { atomic_t refcount; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f6b90240dd4..d09db1bc908 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -40,7 +40,6 @@ #ifdef __KERNEL__ #include -#include #include #include #include -- cgit v1.2.3 From d1f8297a96b0d70f17704296a6666468f2087ce6 Mon Sep 17 00:00:00 2001 From: Sascha Hlusiak Date: Sat, 26 Sep 2009 20:28:07 -0700 Subject: Revert "sit: stateless autoconf for isatap" This reverts commit 645069299a1c7358cf7330afe293f07552f11a5d. While the code does not actually break anything, it does not completely follow RFC5214 yet. After talking back with Fred L. Templin, I agree that completing the ISATAP specific RS/RA code, would pollute the kernel a lot with code that is better implemented in userspace. The kernel should not send RS packages for ISATAP at all. Signed-off-by: Sascha Hlusiak Acked-by: Fred L. Templin Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 5eb9b0f857e..5a9aae4adb4 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -44,7 +44,7 @@ struct ip_tunnel_prl { __u16 flags; __u16 __reserved; __u32 datalen; - __u32 rs_delay; + __u32 __reserved2; /* data follows */ }; -- cgit v1.2.3 From f0f37e2f77731b3473fa6bd5ee53255d9a9cdb40 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 27 Sep 2009 22:29:37 +0400 Subject: const: mark struct vm_struct_operations * mark struct vm_area_struct::vm_ops as const * mark vm_ops in AGP code But leave TTM code alone, something is fishy there with global vm_ops being used. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- include/linux/agp_backend.h | 2 +- include/linux/hugetlb.h | 2 +- include/linux/mm_types.h | 2 +- include/linux/ramfs.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 880130f7311..9101ed64f80 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -53,7 +53,7 @@ struct agp_kern_info { int current_memory; bool cant_use_aperture; unsigned long page_mask; - struct vm_operations_struct *vm_ops; + const struct vm_operations_struct *vm_ops; }; /* diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 16937995abd..41a59afc70f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -163,7 +163,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) } extern const struct file_operations hugetlbfs_file_operations; -extern struct vm_operations_struct hugetlb_vm_ops; +extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, int acct, struct user_struct **user, int creat_flags); int hugetlb_get_quota(struct address_space *mapping, long delta); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 21d6aa45206..84a524afb3d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -171,7 +171,7 @@ struct vm_area_struct { struct anon_vma *anon_vma; /* Serialized by page_table_lock */ /* Function pointers to deal with this struct. */ - struct vm_operations_struct * vm_ops; + const struct vm_operations_struct *vm_ops; /* Information about our backing store: */ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 37aaf2b3986..4e768dda87b 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -17,7 +17,7 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); #endif extern const struct file_operations ramfs_file_operations; -extern struct vm_operations_struct generic_file_vm_ops; +extern const struct vm_operations_struct generic_file_vm_ops; extern int __init init_rootfs(void); #endif -- cgit v1.2.3 From f278a2f7bbc2239f479eaf63d0b3ae573b1d746c Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sun, 27 Sep 2009 16:00:42 +0000 Subject: tty: Fix regressions caused by commit b50989dc The following commit made console open fails while booting: commit b50989dc444599c8b21edc23536fc305f4e9b7d5 Author: Alan Cox Date: Sat Sep 19 13:13:22 2009 -0700 tty: make the kref destructor occur asynchronously Due to tty release routines run in a workqueue now, error like the following will be reported while booting: INIT open /dev/console Input/output error It also causes hibernation regression to appear as reported at http://bugzilla.kernel.org/show_bug.cgi?id=14229 The reason is that now there's latency issue with closing, but when we open a "closing not finished" tty, -EIO will be returned. Fix it as per the following Alan's suggestion: Fun but it's actually not a bug and the fix is wrong in itself as the port may be closing but not yet being destructed, in which case it seems to do the wrong thing. Opening a tty that is closing (and could be closing for long periods) is supposed to return -EIO. I suspect a better way to deal with this and keep the old console timing is to split tty->shutdown into two functions. tty->shutdown() - called synchronously just before we dump the tty onto the waitqueue for destruction tty->cleanup() - called when the destructor runs. We would then do the shutdown part which can occur in IRQ context fine, before queueing the rest of the release (from tty->magic = 0 ... the end) to occur asynchronously The USB update in -next would then need a call like if (tty->cleanup) tty->cleanup(tty); at the top of the async function and the USB shutdown to be split between shutdown and cleanup as the USB resource cleanup and final tidy cannot occur synchronously as it needs to sleep. In other words the logic becomes final kref put make object unfindable async clean it up Signed-off-by: Dave Young [ rjw: Rebased on top of 2.6.31-git, reworked the changelog. ] Signed-off-by: "Rafael J. Wysocki" [ Changed serial naming to match new rules, dropped tty_shutdown as per comments from Alan Stern - Linus ] Signed-off-by: Linus Torvalds --- include/linux/tty_driver.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 3566129384a..b0867798252 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -45,8 +45,16 @@ * * void (*shutdown)(struct tty_struct * tty); * - * This routine is called when a particular tty device is closed for - * the last time freeing up the resources. + * This routine is called synchronously when a particular tty device + * is closed for the last time freeing up the resources. + * + * + * void (*cleanup)(struct tty_struct * tty); + * + * This routine is called asynchronously when a particular tty device + * is closed for the last time freeing up the resources. This is + * actually the second part of shutdown for routines that might sleep. + * * * int (*write)(struct tty_struct * tty, * const unsigned char *buf, int count); @@ -233,6 +241,7 @@ struct tty_operations { int (*open)(struct tty_struct * tty, struct file * filp); void (*close)(struct tty_struct * tty, struct file * filp); void (*shutdown)(struct tty_struct *tty); + void (*cleanup)(struct tty_struct *tty); int (*write)(struct tty_struct * tty, const unsigned char *buf, int count); int (*put_char)(struct tty_struct *tty, unsigned char ch); -- cgit v1.2.3 From bf6993276f74d46776f35c45ddef29b981b1d1c6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 30 Sep 2009 00:32:06 -0400 Subject: jbd2: Use tracepoints for history file The /proc/fs/jbd2//history was maintained manually; by using tracepoints, we can get all of the existing functionality of the /proc file plus extra capabilities thanks to the ftrace infrastructure. We save memory as a bonus. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 52695d3dfd0..f1011f7f3d4 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -464,9 +464,9 @@ struct handle_s */ struct transaction_chp_stats_s { unsigned long cs_chp_time; - unsigned long cs_forced_to_close; - unsigned long cs_written; - unsigned long cs_dropped; + __u32 cs_forced_to_close; + __u32 cs_written; + __u32 cs_dropped; }; /* The transaction_t type is the guts of the journaling mechanism. It @@ -668,23 +668,16 @@ struct transaction_run_stats_s { unsigned long rs_flushing; unsigned long rs_logging; - unsigned long rs_handle_count; - unsigned long rs_blocks; - unsigned long rs_blocks_logged; + __u32 rs_handle_count; + __u32 rs_blocks; + __u32 rs_blocks_logged; }; struct transaction_stats_s { - int ts_type; unsigned long ts_tid; - union { - struct transaction_run_stats_s run; - struct transaction_chp_stats_s chp; - } u; + struct transaction_run_stats_s run; }; -#define JBD2_STATS_RUN 1 -#define JBD2_STATS_CHECKPOINT 2 - static inline unsigned long jbd2_time_diff(unsigned long start, unsigned long end) { @@ -988,12 +981,6 @@ struct journal_s /* * Journal statistics */ - struct transaction_stats_s *j_history; - int j_history_max; - int j_history_cur; - /* - * Protect the transactions statistics history - */ spinlock_t j_history_lock; struct proc_dir_entry *j_proc_entry; struct transaction_stats_s j_stats; -- cgit v1.2.3 From 9fcd66e572b94974365a9119b073e0a43d496eb7 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Fri, 18 Sep 2009 13:04:58 +0200 Subject: MIPS: BCM63xx: Add serial driver for bcm63xx integrated UART. Signed-off-by: Maxime Bizon Acked-by: Greg Kroah-Hartman Signed-off-by: Ralf Baechle --- include/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index fe661afe071..db532ce288b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -176,6 +176,9 @@ /* Qualcomm MSM SoCs */ #define PORT_MSM 88 +/* BCM63xx family SoCs */ +#define PORT_BCM63XX 89 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From b7058842c940ad2c08dd829b21e5c92ebe3b8758 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 30 Sep 2009 16:12:20 -0700 Subject: net: Make setsockopt() optlen be unsigned. This provides safety against negative optlen at the type level instead of depending upon (sometimes non-trivial) checks against this sprinkled all over the the place, in each and every implementation. Based upon work done by Arjan van de Ven and feedback from Linus Torvalds. Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- include/linux/mroute.h | 4 ++-- include/linux/mroute6.h | 4 ++-- include/linux/net.h | 8 ++++---- include/linux/netfilter.h | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 086e5c362d3..817b23705c9 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -397,7 +397,7 @@ struct atmdev_ops { /* only send is required */ int (*getsockopt)(struct atm_vcc *vcc,int level,int optname, void __user *optval,int optlen); int (*setsockopt)(struct atm_vcc *vcc,int level,int optname, - void __user *optval,int optlen); + void __user *optval,unsigned int optlen); int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); void (*phy_put)(struct atm_dev *dev,unsigned char value, diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 0d45b4e8d36..08bc776d05e 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -145,14 +145,14 @@ static inline int ip_mroute_opt(int opt) #endif #ifdef CONFIG_IP_MROUTE -extern int ip_mroute_setsockopt(struct sock *, int, char __user *, int); +extern int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); extern int ip_mr_init(void); #else static inline int ip_mroute_setsockopt(struct sock *sock, - int optname, char __user *optval, int optlen) + int optname, char __user *optval, unsigned int optlen) { return -ENOPROTOOPT; } diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 43dc97e3218..b191865a6ca 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -134,7 +134,7 @@ static inline int ip6_mroute_opt(int opt) struct sock; #ifdef CONFIG_IPV6_MROUTE -extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, int); +extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *); extern int ip6_mr_input(struct sk_buff *skb); extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg); @@ -143,7 +143,7 @@ extern void ip6_mr_cleanup(void); #else static inline int ip6_mroute_setsockopt(struct sock *sock, - int optname, char __user *optval, int optlen) + int optname, char __user *optval, unsigned int optlen) { return -ENOPROTOOPT; } diff --git a/include/linux/net.h b/include/linux/net.h index 9040a10584f..529a0931711 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -178,11 +178,11 @@ struct proto_ops { int (*listen) (struct socket *sock, int len); int (*shutdown) (struct socket *sock, int flags); int (*setsockopt)(struct socket *sock, int level, - int optname, char __user *optval, int optlen); + int optname, char __user *optval, unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); int (*compat_setsockopt)(struct socket *sock, int level, - int optname, char __user *optval, int optlen); + int optname, char __user *optval, unsigned int optlen); int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); int (*sendmsg) (struct kiocb *iocb, struct socket *sock, @@ -256,7 +256,7 @@ extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, extern int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen); extern int kernel_setsockopt(struct socket *sock, int level, int optname, - char *optval, int optlen); + char *optval, unsigned int optlen); extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); @@ -313,7 +313,7 @@ SOCKCALL_WRAP(name, compat_ioctl, (struct socket *sock, unsigned int cmd, \ SOCKCALL_WRAP(name, listen, (struct socket *sock, int len), (sock, len)) \ SOCKCALL_WRAP(name, shutdown, (struct socket *sock, int flags), (sock, flags)) \ SOCKCALL_WRAP(name, setsockopt, (struct socket *sock, int level, int optname, \ - char __user *optval, int optlen), (sock, level, optname, optval, optlen)) \ + char __user *optval, unsigned int optlen), (sock, level, optname, optval, optlen)) \ SOCKCALL_WRAP(name, getsockopt, (struct socket *sock, int level, int optname, \ char __user *optval, int __user *optlen), (sock, level, optname, optval, optlen)) \ SOCKCALL_WRAP(name, sendmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len), \ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 48cfe51bfdd..6132b5e6d9d 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -221,12 +221,12 @@ __ret;}) /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, - int len); + unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, - char __user *opt, int len); + char __user *opt, unsigned int len); int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); -- cgit v1.2.3 From 48c0d4d4c04dd520c55e0fd756fa4e7c83de3d13 Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Fri, 25 Sep 2009 06:19:26 +0200 Subject: Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs introduced in commit 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82. Release kobject also in case the request_fn is NULL. Problem was noticed via kmemleak backtrace when some sysfs entries were note properly destroyed during device removal: unreferenced object 0xffff88001aa76640 (size 80): comm "lvcreate", pid 2120, jiffies 4294885144 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 f0 65 a7 1a 00 88 ff ff .........e...... 90 66 a7 1a 00 88 ff ff 86 1d 53 81 ff ff ff ff .f........S..... backtrace: [] kmemleak_alloc+0x26/0x60 [] kmem_cache_alloc+0x133/0x1c0 [] sysfs_new_dirent+0x41/0x120 [] sysfs_add_file_mode+0x3c/0xb0 [] internal_create_group+0xc1/0x1a0 [] sysfs_create_group+0x13/0x20 [] blk_trace_init_sysfs+0x14/0x20 [] blk_register_queue+0x3c/0xf0 [] add_disk+0x94/0x160 [] dm_create+0x598/0x6e0 [dm_mod] [] dev_create+0x51/0x350 [dm_mod] [] ctl_ioctl+0x1a3/0x240 [dm_mod] [] dm_compat_ctl_ioctl+0x12/0x20 [dm_mod] [] compat_sys_ioctl+0xcd/0x4f0 [] sysenter_dispatch+0x7/0x2c [] 0xffffffffffffffff Signed-off-by: Zdenek Kabelac Reviewed-by: Li Zefan Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7e4350ece0f..622939a2329 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern void blk_trace_remove_sysfs(struct device *dev); extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; @@ -211,6 +212,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) +# define blk_trace_remove_sysfs(struct device *dev) do { } while (0) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; -- cgit v1.2.3 From c15227de132f1295f3db6b7df9079956b1020fd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:52:12 +0200 Subject: block: use normal I/O path for discard requests prepare_discard_fn() was being called in a place where memory allocation was effectively impossible. This makes it inappropriate for all but the most trivial translations of Linux's DISCARD operation to the block command set. Additionally adding a payload there makes the ownership of the bio backing unclear as it's now allocated by the device driver and not the submitter as usual. It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether the queue supports discard operations or not. blkdev_issue_discard now allocates a one-page, sector-length payload which is the right thing for the common ATA and SCSI implementations. The mtd implementation of prepare_discard_fn() is replaced with simply checking for the request being a discard. Largely based on a previous patch from Matthew Wilcox which did the prepare_discard_fn but not the different payload allocation yet. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e23a86cae5a..f62d45e8761 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -82,7 +82,6 @@ enum rq_cmd_type_bits { enum { REQ_LB_OP_EJECT = 0x40, /* eject request */ REQ_LB_OP_FLUSH = 0x41, /* flush request */ - REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; /* @@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); -typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -340,7 +338,6 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; - prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; @@ -460,6 +457,7 @@ struct request_queue #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ +#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -591,6 +589,7 @@ enum { #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) +#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) @@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); -extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.2.3 From 67efc9258010da35b27b3854d0880c7e193004ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:54:20 +0200 Subject: block: allow large discard requests Currently we set the bio size to the byte equivalent of the blocks to be trimmed when submitting the initial DISCARD ioctl. That means it is subject to the max_hw_sectors limitation of the HBA which is much lower than the size of a DISCARD request we can support. Add a separate max_discard_sectors tunable to limit the size for discard requests. We limit the max discard request size in bytes to 32bit as that is the limit for bio->bi_size. This could be much larger if we had a way to pass that information through the block layer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f62d45e8761..1a03b715dfa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -311,6 +311,7 @@ struct queue_limits { unsigned int alignment_offset; unsigned int io_min; unsigned int io_opt; + unsigned int max_discard_sectors; unsigned short logical_block_size; unsigned short max_hw_segments; @@ -928,6 +929,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); +extern void blk_queue_max_discard_sectors(struct request_queue *q, + unsigned int max_discard_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); extern void blk_queue_alignment_offset(struct request_queue *q, -- cgit v1.2.3 From b0da3f0dada78832c9da03ad2152ae76bd9a2496 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Thu, 1 Oct 2009 21:16:13 +0200 Subject: Add a tracepoint for block request remapping Since 2.6.31 now has request-based device-mapper, it's useful to have a tracepoint for request-remapping as well as bio-remapping. This patch adds a tracepoint for request-remapping, trace_block_rq_remap(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Alasdair G Kergon Cc: Li Zefan Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 622939a2329..3b73b9992b2 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -212,7 +212,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) -# define blk_trace_remove_sysfs(struct device *dev) do { } while (0) +# define blk_trace_remove_sysfs(dev) do { } while (0) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; -- cgit v1.2.3 From 828c09509b9695271bcbdc53e9fc9a6a737148d2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 1 Oct 2009 15:43:56 -0700 Subject: const: constify remaining file_operations [akpm@linux-foundation.org: fix KVM] Signed-off-by: Alexey Dobriyan Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b62bb9294d0..0008dee6651 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -37,7 +37,7 @@ extern void cgroup_exit(struct task_struct *p, int run_callbacks); extern int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); -extern struct file_operations proc_cgroup_operations; +extern const struct file_operations proc_cgroup_operations; /* Define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _subsys_id, diff --git a/include/linux/fs.h b/include/linux/fs.h index 2adaa2529f1..a1e6899d4b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2446,7 +2446,7 @@ static int __fops ## _open(struct inode *inode, struct file *file) \ __simple_attr_check_format(__fmt, 0ull); \ return simple_attr_open(inode, file, __get, __set, __fmt); \ } \ -static struct file_operations __fops = { \ +static const struct file_operations __fops = { \ .owner = THIS_MODULE, \ .open = __fops ## _open, \ .release = simple_attr_release, \ -- cgit v1.2.3 From 4e649152cbaa1aedd01821d200ab9d597fe469e4 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 1 Oct 2009 15:44:11 -0700 Subject: memcg: some modification to softlimit under hierarchical memory reclaim. This patch clean up/fixes for memcg's uncharge soft limit path. Problems: Now, res_counter_charge()/uncharge() handles softlimit information at charge/uncharge and softlimit-check is done when event counter per memcg goes over limit. Now, event counter per memcg is updated only when memory usage is over soft limit. Here, considering hierarchical memcg management, ancesotors should be taken care of. Now, ancerstors(hierarchy) are handled in charge() but not in uncharge(). This is not good. Prolems: 1. memcg's event counter incremented only when softlimit hits. That's bad. It makes event counter hard to be reused for other purpose. 2. At uncharge, only the lowest level rescounter is handled. This is bug. Because ancesotor's event counter is not incremented, children should take care of them. 3. res_counter_uncharge()'s 3rd argument is NULL in most case. ops under res_counter->lock should be small. No "if" sentense is better. Fixes: * Removed soft_limit_xx poitner and checks in charge and uncharge. Do-check-only-when-necessary scheme works enough well without them. * make event-counter of memcg incremented at every charge/uncharge. (per-cpu area will be accessed soon anyway) * All ancestors are checked at soft-limit-check. This is necessary because ancesotor's event counter may never be modified. Then, they should be checked at the same time. Reviewed-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Paul Menage Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 731af71cddc..fcb9884df61 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -114,8 +114,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val, struct res_counter **limit_fail_at, - struct res_counter **soft_limit_at); + unsigned long val, struct res_counter **limit_fail_at); /* * uncharge - tell that some portion of the resource is released @@ -128,8 +127,7 @@ int __must_check res_counter_charge(struct res_counter *counter, */ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); -void res_counter_uncharge(struct res_counter *counter, unsigned long val, - bool *was_soft_limit_excess); +void res_counter_uncharge(struct res_counter *counter, unsigned long val); static inline bool res_counter_limit_check_locked(struct res_counter *cnt) { -- cgit v1.2.3 From 329bd4119c8a0afea95f9db6d6b402a2f2b40e84 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Oct 2009 15:23:21 +0200 Subject: initcalls: Add early_initcall() for modules Complete the early_initcall() API by making it available in modules too. To be used by the EDAC/MCE code. Signed-off-by: Borislav Petkov Acked-by: Linus Torvalds Cc: Andi Kleen LKML-Reference: <20091002132321.GC28682@aftab> Signed-off-by: Ingo Molnar --- include/linux/init.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 400adbb4541..ff8bde520d0 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -271,6 +271,7 @@ void __init parse_early_options(char *cmdline); #else /* MODULE */ /* Don't use these in modules, but some people do... */ +#define early_initcall(fn) module_init(fn) #define core_initcall(fn) module_init(fn) #define postcore_initcall(fn) module_init(fn) #define arch_initcall(fn) module_init(fn) -- cgit v1.2.3 From 293500a23f4b0698cb04abfecfc9a954d8ab2742 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 2 Oct 2009 02:40:04 +0000 Subject: connector: Keep the skb in cn_callback_data Signed-off-by: Philipp Reisner Acked-by: Lars Ellenberg Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- include/linux/connector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 47ebf416f51..05a7a14126d 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -134,8 +134,8 @@ struct cn_callback_id { struct cn_callback_data { void (*destruct_data) (void *); void *ddata; - - void *callback_priv; + + struct sk_buff *skb; void (*callback) (struct cn_msg *); void *free; -- cgit v1.2.3 From 7069331dbe7155f23966f5944109f909fea0c7e4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 2 Oct 2009 02:40:05 +0000 Subject: connector: Provide the sender's credentials to the callback Signed-off-by: Philipp Reisner Acked-by: Lars Ellenberg Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- include/linux/connector.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 05a7a14126d..545728e20b6 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -136,7 +136,7 @@ struct cn_callback_data { void *ddata; struct sk_buff *skb; - void (*callback) (struct cn_msg *); + void (*callback) (struct cn_msg *, struct netlink_skb_parms *); void *free; }; @@ -167,11 +167,11 @@ struct cn_dev { struct cn_queue_dev *cbdev; }; -int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *)); +int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *, struct netlink_skb_parms *)); void cn_del_callback(struct cb_id *); int cn_netlink_send(struct cn_msg *, u32, gfp_t); -int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *)); +int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work); -- cgit v1.2.3 From f1489cfb173509a3c13444b46b6c989bad4f5b16 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 2 Oct 2009 02:40:07 +0000 Subject: connector: Removed the destruct_data callback since it is always kfree_skb() Signed-off-by: Philipp Reisner Acked-by: Lars Ellenberg Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- include/linux/connector.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 545728e20b6..3a14615fd35 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -132,9 +132,6 @@ struct cn_callback_id { }; struct cn_callback_data { - void (*destruct_data) (void *); - void *ddata; - struct sk_buff *skb; void (*callback) (struct cn_msg *, struct netlink_skb_parms *); -- cgit v1.2.3 From 8e2967555571659d2c8a70dd120710110ed7bba4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 3 Oct 2009 16:26:03 +0200 Subject: cfq-iosched: implement slower async initiate and queue ramp up This slowly ramps up the async queue depth based on the time passed since the sync IO, and doesn't allow async at all until a sync slice period has passed. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1a03b715dfa..a7323930d2b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1147,7 +1147,11 @@ static inline void put_dev_sector(Sector p) } struct work_struct; +struct delayed_work; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); +int kblockd_schedule_delayed_work(struct request_queue *q, + struct delayed_work *work, + unsigned long delay); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.3 From ac481c20ef8f6c6f2be75d581863f40c43874ef7 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sat, 3 Oct 2009 20:52:01 +0200 Subject: block: Topology ioctls Not all users of the topology information want to use libblkid. Provide the topology information through bdev ioctls. Also clarify sector size comments for existing BLK ioctls. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 35 ++++++++++++++++++++++++++++++----- include/linux/fs.h | 4 ++++ 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a7323930d2b..25119041e03 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1081,25 +1081,37 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q) return q->limits.physical_block_size; } +static inline int bdev_physical_block_size(struct block_device *bdev) +{ + return queue_physical_block_size(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_min(struct request_queue *q) { return q->limits.io_min; } +static inline int bdev_io_min(struct block_device *bdev) +{ + return queue_io_min(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_opt(struct request_queue *q) { return q->limits.io_opt; } +static inline int bdev_io_opt(struct block_device *bdev) +{ + return queue_io_opt(bdev_get_queue(bdev)); +} + static inline int queue_alignment_offset(struct request_queue *q) { - if (q && q->limits.misaligned) + if (q->limits.misaligned) return -1; - if (q && q->limits.alignment_offset) - return q->limits.alignment_offset; - - return 0; + return q->limits.alignment_offset; } static inline int queue_sector_alignment_offset(struct request_queue *q, @@ -1109,6 +1121,19 @@ static inline int queue_sector_alignment_offset(struct request_queue *q, & (q->limits.io_min - 1); } +static inline int bdev_alignment_offset(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q->limits.misaligned) + return -1; + + if (bdev != bdev->bd_contains) + return bdev->bd_part->alignment_offset; + + return q->limits.alignment_offset; +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2adaa2529f1..883eaacfd92 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -300,6 +300,10 @@ struct inodes_stat_t { #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) #define BLKDISCARD _IO(0x12,119) +#define BLKIOMIN _IO(0x12,120) +#define BLKIOOPT _IO(0x12,121) +#define BLKALIGNOFF _IO(0x12,122) +#define BLKPBSZGET _IO(0x12,123) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3 From 0f78ab9899e9d6acb09d5465def618704255963b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 4 Oct 2009 21:04:38 +0200 Subject: Revert "Seperate read and write statistics of in_flight requests" This reverts commit a9327cac440be4d8333bba975cbbf76045096275. Corrado Zoccolo reports: "with 2.6.32-rc1 I started getting the following strange output from "iostat -kx 2": Linux 2.6.31bisect (et2) 04/10/2009 _i686_ (2 CPU) avg-cpu: %user %nice %system %iowait %steal %idle 10,70 0,00 3,16 15,75 0,00 70,38 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 18,22 0,00 0,67 0,01 14,77 0,02 43,94 0,01 10,53 39043915,03 2629219,87 sdb 60,89 9,68 50,79 3,04 1724,43 50,52 65,95 0,70 13,06 488437,47 2629219,87 avg-cpu: %user %nice %system %iowait %steal %idle 2,72 0,00 0,74 0,00 0,00 96,53 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 sdb 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 avg-cpu: %user %nice %system %iowait %steal %idle 6,68 0,00 0,99 0,00 0,00 92,33 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 sdb 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 avg-cpu: %user %nice %system %iowait %steal %idle 4,40 0,00 0,73 1,47 0,00 93,40 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 sdb 0,00 4,00 0,00 3,00 0,00 28,00 18,67 0,06 19,50 333,33 100,00 Global values for service time and utilization are garbage. For interval values, utilization is always 100%, and service time is higher than normal. I bisected it down to: [a9327cac440be4d8333bba975cbbf76045096275] Seperate read and write statistics of in_flight requests and verified that reverting just that commit indeed solves the issue on 2.6.32-rc1." So until this is debugged, revert the bad commit. Signed-off-by: Jens Axboe --- include/linux/genhd.h | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 297df45ffd0..7beaa21b388 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -98,7 +98,7 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - int in_flight[2]; + int in_flight; #ifdef CONFIG_SMP struct disk_stats *dkstats; #else @@ -322,23 +322,18 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct hd_struct *part, int rw) +static inline void part_inc_in_flight(struct hd_struct *part) { - part->in_flight[rw]++; + part->in_flight++; if (part->partno) - part_to_disk(part)->part0.in_flight[rw]++; + part_to_disk(part)->part0.in_flight++; } -static inline void part_dec_in_flight(struct hd_struct *part, int rw) +static inline void part_dec_in_flight(struct hd_struct *part) { - part->in_flight[rw]--; + part->in_flight--; if (part->partno) - part_to_disk(part)->part0.in_flight[rw]--; -} - -static inline int part_in_flight(struct hd_struct *part) -{ - return part->in_flight[0] + part->in_flight[1]; + part_to_disk(part)->part0.in_flight--; } /* block/blk-core.c */ @@ -551,8 +546,6 @@ extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); -extern ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3 From a99bbaf5ee6bad1aca0c88ea65ec6e5373e86184 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 4 Oct 2009 16:11:37 +0400 Subject: headers: remove sched.h from poll.h Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- include/linux/poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/poll.h b/include/linux/poll.h index fa287f25138..6673743946f 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -6,10 +6,10 @@ #ifdef __KERNEL__ #include +#include #include #include #include -#include #include /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating -- cgit v1.2.3 From 9c501935a3cdcf6b1d35aaee3aa11c7a7051a305 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 5 Oct 2009 00:24:36 -0700 Subject: net: Support inclusion of before The following user-space program fails to compile: #include #include int main() { return 0; } The reason is that tests __GLIBC__ to decide whether it should define various structures and macros that are now defined for user-space by , but __GLIBC__ is not defined if no libc headers have yet been included. It seems safe to drop support for libc 5 now. Signed-off-by: Ben Hutchings Signed-off-by: Bastian Blank Signed-off-by: David S. Miller --- include/linux/socket.h | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 3b461dffe24..3273a0c5043 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -16,7 +16,7 @@ struct __kernel_sockaddr_storage { /* _SS_MAXSIZE value minus size of ss_family */ } __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ -#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) +#ifdef __KERNEL__ #include /* arch-dependent defines */ #include /* the SIOCxxx I/O controls */ @@ -100,21 +100,6 @@ struct cmsghdr { ((mhdr)->msg_controllen - \ ((char *)(cmsg) - (char *)(mhdr)->msg_control))) -/* - * This mess will go away with glibc - */ - -#ifdef __KERNEL__ -#define __KINLINE static inline -#elif defined(__GNUC__) -#define __KINLINE static __inline__ -#elif defined(__cplusplus) -#define __KINLINE static inline -#else -#define __KINLINE static -#endif - - /* * Get the next cmsg header * @@ -128,7 +113,7 @@ struct cmsghdr { * ancillary object DATA. --ANK (980731) */ -__KINLINE struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, +static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, struct cmsghdr *__cmsg) { struct cmsghdr * __ptr; @@ -140,7 +125,7 @@ __KINLINE struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, return __ptr; } -__KINLINE struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg) +static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg) { return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } -- cgit v1.2.3 From 23e018a1b083ecb4b8bb2fb43d58e7c19b5d7959 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Oct 2009 08:52:35 +0200 Subject: block: get rid of kblock_schedule_delayed_work() It was briefly introduced to allow CFQ to to delayed scheduling, but we ended up removing that feature again. So lets kill the function and export, and just switch CFQ back to the normal work schedule since it is now passing in a '0' delay from all call sites. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25119041e03..221cecd86bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1172,11 +1172,7 @@ static inline void put_dev_sector(Sector p) } struct work_struct; -struct delayed_work; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); -int kblockd_schedule_delayed_work(struct request_queue *q, - struct delayed_work *work, - unsigned long delay); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.3 From 3d76c082907e8f83c5d5c4572f38d53ad8f00c4b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Sep 2009 07:46:32 -0700 Subject: rcu: Clean up code based on review feedback from Josh Triplett, part 3 Whitespace fixes, updated comments, and trivial code movement. o Fix whitespace error in RCU_HEAD_INIT() o Move "So where is rcu_write_lock()" comment so that it does not come between the rcu_read_unlock() header comment and the rcu_read_unlock() definition. o Move the module_param statements for blimit, qhimark, and qlowmark to immediately follow the corresponding definitions. o In __rcu_offline_cpu(), move the assignment to rdp_me inside the "if" statement, given that rdp_me is not used outside of that "if" statement. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12541491931164-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 70331218e4b..3ebd0b7bcb0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -77,7 +77,7 @@ extern int rcu_scheduler_active; #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_HEAD_INIT { .next = NULL, .func = NULL } +#define RCU_HEAD_INIT { .next = NULL, .func = NULL } #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT #define INIT_RCU_HEAD(ptr) do { \ (ptr)->next = NULL; (ptr)->func = NULL; \ @@ -129,12 +129,6 @@ static inline void rcu_read_lock(void) rcu_read_acquire(); } -/** - * rcu_read_unlock - marks the end of an RCU read-side critical section. - * - * See rcu_read_lock() for more information. - */ - /* * So where is rcu_write_lock()? It does not exist, as there is no * way for writers to lock out RCU readers. This is a feature, not @@ -144,6 +138,12 @@ static inline void rcu_read_lock(void) * used as well. RCU does not care how the writers keep out of each * others' way, as long as they do so. */ + +/** + * rcu_read_unlock - marks the end of an RCU read-side critical section. + * + * See rcu_read_lock() for more information. + */ static inline void rcu_read_unlock(void) { rcu_read_release(); -- cgit v1.2.3 From f1bce7f80e3b400cf29787b0afa9c3042b959017 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2009 04:16:04 +0900 Subject: libata: cosmetic updates We're about to add more SATA_* and ATA_ACPI_FILTER_* constants. Reformat them in preparation. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/ata.h | 6 +++--- include/linux/libata.h | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 6299a259ed1..7c5beafa972 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -334,9 +334,9 @@ enum { SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */ /* SETFEATURE Sector counts for SATA features */ - SATA_AN = 0x05, /* Asynchronous Notification */ - SATA_DIPM = 0x03, /* Device Initiated Power Management */ - SATA_FPDMA_AA = 0x02, /* DMA Setup FIS Auto-Activate */ + SATA_FPDMA_AA = 0x02, /* FPDMA Setup FIS Auto-Activate */ + SATA_DIPM = 0x03, /* Device Initiated Power Management */ + SATA_AN = 0x05, /* Asynchronous Notification */ /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index 76319bf03e3..5b2f7491fb2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -418,6 +418,15 @@ enum { ATA_TIMING_ACTIVE | ATA_TIMING_RECOVER | ATA_TIMING_DMACK_HOLD | ATA_TIMING_CYCLE | ATA_TIMING_UDMA, + + /* ACPI constants */ + ATA_ACPI_FILTER_SETXFER = 1 << 0, + ATA_ACPI_FILTER_LOCK = 1 << 1, + ATA_ACPI_FILTER_DIPM = 1 << 2, + + ATA_ACPI_FILTER_DEFAULT = ATA_ACPI_FILTER_SETXFER | + ATA_ACPI_FILTER_LOCK | + ATA_ACPI_FILTER_DIPM, }; enum ata_xfer_mask { -- cgit v1.2.3 From fa5b561c4ea170caf9759109acc2e961a7e83bea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2009 04:17:02 +0900 Subject: libata: implement more acpi filtering options Currently libata-acpi can only filter DIPM among SATA feature enables via _GTF. This patch adds the capability to filter out FPDMA non-zero offset, in-order guarantee and auto-activation. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/ata.h | 3 +++ include/linux/libata.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 7c5beafa972..4fb357312b3 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -334,9 +334,12 @@ enum { SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */ /* SETFEATURE Sector counts for SATA features */ + SATA_FPDMA_OFFSET = 0x01, /* FPDMA non-zero buffer offsets */ SATA_FPDMA_AA = 0x02, /* FPDMA Setup FIS Auto-Activate */ SATA_DIPM = 0x03, /* Device Initiated Power Management */ + SATA_FPDMA_IN_ORDER = 0x04, /* FPDMA in-order data delivery */ SATA_AN = 0x05, /* Asynchronous Notification */ + SATA_SSP = 0x06, /* Software Settings Preservation */ /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index 5b2f7491fb2..aa52794d2a0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -423,6 +423,8 @@ enum { ATA_ACPI_FILTER_SETXFER = 1 << 0, ATA_ACPI_FILTER_LOCK = 1 << 1, ATA_ACPI_FILTER_DIPM = 1 << 2, + ATA_ACPI_FILTER_FPDMA_OFFSET = 1 << 3, /* FPDMA non-zero offset */ + ATA_ACPI_FILTER_FPDMA_AA = 1 << 4, /* FPDMA auto activate */ ATA_ACPI_FILTER_DEFAULT = ATA_ACPI_FILTER_SETXFER | ATA_ACPI_FILTER_LOCK | -- cgit v1.2.3 From 110f66d25c33c2259b1125255fa7063ab07b8340 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2009 04:17:28 +0900 Subject: libata: make gtf_filter per-dev Add ->gtf_filter to ata_device and set it to ata_acpi_gtf_filter when initializing ata_link. This is to allow quirks which apply different gtf filters. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index aa52794d2a0..87698640c09 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -598,6 +598,7 @@ struct ata_device { #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; union acpi_object *gtf_cache; + unsigned int gtf_filter; #endif /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ u64 n_sectors; /* size of device, if ATA */ -- cgit v1.2.3 From acf442dc560437858e6a4c904678052616f8226e Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Mon, 5 Oct 2009 21:43:44 -0700 Subject: Input: fix rx51 board keymap The original driver was written with the KEY() macro defined as (col, row) instead of (row, col) as defined by the matrix keypad infrastructure. So the keymap was defined accordingly. Since the driver that was merged upstream uses the matrix keypad infrastructure, modify the keymap accordingly. While we are at it, fix the comments in twl4030.h and define PERSISTENT_KEY as (r,c) instead of (c, r) Tested on a RX51 (N900) device. Signed-off-by: Amit Kucheria Acked-by: Tony Lindgren Signed-off-by: Dmitry Torokhov --- include/linux/i2c/twl4030.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index 3fd21d7cb6b..007572536ea 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -305,11 +305,11 @@ struct twl4030_madc_platform_data { int irq_line; }; -/* Boards have uniqe mappings of {col, row} --> keycode. - * Column and row are 4 bits, but range only from 0..7. +/* Boards have uniqe mappings of {row, col} --> keycode. + * Column and row are 8 bits each, but range only from 0..7. * a PERSISTENT_KEY is "always on" and never reported. */ -#define PERSISTENT_KEY(c, r) KEY((c), (r), KEY_RESERVED) +#define PERSISTENT_KEY(r, c) KEY((r), (c), KEY_RESERVED) struct twl4030_keypad_data { const struct matrix_keymap_data *keymap_data; -- cgit v1.2.3 From ea2a4d3a3a929ef494952bba57a0ef1a8a877881 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Oct 2009 10:34:13 +0200 Subject: [S390] 64-bit register support for 31-bit processes From: Heiko Carstens From: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- include/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index 45a937be6d3..90a4ed0ea0e 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -361,6 +361,7 @@ typedef struct elf64_shdr { #define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ +#define NT_PRXSTATUS 0x300 /* s390 upper register halves */ /* Note header in a PT_NOTE section */ -- cgit v1.2.3 From e13dbd7d75d1ecc315c6e3071b3c4e8fba4f6bec Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Tue, 6 Oct 2009 07:38:51 -0400 Subject: perf_events: Make ABI definitions available to userspace Signed-off-by: Chuck Ebbert LKML-Reference: <200910061138.n96BcqkJ004709@int-mx03.intmail.prod.int.phx2.redhat.com> Signed-off-by: Ingo Molnar --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index cff4a101f26..3f384d4b163 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -126,6 +126,7 @@ header-y += nfs_mount.h header-y += nl80211.h header-y += param.h header-y += pci_regs.h +header-y += perf_event.h header-y += pfkeyv2.h header-y += pg.h header-y += phantom.h -- cgit v1.2.3 From 906010b2134e14a2e377decbadd357b3d0ab9c6a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 21 Sep 2009 16:08:49 +0200 Subject: perf_event: Provide vmalloc() based mmap() backing Some architectures such as Sparc, ARM and MIPS (basically everything with flush_dcache_page()) need to deal with dcache aliases by carefully placing pages in both kernel and user maps. These architectures typically have to use vmalloc_user() for this. However, on other architectures, vmalloc() is not needed and has the downsides of being more restricted and slower than regular allocations. Signed-off-by: Peter Zijlstra Acked-by: David Miller Cc: Andrew Morton Cc: Jens Axboe Cc: Paul Mackerras LKML-Reference: <1254830228.21044.272.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3a9d36d1e92..2e6d95f9741 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -442,6 +442,7 @@ enum perf_callchain_context { #include #include #include +#include #include #define PERF_MAX_STACK_DEPTH 255 @@ -513,6 +514,10 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; +#ifdef CONFIG_PERF_USE_VMALLOC + struct work_struct work; +#endif + int data_order; int nr_pages; /* nr of data pages */ int writable; /* are we writable */ int nr_locked; /* nr pages mlocked */ -- cgit v1.2.3 From 316d315bffa4026f28085f6b24ebcebede370ac7 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Tue, 6 Oct 2009 20:16:55 +0200 Subject: block: Seperate read and write statistics of in_flight requests v2 Commit a9327cac440be4d8333bba975cbbf76045096275 added seperate read and write statistics of in_flight requests. And exported the number of read and write requests in progress seperately through sysfs. But Corrado Zoccolo reported getting strange output from "iostat -kx 2". Global values for service time and utilization were garbage. For interval values, utilization was always 100%, and service time is higher than normal. So this was reverted by commit 0f78ab9899e9d6acb09d5465def618704255963b The problem was in part_round_stats_single(), I missed the following: if (now == part->stamp) return; - if (part->in_flight) { + if (part_in_flight(part)) { __part_stat_add(cpu, part, time_in_queue, part_in_flight(part) * (now - part->stamp)); __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); With this chunk included, the reported regression gets fixed. Signed-off-by: Nikanth Karthikesan -- Signed-off-by: Jens Axboe --- include/linux/genhd.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7beaa21b388..297df45ffd0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -98,7 +98,7 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - int in_flight; + int in_flight[2]; #ifdef CONFIG_SMP struct disk_stats *dkstats; #else @@ -322,18 +322,23 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct hd_struct *part) +static inline void part_inc_in_flight(struct hd_struct *part, int rw) { - part->in_flight++; + part->in_flight[rw]++; if (part->partno) - part_to_disk(part)->part0.in_flight++; + part_to_disk(part)->part0.in_flight[rw]++; } -static inline void part_dec_in_flight(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part, int rw) { - part->in_flight--; + part->in_flight[rw]--; if (part->partno) - part_to_disk(part)->part0.in_flight--; + part_to_disk(part)->part0.in_flight[rw]--; +} + +static inline int part_in_flight(struct hd_struct *part) +{ + return part->in_flight[0] + part->in_flight[1]; } /* block/blk-core.c */ @@ -546,6 +551,8 @@ extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_inflight_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3 From f8d1e548931cfa5ea9a082e020c2a47d27e5d793 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 7 Oct 2009 11:13:58 +1100 Subject: futex: Fix typo in FUTEX_WAIT/WAKE_BITSET_PRIVATE definitions Looks like a typo, FUTEX_WAKE_BITS should be FUTEX_WAIT_BITSET. Signed-off-by: Anton Blanchard LKML-Reference: <20091007001358.GE16073@kryten> Signed-off-by: Thomas Gleixner --- include/linux/futex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 34956c8fdeb..78b92ec9edb 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -38,8 +38,8 @@ union ktime; #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) -#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) -#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ -- cgit v1.2.3 From 1f56f4a2b4d12c1c348cab23024024396ec7cddc Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 6 Oct 2009 09:19:45 -0500 Subject: PCI quirk: TI XIO200a erroneously reports support for fast b2b transfers This quirk will disable fast back to back transfer on the secondary bus segment of the TI Bridge. Signed-off-by: Gabe Black Signed-off-by: Jesse Barnes --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index da1fda8623e..f490e7a7307 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -776,6 +776,7 @@ #define PCI_DEVICE_ID_TI_X515 0x8036 #define PCI_DEVICE_ID_TI_XX12 0x8039 #define PCI_DEVICE_ID_TI_XX12_FM 0x803b +#define PCI_DEVICE_ID_TI_XIO2000A 0x8231 #define PCI_DEVICE_ID_TI_1130 0xac12 #define PCI_DEVICE_ID_TI_1031 0xac13 #define PCI_DEVICE_ID_TI_1131 0xac15 -- cgit v1.2.3 From e7247a15ff3bbdab0a8b402dffa1171e5c05a8e0 Mon Sep 17 00:00:00 2001 From: "jolsa@redhat.com" Date: Wed, 7 Oct 2009 19:00:35 +0200 Subject: tracing: correct module boundaries for ftrace_release When the module is about the unload we release its call records. The ftrace_release function was given wrong values representing the module core boundaries, thus not releasing its call records. Plus making ftrace_release function module specific. Signed-off-by: Jiri Olsa LKML-Reference: <1254934835-363-3-git-send-email-jolsa@redhat.com> Cc: stable@kernel.org Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index cd3d2abaf30..0b4f97d24d7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -241,7 +241,7 @@ extern void ftrace_enable_daemon(void); # define ftrace_set_filter(buf, len, reset) do { } while (0) # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) -static inline void ftrace_release(void *start, unsigned long size) { } +static inline void ftrace_release_mod(struct module *mod) {} static inline int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; -- cgit v1.2.3 From d308e38fa5467fbb523fc13e4b984375c2198c3d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 7 Oct 2009 13:53:11 -0700 Subject: include/linux/netdevice.h: fix nanodoc mismatch nanodoc was missing an ndo_-prefix. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94958c10976..812a5f3c2ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -557,7 +557,7 @@ struct netdev_queue { * Callback uses when the transmitter has not made any progress * for dev->watchdog ticks. * - * struct net_device_stats* (*get_stats)(struct net_device *dev); + * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); * Called when a user wants to get the network device usage * statistics. If not defined, the counters in dev->stats will * be used. -- cgit v1.2.3 From a4720c650b68a5fe7faed2edeb0ad12645f7ae63 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 9 Oct 2009 12:43:12 -0400 Subject: USB: serial: don't call release without attach This patch (as1295) fixes a recently-added bug in the USB serial core. If certain kinds of errors occur during probing, the core may call a serial driver's release method without previously calling the attach method. This causes some drivers (io_ti in particular) to perform an invalid memory access. The patch adds a new flag to keep track of whether or not attach has been called. Signed-off-by: Alan Stern Tested-by: Jean-Denis Girard CC: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/serial.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index c17eb64d721..ce911ebf91e 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -150,6 +150,7 @@ struct usb_serial { struct usb_interface *interface; unsigned char disconnected:1; unsigned char suspending:1; + unsigned char attached:1; unsigned char minor; unsigned char num_ports; unsigned char num_port_pointers; -- cgit v1.2.3 From d43c36dc6b357fa1806800f18aa30123c747a6d1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 7 Oct 2009 17:09:06 +0400 Subject: headers: remove sched.h from interrupt.h After m68k's task_thread_info() doesn't refer to current, it's possible to remove sched.h from interrupt.h and not break m68k! Many thanks to Heiko Carstens for allowing this. Signed-off-by: Alexey Dobriyan --- include/linux/interrupt.h | 2 +- include/linux/mmc/host.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b78cf819495..7ca72b74eec 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -610,6 +609,7 @@ extern void debug_poll_all_shared_irqs(void); static inline void debug_poll_all_shared_irqs(void) { } #endif +struct seq_file; int show_interrupts(struct seq_file *p, void *v); struct irq_desc; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 81bb4235859..eaf36364b7d 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -11,6 +11,7 @@ #define LINUX_MMC_HOST_H #include +#include #include -- cgit v1.2.3 From c01226c3145d173a0d38f9d5b4f229cc23d99ae2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 21 Sep 2009 16:37:12 +0200 Subject: warn about use of uninstalled kernel headers User applications frequently hit problems when they try to use the kernel headers directly, rather than the exported headers. This adds an explicit warning for this case, and points to a URL holding an explanation of why this is wrong and what to do about it. Signed-off-by: Arnd Bergmann Signed-off-by: Sam Ravnborg --- include/linux/kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d3cd23f3003..f4e3184fa05 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -659,6 +659,12 @@ extern int do_sysinfo(struct sysinfo *info); #endif /* __KERNEL__ */ +#ifndef __EXPORTED_HEADERS__ +#ifndef __KERNEL__ +#warning Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders +#endif /* __KERNEL__ */ +#endif /* __EXPORTED_HEADERS__ */ + #define SI_LOAD_SHIFT 16 struct sysinfo { long uptime; /* Seconds since boot */ -- cgit v1.2.3 From 43046b606673c9c991919ff75b980b72541e9ede Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 14 Oct 2009 09:16:42 -0700 Subject: workqueue: add 'flush_delayed_work()' to run and wait for delayed work It basically turns a delayed work into an immediate work, and then waits for it to finish. --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 7ef0c7b94f3..cf24c20de9e 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -207,6 +207,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void flush_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); +extern void flush_delayed_work(struct delayed_work *work); extern int schedule_work(struct work_struct *work); extern int schedule_work_on(int cpu, struct work_struct *work); -- cgit v1.2.3