From 8d00a6c8f6b08e7167bc03bf955cdc7e47c5132e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 22 Jul 2008 08:39:57 +0200 Subject: genirq: remove last NO_IDLE_HZ leftovers Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 8ccb462ea42..f3047df2d23 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -197,10 +197,6 @@ extern int setup_irq(unsigned int irq, struct irqaction *new); #ifdef CONFIG_GENERIC_HARDIRQS -#ifndef handle_dynamic_tick -# define handle_dynamic_tick(a) do { } while (0) -#endif - #ifdef CONFIG_SMP #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) -- cgit v1.2.3 From 6902c0bead4ce266226fc0c5b3828b850bdc884a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 6 Jun 2008 01:33:22 -0400 Subject: Input: gameport - make gameport_register_driver() return errors Perform actual driver registration right in gameport_register_driver() instead of offloading it to kgameportd and return proper error code to callers if driver registration fails. Note that driver <-> port matching is still done by kgameportd. Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index f64e29c0ef3..5126125afd4 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -146,10 +146,11 @@ static inline void gameport_unpin_driver(struct gameport *gameport) mutex_unlock(&gameport->drv_mutex); } -void __gameport_register_driver(struct gameport_driver *drv, struct module *owner); -static inline void gameport_register_driver(struct gameport_driver *drv) +int __gameport_register_driver(struct gameport_driver *drv, + struct module *owner, const char *mod_name); +static inline int gameport_register_driver(struct gameport_driver *drv) { - __gameport_register_driver(drv, THIS_MODULE); + return __gameport_register_driver(drv, THIS_MODULE, KBUILD_MODNAME); } void gameport_unregister_driver(struct gameport_driver *drv); -- cgit v1.2.3 From 8c4b3c29329eb7ffded3023e6d65bc415cb4e215 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 6 Jun 2008 01:33:51 -0400 Subject: Input: gameport - mark gameport_register_driver() __must_check Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 5126125afd4..0cd825f7363 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -148,7 +148,7 @@ static inline void gameport_unpin_driver(struct gameport *gameport) int __gameport_register_driver(struct gameport_driver *drv, struct module *owner, const char *mod_name); -static inline int gameport_register_driver(struct gameport_driver *drv) +static inline int __must_check gameport_register_driver(struct gameport_driver *drv) { return __gameport_register_driver(drv, THIS_MODULE, KBUILD_MODNAME); } -- cgit v1.2.3 From 03bac96fae0efdb25e2059e5accbe4f3ee6328dd Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 23 Jun 2008 10:47:34 -0400 Subject: Input: expand keycode space Expand the number of potential key codes from 512 to 768 since people are coming up with more and more keys. Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 +- include/linux/mod_devicetable.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index a5802c9c81a..7fae1dee356 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -579,7 +579,7 @@ struct input_absinfo { /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE -#define KEY_MAX 0x1ff +#define KEY_MAX 0x2ff #define KEY_CNT (KEY_MAX+1) /* diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index c4db5827963..0dddfa44ec1 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -274,7 +274,7 @@ struct pcmcia_device_id { /* Input */ #define INPUT_DEVICE_ID_EV_MAX 0x1f #define INPUT_DEVICE_ID_KEY_MIN_INTERESTING 0x71 -#define INPUT_DEVICE_ID_KEY_MAX 0x1ff +#define INPUT_DEVICE_ID_KEY_MAX 0x2ff #define INPUT_DEVICE_ID_REL_MAX 0x0f #define INPUT_DEVICE_ID_ABS_MAX 0x3f #define INPUT_DEVICE_ID_MSC_MAX 0x07 -- cgit v1.2.3 From 5a599a15182ed48e5bf54111feb3b21e425e194d Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 23 Jun 2008 10:47:53 -0400 Subject: Input: add keycodes for remote controls/phone keypads The new keys are separate from normal numeric keys and standard numeric keypads. The userspace should not attempt to apply modifiers like shift and NumLock to these so tey work properly regardless of the language mapping used. Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 7fae1dee356..b86fb5581ce 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -577,6 +577,19 @@ struct input_absinfo { #define KEY_BRL_DOT9 0x1f9 #define KEY_BRL_DOT10 0x1fa +#define KEY_NUMERIC_0 0x200 /* used by phones, remote controls, */ +#define KEY_NUMERIC_1 0x201 /* and other keypads */ +#define KEY_NUMERIC_2 0x202 +#define KEY_NUMERIC_3 0x203 +#define KEY_NUMERIC_4 0x204 +#define KEY_NUMERIC_5 0x205 +#define KEY_NUMERIC_6 0x206 +#define KEY_NUMERIC_7 0x207 +#define KEY_NUMERIC_8 0x208 +#define KEY_NUMERIC_9 0x209 +#define KEY_NUMERIC_STAR 0x20a +#define KEY_NUMERIC_POUND 0x20b + /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE #define KEY_MAX 0x2ff -- cgit v1.2.3 From feb2f55db45919aa80731f8877b60cab454b7b94 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Aug 2008 11:53:29 +0300 Subject: [MTD] [OneNAND] Add defines for HF and sync write Signed-off-by: Adrian Hunter Signed-off-by: David Woodhouse --- include/linux/mtd/onenand_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h index d1b310c92eb..0c6bbe28f38 100644 --- a/include/linux/mtd/onenand_regs.h +++ b/include/linux/mtd/onenand_regs.h @@ -152,6 +152,8 @@ #define ONENAND_SYS_CFG1_INT (1 << 6) #define ONENAND_SYS_CFG1_IOBE (1 << 5) #define ONENAND_SYS_CFG1_RDY_CONF (1 << 4) +#define ONENAND_SYS_CFG1_HF (1 << 2) +#define ONENAND_SYS_CFG1_SYNC_WRITE (1 << 1) /* * Controller Status Register F240h (R) -- cgit v1.2.3 From 2e489e077a6ad118c4f247faedf330117b107cce Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 5 Aug 2008 16:39:42 +0100 Subject: [MTD] [NOR] Add qry_mode_on()/qry_omde_off() to deal with odd chips There are some CFI chips which require non standard procedures to get into QRY mode. The possible way to support them would be trying different modes till QRY will be read. This patch introduce two new functions qry_mode_on qry_mode_off. qry_mode_on tries different commands in order switch chip into QRY mode. So if we have one more "odd" chip - we just could add several lines to qry_mode_on. Also using these functions remove unnecessary code duplicaton in porbe procedure. Currently there are two "odd" cases 1. Some old intel chips which require 0xFF before 0x98 2. ST M29DW chip which requires 0x98 to be sent at 0x555 (according to CFI should be 0x55) This patch is partialy based on the patch from Uwe (see "[PATCH 2/4] [RFC][MTD] cfi_probe: remove Intel chip workaround" thread ) Signed-off-by: Alexey Korolev Signed-off-by: Alexander Belyakov Signed-off-by: David Woodhouse --- include/linux/mtd/cfi.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index d6fb115f5a0..3058917d7b9 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_MTD_CFI_I1 #define cfi_interleave(cfi) 1 @@ -430,7 +431,6 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t { map_word val; uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type); - val = cfi_build_cmd(cmd, map, cfi); if (prev_val) @@ -483,6 +483,13 @@ static inline void cfi_udelay(int us) } } +int __xipram qry_present(struct map_info *map, __u32 base, + struct cfi_private *cfi); +int __xipram qry_mode_on(uint32_t base, struct map_info *map, + struct cfi_private *cfi); +void __xipram qry_mode_off(uint32_t base, struct map_info *map, + struct cfi_private *cfi); + struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size, const char* name); struct cfi_fixup { -- cgit v1.2.3 From e93cafe45fd74935e0aca2b79e533f0e3ed9640f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Grafstr=C3=B6m?= Date: Tue, 5 Aug 2008 18:37:41 +0200 Subject: [MTD] [NOR] cfi_cmdset_0001: Timeouts for erase, write and unlock operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Timeouts are currently given by the typical operation time times 8. It works in the general well-behaved case but not when an erase block is failing. For erase operations, it seems that a failing erase block will keep the device state machine in erasing state until the vendor specified maximum timeout period has passed. By this time the driver would have long since timed out, left erasing state and attempted further operations which all fail. This patch implements timeouts using values from the CFI Query structure when available. The patch also sets a longer timeout for locking operations. The current value used for locking/unlocking given by 1000000/HZ microseconds is too short for devices like J3 and J5 Strataflash which have a typical clear lock-bits time of 0.5 seconds. Signed-off-by: Anders Grafström Signed-off-by: David Woodhouse --- include/linux/mtd/flashchip.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h index 08dd131301c..d4f38c5fd44 100644 --- a/include/linux/mtd/flashchip.h +++ b/include/linux/mtd/flashchip.h @@ -73,6 +73,10 @@ struct flchip { int buffer_write_time; int erase_time; + int word_write_time_max; + int buffer_write_time_max; + int erase_time_max; + void *priv; }; -- cgit v1.2.3 From c314dfdc358847eef0fc07ec8682e1acc8cadd00 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 7 Aug 2008 11:55:07 +0100 Subject: [MTD] [NOR] Rename and export new cfi_qry_*() functions They need to be exported, so let's give them less generic-sounding names while we're at it. Original export patch, along with the suggestion about the nomenclature, from Stephen Rothwell. Signed-off-by: David Woodhouse --- include/linux/mtd/cfi.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 3058917d7b9..ee5124ec319 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -483,12 +483,12 @@ static inline void cfi_udelay(int us) } } -int __xipram qry_present(struct map_info *map, __u32 base, - struct cfi_private *cfi); -int __xipram qry_mode_on(uint32_t base, struct map_info *map, - struct cfi_private *cfi); -void __xipram qry_mode_off(uint32_t base, struct map_info *map, - struct cfi_private *cfi); +int __xipram cfi_qry_present(struct map_info *map, __u32 base, + struct cfi_private *cfi); +int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map, + struct cfi_private *cfi); +void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map, + struct cfi_private *cfi); struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size, const char* name); -- cgit v1.2.3 From b845b517b5e3706a3729f6ea83b88ab85f0725b0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Aug 2008 21:47:09 +0200 Subject: printk: robustify printk Avoid deadlocks against rq->lock and xtime_lock by deferring the klogd wakeup by polling from the timer tick. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index aaa998f65c7..113ac8d0425 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -200,6 +200,8 @@ extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); +extern void printk_tick(void); +extern int printk_needs_cpu(int); #else static inline int vprintk(const char *s, va_list args) __attribute__ ((format (printf, 1, 0))); @@ -211,6 +213,8 @@ static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ unsigned int interval_msec) \ { return false; } +static inline void printk_tick(void) { } +static inline int printk_needs_cpu(int) { return 0; } #endif extern void asmlinkage __attribute__((format(printf, 1, 2))) -- cgit v1.2.3 From ced9cd40ac14111befd6b0c73ec90106c22a3fd7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 11 Aug 2008 14:38:12 +0200 Subject: printk: robustify printk, fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: include/linux/kernel.h: In function ‘printk_needs_cpu': include/linux/kernel.h:217: error: parameter name omitted Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 113ac8d0425..3652a456412 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -200,8 +200,6 @@ extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); -extern void printk_tick(void); -extern int printk_needs_cpu(int); #else static inline int vprintk(const char *s, va_list args) __attribute__ ((format (printf, 1, 0))); @@ -213,10 +211,11 @@ static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ unsigned int interval_msec) \ { return false; } -static inline void printk_tick(void) { } -static inline int printk_needs_cpu(int) { return 0; } #endif +extern int printk_needs_cpu(int cpu); +extern void printk_tick(void); + extern void asmlinkage __attribute__((format(printf, 1, 2))) early_printk(const char *fmt, ...); -- cgit v1.2.3 From bb0eb217c980d50c45f3e793b4dcc70ab9ee820d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 12 Aug 2008 12:40:50 +0300 Subject: [MTD] Define and use MTD_FAIL_ADDR_UNKNOWN instead of 0xffffffff Signed-off-by: Adrian Hunter Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 92263654855..eae26bb6430 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -25,8 +25,10 @@ #define MTD_ERASE_DONE 0x08 #define MTD_ERASE_FAILED 0x10 +#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff + /* If the erase fails, fail_addr might indicate exactly which block failed. If - fail_addr = 0xffffffff, the failure was not at the device level or was not + fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not specific to any particular block. */ struct erase_info { struct mtd_info *mtd; -- cgit v1.2.3 From 17c1d2be28e485c0c8b09661db39d5bf2605069d Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Wed, 20 Aug 2008 22:32:08 +0100 Subject: [MTD] [NAND] Fix missing kernel-doc [Reported by Randy Dunlap] Signed-off-by: Alexey Korolev Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 81774e5facf..733d3f3b4eb 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -248,6 +248,7 @@ struct nand_hw_control { * @read_page_raw: function to read a raw page without ECC * @write_page_raw: function to write a raw page without ECC * @read_page: function to read a page according to the ecc generator requirements + * @read_subpage: function to read parts of the page covered by ECC. * @write_page: function to write a page according to the ecc generator requirements * @read_oob: function to read chip OOB data * @write_oob: function to write chip OOB data -- cgit v1.2.3 From 1aa5dfb751d275ae7117d3b73ac423b4a46f2a73 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 20 Aug 2008 16:37:28 -0700 Subject: clocksource: keep track of original clocksource frequency The clocksource frequency is represented by clocksource->mult/2^(clocksource->shift). Currently, when NTP makes adjustments to the clock frequency, they are made directly to the mult value. This has the drawback that once changed, we cannot know what the orignal mult value was, or how much adjustment has been applied. This property causes problems in calculating proper ntp intervals when switching back and forth between clocksources. This patch separates the current mult value into a mult and mult_orig pair. The mult_orig value stays constant, while the ntp clocksource adjustments are done only to the mult value. This allows for correct ntp interval calculation and additionally lays the groundwork for a new notion of time, what I'm calling the monotonic-raw time, which is introduced in a following patch. Signed-off-by: John Stultz Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 55e434feec9..f0a7fb98441 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -45,7 +45,8 @@ struct clocksource; * @read: returns a cycle value * @mask: bitmask for two's complement * subtraction of non 64 bit counters - * @mult: cycle to nanosecond multiplier + * @mult: cycle to nanosecond multiplier (adjusted by NTP) + * @mult_orig: cycle to nanosecond multiplier (unadjusted by NTP) * @shift: cycle to nanosecond divisor (power of two) * @flags: flags describing special properties * @vread: vsyscall based read @@ -63,6 +64,7 @@ struct clocksource { cycle_t (*read)(void); cycle_t mask; u32 mult; + u32 mult_orig; u32 shift; unsigned long flags; cycle_t (*vread)(void); @@ -201,16 +203,17 @@ static inline void clocksource_calculate_interval(struct clocksource *c, { u64 tmp; - /* XXX - All of this could use a whole lot of optimization */ + /* Do the ns -> cycle conversion first, using original mult */ tmp = length_nsec; tmp <<= c->shift; - tmp += c->mult/2; - do_div(tmp, c->mult); + tmp += c->mult_orig/2; + do_div(tmp, c->mult_orig); c->cycle_interval = (cycle_t)tmp; if (c->cycle_interval == 0) c->cycle_interval = 1; + /* Go back from cycles -> shifted ns, this time use ntp adjused mult */ c->xtime_interval = (u64)c->cycle_interval * c->mult; } -- cgit v1.2.3 From 2d42244ae71d6c7b0884b5664cf2eda30fb2ae68 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 20 Aug 2008 16:37:30 -0700 Subject: clocksource: introduce CLOCK_MONOTONIC_RAW In talking with Josip Loncaric, and his work on clock synchronization (see btime.sf.net), he mentioned that for really close synchronization, it is useful to have access to "hardware time", that is a notion of time that is not in any way adjusted by the clock slewing done to keep close time sync. Part of the issue is if we are using the kernel's ntp adjusted representation of time in order to measure how we should correct time, we can run into what Paul McKenney aptly described as "Painting a road using the lines we're painting as the guide". I had been thinking of a similar problem, and was trying to come up with a way to give users access to a purely hardware based time representation that avoided users having to know the underlying frequency and mask values needed to deal with the wide variety of possible underlying hardware counters. My solution is to introduce CLOCK_MONOTONIC_RAW. This exposes a nanosecond based time value, that increments starting at bootup and has no frequency adjustments made to it what so ever. The time is accessed from userspace via the posix_clock_gettime() syscall, passing CLOCK_MONOTONIC_RAW as the clock_id. Signed-off-by: John Stultz Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 3 +++ include/linux/time.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f0a7fb98441..f88d32f8ff7 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -79,6 +79,7 @@ struct clocksource { /* timekeeping specific data, ignore */ cycle_t cycle_interval; u64 xtime_interval; + u32 raw_interval; /* * Second part is written at each timer interrupt * Keep it in a different cache line to dirty no @@ -87,6 +88,7 @@ struct clocksource { cycle_t cycle_last ____cacheline_aligned_in_smp; u64 xtime_nsec; s64 error; + struct timespec raw_time; #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ @@ -215,6 +217,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c, /* Go back from cycles -> shifted ns, this time use ntp adjused mult */ c->xtime_interval = (u64)c->cycle_interval * c->mult; + c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift; } diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82..205f974b9eb 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -117,6 +117,7 @@ extern int do_setitimer(int which, struct itimerval *value, extern unsigned int alarm_setitimer(unsigned int seconds); extern int do_getitimer(int which, struct itimerval *value); extern void getnstimeofday(struct timespec *tv); +extern void getrawmonotonic(struct timespec *ts); extern void getboottime(struct timespec *ts); extern void monotonic_to_bootbased(struct timespec *ts); @@ -214,6 +215,7 @@ struct itimerval { #define CLOCK_MONOTONIC 1 #define CLOCK_PROCESS_CPUTIME_ID 2 #define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 /* * The IDs of various hardware clocks: -- cgit v1.2.3 From 916c7a855174e3b53d182b97a26b2e27a29726a1 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Wed, 20 Aug 2008 16:46:08 -0700 Subject: ntp: fix ADJ_OFFSET_SS_READ bug and do_adjtimex() cleanup Thanks to the review by Michael Kerrisk a bug in the recent ADJ_OFFSET_SS_READ option was discovered, where the ntp time_offset was inadvertently set by it. This fixes this by making the adjtime code more separate from the ntp_adjtime code (both of which really want to be separate syscalls). Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Acked-by: John Stultz Signed-off-by: Ingo Molnar --- include/linux/timex.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index fc6035d29d5..c00bcdd3ae4 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -141,8 +141,15 @@ struct timex { #define ADJ_MICRO 0x1000 /* select microsecond resolution */ #define ADJ_NANO 0x2000 /* select nanosecond resolution */ #define ADJ_TICK 0x4000 /* tick value */ + +#ifdef __KERNEL__ +#define ADJ_ADJTIME 0x8000 /* switch between adjtime/adjtimex modes */ +#define ADJ_OFFSET_SINGLESHOT 0x0001 /* old-fashioned adjtime */ +#define ADJ_OFFSET_READONLY 0x2000 /* read-only adjtime */ +#else #define ADJ_OFFSET_SINGLESHOT 0x8001 /* old-fashioned adjtime */ -#define ADJ_OFFSET_SS_READ 0xa001 /* read-only adjtime */ +#define ADJ_OFFSET_SS_READ 0xa001 /* read-only adjtime */ +#endif /* xntp 3.4 compatibility names */ #define MOD_OFFSET ADJ_OFFSET -- cgit v1.2.3 From 942ed161944b3476639916cf544e6975b29c985a Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 26 Aug 2008 21:09:59 +0100 Subject: power_supply: Add function to return system-wide power state Certain drivers benefit from knowing whether the system is on ac or battery, for instance when determining which backlight registers to read. This adds a simple call to determine whether there's an online power supply other than any batteries. Signed-off-by: Matthew Garrett Signed-off-by: Anton Vorontsov --- include/linux/power_supply.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index ea96ead1d39..f9348cba6dc 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -165,6 +165,12 @@ struct power_supply_info { extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); +#if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE) +extern int power_supply_is_system_supplied(void); +#else +static inline int power_supply_is_system_supplied(void) { return -ENOSYS; } +#endif + extern int power_supply_register(struct device *parent, struct power_supply *psy); extern void power_supply_unregister(struct power_supply *psy); -- cgit v1.2.3 From 978b0116cd225682a29e3d1d5010319bf2de32c2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 6 Sep 2008 20:04:36 +0200 Subject: softirq: allocate less vectors We don't need whole 32 of them, only NR_SOFTIRQS. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 58ff4e74b2f..54b3623434e 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -252,6 +252,8 @@ enum HRTIMER_SOFTIRQ, #endif RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ + + NR_SOFTIRQS }; /* softirq mask and active fields moved to irq_cpustat_t in -- cgit v1.2.3 From 4d5975e5016a9025814b92981de21eaf9203caa6 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Wed, 10 Sep 2008 12:06:15 -0400 Subject: Input: ads7846 - introduce .gpio_pendown to get pendown state The GPIO connected to ADS7846 nPENIRQ signal is usually used to get the pendown state as well. Introduce a .gpio_pendown, and use this to decide the pendown state if .get_pendown_state is NULL. Signed-off-by: Eric Miao Signed-off-by: Dmitry Torokhov --- include/linux/spi/ads7846.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h index daf744017a3..05eab2f11e6 100644 --- a/include/linux/spi/ads7846.h +++ b/include/linux/spi/ads7846.h @@ -43,6 +43,9 @@ struct ads7846_platform_data { u16 debounce_tol; /* tolerance used for filtering */ u16 debounce_rep; /* additional consecutive good readings * required after the first two */ + int gpio_pendown; /* the GPIO used to decide the pendown + * state if get_pendown_state == NULL + */ int (*get_pendown_state)(void); int (*filter_init) (struct ads7846_platform_data *pdata, void **filter_data); -- cgit v1.2.3 From f06febc96ba8e0af80bcc3eaec0a109e88275fac Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Fri, 12 Sep 2008 09:54:39 -0700 Subject: timers: fix itimer/many thread hang Overview This patch reworks the handling of POSIX CPU timers, including the ITIMER_PROF, ITIMER_VIRT timers and rlimit handling. It was put together with the help of Roland McGrath, the owner and original writer of this code. The problem we ran into, and the reason for this rework, has to do with using a profiling timer in a process with a large number of threads. It appears that the performance of the old implementation of run_posix_cpu_timers() was at least O(n*3) (where "n" is the number of threads in a process) or worse. Everything is fine with an increasing number of threads until the time taken for that routine to run becomes the same as or greater than the tick time, at which point things degrade rather quickly. This patch fixes bug 9906, "Weird hang with NPTL and SIGPROF." Code Changes This rework corrects the implementation of run_posix_cpu_timers() to make it run in constant time for a particular machine. (Performance may vary between one machine and another depending upon whether the kernel is built as single- or multiprocessor and, in the latter case, depending upon the number of running processors.) To do this, at each tick we now update fields in signal_struct as well as task_struct. The run_posix_cpu_timers() function uses those fields to make its decisions. We define a new structure, "task_cputime," to contain user, system and scheduler times and use these in appropriate places: struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; }; This is included in the structure "thread_group_cputime," which is a new substructure of signal_struct and which varies for uniprocessor versus multiprocessor kernels. For uniprocessor kernels, it uses "task_cputime" as a simple substructure, while for multiprocessor kernels it is a pointer: struct thread_group_cputime { struct task_cputime totals; }; struct thread_group_cputime { struct task_cputime *totals; }; We also add a new task_cputime substructure directly to signal_struct, to cache the earliest expiration of process-wide timers, and task_cputime also replaces the it_*_expires fields of task_struct (used for earliest expiration of thread timers). The "thread_group_cputime" structure contains process-wide timers that are updated via account_user_time() and friends. In the non-SMP case the structure is a simple aggregator; unfortunately in the SMP case that simplicity was not achievable due to cache-line contention between CPUs (in one measured case performance was actually _worse_ on a 16-cpu system than the same test on a 4-cpu system, due to this contention). For SMP, the thread_group_cputime counters are maintained as a per-cpu structure allocated using alloc_percpu(). The timer functions update only the timer field in the structure corresponding to the running CPU, obtained using per_cpu_ptr(). We define a set of inline functions in sched.h that we use to maintain the thread_group_cputime structure and hide the differences between UP and SMP implementations from the rest of the kernel. The thread_group_cputime_init() function initializes the thread_group_cputime structure for the given task. The thread_group_cputime_alloc() is a no-op for UP; for SMP it calls the out-of-line function thread_group_cputime_alloc_smp() to allocate and fill in the per-cpu structures and fields. The thread_group_cputime_free() function, also a no-op for UP, in SMP frees the per-cpu structures. The thread_group_cputime_clone_thread() function (also a UP no-op) for SMP calls thread_group_cputime_alloc() if the per-cpu structures haven't yet been allocated. The thread_group_cputime() function fills the task_cputime structure it is passed with the contents of the thread_group_cputime fields; in UP it's that simple but in SMP it must also safely check that tsk->signal is non-NULL (if it is it just uses the appropriate fields of task_struct) and, if so, sums the per-cpu values for each online CPU. Finally, the three functions account_group_user_time(), account_group_system_time() and account_group_exec_runtime() are used by timer functions to update the respective fields of the thread_group_cputime structure. Non-SMP operation is trivial and will not be mentioned further. The per-cpu structure is always allocated when a task creates its first new thread, via a call to thread_group_cputime_clone_thread() from copy_signal(). It is freed at process exit via a call to thread_group_cputime_free() from cleanup_signal(). All functions that formerly summed utime/stime/sum_sched_runtime values from from all threads in the thread group now use thread_group_cputime() to snapshot the values in the thread_group_cputime structure or the values in the task structure itself if the per-cpu structure hasn't been allocated. Finally, the code in kernel/posix-cpu-timers.c has changed quite a bit. The run_posix_cpu_timers() function has been split into a fast path and a slow path; the former safely checks whether there are any expired thread timers and, if not, just returns, while the slow path does the heavy lifting. With the dedicated thread group fields, timers are no longer "rebalanced" and the process_timer_rebalance() function and related code has gone away. All summing loops are gone and all code that used them now uses the thread_group_cputime() inline. When process-wide timers are set, the new task_cputime structure in signal_struct is used to cache the earliest expiration; this is checked in the fast path. Performance The fix appears not to add significant overhead to existing operations. It generally performs the same as the current code except in two cases, one in which it performs slightly worse (Case 5 below) and one in which it performs very significantly better (Case 2 below). Overall it's a wash except in those two cases. I've since done somewhat more involved testing on a dual-core Opteron system. Case 1: With no itimer running, for a test with 100,000 threads, the fixed kernel took 1428.5 seconds, 513 seconds more than the unfixed system, all of which was spent in the system. There were twice as many voluntary context switches with the fix as without it. Case 2: With an itimer running at .01 second ticks and 4000 threads (the most an unmodified kernel can handle), the fixed kernel ran the test in eight percent of the time (5.8 seconds as opposed to 70 seconds) and had better tick accuracy (.012 seconds per tick as opposed to .023 seconds per tick). Case 3: A 4000-thread test with an initial timer tick of .01 second and an interval of 10,000 seconds (i.e. a timer that ticks only once) had very nearly the same performance in both cases: 6.3 seconds elapsed for the fixed kernel versus 5.5 seconds for the unfixed kernel. With fewer threads (eight in these tests), the Case 1 test ran in essentially the same time on both the modified and unmodified kernels (5.2 seconds versus 5.8 seconds). The Case 2 test ran in about the same time as well, 5.9 seconds versus 5.4 seconds but again with much better tick accuracy, .013 seconds per tick versus .025 seconds per tick for the unmodified kernel. Since the fix affected the rlimit code, I also tested soft and hard CPU limits. Case 4: With a hard CPU limit of 20 seconds and eight threads (and an itimer running), the modified kernel was very slightly favored in that while it killed the process in 19.997 seconds of CPU time (5.002 seconds of wall time), only .003 seconds of that was system time, the rest was user time. The unmodified kernel killed the process in 20.001 seconds of CPU (5.014 seconds of wall time) of which .016 seconds was system time. Really, though, the results were too close to call. The results were essentially the same with no itimer running. Case 5: With a soft limit of 20 seconds and a hard limit of 2000 seconds (where the hard limit would never be reached) and an itimer running, the modified kernel exhibited worse tick accuracy than the unmodified kernel: .050 seconds/tick versus .028 seconds/tick. Otherwise, performance was almost indistinguishable. With no itimer running this test exhibited virtually identical behavior and times in both cases. In times past I did some limited performance testing. those results are below. On a four-cpu Opteron system without this fix, a sixteen-thread test executed in 3569.991 seconds, of which user was 3568.435s and system was 1.556s. On the same system with the fix, user and elapsed time were about the same, but system time dropped to 0.007 seconds. Performance with eight, four and one thread were comparable. Interestingly, the timer ticks with the fix seemed more accurate: The sixteen-thread test with the fix received 149543 ticks for 0.024 seconds per tick, while the same test without the fix received 58720 for 0.061 seconds per tick. Both cases were configured for an interval of 0.01 seconds. Again, the other tests were comparable. Each thread in this test computed the primes up to 25,000,000. I also did a test with a large number of threads, 100,000 threads, which is impossible without the fix. In this case each thread computed the primes only up to 10,000 (to make the runtime manageable). System time dominated, at 1546.968 seconds out of a total 2176.906 seconds (giving a user time of 629.938s). It received 147651 ticks for 0.015 seconds per tick, still quite accurate. There is obviously no comparable test without the fix. Signed-off-by: Frank Mayhar Cc: Roland McGrath Cc: Alexey Dobriyan Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/posix-timers.h | 2 + include/linux/sched.h | 257 ++++++++++++++++++++++++++++++++++++++++--- include/linux/time.h | 3 + 3 files changed, 249 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index a7dd38f30ad..f9d8e9e94e9 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -115,4 +115,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, long clock_nanosleep_restart(struct restart_block *restart_block); +void update_rlimit_cpu(unsigned long rlim_new); + #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d9120c5ad1..26d7a5f2d0b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -425,6 +425,45 @@ struct pacct_struct { unsigned long ac_minflt, ac_majflt; }; +/** + * struct task_cputime - collected CPU time counts + * @utime: time spent in user mode, in &cputime_t units + * @stime: time spent in kernel mode, in &cputime_t units + * @sum_exec_runtime: total time spent on the CPU, in nanoseconds + * + * This structure groups together three kinds of CPU time that are + * tracked for threads and thread groups. Most things considering + * CPU time want to group these counts together and treat all three + * of them in parallel. + */ +struct task_cputime { + cputime_t utime; + cputime_t stime; + unsigned long long sum_exec_runtime; +}; +/* Alternate field names when used to cache expirations. */ +#define prof_exp stime +#define virt_exp utime +#define sched_exp sum_exec_runtime + +/** + * struct thread_group_cputime - thread group interval timer counts + * @totals: thread group interval timers; substructure for + * uniprocessor kernel, per-cpu for SMP kernel. + * + * This structure contains the version of task_cputime, above, that is + * used for thread group CPU clock calculations. + */ +#ifdef CONFIG_SMP +struct thread_group_cputime { + struct task_cputime *totals; +}; +#else +struct thread_group_cputime { + struct task_cputime totals; +}; +#endif + /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -470,6 +509,17 @@ struct signal_struct { cputime_t it_prof_expires, it_virt_expires; cputime_t it_prof_incr, it_virt_incr; + /* + * Thread group totals for process CPU clocks. + * See thread_group_cputime(), et al, for details. + */ + struct thread_group_cputime cputime; + + /* Earliest-expiration cache. */ + struct task_cputime cputime_expires; + + struct list_head cpu_timers[3]; + /* job control IDs */ /* @@ -500,7 +550,7 @@ struct signal_struct { * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ - cputime_t utime, stime, cutime, cstime; + cputime_t cutime, cstime; cputime_t gtime; cputime_t cgtime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; @@ -508,14 +558,6 @@ struct signal_struct { unsigned long inblock, oublock, cinblock, coublock; struct task_io_accounting ioac; - /* - * Cumulative ns of scheduled CPU time for dead threads in the - * group, not including a zombie group leader. (This only differs - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) - */ - unsigned long long sum_sched_runtime; - /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs @@ -527,8 +569,6 @@ struct signal_struct { */ struct rlimit rlim[RLIM_NLIMITS]; - struct list_head cpu_timers[3]; - /* keep the process-shared keyrings here so that they do the right * thing in threads created with CLONE_THREAD */ #ifdef CONFIG_KEYS @@ -1134,8 +1174,7 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; - cputime_t it_prof_expires, it_virt_expires; - unsigned long long it_sched_expires; + struct task_cputime cputime_expires; struct list_head cpu_timers[3]; /* process credentials */ @@ -1585,6 +1624,7 @@ extern unsigned long long cpu_clock(int cpu); extern unsigned long long task_sched_runtime(struct task_struct *task); +extern unsigned long long thread_group_sched_runtime(struct task_struct *task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP @@ -2081,6 +2121,197 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } +/* + * Thread group CPU time accounting. + */ +#ifdef CONFIG_SMP + +extern int thread_group_cputime_alloc_smp(struct task_struct *); +extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *); + +static inline void thread_group_cputime_init(struct signal_struct *sig) +{ + sig->cputime.totals = NULL; +} + +static inline int thread_group_cputime_clone_thread(struct task_struct *curr, + struct task_struct *new) +{ + if (curr->signal->cputime.totals) + return 0; + return thread_group_cputime_alloc_smp(curr); +} + +static inline void thread_group_cputime_free(struct signal_struct *sig) +{ + free_percpu(sig->cputime.totals); +} + +/** + * thread_group_cputime - Sum the thread group time fields across all CPUs. + * + * This is a wrapper for the real routine, thread_group_cputime_smp(). See + * that routine for details. + */ +static inline void thread_group_cputime( + struct task_struct *tsk, + struct task_cputime *times) +{ + thread_group_cputime_smp(tsk, times); +} + +/** + * thread_group_cputime_account_user - Maintain utime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the utime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the utime field there. + */ +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->utime = cputime_add(times->utime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_system - Maintain stime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the stime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the stime field there. + */ +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->stime = cputime_add(times->stime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a + * thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @ns: Time value by which to increment the sum_exec_runtime field + * of that structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the sum_exec_runtime field there. + */ +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->sum_exec_runtime += ns; + put_cpu_no_resched(); + } +} + +#else /* CONFIG_SMP */ + +static inline void thread_group_cputime_init(struct signal_struct *sig) +{ + sig->cputime.totals.utime = cputime_zero; + sig->cputime.totals.stime = cputime_zero; + sig->cputime.totals.sum_exec_runtime = 0; +} + +static inline int thread_group_cputime_alloc(struct task_struct *tsk) +{ + return 0; +} + +static inline void thread_group_cputime_free(struct signal_struct *sig) +{ +} + +static inline int thread_group_cputime_clone_thread(struct task_struct *curr, + struct task_struct *tsk) +{ +} + +static inline void thread_group_cputime(struct task_struct *tsk, + struct task_cputime *cputime) +{ + *cputime = tsk->signal->cputime.totals; +} + +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); +} + +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); +} + +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + tgtimes->totals->sum_exec_runtime += ns; +} + +#endif /* CONFIG_SMP */ + +static inline void account_group_user_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_user(&sig->cputime, cputime); +} + +static inline void account_group_system_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_system(&sig->cputime, cputime); +} + +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_exec_runtime(&sig->cputime, ns); +} + /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82..1b70b3c293e 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -125,6 +125,9 @@ extern int timekeeping_valid_for_hres(void); extern void update_wall_time(void); extern void update_xtime_cache(u64 nsec); +struct tms; +extern void do_sys_times(struct tms *); + /** * timespec_to_ns - Convert timespec to nanoseconds * @ts: pointer to the timespec variable to be converted -- cgit v1.2.3 From 0a8eaa4f9b58759595a1bfe13a1295fdc25ba026 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 17:03:52 +0200 Subject: timers: fix itimer/many thread hang, fix #2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix the UP build: In file included from arch/x86/kernel/asm-offsets_32.c:9, from arch/x86/kernel/asm-offsets.c:3: include/linux/sched.h: In function ‘thread_group_cputime_clone_thread’: include/linux/sched.h:2272: warning: no return statement in function returning non-void include/linux/sched.h: In function ‘thread_group_cputime_account_user’: include/linux/sched.h:2284: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) include/linux/sched.h:2284: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) include/linux/sched.h: In function ‘thread_group_cputime_account_system’: include/linux/sched.h:2291: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) include/linux/sched.h:2291: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) include/linux/sched.h: In function ‘thread_group_cputime_account_exec_runtime’: include/linux/sched.h:2298: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) distcc[14501] ERROR: compile arch/x86/kernel/asm-offsets.c on a/30 failed make[1]: *** [arch/x86/kernel/asm-offsets.s] Error 1 Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 26d7a5f2d0b..ed355f02d32 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2251,6 +2251,7 @@ static inline void thread_group_cputime_free(struct signal_struct *sig) static inline int thread_group_cputime_clone_thread(struct task_struct *curr, struct task_struct *tsk) { + return 0; } static inline void thread_group_cputime(struct task_struct *tsk, @@ -2263,21 +2264,21 @@ static inline void thread_group_cputime_account_user( struct thread_group_cputime *tgtimes, cputime_t cputime) { - tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); + tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime); } static inline void thread_group_cputime_account_system( struct thread_group_cputime *tgtimes, cputime_t cputime) { - tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); + tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime); } static inline void thread_group_cputime_account_exec_runtime( struct thread_group_cputime *tgtimes, unsigned long long ns) { - tgtimes->totals->sum_exec_runtime += ns; + tgtimes->totals.sum_exec_runtime += ns; } #endif /* CONFIG_SMP */ -- cgit v1.2.3 From 5ce73a4a5a4893a1aa4cdeed1b1a5a6de42c43b6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 17:11:46 +0200 Subject: timers: fix itimer/many thread hang, cleanups Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ed355f02d32..7ce8d4e5356 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -430,7 +430,7 @@ struct pacct_struct { * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds - * + * * This structure groups together three kinds of CPU time that are * tracked for threads and thread groups. Most things considering * CPU time want to group these counts together and treat all three -- cgit v1.2.3 From 600715dcdf567c86f8b2c6173fcfb4b873e25a19 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Sep 2008 01:31:45 -0700 Subject: generic: add phys_addr_t for holding physical addresses Add a kernel-wide "phys_addr_t" which is guaranteed to be able to hold any physical address. By default it equals the word size of the architecture, but a 32-bit architecture can set ARCH_PHYS_ADDR_T_64BIT if it needs a 64-bit phys_addr_t. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/types.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index d4a9ce6e276..022c668496d 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -197,6 +197,12 @@ typedef u64 resource_size_t; typedef u32 resource_size_t; #endif +#ifdef CONFIG_PHYS_ADDR_T_64BIT +typedef u64 phys_addr_t; +#else +typedef u32 phys_addr_t; +#endif + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; -- cgit v1.2.3 From 947d0496cf3e12ebfa70b3eaf561c25403247ce9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Sep 2008 01:31:48 -0700 Subject: generic: make PFN_PHYS explicitly return phys_addr_t PFN_PHYS, as its name suggests, turns a pfn into a physical address. However, it is a macro which just operates on its argument without modifying its type. pfns are typed unsigned long, but an unsigned long may not be long enough to hold a physical address (32-bit systems with more than 32 bits of physcial address). Make sure we cast to phys_addr_t to return a complete result. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/pfn.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pfn.h b/include/linux/pfn.h index bb01f8b92b5..7646637221f 100644 --- a/include/linux/pfn.h +++ b/include/linux/pfn.h @@ -1,9 +1,13 @@ #ifndef _LINUX_PFN_H_ #define _LINUX_PFN_H_ +#ifndef __ASSEMBLY__ +#include +#endif + #define PFN_ALIGN(x) (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK) #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) +#define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT) #endif -- cgit v1.2.3 From 8308c54d7e312f7a03e2ce2057d0837e6fe3843f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Sep 2008 01:31:50 -0700 Subject: generic: redefine resource_size_t as phys_addr_t There's no good reason why a resource_size_t shouldn't just be a physical address, so simply redefine it in terms of phys_addr_t. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/types.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 022c668496d..f24f7beb47d 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -191,18 +191,14 @@ typedef __u32 __bitwise __wsum; #ifdef __KERNEL__ typedef unsigned __bitwise__ gfp_t; -#ifdef CONFIG_RESOURCES_64BIT -typedef u64 resource_size_t; -#else -typedef u32 resource_size_t; -#endif - #ifdef CONFIG_PHYS_ADDR_T_64BIT typedef u64 phys_addr_t; #else typedef u32 phys_addr_t; #endif +typedef phys_addr_t resource_size_t; + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; -- cgit v1.2.3 From d7cfb60c5cf904ecf1e0ae23ec178175b86f0d4a Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 19 Sep 2008 13:13:44 +0100 Subject: hrtimer: remove hrtimer_clock_base::get_softirq_time() Peter Zijlstra noticed this 8 months ago and I just noticed it again. hrtimer_clock_base::get_softirq_time() is currently unused in the entire tree. In fact, looking at the logs, it appears as if it was never used. Remove it. Signed-off-by: Mark McLoughlin Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6d93dce61cb..1b079bd29c3 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -145,7 +145,6 @@ struct hrtimer_sleeper { * @first: pointer to the timer node which expires first * @resolution: the resolution of the clock, in nanoseconds * @get_time: function to retrieve the current time of the clock - * @get_softirq_time: function to retrieve the current time from the softirq * @softirq_time: the time when running the hrtimer queue in the softirq * @offset: offset of this clock to the monotonic base * @reprogram: function to reprogram the timer event @@ -157,7 +156,6 @@ struct hrtimer_clock_base { struct rb_node *first; ktime_t resolution; ktime_t (*get_time)(void); - ktime_t (*get_softirq_time)(void); ktime_t softirq_time; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t offset; -- cgit v1.2.3 From b91c4996df56fcd201f85c392a1de7bc3f6641f5 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 19 Sep 2008 13:13:48 +0100 Subject: hrtimer: remove hrtimer_clock_base::reprogram() hrtimer_clock_base::reprogram() also appears to never have been used, so remove it. Signed-off-by: Mark McLoughlin Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1b079bd29c3..68b0196d869 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -147,7 +147,6 @@ struct hrtimer_sleeper { * @get_time: function to retrieve the current time of the clock * @softirq_time: the time when running the hrtimer queue in the softirq * @offset: offset of this clock to the monotonic base - * @reprogram: function to reprogram the timer event */ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; @@ -159,9 +158,6 @@ struct hrtimer_clock_base { ktime_t softirq_time; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t offset; - int (*reprogram)(struct hrtimer *t, - struct hrtimer_clock_base *b, - ktime_t n); #endif }; -- cgit v1.2.3 From bb34d92f643086d546b49cef680f6f305ed84414 Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Fri, 12 Sep 2008 09:54:39 -0700 Subject: timers: fix itimer/many thread hang, v2 This is the second resubmission of the posix timer rework patch, posted a few days ago. This includes the changes from the previous resubmittion, which addressed Oleg Nesterov's comments, removing the RCU stuff from the patch and un-inlining the thread_group_cputime() function for SMP. In addition, per Ingo Molnar it simplifies the UP code, consolidating much of it with the SMP version and depending on lower-level SMP/UP handling to take care of the differences. It also cleans up some UP compile errors, moves the scheduler stats-related macros into kernel/sched_stats.h, cleans up a merge error in kernel/fork.c and has a few other minor fixes and cleanups as suggested by Oleg and Ingo. Thanks for the review, guys. Signed-off-by: Frank Mayhar Cc: Roland McGrath Cc: Alexey Dobriyan Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 1 + include/linux/sched.h | 183 ++------------------------------------------ 2 files changed, 6 insertions(+), 178 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index cf9f40a91c9..cac3750cd65 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -52,6 +52,7 @@ static inline int kstat_irqs(int irq) return sum; } +extern unsigned long long task_delta_exec(struct task_struct *); extern void account_user_time(struct task_struct *, cputime_t); extern void account_user_time_scaled(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); diff --git a/include/linux/sched.h b/include/linux/sched.h index 7ce8d4e5356..b982fb48c8f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -454,15 +454,9 @@ struct task_cputime { * This structure contains the version of task_cputime, above, that is * used for thread group CPU clock calculations. */ -#ifdef CONFIG_SMP struct thread_group_cputime { struct task_cputime *totals; }; -#else -struct thread_group_cputime { - struct task_cputime totals; -}; -#endif /* * NOTE! "signal_struct" does not have it's own @@ -2124,193 +2118,26 @@ static inline int spin_needbreak(spinlock_t *lock) /* * Thread group CPU time accounting. */ -#ifdef CONFIG_SMP -extern int thread_group_cputime_alloc_smp(struct task_struct *); -extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *); +extern int thread_group_cputime_alloc(struct task_struct *); +extern void thread_group_cputime(struct task_struct *, struct task_cputime *); static inline void thread_group_cputime_init(struct signal_struct *sig) { sig->cputime.totals = NULL; } -static inline int thread_group_cputime_clone_thread(struct task_struct *curr, - struct task_struct *new) +static inline int thread_group_cputime_clone_thread(struct task_struct *curr) { if (curr->signal->cputime.totals) return 0; - return thread_group_cputime_alloc_smp(curr); + return thread_group_cputime_alloc(curr); } -static inline void thread_group_cputime_free(struct signal_struct *sig) -{ - free_percpu(sig->cputime.totals); -} - -/** - * thread_group_cputime - Sum the thread group time fields across all CPUs. - * - * This is a wrapper for the real routine, thread_group_cputime_smp(). See - * that routine for details. - */ -static inline void thread_group_cputime( - struct task_struct *tsk, - struct task_cputime *times) -{ - thread_group_cputime_smp(tsk, times); -} - -/** - * thread_group_cputime_account_user - Maintain utime for a thread group. - * - * @tgtimes: Pointer to thread_group_cputime structure. - * @cputime: Time value by which to increment the utime field of that - * structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the utime field there. - */ -static inline void thread_group_cputime_account_user( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - if (tgtimes->totals) { - struct task_cputime *times; - - times = per_cpu_ptr(tgtimes->totals, get_cpu()); - times->utime = cputime_add(times->utime, cputime); - put_cpu_no_resched(); - } -} - -/** - * thread_group_cputime_account_system - Maintain stime for a thread group. - * - * @tgtimes: Pointer to thread_group_cputime structure. - * @cputime: Time value by which to increment the stime field of that - * structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the stime field there. - */ -static inline void thread_group_cputime_account_system( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - if (tgtimes->totals) { - struct task_cputime *times; - - times = per_cpu_ptr(tgtimes->totals, get_cpu()); - times->stime = cputime_add(times->stime, cputime); - put_cpu_no_resched(); - } -} - -/** - * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a - * thread group. - * - * @tgtimes: Pointer to thread_group_cputime structure. - * @ns: Time value by which to increment the sum_exec_runtime field - * of that structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the sum_exec_runtime field there. - */ -static inline void thread_group_cputime_account_exec_runtime( - struct thread_group_cputime *tgtimes, - unsigned long long ns) -{ - if (tgtimes->totals) { - struct task_cputime *times; - - times = per_cpu_ptr(tgtimes->totals, get_cpu()); - times->sum_exec_runtime += ns; - put_cpu_no_resched(); - } -} - -#else /* CONFIG_SMP */ - -static inline void thread_group_cputime_init(struct signal_struct *sig) -{ - sig->cputime.totals.utime = cputime_zero; - sig->cputime.totals.stime = cputime_zero; - sig->cputime.totals.sum_exec_runtime = 0; -} - -static inline int thread_group_cputime_alloc(struct task_struct *tsk) -{ - return 0; -} static inline void thread_group_cputime_free(struct signal_struct *sig) { -} - -static inline int thread_group_cputime_clone_thread(struct task_struct *curr, - struct task_struct *tsk) -{ - return 0; -} - -static inline void thread_group_cputime(struct task_struct *tsk, - struct task_cputime *cputime) -{ - *cputime = tsk->signal->cputime.totals; -} - -static inline void thread_group_cputime_account_user( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime); -} - -static inline void thread_group_cputime_account_system( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime); -} - -static inline void thread_group_cputime_account_exec_runtime( - struct thread_group_cputime *tgtimes, - unsigned long long ns) -{ - tgtimes->totals.sum_exec_runtime += ns; -} - -#endif /* CONFIG_SMP */ - -static inline void account_group_user_time(struct task_struct *tsk, - cputime_t cputime) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_user(&sig->cputime, cputime); -} - -static inline void account_group_system_time(struct task_struct *tsk, - cputime_t cputime) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_system(&sig->cputime, cputime); -} - -static inline void account_group_exec_runtime(struct task_struct *tsk, - unsigned long long ns) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_exec_runtime(&sig->cputime, ns); + free_percpu(sig->cputime.totals); } /* -- cgit v1.2.3 From 5a9fa73072854981a5c05eb7ba18a96d49c2804f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Sep 2008 14:42:50 -0700 Subject: posix-timers: kill ->it_sigev_signo and ->it_sigev_value With the recent changes ->it_sigev_signo and ->it_sigev_value are only used in sys_timer_create(), kill them. Signed-off-by: Oleg Nesterov Cc: mingo@elte.hu Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index f9d8e9e94e9..a7c72135554 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -45,8 +45,6 @@ struct k_itimer { int it_requeue_pending; /* waiting to requeue this timer */ #define REQUEUE_PENDING 1 int it_sigev_notify; /* notify word of sigevent struct */ - int it_sigev_signo; /* signo word of sigevent struct */ - sigval_t it_sigev_value; /* value word of sigevent struct */ struct task_struct *it_process; /* process to send signal to */ struct sigqueue *sigq; /* signal queue entry. */ union { -- cgit v1.2.3 From 1b02469088ac7a13d7e622b618b7410d0f1ce5ec Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 22 Sep 2008 14:42:43 -0700 Subject: hrtimer: reorder struct hrtimer to save 8 bytes on 64bit builds reorder struct hrtimer to save 8 bytes on 64 bit builds when CONFIG_TIMER_STATS selected. (also removes 8 bytes from signal_struct) Signed-off-by: Richard Kennedy Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 68b0196d869..8730b60c943 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -115,12 +115,12 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; - enum hrtimer_cb_mode cb_mode; struct list_head cb_entry; + enum hrtimer_cb_mode cb_mode; #ifdef CONFIG_TIMER_STATS + int start_pid; void *start_site; char start_comm[16]; - int start_pid; #endif }; -- cgit v1.2.3 From d40e944c25fb4642adb2a4c580a48218a9f3f824 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Mon, 22 Sep 2008 14:42:44 -0700 Subject: ntp: improve adjtimex frequency rounding Change PPM_SCALE_INV_SHIFT so that it doesn't throw away any input bits (19 is the amount of the factor 2 in PPM_SCALE), the output frequency can then be calculated back to its input value, as the inverse divide produce a slightly larger value, which is then correctly rounded by the final shift. Reported-by: Martin Ziegler Signed-off-by: Roman Zippel Cc: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index c00bcdd3ae4..9007313b5b7 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -82,7 +82,7 @@ */ #define SHIFT_USEC 16 /* frequency offset scale (shift) */ #define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) -#define PPM_SCALE_INV_SHIFT 20 +#define PPM_SCALE_INV_SHIFT 19 #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ PPM_SCALE + 1) -- cgit v1.2.3 From 379daf6290814e41f14880094b7b773640df2461 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 25 Sep 2008 18:43:34 -0700 Subject: IO resources, x86: ioremap sanity check to catch mapping requests exceeding the BAR sizes Go through the iomem resource tree to check if any of the ioremap() requests span more than any slot in the iomem resource tree and do a WARN_ON() if we hit this check. This will raise a red-flag, if some driver is mapping more than what is needed. And hopefully identify possible corruptions much earlier. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- include/linux/ioport.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index fded376b94e..01712cf1a38 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -169,6 +169,7 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); +extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ -- cgit v1.2.3 From e416de5e61e1a9b7f987804cbb67230b5f5293c6 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 23 Sep 2008 17:25:10 +0100 Subject: Export the ROM enable/disable helpers .... so that they can be used by MTD map drivers. Lets us close #9420 Signed-off-by: Alan Cox Signed-off-by: David Woodhouse --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c0e14008a3c..7a4cee00c1d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -631,6 +631,8 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_select_bars(struct pci_dev *dev, unsigned long flags); /* ROM control related routines */ +int pci_enable_rom(struct pci_dev *pdev); +void pci_disable_rom(struct pci_dev *pdev); void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); size_t pci_get_rom_size(void __iomem *rom, size_t size); -- cgit v1.2.3 From 7086efe1c1536f6bc160e7d60a9bfd645b91f279 Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Fri, 12 Sep 2008 09:54:39 -0700 Subject: timers: fix itimer/many thread hang, v3 - fix UP lockup - another set of UP/SMP cleanups and simplifications Signed-off-by: Frank Mayhar Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b982fb48c8f..23d9d546454 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2134,7 +2134,6 @@ static inline int thread_group_cputime_clone_thread(struct task_struct *curr) return thread_group_cputime_alloc(curr); } - static inline void thread_group_cputime_free(struct signal_struct *sig) { free_percpu(sig->cputime.totals); -- cgit v1.2.3 From bbfbd8b151fe35c9a1180a7f5254c5d6b8387cc0 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 1 Oct 2008 16:13:54 +0900 Subject: sh: Move the shared INTC code out to drivers/sh/ The INTC code will be re-used across different architectures, so move this out to drivers/sh/ and include/linux/sh_intc.h respectively. Signed-off-by: Paul Mundt --- include/linux/sh_intc.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 include/linux/sh_intc.h (limited to 'include/linux') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h new file mode 100644 index 00000000000..68e212ff9dd --- /dev/null +++ b/include/linux/sh_intc.h @@ -0,0 +1,91 @@ +#ifndef __SH_INTC_H +#define __SH_INTC_H + +typedef unsigned char intc_enum; + +struct intc_vect { + intc_enum enum_id; + unsigned short vect; +}; + +#define INTC_VECT(enum_id, vect) { enum_id, vect } +#define INTC_IRQ(enum_id, irq) INTC_VECT(enum_id, irq2evt(irq)) + +struct intc_group { + intc_enum enum_id; + intc_enum enum_ids[32]; +}; + +#define INTC_GROUP(enum_id, ids...) { enum_id, { ids } } + +struct intc_mask_reg { + unsigned long set_reg, clr_reg, reg_width; + intc_enum enum_ids[32]; +#ifdef CONFIG_SMP + unsigned long smp; +#endif +}; + +struct intc_prio_reg { + unsigned long set_reg, clr_reg, reg_width, field_width; + intc_enum enum_ids[16]; +#ifdef CONFIG_SMP + unsigned long smp; +#endif +}; + +struct intc_sense_reg { + unsigned long reg, reg_width, field_width; + intc_enum enum_ids[16]; +}; + +#ifdef CONFIG_SMP +#define INTC_SMP(stride, nr) .smp = (stride) | ((nr) << 8) +#else +#define INTC_SMP(stride, nr) +#endif + +struct intc_desc { + struct intc_vect *vectors; + unsigned int nr_vectors; + struct intc_group *groups; + unsigned int nr_groups; + struct intc_mask_reg *mask_regs; + unsigned int nr_mask_regs; + struct intc_prio_reg *prio_regs; + unsigned int nr_prio_regs; + struct intc_sense_reg *sense_regs; + unsigned int nr_sense_regs; + char *name; +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) + struct intc_mask_reg *ack_regs; + unsigned int nr_ack_regs; +#endif +}; + +#define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a) +#define DECLARE_INTC_DESC(symbol, chipname, vectors, groups, \ + mask_regs, prio_regs, sense_regs) \ +struct intc_desc symbol __initdata = { \ + _INTC_ARRAY(vectors), _INTC_ARRAY(groups), \ + _INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs), \ + _INTC_ARRAY(sense_regs), \ + chipname, \ +} + +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) +#define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups, \ + mask_regs, prio_regs, sense_regs, ack_regs) \ +struct intc_desc symbol __initdata = { \ + _INTC_ARRAY(vectors), _INTC_ARRAY(groups), \ + _INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs), \ + _INTC_ARRAY(sense_regs), \ + chipname, \ + _INTC_ARRAY(ack_regs), \ +} +#endif + +void __init register_intc_controller(struct intc_desc *desc); +int intc_set_priority(unsigned int irq, unsigned int prio); + +#endif /* __SH_INTC_H */ -- cgit v1.2.3 From 1daef0a868370c5a96d031b9202e3354bea060e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 Jul 2008 18:19:01 -0400 Subject: NFS: Clean up nfs_sb_active/nfs_sb_deactive Instead of causing umount requests to block on server->active_wq while the asynchronous sillyrename deletes are executing, we can use the sb->s_active counter to obtain a reference to the super_block, and then release that reference in nfs_async_unlink_release(). Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c9beacd16c0..4e477ae5869 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -119,7 +119,6 @@ struct nfs_server { void (*destroy)(struct nfs_server *); atomic_t active; /* Keep trace of any activity to this server */ - wait_queue_head_t active_wq; /* Wait for any activity to stop */ /* mountd-related mount options */ struct sockaddr_storage mountd_address; -- cgit v1.2.3 From 4eec952e42314b53e48fef1f54dd89cbf9789734 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2008 17:58:13 -0400 Subject: NFS: Add options for finer control of the lookup cache Add the flag NFS_MOUNT_LOOKUP_CACHE_NONEG to turn off the caching of negative dentries. In reality what we do is to force nfs_lookup_revalidate() to always discard negative dentries. Add the flag NFS_MOUNT_LOOKUP_CACHE_NONE for enforcing stricter revalidation of dentries. It forces the revalidate code to always do a lookup instead of just checking the cached mtime of the parent directory. Signed-off-by: Trond Myklebust --- include/linux/nfs_mount.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index df7c6b7a7eb..6549a06ac16 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -65,4 +65,8 @@ struct nfs_mount_data { #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF +/* The following are for internal use only */ +#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000 +#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000 + #endif -- cgit v1.2.3 From 691beb13cdc88358334ef0ba867c080a247a760f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Oct 2008 14:48:22 -0400 Subject: NFS: Allow concurrent inode revalidation Currently, if two processes are both trying to revalidate metadata for the same inode, they will find themselves being serialised. There is no good justification for this now that we have improved our ability to detect stale attribute data, so we should remove that serialisation. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 78a5922a2f1..ca563ee13e3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -200,11 +200,10 @@ struct nfs_inode { /* * Bit offsets in flags field */ -#define NFS_INO_REVALIDATING (0) /* revalidating attrs */ -#define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ -#define NFS_INO_STALE (2) /* possible stale inode */ -#define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ -#define NFS_INO_MOUNTPOINT (4) /* inode is remote mountpoint */ +#define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus */ +#define NFS_INO_STALE (1) /* possible stale inode */ +#define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ +#define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3 From 9fa8d66f1e55bf197568c8c689043c2aad1ffc97 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Tue, 26 Aug 2008 16:23:20 +0100 Subject: NFS: remove 8 bytes of padding from struct nfs_fattr on 64 bit builds remove 8 bytes of padding from struct nfs_fattr on 64 bit builds This also removes padding from several nfs structures, including 16 bytes from nfs4_opendata, nfs4_createdata,nfs3_createdata & 8 bytes from nfs_read_data,nfs_write_data,nfs_removeres,nfs4_closedata This also reduces the reported stack usage of many nfs functions (30+). Signed-off-by: Richard Kennedy ---- This patch is against the latest git 2.6.27-rc4. I've built & run this on my AMD64 desktop, & successfully run _simple_ tests with a 64 bit client => 32 bit server & 32 bit client to 64 bit server. On fedora with gcc (GCC) 4.3.0 20080428 (Red Hat 4.3.0-8) checkpatch reports 33 functions with reduced stack usage. e.g. __nfs_revalidate_inode [nfs] 216 => 200 _nfs4_proc_access [nfs] 304 => 288 _nfs4_proc_link [nfs] 536 => 504 _nfs4_proc_remove [nfs] 304 => 288 _nfs4_proc_rename [nfs] 584 => 552 nfs3_proc_access [nfs] 272 => 256 nfs3_proc_getacl [nfs] 384 => 368 nfs3_proc_link [nfs] 496 => 464 etc I can supply the complete list if anyone is interested. regards Richard Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8c77c11224d..9cabbb3a9e6 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -36,6 +36,7 @@ struct nfs_fattr { __u32 nlink; __u32 uid; __u32 gid; + dev_t rdev; __u64 size; union { struct { @@ -46,7 +47,6 @@ struct nfs_fattr { __u64 used; } nfs3; } du; - dev_t rdev; struct nfs_fsid fsid; __u64 fileid; struct timespec atime; -- cgit v1.2.3 From d1ce02e1689dff9d413138f60a79b4e3affb4708 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Sep 2008 11:57:12 -0400 Subject: NFS: SETCLIENTID truncates client ID and netid The sc_name field is currently 56 bytes long. This is not large enough to hold a pair of IPv6 addresses, the authentication type, the protocol name, and a uniquifier number. The maximum possible size of the name string using IPv6 addresses is just under 110 bytes, so I increased the size of the sc_name field to accomodate this maximum. In addition, the strings in the nfs4_setclientid structure are constructed with scnprintf(), which wants to terminate its output with '\0'. The sc_netid field was large enough only for a three byte netid string and a '\0' so inet6 netids were being truncated. Perhaps we don't need the overhead of scnprintf() to do a simple string copy, but I fixed this by increasing the size of the buffer by one byte. Since all three of the string buffers in nfs4_setclientid are constructed with scnprintf(), I increased the size of all three by one byte to document the requirement, although I don't think either the universal address field or the name field will be so small that these strings get truncated in this way. The size of the Linux client's client ID on the wire will be larger than before. RFC 3530 suggests the size limit for client IDs is 1024, and we are still well below that. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9cabbb3a9e6..f6e95bfad5d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -672,16 +672,16 @@ struct nfs4_rename_res { struct nfs_fattr * new_fattr; }; -#define NFS4_SETCLIENTID_NAMELEN (56) +#define NFS4_SETCLIENTID_NAMELEN (128) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; unsigned int sc_name_len; - char sc_name[NFS4_SETCLIENTID_NAMELEN]; + char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; u32 sc_prog; unsigned int sc_netid_len; - char sc_netid[RPCBIND_MAXNETIDLEN]; + char sc_netid[RPCBIND_MAXNETIDLEN + 1]; unsigned int sc_uaddr_len; - char sc_uaddr[RPCBIND_MAXUADDRLEN]; + char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; u32 sc_cb_ident; }; -- cgit v1.2.3 From 19d771f3caccaf66ce2fb539319222139e5b4e88 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Oct 2008 13:54:52 -0400 Subject: NFS: Save padding bytes in struct nfs4_setclientid Peter Staubach suggested reducing NFS4_SETCLIENTID_NAMELEN by one byte so as to avoid 7 bytes of unnecessary padding. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f6e95bfad5d..6ee6ae3f095 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -672,7 +672,7 @@ struct nfs4_rename_res { struct nfs_fattr * new_fattr; }; -#define NFS4_SETCLIENTID_NAMELEN (128) +#define NFS4_SETCLIENTID_NAMELEN (127) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; unsigned int sc_name_len; -- cgit v1.2.3 From fe9053b30bb48b99f7b45541249f5cfe96bdf7f7 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 9 Oct 2008 14:59:59 -0400 Subject: RPC/RDMA: add data types and new FRMR memory registration enum. Internal RPC/RDMA structure updates in preparation for FRMR support. Signed-off-by: Tom Talpey Acked-by: Tom Tucker Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtrdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 4de56b1d372..55a5d92ca1e 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -78,6 +78,7 @@ enum rpcrdma_memreg { RPCRDMA_MEMWINDOWS, RPCRDMA_MEMWINDOWS_ASYNC, RPCRDMA_MTHCAFMR, + RPCRDMA_FRMR, RPCRDMA_ALLPHYSICAL, RPCRDMA_LAST }; -- cgit v1.2.3 From 5675add36e76b9487e7f9e689f854cb8d6afd9b4 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 9 Oct 2008 15:01:41 -0400 Subject: RPC/RDMA: harden connection logic against missing/late rdma_cm upcalls. Add defensive timeouts to wait_for_completion() calls in RDMA address resolution, and make them interruptible. Fix the timeout units to milliseconds (formerly jiffies) and move to private header. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtrdma.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 55a5d92ca1e..54a379c9e8e 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -66,9 +66,6 @@ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ -#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */ -#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ - /* memory registration strategies */ #define RPCRDMA_PERSISTENT_REGISTRATION (1) -- cgit v1.2.3 From 061b1bd394ca8628b7c24eb4658ba3535da4249a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 24 Sep 2008 14:46:44 -0700 Subject: Staging: add TAINT_CRAP for all drivers/staging code We need to add a flag for all code that is in the drivers/staging/ directory to prevent all other kernel developers from worrying about issues here, and to notify users that the drivers might not be as good as they are normally used to. Based on code from Andreas Gruenbacher and Jeff Mahoney to provide a TAINT flag for the support level of a kernel module in the Novell enterprise kernel release. This is the kernel portion of this feature, the ability for the flag to be set needs to be done in the build process and will happen in a follow-up patch. Cc: Andreas Gruenbacher Cc: Jeff Mahoney Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6..b36805cb95f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -260,6 +260,7 @@ extern enum system_states { #define TAINT_DIE (1<<7) #define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8) #define TAINT_WARN (1<<9) +#define TAINT_CRAP (1<<10) extern void dump_stack(void) __cold; -- cgit v1.2.3 From f9da8d157b60d8c5bfc5a21fc50538fdb754a65b Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 10 Oct 2008 23:14:14 -0400 Subject: Input: move map_to_7segment.h to include/linux The map_to_7segment.h provides generic 7segment LED mappings and is designed to be used by other drivers. Moving it to common area will make it more usable. Also exporting it to userspace will help users of sysfs interface. Signed-off-by: Atsushi Nemoto Acked-by: Henk Vergonet Signed-off-by: Dmitry Torokhov --- include/linux/Kbuild | 1 + include/linux/map_to_7segment.h | 187 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 include/linux/map_to_7segment.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 4c4142c5aa6..0b136c5990c 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -106,6 +106,7 @@ header-y += keyctl.h header-y += limits.h header-y += magic.h header-y += major.h +header-y += map_to_7segment.h header-y += matroxfb.h header-y += meye.h header-y += minix_fs.h diff --git a/include/linux/map_to_7segment.h b/include/linux/map_to_7segment.h new file mode 100644 index 00000000000..7df8432c440 --- /dev/null +++ b/include/linux/map_to_7segment.h @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2005 Henk Vergonet + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef MAP_TO_7SEGMENT_H +#define MAP_TO_7SEGMENT_H + +/* This file provides translation primitives and tables for the conversion + * of (ASCII) characters to a 7-segments notation. + * + * The 7 segment's wikipedia notation below is used as standard. + * See: http://en.wikipedia.org/wiki/Seven_segment_display + * + * Notation: +-a-+ + * f b + * +-g-+ + * e c + * +-d-+ + * + * Usage: + * + * Register a map variable, and fill it with a character set: + * static SEG7_DEFAULT_MAP(map_seg7); + * + * + * Then use for conversion: + * seg7 = map_to_seg7(&map_seg7, some_char); + * ... + * + * In device drivers it is recommended, if required, to make the char map + * accessible via the sysfs interface using the following scheme: + * + * static ssize_t show_map(struct device *dev, char *buf) { + * memcpy(buf, &map_seg7, sizeof(map_seg7)); + * return sizeof(map_seg7); + * } + * static ssize_t store_map(struct device *dev, const char *buf, size_t cnt) { + * if(cnt != sizeof(map_seg7)) + * return -EINVAL; + * memcpy(&map_seg7, buf, cnt); + * return cnt; + * } + * static DEVICE_ATTR(map_seg7, PERMS_RW, show_map, store_map); + * + * History: + * 2005-05-31 RFC linux-kernel@vger.kernel.org + */ +#include + + +#define BIT_SEG7_A 0 +#define BIT_SEG7_B 1 +#define BIT_SEG7_C 2 +#define BIT_SEG7_D 3 +#define BIT_SEG7_E 4 +#define BIT_SEG7_F 5 +#define BIT_SEG7_G 6 +#define BIT_SEG7_RESERVED 7 + +struct seg7_conversion_map { + unsigned char table[128]; +}; + +static inline int map_to_seg7(struct seg7_conversion_map *map, int c) +{ + return c >= 0 && c < sizeof(map->table) ? map->table[c] : -EINVAL; +} + +#define SEG7_CONVERSION_MAP(_name, _map) \ + struct seg7_conversion_map _name = { .table = { _map } } + +/* + * It is recommended to use a facility that allows user space to redefine + * custom character sets for LCD devices. Please use a sysfs interface + * as described above. + */ +#define MAP_TO_SEG7_SYSFS_FILE "map_seg7" + +/******************************************************************************* + * ASCII conversion table + ******************************************************************************/ + +#define _SEG7(l,a,b,c,d,e,f,g) \ + ( a<',1,1,0,0,0,0,1), _SEG7('?',1,1,1,0,0,1,0),\ + _SEG7('@',1,1,0,1,1,1,1), + +#define _MAP_65_90_ASCII_SEG7_ALPHA_UPPR \ + _SEG7('A',1,1,1,0,1,1,1), _SEG7('B',1,1,1,1,1,1,1), _SEG7('C',1,0,0,1,1,1,0),\ + _SEG7('D',1,1,1,1,1,1,0), _SEG7('E',1,0,0,1,1,1,1), _SEG7('F',1,0,0,0,1,1,1),\ + _SEG7('G',1,1,1,1,0,1,1), _SEG7('H',0,1,1,0,1,1,1), _SEG7('I',0,1,1,0,0,0,0),\ + _SEG7('J',0,1,1,1,0,0,0), _SEG7('K',0,1,1,0,1,1,1), _SEG7('L',0,0,0,1,1,1,0),\ + _SEG7('M',1,1,1,0,1,1,0), _SEG7('N',1,1,1,0,1,1,0), _SEG7('O',1,1,1,1,1,1,0),\ + _SEG7('P',1,1,0,0,1,1,1), _SEG7('Q',1,1,1,1,1,1,0), _SEG7('R',1,1,1,0,1,1,1),\ + _SEG7('S',1,0,1,1,0,1,1), _SEG7('T',0,0,0,1,1,1,1), _SEG7('U',0,1,1,1,1,1,0),\ + _SEG7('V',0,1,1,1,1,1,0), _SEG7('W',0,1,1,1,1,1,1), _SEG7('X',0,1,1,0,1,1,1),\ + _SEG7('Y',0,1,1,0,0,1,1), _SEG7('Z',1,1,0,1,1,0,1), + +#define _MAP_91_96_ASCII_SEG7_SYMBOL \ + _SEG7('[',1,0,0,1,1,1,0), _SEG7('\\',0,0,1,0,0,1,1),_SEG7(']',1,1,1,1,0,0,0),\ + _SEG7('^',1,1,0,0,0,1,0), _SEG7('_',0,0,0,1,0,0,0), _SEG7('`',0,1,0,0,0,0,0), + +#define _MAP_97_122_ASCII_SEG7_ALPHA_LOWER \ + _SEG7('A',1,1,1,0,1,1,1), _SEG7('b',0,0,1,1,1,1,1), _SEG7('c',0,0,0,1,1,0,1),\ + _SEG7('d',0,1,1,1,1,0,1), _SEG7('E',1,0,0,1,1,1,1), _SEG7('F',1,0,0,0,1,1,1),\ + _SEG7('G',1,1,1,1,0,1,1), _SEG7('h',0,0,1,0,1,1,1), _SEG7('i',0,0,1,0,0,0,0),\ + _SEG7('j',0,0,1,1,0,0,0), _SEG7('k',0,0,1,0,1,1,1), _SEG7('L',0,0,0,1,1,1,0),\ + _SEG7('M',1,1,1,0,1,1,0), _SEG7('n',0,0,1,0,1,0,1), _SEG7('o',0,0,1,1,1,0,1),\ + _SEG7('P',1,1,0,0,1,1,1), _SEG7('q',1,1,1,0,0,1,1), _SEG7('r',0,0,0,0,1,0,1),\ + _SEG7('S',1,0,1,1,0,1,1), _SEG7('T',0,0,0,1,1,1,1), _SEG7('u',0,0,1,1,1,0,0),\ + _SEG7('v',0,0,1,1,1,0,0), _SEG7('W',0,1,1,1,1,1,1), _SEG7('X',0,1,1,0,1,1,1),\ + _SEG7('y',0,1,1,1,0,1,1), _SEG7('Z',1,1,0,1,1,0,1), + +#define _MAP_123_126_ASCII_SEG7_SYMBOL \ + _SEG7('{',1,0,0,1,1,1,0), _SEG7('|',0,0,0,0,1,1,0), _SEG7('}',1,1,1,1,0,0,0),\ + _SEG7('~',1,0,0,0,0,0,0), + +/* Maps */ + +/* This set tries to map as close as possible to the visible characteristics + * of the ASCII symbol, lowercase and uppercase letters may differ in + * presentation on the display. + */ +#define MAP_ASCII7SEG_ALPHANUM \ + _MAP_0_32_ASCII_SEG7_NON_PRINTABLE \ + _MAP_33_47_ASCII_SEG7_SYMBOL \ + _MAP_48_57_ASCII_SEG7_NUMERIC \ + _MAP_58_64_ASCII_SEG7_SYMBOL \ + _MAP_65_90_ASCII_SEG7_ALPHA_UPPR \ + _MAP_91_96_ASCII_SEG7_SYMBOL \ + _MAP_97_122_ASCII_SEG7_ALPHA_LOWER \ + _MAP_123_126_ASCII_SEG7_SYMBOL + +/* This set tries to map as close as possible to the symbolic characteristics + * of the ASCII character for maximum discrimination. + * For now this means all alpha chars are in lower case representations. + * (This for example facilitates the use of hex numbers with uppercase input.) + */ +#define MAP_ASCII7SEG_ALPHANUM_LC \ + _MAP_0_32_ASCII_SEG7_NON_PRINTABLE \ + _MAP_33_47_ASCII_SEG7_SYMBOL \ + _MAP_48_57_ASCII_SEG7_NUMERIC \ + _MAP_58_64_ASCII_SEG7_SYMBOL \ + _MAP_97_122_ASCII_SEG7_ALPHA_LOWER \ + _MAP_91_96_ASCII_SEG7_SYMBOL \ + _MAP_97_122_ASCII_SEG7_ALPHA_LOWER \ + _MAP_123_126_ASCII_SEG7_SYMBOL + +#define SEG7_DEFAULT_MAP(_name) \ + SEG7_CONVERSION_MAP(_name,MAP_ASCII7SEG_ALPHANUM) + +#endif /* MAP_TO_7SEGMENT_H */ + -- cgit v1.2.3 From 6283815d1853b7daf31dc4adb83e5c1dc9568251 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: linear: Represent dev_info->size and dev_info->offset in sectors. Rename them to num_sectors and start_sector which is more descriptive. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/linear.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h index 7e375111d00..87090e98529 100644 --- a/include/linux/raid/linear.h +++ b/include/linux/raid/linear.h @@ -5,8 +5,8 @@ struct dev_info { mdk_rdev_t *rdev; - sector_t size; - sector_t offset; + sector_t num_sectors; + sector_t start_sector; }; typedef struct dev_info dev_info_t; -- cgit v1.2.3 From ab5bd5cbc8d4b868378d062eed3d4240930fbb86 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: Convert remaining 1k representations in linear.c to sectors. This patch renames hash_spacing and preshift to spacing and sector_shift respectively with the following change of semantics: Case 1: (sizeof(sector_t) <= sizeof(u32)). ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In this case, we have sector_shift = preshift = 0 and spacing = 2 * hash_spacing. Hence, the index for the hash table which is computed by the new code in which_dev() as sector / spacing equals the old value which was (sector/2) / hash_spacing. Note also that the value of nb_zone stays the same because both sz and base double. Case 2: (sizeof(sector_t) > sizeof(u32)). ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (aka the shifting dance case). Here we have sector_shift = preshift + 1 and spacing = 2 * hash_spacing during the computation of nb_zone and curr_sector, but spacing = hash_spacing in which_dev() because in the last hunk of the patch for linear.c we shift down conf->spacing (= 2 * hash_spacing) by one more bit than in the old code. Hence in the computation of nb_zone, sz and base have the same value as before, so nb_zone is not affected. Also curr_sector in the next hunk stays the same. In which_dev() the hash table index is computed as (sector >> sector_shift) / spacing In view of sector_shift = preshift + 1 and spacing = hash_spacing, this equals ((sector/2) >> preshift) / hash_spacing which is the value computed by the old code. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/linear.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h index 87090e98529..f38b9c586af 100644 --- a/include/linux/raid/linear.h +++ b/include/linux/raid/linear.h @@ -15,9 +15,11 @@ struct linear_private_data { struct linear_private_data *prev; /* earlier version */ dev_info_t **hash_table; - sector_t hash_spacing; + sector_t spacing; sector_t array_sectors; - int preshift; /* shift before dividing by hash_spacing */ + int sector_shift; /* shift before dividing + * by spacing + */ dev_info_t disks[0]; }; -- cgit v1.2.3 From fb4d8c76e56a887b9eee99fbc55fe82b18625d30 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: Remove unnecessary #includes, #defines, and function declarations. A lot of cruft has gathered over the years. Time to remove it. Signed-off-by: NeilBrown --- include/linux/raid/md.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index dc0e3fcb9f2..bb727fa1ce7 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -19,27 +19,7 @@ #define _MD_H #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* * 'md_p.h' holds the 'physical' layout of RAID devices @@ -83,10 +63,8 @@ extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); extern void md_write_start(mddev_t *mddev, struct bio *bi); extern void md_write_end(mddev_t *mddev); -extern void md_handle_safemode(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev); -extern void md_unplug_mddev(mddev_t *mddev); extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, sector_t sector, int size, struct page *page); -- cgit v1.2.3 From d710e13812600037a723a673dc5c96a071de98d3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 13 Oct 2008 11:55:12 +1100 Subject: md: remove space after function name in declaration and call. Having function (args) instead of function(args) make is harder to search for calls of particular functions. So remove all those spaces. Signed-off-by: NeilBrown --- include/linux/raid/md.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index bb727fa1ce7..82bea14cae1 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -54,17 +54,17 @@ extern int mdp_major; -extern int register_md_personality (struct mdk_personality *p); -extern int unregister_md_personality (struct mdk_personality *p); -extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev), +extern int register_md_personality(struct mdk_personality *p); +extern int unregister_md_personality(struct mdk_personality *p); +extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), mddev_t *mddev, const char *name); -extern void md_unregister_thread (mdk_thread_t *thread); +extern void md_unregister_thread(mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); extern void md_write_start(mddev_t *mddev, struct bio *bi); extern void md_write_end(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); -extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev); +extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, sector_t sector, int size, struct page *page); -- cgit v1.2.3 From 6000a368cd8e6da1caf101411bdb494cd6fb8b09 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 19 Aug 2008 18:45:30 -0500 Subject: [SCSI] block: separate failfast into multiple bits. Multipath is best at handling transport errors. If it gets a device error then there is not much the multipath layer can do. It will just access the same device but from a different path. This patch breaks up failfast into device, transport and driver errors. The multipath layers (md and dm mutlipath) only ask the lower levels to fast fail transport errors. The user of failfast, read ahead, will ask to fast fail on all errors. Note that blk_noretry_request will return true if any failfast bit is set. This allows drivers that do not support the multipath failfast bits to continue to fail on any failfast error like before. Drivers like scsi that are able to fail fast specific errors can check for the specific fail fast type. In the next patch I will convert scsi. Signed-off-by: Mike Christie Cc: Jens Axboe Signed-off-by: James Bottomley --- include/linux/bio.h | 26 +++++++++++++++++--------- include/linux/blkdev.h | 15 ++++++++++++--- 2 files changed, 29 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index ff5b4cf9e2d..1beda208cbf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -129,25 +129,30 @@ struct bio { * bit 2 -- barrier * Insert a serialization point in the IO queue, forcing previously * submitted IO to be completed before this oen is issued. - * bit 3 -- fail fast, don't want low level driver retries - * bit 4 -- synchronous I/O hint: the block layer will unplug immediately + * bit 3 -- synchronous I/O hint: the block layer will unplug immediately * Note that this does NOT indicate that the IO itself is sync, just * that the block layer will not postpone issue of this IO by plugging. - * bit 5 -- metadata request + * bit 4 -- metadata request * Used for tracing to differentiate metadata and data IO. May also * get some preferential treatment in the IO scheduler - * bit 6 -- discard sectors + * bit 5 -- discard sectors * Informs the lower level device that this range of sectors is no longer * used by the file system and may thus be freed by the device. Used * for flash based storage. + * bit 6 -- fail fast device errors + * bit 7 -- fail fast transport errors + * bit 8 -- fail fast driver errors + * Don't want driver retries for any fast fail whatever the reason. */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ #define BIO_RW_BARRIER 2 -#define BIO_RW_FAILFAST 3 -#define BIO_RW_SYNC 4 -#define BIO_RW_META 5 -#define BIO_RW_DISCARD 6 +#define BIO_RW_SYNC 3 +#define BIO_RW_META 4 +#define BIO_RW_DISCARD 5 +#define BIO_RW_FAILFAST_DEV 6 +#define BIO_RW_FAILFAST_TRANSPORT 7 +#define BIO_RW_FAILFAST_DRIVER 8 /* * upper 16 bits of bi_rw define the io priority of this bio @@ -174,7 +179,10 @@ struct bio { #define bio_sectors(bio) ((bio)->bi_size >> 9) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) -#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) +#define bio_failfast_dev(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DEV)) +#define bio_failfast_transport(bio) \ + ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_TRANSPORT)) +#define bio_failfast_driver(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DRIVER)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) #define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a92d9e4ea96..f3491d22526 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -87,7 +87,9 @@ enum { */ enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ - __REQ_FAILFAST, /* no low level driver retries */ + __REQ_FAILFAST_DEV, /* no driver retries of device errors */ + __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ + __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ @@ -111,8 +113,10 @@ enum rq_flag_bits { }; #define REQ_RW (1 << __REQ_RW) +#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) #define REQ_DISCARD (1 << __REQ_DISCARD) -#define REQ_FAILFAST (1 << __REQ_FAILFAST) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) @@ -560,7 +564,12 @@ enum { #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) -#define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) +#define blk_failfast_dev(rq) ((rq)->cmd_flags & REQ_FAILFAST_DEV) +#define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT) +#define blk_failfast_driver(rq) ((rq)->cmd_flags & REQ_FAILFAST_DRIVER) +#define blk_noretry_request(rq) (blk_failfast_dev(rq) || \ + blk_failfast_transport(rq) || \ + blk_failfast_driver(rq)) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) -- cgit v1.2.3 From 69fd3a8d098faf41a04930afa83757c0555ee360 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 12 Oct 2008 16:18:36 +0200 Subject: [MTD] remove unused mtd parameter in of_mtd_parse_partitions() Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David Woodhouse --- include/linux/mtd/partitions.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 5014f7a9f5d..c92b4d43960 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -73,7 +73,6 @@ struct device; struct device_node; int __devinit of_mtd_parse_partitions(struct device *dev, - struct mtd_info *mtd, struct device_node *node, struct mtd_partition **pparts); -- cgit v1.2.3 From 97e1c18e8d17bd87e1e383b2e9d9fc740332c8e2 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 18 Jul 2008 12:16:16 -0400 Subject: tracing: Kernel Tracepoints Implementation of kernel tracepoints. Inspired from the Linux Kernel Markers. Allows complete typing verification by declaring both tracing statement inline functions and probe registration/unregistration static inline functions within the same macro "DEFINE_TRACE". No format string is required. See the tracepoint Documentation and Samples patches for usage examples. Taken from the documentation patch : "A tracepoint placed in code provides a hook to call a function (probe) that you can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or "off" (no probe is attached). When a tracepoint is "off" it has no effect, except for adding a tiny time penalty (checking a condition for a branch) and space penalty (adding a few bytes for the function call at the end of the instrumented function and adds a data structure in a separate section). When a tracepoint is "on", the function you provide is called each time the tracepoint is executed, in the execution context of the caller. When the function provided ends its execution, it returns to the caller (continuing from the tracepoint site). You can put tracepoints at important locations in the code. They are lightweight hooks that can pass an arbitrary number of parameters, which prototypes are described in a tracepoint declaration placed in a header file." Addition and removal of tracepoints is synchronized by RCU using the scheduler (and preempt_disable) as guarantees to find a quiescent state (this is really RCU "classic"). The update side uses rcu_barrier_sched() with call_rcu_sched() and the read/execute side uses "preempt_disable()/preempt_enable()". We make sure the previous array containing probes, which has been scheduled for deletion by the rcu callback, is indeed freed before we proceed to the next update. It therefore limits the rate of modification of a single tracepoint to one update per RCU period. The objective here is to permit fast batch add/removal of probes on _different_ tracepoints. Changelog : - Use #name ":" #proto as string to identify the tracepoint in the tracepoint table. This will make sure not type mismatch happens due to connexion of a probe with the wrong type to a tracepoint declared with the same name in a different header. - Add tracepoint_entry_free_old. - Change __TO_TRACE to get rid of the 'i' iterator. Masami Hiramatsu : Tested on x86-64. Performance impact of a tracepoint : same as markers, except that it adds about 70 bytes of instructions in an unlikely branch of each instrumented function (the for loop, the stack setup and the function call). It currently adds a memory read, a test and a conditional branch at the instrumentation site (in the hot path). Immediate values will eventually change this into a load immediate, test and branch, which removes the memory read which will make the i-cache impact smaller (changing the memory read for a load immediate removes 3-4 bytes per site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it also saves the d-cache hit). About the performance impact of tracepoints (which is comparable to markers), even without immediate values optimizations, tests done by Hideo Aoki on ia64 show no regression. His test case was using hackbench on a kernel where scheduler instrumentation (about 5 events in code scheduler code) was added. Quoting Hideo Aoki about Markers : I evaluated overhead of kernel marker using linux-2.6-sched-fixes git tree, which includes several markers for LTTng, using an ia64 server. While the immediate trace mark feature isn't implemented on ia64, there is no major performance regression. So, I think that we don't have any issues to propose merging marker point patches into Linus's tree from the viewpoint of performance impact. I prepared two kernels to evaluate. The first one was compiled without CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS. I downloaded the original hackbench from the following URL: http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c I ran hackbench 5 times in each condition and calculated the average and difference between the kernels. The parameter of hackbench: every 50 from 50 to 800 The number of CPUs of the server: 2, 4, and 8 Below is the results. As you can see, major performance regression wasn't found in any case. Even if number of processes increases, differences between marker-enabled kernel and marker- disabled kernel doesn't increase. Moreover, if number of CPUs increases, the differences doesn't increase either. Curiously, marker-enabled kernel is better than marker-disabled kernel in more than half cases, although I guess it comes from the difference of memory access pattern. * 2 CPUs Number of | without | with | diff | diff | processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] | -------------------------------------------------------------- 50 | 4.811 | 4.872 | +0.061 | +1.27 | 100 | 9.854 | 10.309 | +0.454 | +4.61 | 150 | 15.602 | 15.040 | -0.562 | -3.6 | 200 | 20.489 | 20.380 | -0.109 | -0.53 | 250 | 25.798 | 25.652 | -0.146 | -0.56 | 300 | 31.260 | 30.797 | -0.463 | -1.48 | 350 | 36.121 | 35.770 | -0.351 | -0.97 | 400 | 42.288 | 42.102 | -0.186 | -0.44 | 450 | 47.778 | 47.253 | -0.526 | -1.1 | 500 | 51.953 | 52.278 | +0.325 | +0.63 | 550 | 58.401 | 57.700 | -0.701 | -1.2 | 600 | 63.334 | 63.222 | -0.112 | -0.18 | 650 | 68.816 | 68.511 | -0.306 | -0.44 | 700 | 74.667 | 74.088 | -0.579 | -0.78 | 750 | 78.612 | 79.582 | +0.970 | +1.23 | 800 | 85.431 | 85.263 | -0.168 | -0.2 | -------------------------------------------------------------- * 4 CPUs Number of | without | with | diff | diff | processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] | -------------------------------------------------------------- 50 | 2.586 | 2.584 | -0.003 | -0.1 | 100 | 5.254 | 5.283 | +0.030 | +0.56 | 150 | 8.012 | 8.074 | +0.061 | +0.76 | 200 | 11.172 | 11.000 | -0.172 | -1.54 | 250 | 13.917 | 14.036 | +0.119 | +0.86 | 300 | 16.905 | 16.543 | -0.362 | -2.14 | 350 | 19.901 | 20.036 | +0.135 | +0.68 | 400 | 22.908 | 23.094 | +0.186 | +0.81 | 450 | 26.273 | 26.101 | -0.172 | -0.66 | 500 | 29.554 | 29.092 | -0.461 | -1.56 | 550 | 32.377 | 32.274 | -0.103 | -0.32 | 600 | 35.855 | 35.322 | -0.533 | -1.49 | 650 | 39.192 | 38.388 | -0.804 | -2.05 | 700 | 41.744 | 41.719 | -0.025 | -0.06 | 750 | 45.016 | 44.496 | -0.520 | -1.16 | 800 | 48.212 | 47.603 | -0.609 | -1.26 | -------------------------------------------------------------- * 8 CPUs Number of | without | with | diff | diff | processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] | -------------------------------------------------------------- 50 | 2.094 | 2.072 | -0.022 | -1.07 | 100 | 4.162 | 4.273 | +0.111 | +2.66 | 150 | 6.485 | 6.540 | +0.055 | +0.84 | 200 | 8.556 | 8.478 | -0.078 | -0.91 | 250 | 10.458 | 10.258 | -0.200 | -1.91 | 300 | 12.425 | 12.750 | +0.325 | +2.62 | 350 | 14.807 | 14.839 | +0.032 | +0.22 | 400 | 16.801 | 16.959 | +0.158 | +0.94 | 450 | 19.478 | 19.009 | -0.470 | -2.41 | 500 | 21.296 | 21.504 | +0.208 | +0.98 | 550 | 23.842 | 23.979 | +0.137 | +0.57 | 600 | 26.309 | 26.111 | -0.198 | -0.75 | 650 | 28.705 | 28.446 | -0.259 | -0.9 | 700 | 31.233 | 31.394 | +0.161 | +0.52 | 750 | 34.064 | 33.720 | -0.344 | -1.01 | 800 | 36.320 | 36.114 | -0.206 | -0.57 | -------------------------------------------------------------- Signed-off-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: 'Peter Zijlstra' Signed-off-by: Ingo Molnar --- include/linux/module.h | 17 ++++++ include/linux/tracepoint.h | 127 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 include/linux/tracepoint.h (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 68e09557c95..8b611350386 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -331,6 +332,10 @@ struct module struct marker *markers; unsigned int num_markers; #endif +#ifdef CONFIG_TRACEPOINTS + struct tracepoint *tracepoints; + unsigned int num_tracepoints; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ @@ -454,6 +459,9 @@ extern void print_modules(void); extern void module_update_markers(void); +extern void module_update_tracepoints(void); +extern int module_get_iter_tracepoints(struct tracepoint_iter *iter); + #else /* !CONFIG_MODULES... */ #define EXPORT_SYMBOL(sym) #define EXPORT_SYMBOL_GPL(sym) @@ -558,6 +566,15 @@ static inline void module_update_markers(void) { } +static inline void module_update_tracepoints(void) +{ +} + +static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) +{ + return 0; +} + #endif /* CONFIG_MODULES */ struct device_driver; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h new file mode 100644 index 00000000000..e623a6fca5c --- /dev/null +++ b/include/linux/tracepoint.h @@ -0,0 +1,127 @@ +#ifndef _LINUX_TRACEPOINT_H +#define _LINUX_TRACEPOINT_H + +/* + * Kernel Tracepoint API. + * + * See Documentation/tracepoint.txt. + * + * (C) Copyright 2008 Mathieu Desnoyers + * + * Heavily inspired from the Linux Kernel Markers. + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#include +#include + +struct module; +struct tracepoint; + +struct tracepoint { + const char *name; /* Tracepoint name */ + int state; /* State. */ + void **funcs; +} __attribute__((aligned(8))); + + +#define TPPROTO(args...) args +#define TPARGS(args...) args + +#ifdef CONFIG_TRACEPOINTS + +/* + * it_func[0] is never NULL because there is at least one element in the array + * when the array itself is non NULL. + */ +#define __DO_TRACE(tp, proto, args) \ + do { \ + void **it_func; \ + \ + rcu_read_lock_sched(); \ + it_func = rcu_dereference((tp)->funcs); \ + if (it_func) { \ + do { \ + ((void(*)(proto))(*it_func))(args); \ + } while (*(++it_func)); \ + } \ + rcu_read_unlock_sched(); \ + } while (0) + +/* + * Make sure the alignment of the structure in the __tracepoints section will + * not add unwanted padding between the beginning of the section and the + * structure. Force alignment to the same alignment as the section start. + */ +#define DEFINE_TRACE(name, proto, args) \ + static inline void trace_##name(proto) \ + { \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) \ + = #name ":" #proto; \ + static struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"), aligned(8))) = \ + { __tpstrtab_##name, 0, NULL }; \ + if (unlikely(__tracepoint_##name.state)) \ + __DO_TRACE(&__tracepoint_##name, \ + TPPROTO(proto), TPARGS(args)); \ + } \ + static inline int register_trace_##name(void (*probe)(proto)) \ + { \ + return tracepoint_probe_register(#name ":" #proto, \ + (void *)probe); \ + } \ + static inline void unregister_trace_##name(void (*probe)(proto))\ + { \ + tracepoint_probe_unregister(#name ":" #proto, \ + (void *)probe); \ + } + +extern void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end); + +#else /* !CONFIG_TRACEPOINTS */ +#define DEFINE_TRACE(name, proto, args) \ + static inline void _do_trace_##name(struct tracepoint *tp, proto) \ + { } \ + static inline void trace_##name(proto) \ + { } \ + static inline int register_trace_##name(void (*probe)(proto)) \ + { \ + return -ENOSYS; \ + } \ + static inline void unregister_trace_##name(void (*probe)(proto))\ + { } + +static inline void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end) +{ } +#endif /* CONFIG_TRACEPOINTS */ + +/* + * Connect a probe to a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_register(const char *name, void *probe); + +/* + * Disconnect a probe from a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_unregister(const char *name, void *probe); + +struct tracepoint_iter { + struct module *module; + struct tracepoint *tracepoint; +}; + +extern void tracepoint_iter_start(struct tracepoint_iter *iter); +extern void tracepoint_iter_next(struct tracepoint_iter *iter); +extern void tracepoint_iter_stop(struct tracepoint_iter *iter); +extern void tracepoint_iter_reset(struct tracepoint_iter *iter); +extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, + struct tracepoint *begin, struct tracepoint *end); + +#endif -- cgit v1.2.3 From 36dcd67ae994fece615b7c700958d215e884b9ae Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 12:00:59 +0200 Subject: ftrace: ignore functions that cannot be kprobe-ed kprobes already has an extensive list of annotations for functions that should not be instrumented. Add notrace annotations to these functions as well. This is particularly useful for functions called by the NMI path. Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 0be7795655f..497b1d1f7a0 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -29,6 +29,7 @@ * and Prasanna S Panchamukhi * added function-return probes. */ +#include #include #include #include @@ -47,7 +48,7 @@ #define KPROBE_HIT_SSDONE 0x00000008 /* Attach to insert probes on any functions which should be ignored*/ -#define __kprobes __attribute__((__section__(".kprobes.text"))) +#define __kprobes __attribute__((__section__(".kprobes.text"))) notrace struct kprobe; struct pt_regs; @@ -256,7 +257,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); #else /* CONFIG_KPROBES */ -#define __kprobes /**/ +#define __kprobes notrace struct jprobe; struct kretprobe; -- cgit v1.2.3 From 68bf21aa15c85d2e9b623dcda2b1ed8893275fa1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 15:45:08 -0400 Subject: ftrace: mcount call site on boot nops core This is the infrastructure to the converting the mcount call sites recorded by the __mcount_loc section into nops on boot. It also allows for using these sites to enable tracing as normal. When the __mcount_loc section is used, the "ftraced" kernel thread is disabled. This uses the current infrastructure to record the mcount call sites as well as convert them to nops. The mcount function is kept as a stub on boot up and not converted to the ftrace_record_ip function. We use the ftrace_record_ip to only record from the table. This patch does not handle modules. That comes with a later patch. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index bb384068272..d4d6ab453b7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -162,4 +162,10 @@ static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD +extern void ftrace_init(void); +#else +static inline void ftrace_init(void) { } +#endif + #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From 90d595fe5ca4b685465c068907e6e554760abea8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 15:45:09 -0400 Subject: ftrace: enable mcount recording for modules This patch enables the loading of the __mcount_section of modules and changing all the callers of mcount into nops. The modification is done before the init_module function is called, so again, we do not need to use kstop_machine to make these changes. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d4d6ab453b7..4936489f9ed 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -164,8 +164,11 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); +extern void ftrace_init_module(unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } +static inline void +ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From 29e71abf56cebc5c5a4e184a6eb4360cc58554ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 15:45:10 -0400 Subject: ftrace: rebuild everything on change to FTRACE_MCOUNT_RECORD When enabling or disabling CONFIG_FTRACE_MCOUNT_RECORD, we want a full kernel compile to handle the adding of the __mcount_loc sections. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 75d81f157d2..ecce4a4ccd5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -486,4 +486,9 @@ struct sysinfo { #define NUMA_BUILD 0 #endif +/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ +#ifdef CONFIG_FTRACE_MCOUNT_RECORD +# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD +#endif + #endif -- cgit v1.2.3 From 28614889bcb2558a47d02d52394b7fd9795a9547 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 22:47:18 -0400 Subject: ftrace: move notrace to compiler.h The notrace define belongs in compiler.h so that it can be used in init.h Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 ++ include/linux/linkage.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8322141ee48..98115d9d04d 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -44,6 +44,8 @@ extern void __chk_io_ptr(const volatile void __iomem *); # error Sorry, your compiler is too old/not recognized. #endif +#define notrace __attribute__((no_instrument_function)) + /* Intel compiler defines __GNUC__. So we will overwrite implementations * coming from above header files here */ diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 56ba3739465..9fd1f859021 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -4,8 +4,6 @@ #include #include -#define notrace __attribute__((no_instrument_function)) - #ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else -- cgit v1.2.3 From fed1939c64d2288938fdc1c367d49082da65e195 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 22:47:19 -0400 Subject: ftrace: remove old pointers to mcount When a mcount pointer is recorded into a table, it is used to add or remove calls to mcount (replacing them with nops). If the code is removed via removing a module, the pointers still exist. At modifying the code a check is always made to make sure the code being replaced is the code expected. In-other-words, the code being replaced is compared to what it is expected to be before being replaced. There is a very small chance that the code being replaced just happens to look like code that calls mcount (very small since the call to mcount is relative). To remove this chance, this patch adds ftrace_release to allow module unloading to remove the pointers to mcount within the module. Another change for init calls is made to not trace calls marked with __init. The tracing can not be started until after init is done anyway. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ include/linux/init.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4936489f9ed..6b232a2460c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -165,10 +165,12 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); extern void ftrace_init_module(unsigned long *start, unsigned long *end); +extern void ftrace_release(void *start, unsigned long size); #else static inline void ftrace_init(void) { } static inline void ftrace_init_module(unsigned long *start, unsigned long *end) { } +static inline void ftrace_release(void *start, unsigned long size) { } #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/init.h b/include/linux/init.h index 93538b696e3..27f61f6b3cb 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -40,7 +40,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold +#define __init __section(.init.text) __cold notrace #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) -- cgit v1.2.3 From dd0e545f061f90099a3dcc13aa77e29c6295cf23 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2008 12:26:41 -0400 Subject: ftrace: printk formatting infrastructure This patch adds a feature that can help kernel developers debug their code using ftrace. int ftrace_printk(const char *fmt, ...); This records into the ftrace buffer using printf formatting. The entry size in the buffers are still a fixed length. A new type has been added that allows for more entries to be used for a single recording. The start of the print is still the same as the other entries. It returns the number of characters written to the ftrace buffer. For example: Having a module with the following code: static int __init ftrace_print_test(void) { ftrace_printk("jiffies are %ld\n", jiffies); return 0; } Gives me: insmod-5441 3...1 7569us : ftrace_print_test: jiffies are 4296626666 for the latency_trace file and: insmod-5441 [03] 1959.370498: ftrace_print_test jiffies are 4296626666 for the trace file. Note: Only the infrastructure should go into the kernel. It is to help facilitate debugging for other kernel developers. Calls to ftrace_printk is not intended to be left in the kernel, and should be frowned upon just like scattering printks around in the code. But having this easily at your fingertips helps the debugging go faster and bugs be solved quicker. Maybe later on, we can hook this with markers and have their printf format be sucked into ftrace output. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6b232a2460c..f53b975e32f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -157,9 +157,18 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_TRACING extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +# define ftrace_printk(x...) __ftrace_printk(_THIS_IP_, x) +extern int +__ftrace_printk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); #else static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +static inline int +ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))) +{ + return 0; +} #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD @@ -173,4 +182,5 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } static inline void ftrace_release(void *start, unsigned long size) { } #endif + #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From 2f2c99dba2398ef7d9c21f7c793180a50e68b1f0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2008 16:45:49 -0400 Subject: ftrace: ftrace_printk doc moved Based on Randy Dunlap's suggestion, the ftrace_printk kernel-doc belongs with the ftrace_printk macro that should be used. Not with the __ftrace_printk internal function. Signed-off-by: Steven Rostedt Acked-by: Randy Dunlap Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f53b975e32f..018af16bce5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -157,7 +157,24 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_TRACING extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -# define ftrace_printk(x...) __ftrace_printk(_THIS_IP_, x) + +/** + * ftrace_printk - printf formatting in the ftrace buffer + * @fmt: the printf format for printing + * + * Note: __ftrace_printk is an internal function for ftrace_printk and + * the @ip is passed in via the ftrace_printk macro. + * + * This function allows a kernel developer to debug fast path sections + * that printk is not appropriate for. By scattering in various + * printk like tracing in the code, a developer can quickly see + * where problems are occurring. + * + * This is intended as a debugging tool for the developer only. + * Please refrain from leaving ftrace_printks scattered around in + * your code. + */ +# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt) extern int __ftrace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -- cgit v1.2.3 From 3f5a54e371ca20b119b73704f6c01b71295c1714 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 30 Jul 2008 22:36:46 -0400 Subject: ftrace: dump out ftrace buffers to console on panic At OLS I had a lot of interest to be able to have the ftrace buffers dumped on panic. Usually one would expect to uses kexec and examine the buffers after a new kernel is loaded. But sometimes the resources do not permit kdump and kexec, so having an option to still see the sequence of events up to the crash is very advantageous. This patch adds the option to have the ftrace buffers dumped to the console in the latency_trace format on a panic. When the option is set, the default entries per CPU buffer are lowered to 16384, since the writing to the serial (if that is the console) may take an awful long time otherwise. [ Changes since -v1: Got alpine to send correctly (as well as spell check working). Removed config option. Moved the static variables into ftrace_dump itself. Gave printk a log level. ] Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 018af16bce5..f7fb92045bf 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -178,6 +178,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); extern int __ftrace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); +extern void ftrace_dump(void); #else static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } @@ -186,6 +187,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))) { return 0; } +static inline void ftrace_dump(void) { } #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD -- cgit v1.2.3 From 7b928c23fa3e9fa37d1d4ba52ba963f41ee5aae0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 15 Aug 2008 17:48:02 +0200 Subject: ftrace: build fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: In file included from init/main.c:65: include/linux/ftrace.h:166: error: expected ‘,' or ‘;' before ‘{' token make[1]: *** [init/main.o] Error 1 make: *** [init/main.o] Error 2 Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7fb92045bf..ce929cb5543 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -183,7 +183,10 @@ extern void ftrace_dump(void); static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int -ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))) +ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); + +static inline int +ftrace_printk(const char *fmt, ...) { return 0; } -- cgit v1.2.3 From c5131ad6c3cbe8f6674993e29a76cecf8deb4384 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 15 Aug 2008 18:22:09 +0200 Subject: ftrace: ftrace_kill_atomic() build fix fix: kernel/built-in.o: In function `ftrace_dump': (.text+0x2e2ea): undefined reference to `ftrace_kill_atomic' Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ce929cb5543..36c439927ff 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -36,6 +36,7 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); # define register_ftrace_function(ops) do { } while (0) # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) +static inline void ftrace_kill_atomic(void) { } #endif /* CONFIG_FTRACE */ #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3 From 3700273586ee6a58b95dd07d9f8a02db4a9b476f Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 18 Aug 2008 16:24:56 +0800 Subject: ftrace: fix incorrect comment style of __ftrace_enabled_save() This patch fixes incorrect comment style of __ftrace_enabled_save(). Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 36c439927ff..8b4cf38c80d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -99,9 +99,11 @@ static inline void tracer_disable(void) #endif } -/* Ftrace disable/restore without lock. Some synchronization mechanism +/* + * Ftrace disable/restore without lock. Some synchronization mechanism * must be used to prevent ftrace_enabled to be changed between - * disable/restore. */ + * disable/restore. + */ static inline int __ftrace_enabled_save(void) { #ifdef CONFIG_FTRACE -- cgit v1.2.3 From c0719e5a4b1ccc04180b7a7b71095c9fb7131919 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 6 Sep 2008 01:06:03 -0400 Subject: ftrace: use ftrace_release for all dynamic ftrace functions ftrace_release is necessary for all uses of dynamic ftrace and not just the archs that have CONFIG_FTRACE_MCOUNT_RECORD defined. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8b4cf38c80d..5de9903645d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -77,8 +77,10 @@ extern void mcount_call(void); extern int skip_trace(unsigned long ip); -void ftrace_disable_daemon(void); -void ftrace_enable_daemon(void); +extern void ftrace_release(void *start, unsigned long size); + +extern void ftrace_disable_daemon(void); +extern void ftrace_enable_daemon(void); #else # define skip_trace(ip) ({ 0; }) @@ -86,6 +88,7 @@ void ftrace_enable_daemon(void); # define ftrace_set_filter(buf, len, reset) do { } while (0) # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) +static inline void ftrace_release(void *start, unsigned long size) { } #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ @@ -199,12 +202,10 @@ static inline void ftrace_dump(void) { } #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); extern void ftrace_init_module(unsigned long *start, unsigned long *end); -extern void ftrace_release(void *start, unsigned long size); #else static inline void ftrace_init(void) { } static inline void ftrace_init_module(unsigned long *start, unsigned long *end) { } -static inline void ftrace_release(void *start, unsigned long size) { } #endif -- cgit v1.2.3 From 9e57fb35d711331a9b1410c5c56ebeb3733428a0 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 22:00:34 +0300 Subject: x86 mmiotrace: implement mmiotrace_printk() Offer mmiotrace users a function to inject markers from inside the kernel. This depends on the trace_vprintk() patch. Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 61d19e1b7a0..60cc3bf5c53 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -34,11 +34,15 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p); /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); -/* Called from ioremap.c */ #ifdef CONFIG_MMIOTRACE +/* Called from ioremap.c */ extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, void __iomem *addr); extern void mmiotrace_iounmap(volatile void __iomem *addr); + +/* For anyone to insert markers. Remember trailing newline. */ +extern int mmiotrace_printk(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); #else static inline void mmiotrace_ioremap(resource_size_t offset, unsigned long size, void __iomem *addr) @@ -48,7 +52,15 @@ static inline void mmiotrace_ioremap(resource_size_t offset, static inline void mmiotrace_iounmap(volatile void __iomem *addr) { } -#endif /* CONFIG_MMIOTRACE_HOOKS */ + +static inline int mmiotrace_printk(const char *fmt, ...) + __attribute__ ((format (printf, 1, 0))); + +static inline int mmiotrace_printk(const char *fmt, ...) +{ + return 0; +} +#endif /* CONFIG_MMIOTRACE */ enum mm_io_opcode { MMIO_READ = 0x1, /* struct mmiotrace_rw */ @@ -81,5 +93,6 @@ extern void enable_mmiotrace(void); extern void disable_mmiotrace(void); extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); +extern int mmio_trace_printk(const char *fmt, va_list args); #endif /* MMIOTRACE_H */ -- cgit v1.2.3 From 4427414170a63331a9cc36b9598502c5cdfe453b Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 22:03:56 +0300 Subject: mmiotrace: remove left-over marker cruft Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 60cc3bf5c53..139d7c88d9c 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -67,8 +67,7 @@ enum mm_io_opcode { MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ MMIO_PROBE = 0x3, /* struct mmiotrace_map */ MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ - MMIO_MARKER = 0x5, /* raw char data */ - MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */ + MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */ }; struct mmiotrace_rw { -- cgit v1.2.3 From e98d0eabef2748d88fa58760d104e8e68517406b Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Sep 2008 11:05:13 -0400 Subject: markers: marker_synchronize_unregister() Create marker_synchronize_unregister() which must be called before the end of exit() to make sure every probe callers have exited the non preemptible section and thus are not executing the probe code anymore. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 1290653f924..889196c7fbb 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -160,4 +160,11 @@ extern int marker_probe_unregister_private_data(marker_probe_func *probe, extern void *marker_get_private_data(const char *name, marker_probe_func *probe, int num); +/* + * marker_synchronize_unregister must be called between the last marker probe + * unregistration and the end of module exit to make sure there is no caller + * executing a probe when it is freed. + */ +#define marker_synchronize_unregister() synchronize_sched() + #endif -- cgit v1.2.3 From 53c8c8fdfd2d2d515bdcb3d0f2a11d1f3f42ece1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 3 Oct 2008 11:52:54 -0400 Subject: markers: turn marker_synchronize_unregister() into an inline Turn marker synchronize unregister into a static inline. There is no reason to keep it as a macro over a static inline. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c7fbb..38e32e781ed 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -13,6 +13,7 @@ */ #include +#include struct module; struct marker; @@ -165,6 +166,9 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe, * unregistration and the end of module exit to make sure there is no caller * executing a probe when it is freed. */ -#define marker_synchronize_unregister() synchronize_sched() +static inline void marker_synchronize_unregister(void) +{ + synchronize_sched(); +} #endif -- cgit v1.2.3 From d13744cd6e3fef373a3fe656ac349b4e7c49ff79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= Date: Tue, 23 Sep 2008 11:32:08 +0100 Subject: tracing/ftrace: add the boot tracer Add the boot/initcall tracer. It's primary purpose is to be able to trace the initcalls. It is intended to be used with scripts/bootgraph.pl after some small improvements. Note that it is not active after its init. To avoid tracing (and so crashing) before the whole tracing engine init, you have to explicitly call start_boot_trace() after do_pre_smp_initcalls() to enable it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5de9903645d..91954eb6460 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,6 +5,8 @@ #include #include +#include +#include extern int ftrace_enabled; extern int @@ -209,4 +211,21 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif +struct boot_trace { + pid_t caller; + initcall_t func; + int result; + unsigned long long duration; +}; + +#ifdef CONFIG_BOOT_TRACER +extern void trace_boot(struct boot_trace *it); +extern void start_boot_trace(void); +#else +static inline void trace_boot(struct boot_trace *it) { } +static inline void start_boot_trace(void) { } +#endif + + + #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From 7a8e76a3829f1067b70f715771ff88baf2fbf3c3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Sep 2008 23:02:38 -0400 Subject: tracing: unified trace buffer This is a unified tracing buffer that implements a ring buffer that hopefully everyone will eventually be able to use. The events recorded into the buffer have the following structure: struct ring_buffer_event { u32 type:2, len:3, time_delta:27; u32 array[]; }; The minimum size of an event is 8 bytes. All events are 4 byte aligned inside the buffer. There are 4 types (all internal use for the ring buffer, only the data type is exported to the interface users). RINGBUF_TYPE_PADDING: this type is used to note extra space at the end of a buffer page. RINGBUF_TYPE_TIME_EXTENT: This type is used when the time between events is greater than the 27 bit delta can hold. We add another 32 bits, and record that in its own event (8 byte size). RINGBUF_TYPE_TIME_STAMP: (Not implemented yet). This will hold data to help keep the buffer timestamps in sync. RINGBUF_TYPE_DATA: The event actually holds user data. The "len" field is only three bits. Since the data must be 4 byte aligned, this field is shifted left by 2, giving a max length of 28 bytes. If the data load is greater than 28 bytes, the first array field holds the full length of the data load and the len field is set to zero. Example, data size of 7 bytes: type = RINGBUF_TYPE_DATA len = 2 time_delta: - array[0..1]: <7 bytes of data> <1 byte empty> This event is saved in 12 bytes of the buffer. An event with 82 bytes of data: type = RINGBUF_TYPE_DATA len = 0 time_delta: - array[0]: 84 (Note the alignment) array[1..14]: <82 bytes of data> <2 bytes empty> The above event is saved in 92 bytes (if my math is correct). 82 bytes of data, 2 bytes empty, 4 byte header, 4 byte length. Do not reference the above event struct directly. Use the following functions to gain access to the event table, since the ring_buffer_event structure may change in the future. ring_buffer_event_length(event): get the length of the event. This is the size of the memory used to record this event, and not the size of the data pay load. ring_buffer_time_delta(event): get the time delta of the event This returns the delta time stamp since the last event. Note: Even though this is in the header, there should be no reason to access this directly, accept for debugging. ring_buffer_event_data(event): get the data from the event This is the function to use to get the actual data from the event. Note, it is only a pointer to the data inside the buffer. This data must be copied to another location otherwise you risk it being written over in the buffer. ring_buffer_lock: A way to lock the entire buffer. ring_buffer_unlock: unlock the buffer. ring_buffer_alloc: create a new ring buffer. Can choose between overwrite or consumer/producer mode. Overwrite will overwrite old data, where as consumer producer will throw away new data if the consumer catches up with the producer. The consumer/producer is the default. ring_buffer_free: free the ring buffer. ring_buffer_resize: resize the buffer. Changes the size of each cpu buffer. Note, it is up to the caller to provide that the buffer is not being used while this is happening. This requirement may go away but do not count on it. ring_buffer_lock_reserve: locks the ring buffer and allocates an entry on the buffer to write to. ring_buffer_unlock_commit: unlocks the ring buffer and commits it to the buffer. ring_buffer_write: writes some data into the ring buffer. ring_buffer_peek: Look at a next item in the cpu buffer. ring_buffer_consume: get the next item in the cpu buffer and consume it. That is, this function increments the head pointer. ring_buffer_read_start: Start an iterator of a cpu buffer. For now, this disables the cpu buffer, until you issue a finish. This is just because we do not want the iterator to be overwritten. This restriction may change in the future. But note, this is used for static reading of a buffer which is usually done "after" a trace. Live readings would want to use the ring_buffer_consume above, which will not disable the ring buffer. ring_buffer_read_finish: Finishes the read iterator and reenables the ring buffer. ring_buffer_iter_peek: Look at the next item in the cpu iterator. ring_buffer_read: Read the iterator and increment it. ring_buffer_iter_reset: Reset the iterator to point to the beginning of the cpu buffer. ring_buffer_iter_empty: Returns true if the iterator is at the end of the cpu buffer. ring_buffer_size: returns the size in bytes of each cpu buffer. Note, the real size is this times the number of CPUs. ring_buffer_reset_cpu: Sets the cpu buffer to empty ring_buffer_reset: sets all cpu buffers to empty ring_buffer_swap_cpu: swaps a cpu buffer from one buffer with a cpu buffer of another buffer. This is handy when you want to take a snap shot of a running trace on just one cpu. Having a backup buffer, to swap with facilitates this. Ftrace max latencies use this. ring_buffer_empty: Returns true if the ring buffer is empty. ring_buffer_empty_cpu: Returns true if the cpu buffer is empty. ring_buffer_record_disable: disable all cpu buffers (read only) ring_buffer_record_disable_cpu: disable a single cpu buffer (read only) ring_buffer_record_enable: enable all cpu buffers. ring_buffer_record_enabl_cpu: enable a single cpu buffer. ring_buffer_entries: The number of entries in a ring buffer. ring_buffer_overruns: The number of entries removed due to writing wrap. ring_buffer_time_stamp: Get the time stamp used by the ring buffer ring_buffer_normalize_time_stamp: normalize the ring buffer time stamp into nanosecs. I still need to implement the GTOD feature. But we need support from the cpu frequency infrastructure. But this can be done at a later time without affecting the ring buffer interface. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 130 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 include/linux/ring_buffer.h (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h new file mode 100644 index 00000000000..c52375b8330 --- /dev/null +++ b/include/linux/ring_buffer.h @@ -0,0 +1,130 @@ +#ifndef _LINUX_RING_BUFFER_H +#define _LINUX_RING_BUFFER_H + +#include +#include + +struct ring_buffer; +struct ring_buffer_iter; + +/* + * Don't reference this struct directly, use functions below. + */ +struct ring_buffer_event { + u32 type:2, len:3, time_delta:27; + u32 array[]; +}; + +/** + * enum ring_buffer_type - internal ring buffer types + * + * @RINGBUF_TYPE_PADDING: Left over page padding + * array is ignored + * size is variable depending on how much + * padding is needed + * + * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta + * array[0] = time delta (28 .. 59) + * size = 8 bytes + * + * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock + * array[0] = tv_nsec + * array[1] = tv_sec + * size = 16 bytes + * + * @RINGBUF_TYPE_DATA: Data record + * If len is zero: + * array[0] holds the actual length + * array[1..(length+3)/4-1] holds data + * else + * length = len << 2 + * array[0..(length+3)/4] holds data + */ +enum ring_buffer_type { + RINGBUF_TYPE_PADDING, + RINGBUF_TYPE_TIME_EXTEND, + /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ + RINGBUF_TYPE_TIME_STAMP, + RINGBUF_TYPE_DATA, +}; + +unsigned ring_buffer_event_length(struct ring_buffer_event *event); +void *ring_buffer_event_data(struct ring_buffer_event *event); + +/** + * ring_buffer_event_time_delta - return the delta timestamp of the event + * @event: the event to get the delta timestamp of + * + * The delta timestamp is the 27 bit timestamp since the last event. + */ +static inline unsigned +ring_buffer_event_time_delta(struct ring_buffer_event *event) +{ + return event->time_delta; +} + +void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags); +void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags); + +/* + * size is in bytes for each per CPU buffer. + */ +struct ring_buffer * +ring_buffer_alloc(unsigned long size, unsigned flags); +void ring_buffer_free(struct ring_buffer *buffer); + +int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); + +struct ring_buffer_event * +ring_buffer_lock_reserve(struct ring_buffer *buffer, + unsigned long length, + unsigned long *flags); +int ring_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags); +int ring_buffer_write(struct ring_buffer *buffer, + unsigned long length, void *data); + +struct ring_buffer_event * +ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts); +struct ring_buffer_event * +ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts); + +struct ring_buffer_iter * +ring_buffer_read_start(struct ring_buffer *buffer, int cpu); +void ring_buffer_read_finish(struct ring_buffer_iter *iter); + +struct ring_buffer_event * +ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts); +struct ring_buffer_event * +ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); +void ring_buffer_iter_reset(struct ring_buffer_iter *iter); +int ring_buffer_iter_empty(struct ring_buffer_iter *iter); + +unsigned long ring_buffer_size(struct ring_buffer *buffer); + +void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); +void ring_buffer_reset(struct ring_buffer *buffer); + +int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, + struct ring_buffer *buffer_b, int cpu); + +int ring_buffer_empty(struct ring_buffer *buffer); +int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); + +void ring_buffer_record_disable(struct ring_buffer *buffer); +void ring_buffer_record_enable(struct ring_buffer *buffer); +void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); +void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); + +unsigned long ring_buffer_entries(struct ring_buffer *buffer); +unsigned long ring_buffer_overruns(struct ring_buffer *buffer); + +u64 ring_buffer_time_stamp(int cpu); +void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); + +enum ring_buffer_flags { + RB_FL_OVERWRITE = 1 << 0, +}; + +#endif /* _LINUX_RING_BUFFER_H */ -- cgit v1.2.3 From d769041f865330034131525ee6a7f72eb4af2a24 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Oct 2008 00:29:53 -0400 Subject: ring_buffer: implement new locking The old "lock always" scheme had issues with lockdep, and was not very efficient anyways. This patch does a new design to be partially lockless on writes. Writes will add new entries to the per cpu pages by simply disabling interrupts. When a write needs to go to another page than it will grab the lock. A new "read page" has been added so that the reader can pull out a page from the ring buffer to read without worrying about the writer writing over it. This allows us to not take the lock for all reads. The lock is now only taken when a read needs to go to a new page. This is far from lockless, and interrupts still need to be disabled, but it is a step towards a more lockless solution, and it also solves a lot of the issues that were noticed by the first conversion of ftrace to the ring buffers. Note: the ring_buffer_{un}lock API has been removed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index c52375b8330..536b0ca46a0 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -63,9 +63,6 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } -void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags); -void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags); - /* * size is in bytes for each per CPU buffer. */ -- cgit v1.2.3 From cb5ab74204a6e2579d1119bf1348eb806526b12b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Oct 2008 12:59:20 +0200 Subject: tracing/fastboot: change the printing of boot tracer according to bootgraph.pl Change the boot tracer printing to make it parsable for the scripts/bootgraph.pl script. We have now to output two lines for each initcall, according to the printk in do_one_initcall() in init/main.c We need now the call's time and the return's time. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 91954eb6460..4455490d91b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -216,6 +216,8 @@ struct boot_trace { initcall_t func; int result; unsigned long long duration; + ktime_t calltime; + ktime_t rettime; }; #ifdef CONFIG_BOOT_TRACER -- cgit v1.2.3 From 5601020feb0c3010e9e3e0131e9697ac6a06777b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Oct 2008 13:26:05 +0200 Subject: tracing/fastboot: get the initcall name before it disappears After some initcall traces, some initcall names may be inconsistent. That's because these functions will disappear from the .init section and also their name from the symbols table. So we have to copy the name of the function in a buffer large enough during the trace appending. It is not costly for the ring_buffer because the number of initcall entries is commonly not really large. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4455490d91b..e672e51c40a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -7,6 +7,7 @@ #include #include #include +#include extern int ftrace_enabled; extern int @@ -213,7 +214,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } struct boot_trace { pid_t caller; - initcall_t func; + char func[KSYM_NAME_LEN]; int result; unsigned long long duration; ktime_t calltime; @@ -221,10 +222,10 @@ struct boot_trace { }; #ifdef CONFIG_BOOT_TRACER -extern void trace_boot(struct boot_trace *it); +extern void trace_boot(struct boot_trace *it, initcall_t fn); extern void start_boot_trace(void); #else -static inline void trace_boot(struct boot_trace *it) { } +static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } static inline void start_boot_trace(void) { } #endif -- cgit v1.2.3 From 3e1932ad59726d794a865cc159c0593d54bf0cb6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Oct 2008 17:45:47 +0200 Subject: tracing/fastboot: build fix fix: In file included from kernel/sysctl.c:52: include/linux/ftrace.h:217: error: 'KSYM_NAME_LEN' undeclared here (not in a function) Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e672e51c40a..deded114dff 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1,14 +1,14 @@ #ifndef _LINUX_FTRACE_H #define _LINUX_FTRACE_H -#ifdef CONFIG_FTRACE - #include #include #include #include #include +#ifdef CONFIG_FTRACE + extern int ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, -- cgit v1.2.3 From eb7fa935274bb233686fdf7a53f40c5d9ee76ed6 Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Thu, 2 Oct 2008 12:00:07 -0700 Subject: ftrace: ktime.h not included in ftrace.h Including eliminates the following error: include/linux/ftrace.h:220: error: expected specifier-qualifier-list before 'ktime_t' Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index deded114dff..ed53265d1f6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 097d036a2f25eecc42435c57e010aaf4a2eed2d9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 3 Oct 2008 15:39:21 +0200 Subject: tracing/fastboot: only trace non-module initcalls At this time, only built-in initcalls interest us. We can't really produce a relevant graph if we include the modules initcall too. I had good results after this patch (see svg in attachment). Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ed53265d1f6..5812dba4ee2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -225,9 +225,11 @@ struct boot_trace { #ifdef CONFIG_BOOT_TRACER extern void trace_boot(struct boot_trace *it, initcall_t fn); extern void start_boot_trace(void); +extern void stop_boot_trace(void); #else static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } static inline void start_boot_trace(void) { } +static inline void stop_boot_trace(void) { } #endif -- cgit v1.2.3 From ca538f6bbe583406f941f3041d40c41f9a13d1de Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Thu, 9 Oct 2008 15:23:05 -0700 Subject: tracing/fastboot: add better resolution to initcall debug/tracing Change the time resolution for initcall_debug to microseconds, from milliseconds. This is handy to determine which initcalls you want to work on for faster booting. One one of my test machines, over 90% of the initcalls are less than a millisecond and (without this patch) these are all reported as 0 msecs. Working on the 900 us ones is more important than the 4 us ones. With 'quiet' on the kernel command line, this adds no significant overhead to kernel boot time. Signed-off-by: Tim Bird Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5812dba4ee2..a3d46151be1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -215,9 +215,9 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } struct boot_trace { pid_t caller; - char func[KSYM_NAME_LEN]; + char func[KSYM_NAME_LEN]; int result; - unsigned long long duration; + unsigned long long duration; /* usecs */ ktime_t calltime; ktime_t rettime; }; -- cgit v1.2.3 From bfadadfccc19e36f7d600c5ce7b3e5ba5197fbf0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 10 Oct 2008 03:48:25 -0400 Subject: markers: fix synchronize marker unregister static inline Use a #define for synchronize marker unregister to fix include dependencies. Fixes the slab circular inclusion which triggers when slab.git is combined with tracing.git, where rcupdate includes slab, which includes markers which includes rcupdate. Signed-off-by: Mathieu Desnoyers Acked-by: Pekka Enberg Signed-off-by: Ingo Molnar --- include/linux/marker.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 38e32e781ed..889196c7fbb 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -13,7 +13,6 @@ */ #include -#include struct module; struct marker; @@ -166,9 +165,6 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe, * unregistration and the end of module exit to make sure there is no caller * executing a probe when it is freed. */ -static inline void marker_synchronize_unregister(void) -{ - synchronize_sched(); -} +#define marker_synchronize_unregister() synchronize_sched() #endif -- cgit v1.2.3 From f2461fc82a083dd60062e05e704c5fcc1c658ba1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 6 Oct 2008 10:33:00 -0400 Subject: tracepoints: tracepoint_synchronize_unregister() Create tracepoint_synchronize_unregister() which must be called before the end of exit() to make sure every probe callers have exited the non preemptible section and thus are not executing the probe code anymore. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index e623a6fca5c..199f4c207c1 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -124,4 +124,11 @@ extern void tracepoint_iter_reset(struct tracepoint_iter *iter); extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, struct tracepoint *begin, struct tracepoint *end); +/* + * tracepoint_synchronize_unregister must be called between the last tracepoint + * probe unregistration and the end of module exit to make sure there is no + * caller executing a probe when it is freed. + */ +#define tracepoint_synchronize_unregister() synchronize_sched() + #endif -- cgit v1.2.3 From 231375cc5cc3549bb413f94a164bdcbd5f9ce943 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 3 Oct 2008 15:01:33 -0400 Subject: tracepoints: synchronize unregister static inline Turn tracepoint synchronize unregister into a static inline. There is no reason to keep it as a macro over a static inline. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 199f4c207c1..c5bb39c7a77 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -129,6 +129,9 @@ extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, * probe unregistration and the end of module exit to make sure there is no * caller executing a probe when it is freed. */ -#define tracepoint_synchronize_unregister() synchronize_sched() +static inline void tracepoint_synchronize_unregister(void) +{ + synchronize_sched(); +} #endif -- cgit v1.2.3 From 6028aa01f759a1dae11e5d0e495b3dc9d2b0a47b Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 14 Oct 2008 21:23:26 +0900 Subject: [MTD] [NAND] sh_flctl: add support for Renesas SuperH FLCTL Several Renesas SuperH CPU has FLCTL. The FLCTL support NAND Flash. This driver support SH7723. Signed-off-by: Yoshihiro Shimoda Acked-by: Paul Mundt Signed-off-by: David Woodhouse --- include/linux/mtd/sh_flctl.h | 125 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 include/linux/mtd/sh_flctl.h (limited to 'include/linux') diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h new file mode 100644 index 00000000000..e77c1cea404 --- /dev/null +++ b/include/linux/mtd/sh_flctl.h @@ -0,0 +1,125 @@ +/* + * SuperH FLCTL nand controller + * + * Copyright © 2008 Renesas Solutions Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __SH_FLCTL_H__ +#define __SH_FLCTL_H__ + +#include +#include +#include + +/* FLCTL registers */ +#define FLCMNCR(f) (f->reg + 0x0) +#define FLCMDCR(f) (f->reg + 0x4) +#define FLCMCDR(f) (f->reg + 0x8) +#define FLADR(f) (f->reg + 0xC) +#define FLADR2(f) (f->reg + 0x3C) +#define FLDATAR(f) (f->reg + 0x10) +#define FLDTCNTR(f) (f->reg + 0x14) +#define FLINTDMACR(f) (f->reg + 0x18) +#define FLBSYTMR(f) (f->reg + 0x1C) +#define FLBSYCNT(f) (f->reg + 0x20) +#define FLDTFIFO(f) (f->reg + 0x24) +#define FLECFIFO(f) (f->reg + 0x28) +#define FLTRCR(f) (f->reg + 0x2C) +#define FL4ECCRESULT0(f) (f->reg + 0x80) +#define FL4ECCRESULT1(f) (f->reg + 0x84) +#define FL4ECCRESULT2(f) (f->reg + 0x88) +#define FL4ECCRESULT3(f) (f->reg + 0x8C) +#define FL4ECCCR(f) (f->reg + 0x90) +#define FL4ECCCNT(f) (f->reg + 0x94) +#define FLERRADR(f) (f->reg + 0x98) + +/* FLCMNCR control bits */ +#define ECCPOS2 (0x1 << 25) +#define _4ECCCNTEN (0x1 << 24) +#define _4ECCEN (0x1 << 23) +#define _4ECCCORRECT (0x1 << 22) +#define SNAND_E (0x1 << 18) /* SNAND (0=512 1=2048)*/ +#define QTSEL_E (0x1 << 17) +#define ENDIAN (0x1 << 16) /* 1 = little endian */ +#define FCKSEL_E (0x1 << 15) +#define ECCPOS_00 (0x00 << 12) +#define ECCPOS_01 (0x01 << 12) +#define ECCPOS_02 (0x02 << 12) +#define ACM_SACCES_MODE (0x01 << 10) +#define NANWF_E (0x1 << 9) +#define SE_D (0x1 << 8) /* Spare area disable */ +#define CE1_ENABLE (0x1 << 4) /* Chip Enable 1 */ +#define CE0_ENABLE (0x1 << 3) /* Chip Enable 0 */ +#define TYPESEL_SET (0x1 << 0) + +/* FLCMDCR control bits */ +#define ADRCNT2_E (0x1 << 31) /* 5byte address enable */ +#define ADRMD_E (0x1 << 26) /* Sector address access */ +#define CDSRC_E (0x1 << 25) /* Data buffer selection */ +#define DOSR_E (0x1 << 24) /* Status read check */ +#define SELRW (0x1 << 21) /* 0:read 1:write */ +#define DOADR_E (0x1 << 20) /* Address stage execute */ +#define ADRCNT_1 (0x00 << 18) /* Address data bytes: 1byte */ +#define ADRCNT_2 (0x01 << 18) /* Address data bytes: 2byte */ +#define ADRCNT_3 (0x02 << 18) /* Address data bytes: 3byte */ +#define ADRCNT_4 (0x03 << 18) /* Address data bytes: 4byte */ +#define DOCMD2_E (0x1 << 17) /* 2nd cmd stage execute */ +#define DOCMD1_E (0x1 << 16) /* 1st cmd stage execute */ + +/* FLTRCR control bits */ +#define TRSTRT (0x1 << 0) /* translation start */ +#define TREND (0x1 << 1) /* translation end */ + +/* FL4ECCCR control bits */ +#define _4ECCFA (0x1 << 2) /* 4 symbols correct fault */ +#define _4ECCEND (0x1 << 1) /* 4 symbols end */ +#define _4ECCEXST (0x1 << 0) /* 4 symbols exist */ + +#define INIT_FL4ECCRESULT_VAL 0x03FF03FF +#define LOOP_TIMEOUT_MAX 0x00010000 + +#define mtd_to_flctl(mtd) container_of(mtd, struct sh_flctl, mtd) + +struct sh_flctl { + struct mtd_info mtd; + struct nand_chip chip; + void __iomem *reg; + + uint8_t done_buff[2048 + 64]; /* max size 2048 + 64 */ + int read_bytes; + int index; + int seqin_column; /* column in SEQIN cmd */ + int seqin_page_addr; /* page_addr in SEQIN cmd */ + uint32_t seqin_read_cmd; /* read cmd in SEQIN cmd */ + int erase1_page_addr; /* page_addr in ERASE1 cmd */ + uint32_t erase_ADRCNT; /* bits of FLCMDCR in ERASE1 cmd */ + uint32_t rw_ADRCNT; /* bits of FLCMDCR in READ WRITE cmd */ + + int hwecc_cant_correct[4]; + + unsigned page_size:1; /* NAND page size (0 = 512, 1 = 2048) */ + unsigned hwecc:1; /* Hardware ECC (0 = disabled, 1 = enabled) */ +}; + +struct sh_flctl_platform_data { + struct mtd_partition *parts; + int nr_parts; + unsigned long flcmncr_val; + + unsigned has_hwecc:1; +}; + +#endif /* __SH_FLCTL_H__ */ -- cgit v1.2.3 From 74baaaaec8b4f22e1ae279f5ecca4ff705b28912 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 14 Oct 2008 09:21:02 -0400 Subject: vfs: Remove the range_cont writeback mode. Ext4 was the only user of range_cont writeback mode and ext4 switched to a different method. So remove the range_cont mode which is not used in the kernel. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" CC: linux-fsdevel@vger.kernel.org --- include/linux/writeback.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 12b15c561a1..bd91987c065 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -63,7 +63,6 @@ struct writeback_control { unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ - unsigned range_cont:1; }; /* -- cgit v1.2.3 From e6a7d3c04f8fe49099521e6dc9a46b0272381f2f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 14 Oct 2008 11:58:31 -0700 Subject: netfilter: ctnetlink: remove bogus module dependency between ctnetlink and nf_nat This patch removes the module dependency between ctnetlink and nf_nat by means of an indirect call that is initialized when nf_nat is loaded. Now, nf_conntrack_netlink only requires nf_conntrack and nfnetlink. This patch puts nfnetlink_parse_nat_setup_hook into the nf_conntrack_core to avoid dependencies between ctnetlink, nf_conntrack_ipv4 and nf_conntrack_ipv6. This patch also introduces the function ctnetlink_change_nat that is only invoked from the creation path. Actually, the nat handling cannot be invoked from the update path since this is not allowed. By introducing this function, we remove the useless nat handling in the update path and we avoid deadlock-prone code. This patch also adds the required EAGAIN logic for nfnetlink. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 0d8424f7689..7d8e0455cca 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -78,6 +78,9 @@ extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo); extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); +extern void nfnl_lock(void); +extern void nfnl_unlock(void); + #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) -- cgit v1.2.3 From 4704f0e274829e3af00737d2d9adace2d71a9605 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Oct 2008 19:16:07 -0400 Subject: NFS: Fix the resolution problem with nfs_inode_attrs_need_update() It appears that 'jiffies' timestamps do not have high enough resolution for nfs_inode_attrs_need_update(). One problem is that a GETATTR can be launched within < 1 jiffy of the last operation that updated the attribute. Another problem is that RPC calls can take < 1 jiffy to execute. We can fix this by switching the variables to use a simple global counter that gets incremented every time we start another GETATTR call. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 10 +++------- include/linux/nfs_xdr.h | 1 + 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ca563ee13e3..ac8d0233b05 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -137,7 +137,7 @@ struct nfs_inode { unsigned long attrtimeo_timestamp; __u64 change_attr; /* v4 only */ - unsigned long last_updated; + unsigned long attr_gencount; /* "Generation counter" for the attribute cache. This is * bumped whenever we update the metadata on the * server. @@ -344,15 +344,11 @@ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ct extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); extern u64 nfs_compat_user_ino64(u64 fileid); +extern void nfs_fattr_init(struct nfs_fattr *fattr); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern __be32 root_nfs_parse_addr(char *name); /*__init*/ - -static inline void nfs_fattr_init(struct nfs_fattr *fattr) -{ - fattr->valid = 0; - fattr->time_start = jiffies; -} +extern unsigned long nfs_inc_attr_generation_counter(void); /* * linux/fs/nfs/file.c diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6ee6ae3f095..c1c31acb8a2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -56,6 +56,7 @@ struct nfs_fattr { __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ unsigned long time_start; + unsigned long gencount; }; #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ -- cgit v1.2.3 From d98e6346350ac909f095768beb28b82368bd126f Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Tue, 1 Jul 2008 16:23:49 -0500 Subject: KVM: Move KVM TRACE DEFINITIONS to common header Move KVM trace definitions from x86 specific kvm headers to common kvm headers to create a cross-architecture numbering scheme for trace events. This means the kvmtrace_format userspace tool won't need to know which architecture produced the log file being processed. Signed-off-by: Jerone Young Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- include/linux/kvm.h | 21 +++++++++++++++++++++ include/linux/kvm_host.h | 19 +++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 70a30651cd1..8a3ceadb136 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -440,4 +440,25 @@ struct kvm_trace_rec { #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) +#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) +#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) +#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) +#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) +#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) +#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) +#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) +#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) +#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) +#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) +#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) +#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) +#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) +#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) +#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) +#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) +#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) +#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) +#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) +#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8525afc5310..a18aaad2ab7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -326,6 +326,25 @@ struct kvm_stats_debugfs_item { extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; +#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 5, d1, d2, d3, d4, d5) +#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 4, d1, d2, d3, d4, 0) +#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 3, d1, d2, d3, 0, 0) +#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 2, d1, d2, 0, 0, 0) +#define KVMTRACE_1D(evt, vcpu, d1, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 1, d1, 0, 0, 0, 0) +#define KVMTRACE_0D(evt, vcpu, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 0, 0, 0, 0, 0, 0) + #ifdef CONFIG_KVM_TRACE int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); void kvm_trace_cleanup(void); -- cgit v1.2.3 From e32c8f2c0720fb21c6f4a5f6ccbebdadc878f707 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Mon, 14 Jul 2008 14:00:00 +0200 Subject: KVM: kvmtrace: Remove use of bit fields in kvm trace structure This patch fixes kvmtrace use on big endian systems. When using bit fields the compiler will lay data out in the wrong order expected when laid down into a file. This fixes it by using one variable instead of using bit fields. Signed-off-by: Jerone Young Signed-off-by: Christian Ehrhardt Signed-off-by: Avi Kivity --- include/linux/kvm.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8a3ceadb136..8a16b083df2 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -311,9 +311,13 @@ struct kvm_s390_interrupt { /* This structure represents a single trace buffer record. */ struct kvm_trace_rec { - __u32 event:28; - __u32 extra_u32:3; - __u32 cycle_in:1; + /* variable rec_val + * is split into: + * bits 0 - 27 -> event id + * bits 28 -30 -> number of extra data args of size u32 + * bits 31 -> binary indicator for if tsc is in record + */ + __u32 rec_val; __u32 pid; __u32 vcpu_id; union { @@ -327,6 +331,13 @@ struct kvm_trace_rec { } u; }; +#define TRACE_REC_EVENT_ID(val) \ + (0x0fffffff & (val)) +#define TRACE_REC_NUM_DATA_ARGS(val) \ + (0x70000000 & ((val) << 28)) +#define TRACE_REC_TCS(val) \ + (0x80000000 & ((val) << 31)) + #define KVMIO 0xAE /* -- cgit v1.2.3 From 3f7f95c65ef6a89472a28da1b9436eaeee288831 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Mon, 14 Jul 2008 14:00:01 +0200 Subject: KVM: kvmtrace: replace get_cycles with ktime_get v3 The current kvmtrace code uses get_cycles() while the interpretation would be easier using using nanoseconds. ktime_get() should give at least the same accuracy as get_cycles on all architectures (even better on 32bit archs) but at a better unit (e.g. comparable between hosts with different frequencies. [avi: avoid ktime_t in public header] Signed-off-by: Christian Ehrhardt Acked-by: Christian Borntraeger Signed-off-by: Avi Kivity --- include/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8a16b083df2..5d08f11bb27 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -322,12 +322,12 @@ struct kvm_trace_rec { __u32 vcpu_id; union { struct { - __u64 cycle_u64; + __u64 timestamp; __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } __attribute__((packed)) cycle; + } __attribute__((packed)) timestamp; struct { __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } nocycle; + } notimestamp; } u; }; -- cgit v1.2.3 From 31711f2294b38d8334efaf7dbac6da4781fd151e Mon Sep 17 00:00:00 2001 From: Jerone Young Date: Mon, 14 Jul 2008 14:00:03 +0200 Subject: KVM: ppc: adds trace points for ppc tlb activity This patch adds trace points to track powerpc TLB activities using the KVM_TRACE infrastructure. Signed-off-by: Jerone Young Signed-off-by: Christian Ehrhardt Signed-off-by: Avi Kivity --- include/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 5d08f11bb27..e21a5050d4d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -471,5 +471,8 @@ struct kvm_trace_rec { #define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) #define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) +#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) +#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) +#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) #endif -- cgit v1.2.3 From 3b4bd7969f7b61a1ab455bff084ee4f0a2411055 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Mon, 14 Jul 2008 14:00:04 +0200 Subject: KVM: ppc: trace powerpc instruction emulation This patch adds a trace point for the instruction emulation on embedded powerpc utilizing the KVM_TRACE interface. Signed-off-by: Christian Ehrhardt Signed-off-by: Avi Kivity --- include/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index e21a5050d4d..d29b6488144 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -474,5 +474,6 @@ struct kvm_trace_rec { #define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) #define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) +#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) #endif -- cgit v1.2.3 From 4d5c5d0fe89c921336b95f5e7e4f529a9df92f53 Mon Sep 17 00:00:00 2001 From: Ben-Ami Yassour Date: Mon, 28 Jul 2008 19:26:26 +0300 Subject: KVM: pci device assignment Based on a patch from: Amit Shah This patch adds support for handling PCI devices that are assigned to the guest. The device to be assigned to the guest is registered in the host kernel and interrupt delivery is handled. If a device is already assigned, or the device driver for it is still loaded on the host, the device assignment is failed by conveying a -EBUSY reply to the userspace. Devices that share their interrupt line are not supported at the moment. By itself, this patch will not make devices work within the guest. The VT-d extension is required to enable the device to perform DMA. Another alternative is PVDMA. Signed-off-by: Amit Shah Signed-off-by: Ben-Ami Yassour Signed-off-by: Weidong Han Signed-off-by: Avi Kivity --- include/linux/kvm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index d29b6488144..ef4bc6f8977 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -383,6 +383,7 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ +#define KVM_CAP_DEVICE_ASSIGNMENT 17 /* * ioctls for VM fds @@ -412,6 +413,10 @@ struct kvm_trace_rec { _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) +#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ + struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ + struct kvm_assigned_irq) /* * ioctls for vcpu fds @@ -476,4 +481,18 @@ struct kvm_trace_rec { #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) +struct kvm_assigned_pci_dev { + __u32 assigned_dev_id; + __u32 busnr; + __u32 devfn; + __u32 flags; +}; + +struct kvm_assigned_irq { + __u32 assigned_dev_id; + __u32 host_irq; + __u32 guest_irq; + __u32 flags; +}; + #endif -- cgit v1.2.3 From d76901750ab9f71091d33ef3d2b5909d8a9a4ad4 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 8 Sep 2008 15:23:48 -0300 Subject: KVM: x86: do not execute halted vcpus Offline or uninitialized vcpu's can be executed if requested to perform userspace work. Follow Avi's suggestion to handle halted vcpu's in the main loop, simplifying kvm_emulate_halt(). Introduce a new vcpu->requests bit to indicate events that promote state from halted to running. Also standardize vcpu wake sites. Signed-off-by: Marcelo Tosatti redhat.com> Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a18aaad2ab7..4b036430ea2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -34,6 +34,7 @@ #define KVM_REQ_MMU_RELOAD 3 #define KVM_REQ_TRIPLE_FAULT 4 #define KVM_REQ_PENDING_TIMER 5 +#define KVM_REQ_UNHALT 6 struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -- cgit v1.2.3 From 387179464257921eb9aa3d15cc3ff194f6945a7c Mon Sep 17 00:00:00 2001 From: "Kay, Allen M" Date: Tue, 9 Sep 2008 18:37:29 +0300 Subject: VT-d: Changes to support KVM This patch extends the VT-d driver to support KVM [Ben: fixed memory pinning] [avi: move dma_remapping.h as well] Signed-off-by: Kay, Allen M Signed-off-by: Weidong Han Signed-off-by: Ben-Ami Yassour Signed-off-by: Amit Shah Acked-by: Mark Gross Signed-off-by: Avi Kivity --- include/linux/dma_remapping.h | 157 ++++++++++++++++++++ include/linux/intel-iommu.h | 327 ++++++++++++++++++++++++++++++++++++++++++ include/linux/iova.h | 52 +++++++ 3 files changed, 536 insertions(+) create mode 100644 include/linux/dma_remapping.h create mode 100644 include/linux/intel-iommu.h create mode 100644 include/linux/iova.h (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h new file mode 100644 index 00000000000..bff5c65f81d --- /dev/null +++ b/include/linux/dma_remapping.h @@ -0,0 +1,157 @@ +#ifndef _DMA_REMAPPING_H +#define _DMA_REMAPPING_H + +/* + * We need a fixed PAGE_SIZE of 4K irrespective of + * arch PAGE_SIZE for IOMMU page tables. + */ +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) + + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & PAGE_MASK_4K; +} + +struct context_entry; +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & PAGE_MASK_4K): + NULL); +} + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) + +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & PAGE_MASK_4K); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) + +struct intel_iommu; + +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +extern int init_dmars(void); +extern void free_dmar_iommu(struct intel_iommu *iommu); + +extern int dmar_disabled; + +#ifndef CONFIG_DMAR_GFX_WA +static inline void iommu_prepare_gfx_mapping(void) +{ + return; +} +#endif /* !CONFIG_DMAR_GFX_WA */ + +#endif diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h new file mode 100644 index 00000000000..2e117f30a76 --- /dev/null +++ b/include/linux/intel-iommu.h @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Ashok Raj + * Author: Anil S Keshavamurthy + */ + +#ifndef _INTEL_IOMMU_H_ +#define _INTEL_IOMMU_H_ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ + +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_GCMD_REG 0x18 /* Global command register */ +#define DMAR_GSTS_REG 0x1c /* Global status register */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_CCMD_REG 0x28 /* Context command reg */ +#define DMAR_FSTS_REG 0x34 /* Fault Status register */ +#define DMAR_FECTL_REG 0x38 /* Fault control register */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ +#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ +#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ +#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ +#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ + +#define OFFSET_STRIDE (9) +/* +#define dmar_readl(dmar, reg) readl(dmar + reg) +#define dmar_readq(dmar, reg) ({ \ + u32 lo, hi; \ + lo = readl(dmar + reg); \ + hi = readl(dmar + reg + 4); \ + (((u64) hi) << 32) + lo; }) +*/ +static inline u64 dmar_readq(void __iomem *addr) +{ + u32 lo, hi; + lo = readl(addr); + hi = readl(addr + 4); + return (((u64) hi) << 32) + lo; +} + +static inline void dmar_writeq(void __iomem *addr, u64 val) +{ + writel((u32)val, addr); + writel((u32)(val >> 32), addr + 4); +} + +#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) +#define DMAR_VER_MINOR(v) ((v) & 0x0f) + +/* + * Decoding Capability Register + */ +#define cap_read_drain(c) (((c) >> 55) & 1) +#define cap_write_drain(c) (((c) >> 54) & 1) +#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) +#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) +#define cap_pgsel_inv(c) (((c) >> 39) & 1) + +#define cap_super_page_val(c) (((c) >> 34) & 0xf) +#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ + * OFFSET_STRIDE) + 21) + +#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) +#define cap_max_fault_reg_offset(c) \ + (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) + +#define cap_zlr(c) (((c) >> 22) & 1) +#define cap_isoch(c) (((c) >> 23) & 1) +#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) +#define cap_sagaw(c) (((c) >> 8) & 0x1f) +#define cap_caching_mode(c) (((c) >> 7) & 1) +#define cap_phmr(c) (((c) >> 6) & 1) +#define cap_plmr(c) (((c) >> 5) & 1) +#define cap_rwbf(c) (((c) >> 4) & 1) +#define cap_afl(c) (((c) >> 3) & 1) +#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) +/* + * Extended Capability Register + */ + +#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) +#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) +#define ecap_max_iotlb_offset(e) \ + (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) +#define ecap_coherent(e) ((e) & 0x1) +#define ecap_qis(e) ((e) & 0x2) +#define ecap_eim_support(e) ((e >> 4) & 0x1) +#define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) + + +/* IOTLB_REG */ +#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) +#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) +#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) +#define DMA_TLB_IIRG(type) ((type >> 60) & 7) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) +#define DMA_TLB_READ_DRAIN (((u64)1) << 49) +#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) +#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) +#define DMA_TLB_IVT (((u64)1) << 63) +#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_TLB_MAX_SIZE (0x3f) + +/* INVALID_DESC */ +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) +#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) +#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) +#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_ID_TLB_ADDR(addr) (addr) +#define DMA_ID_TLB_ADDR_MASK(mask) (mask) + +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)<<31) +#define DMA_PMEN_PRS (((u32)1)<<0) + +/* GCMD_REG */ +#define DMA_GCMD_TE (((u32)1) << 31) +#define DMA_GCMD_SRTP (((u32)1) << 30) +#define DMA_GCMD_SFL (((u32)1) << 29) +#define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_WBF (((u32)1) << 27) +#define DMA_GCMD_QIE (((u32)1) << 26) +#define DMA_GCMD_SIRTP (((u32)1) << 24) +#define DMA_GCMD_IRE (((u32) 1) << 25) + +/* GSTS_REG */ +#define DMA_GSTS_TES (((u32)1) << 31) +#define DMA_GSTS_RTPS (((u32)1) << 30) +#define DMA_GSTS_FLS (((u32)1) << 29) +#define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_WBFS (((u32)1) << 27) +#define DMA_GSTS_QIES (((u32)1) << 26) +#define DMA_GSTS_IRTPS (((u32)1) << 24) +#define DMA_GSTS_IRES (((u32)1) << 25) + +/* CCMD_REG */ +#define DMA_CCMD_ICC (((u64)1) << 63) +#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) +#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) +#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) +#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) +#define DMA_CCMD_MASK_NOBIT 0 +#define DMA_CCMD_MASK_1BIT 1 +#define DMA_CCMD_MASK_2BIT 2 +#define DMA_CCMD_MASK_3BIT 3 +#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) +#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) + +/* FECTL_REG */ +#define DMA_FECTL_IM (((u32)1) << 31) + +/* FSTS_REG */ +#define DMA_FSTS_PPF ((u32)2) +#define DMA_FSTS_PFO ((u32)1) +#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) + +/* FRCD_REG, 32 bits access */ +#define DMA_FRCD_F (((u32)1) << 31) +#define dma_frcd_type(d) ((d >> 30) & 1) +#define dma_frcd_fault_reason(c) (c & 0xff) +#define dma_frcd_source_id(c) (c & 0xffff) +#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ + +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ + +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +{\ + cycles_t start_time = get_cycles();\ + while (1) {\ + sts = op (iommu->reg + offset);\ + if (cond)\ + break;\ + if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ + panic("DMAR hardware is malfunctioning\n");\ + cpu_relax();\ + }\ +} + +#define QI_LENGTH 256 /* queue length */ + +enum { + QI_FREE, + QI_IN_USE, + QI_DONE +}; + +#define QI_CC_TYPE 0x1 +#define QI_IOTLB_TYPE 0x2 +#define QI_DIOTLB_TYPE 0x3 +#define QI_IEC_TYPE 0x4 +#define QI_IWD_TYPE 0x5 + +#define QI_IEC_SELECTIVE (((u64)1) << 4) +#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) +#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) + +#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) +#define QI_IWD_STATUS_WRITE (((u64)1) << 5) + +struct qi_desc { + u64 low, high; +}; + +struct q_inval { + spinlock_t q_lock; + struct qi_desc *desc; /* invalidation queue */ + int *desc_status; /* desc status */ + int free_head; /* first free entry */ + int free_tail; /* last free entry */ + int free_cnt; +}; + +#ifdef CONFIG_INTR_REMAP +/* 1MB - maximum possible interrupt remapping table size */ +#define INTR_REMAP_PAGE_ORDER 8 +#define INTR_REMAP_TABLE_REG_SIZE 0xf + +#define INTR_REMAP_TABLE_ENTRIES 65536 + +struct ir_table { + struct irte *base; +}; +#endif + +struct intel_iommu { + void __iomem *reg; /* Pointer to hardware regs, virtual addr */ + u64 cap; + u64 ecap; + int seg; + u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ + spinlock_t register_lock; /* protect register handling */ + int seq_id; /* sequence id of the iommu */ + +#ifdef CONFIG_DMAR + unsigned long *domain_ids; /* bitmap of domains */ + struct dmar_domain **domains; /* ptr to domains */ + spinlock_t lock; /* protect context, domain ids */ + struct root_entry *root_entry; /* virtual address */ + + unsigned int irq; + unsigned char name[7]; /* Device Name */ + struct msi_msg saved_msg; + struct sys_device sysdev; +#endif + struct q_inval *qi; /* Queued invalidation info */ +#ifdef CONFIG_INTR_REMAP + struct ir_table *ir_table; /* Interrupt remapping info */ +#endif +}; + +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + +extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); + +extern int alloc_iommu(struct dmar_drhd_unit *drhd); +extern void free_iommu(struct intel_iommu *iommu); +extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void qi_global_iec(struct intel_iommu *iommu); + +extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); + +void intel_iommu_domain_exit(struct dmar_domain *domain); +struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); +int intel_iommu_context_mapping(struct dmar_domain *domain, + struct pci_dev *pdev); +int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot); +void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); +struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); +u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); + +#ifdef CONFIG_DMAR +int intel_iommu_found(void); +#else /* CONFIG_DMAR */ +static inline int intel_iommu_found(void) +{ + return 0; +} +#endif /* CONFIG_DMAR */ + +#endif diff --git a/include/linux/iova.h b/include/linux/iova.h new file mode 100644 index 00000000000..228f6c94b69 --- /dev/null +++ b/include/linux/iova.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This file is released under the GPLv2. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Anil S Keshavamurthy + * + */ + +#ifndef _IOVA_H_ +#define _IOVA_H_ + +#include +#include +#include +#include + +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) + +/* iova structure */ +struct iova { + struct rb_node node; + unsigned long pfn_hi; /* IOMMU dish out addr hi */ + unsigned long pfn_lo; /* IOMMU dish out addr lo */ +}; + +/* holds all the iova translations for a domain */ +struct iova_domain { + spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ + spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ + struct rb_root rbroot; /* iova domain rbtree root */ + struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long dma_32bit_pfn; +}; + +struct iova *alloc_iova_mem(void); +void free_iova_mem(struct iova *iova); +void free_iova(struct iova_domain *iovad, unsigned long pfn); +void __free_iova(struct iova_domain *iovad, struct iova *iova); +struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn, + bool size_aligned); +struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, + unsigned long pfn_hi); +void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); +void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); +struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); +void put_iova_domain(struct iova_domain *iovad); + +#endif -- cgit v1.2.3 From 62c476c7c7f25a5b245b9902a935636e6316e58c Mon Sep 17 00:00:00 2001 From: Ben-Ami Yassour Date: Sun, 14 Sep 2008 03:48:28 +0300 Subject: KVM: Device Assignment with VT-d Based on a patch by: Kay, Allen M This patch enables PCI device assignment based on VT-d support. When a device is assigned to the guest, the guest memory is pinned and the mapping is updated in the VT-d IOMMU. [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present and also control enable/disable from userspace] Signed-off-by: Kay, Allen M Signed-off-by: Weidong Han Signed-off-by: Ben-Ami Yassour Signed-off-by: Amit Shah Acked-by: Mark Gross Signed-off-by: Avi Kivity --- include/linux/kvm.h | 3 +++ include/linux/kvm_host.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ef4bc6f8977..4269be171fa 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -384,6 +384,7 @@ struct kvm_trace_rec { #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ #define KVM_CAP_DEVICE_ASSIGNMENT 17 +#define KVM_CAP_IOMMU 18 /* * ioctls for VM fds @@ -495,4 +496,6 @@ struct kvm_assigned_irq { __u32 flags; }; +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4b036430ea2..6252802c3cc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -286,6 +286,53 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); +struct kvm_irq_ack_notifier { + struct hlist_node link; + unsigned gsi; + void (*irq_acked)(struct kvm_irq_ack_notifier *kian); +}; + +struct kvm_assigned_dev_kernel { + struct kvm_irq_ack_notifier ack_notifier; + struct work_struct interrupt_work; + struct list_head list; + int assigned_dev_id; + int host_busnr; + int host_devfn; + int host_irq; + int guest_irq; + int irq_requested; + struct pci_dev *dev; + struct kvm *kvm; +}; + +#ifdef CONFIG_DMAR +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, + unsigned long npages); +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_iommu_unmap_guest(struct kvm *kvm); +#else /* CONFIG_DMAR */ +static inline int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, + unsigned long npages) +{ + return 0; +} + +static inline int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + return -ENODEV; +} + +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + return 0; +} +#endif /* CONFIG_DMAR */ + static inline void kvm_guest_enter(void) { account_system_vtime(current); @@ -308,6 +355,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn) return (gpa_t)gfn << PAGE_SHIFT; } +static inline hpa_t pfn_to_hpa(pfn_t pfn) +{ + return (hpa_t)pfn << PAGE_SHIFT; +} + static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) { set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); -- cgit v1.2.3 From 4731d4c7a07769cf2926c327177b97bb8c68cafc Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 23 Sep 2008 13:18:39 -0300 Subject: KVM: MMU: out of sync shadow core Allow guest pagetables to go out of sync. Instead of emulating write accesses to guest pagetables, or unshadowing them, we un-write-protect the page table and allow the guest to modify it at will. We rely on invlpg executions to synchronize individual ptes, and will synchronize the entire pagetable on tlb flushes. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6252802c3cc..73b7c52b949 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -35,6 +35,7 @@ #define KVM_REQ_TRIPLE_FAULT 4 #define KVM_REQ_PENDING_TIMER 5 #define KVM_REQ_UNHALT 6 +#define KVM_REQ_MMU_SYNC 7 struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -- cgit v1.2.3 From 8a98f6648a2b0756d8f26d6c13332f5526355fec Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Mon, 6 Oct 2008 13:47:38 +0800 Subject: KVM: Move device assignment logic to common code To share with other archs, this patch moves device assignment logic to common parts. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 ++ include/linux/kvm_host.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4269be171fa..9acf34a6dfb 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -383,7 +383,9 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ +#ifdef CONFIG_X86 #define KVM_CAP_DEVICE_ASSIGNMENT 17 +#endif #define KVM_CAP_IOMMU 18 /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 73b7c52b949..10c1146cd00 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -281,6 +281,7 @@ void kvm_free_physmem(struct kvm *kvm); struct kvm *kvm_arch_create_vm(void); void kvm_arch_destroy_vm(struct kvm *kvm); +void kvm_free_all_assigned_devices(struct kvm *kvm); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); -- cgit v1.2.3 From c77fb9dc7a0383c86eabef30272a763a482403e1 Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Sat, 27 Sep 2008 10:55:40 +0800 Subject: KVM: Change is_mmio_pfn to kvm_is_mmio_pfn, and make it common for all archs Add a kvm_ prefix to avoid polluting kernel's name space. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 10c1146cd00..b3b7598b4d9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -288,6 +288,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); +int kvm_is_mmio_pfn(pfn_t pfn); + struct kvm_irq_ack_notifier { struct hlist_node link; unsigned gsi; -- cgit v1.2.3 From 3de42dc094ecd313dc7d551e007a134b52f8663d Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Mon, 6 Oct 2008 13:48:45 +0800 Subject: KVM: Separate irq ack notification out of arch/x86/kvm/irq.c Moving irq ack notification logic as common, and make it shared with ia64 side. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b3b7598b4d9..3833c48fae3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -309,6 +309,12 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; }; +void kvm_set_irq(struct kvm *kvm, int irq, int level); +void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian); +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian); #ifdef CONFIG_DMAR int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, -- cgit v1.2.3 From 2381ad241d0bea1253a37f314b270848067640bb Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Wed, 8 Oct 2008 08:29:33 +0800 Subject: KVM: ia64: Add intel iommu support for guests. With intel iommu hardware, we can assign devices to kvm/ia64 guests. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 9acf34a6dfb..797fcd78124 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -383,7 +383,7 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ -#ifdef CONFIG_X86 +#if defined(CONFIG_X86)||defined(CONFIG_IA64) #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 -- cgit v1.2.3 From be585c07dd577faac26014db4246e6d7c7a131e7 Mon Sep 17 00:00:00 2001 From: Jay Fenlason Date: Wed, 1 Oct 2008 18:13:20 -0400 Subject: firewire: Add more documentation to firewire-cdev.h Signed-off-by: Jay Fenlason Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 0f0e271f97f..4d078e99c01 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -154,8 +154,13 @@ struct fw_cdev_event_iso_interrupt { * @request: Valid if @common.type == %FW_CDEV_EVENT_REQUEST * @iso_interrupt: Valid if @common.type == %FW_CDEV_EVENT_ISO_INTERRUPT * - * Convenience union for userspace use. Events could be read(2) into a char - * buffer and then cast to this union for further processing. + * Convenience union for userspace use. Events could be read(2) into an + * appropriately aligned char buffer and then cast to this union for further + * processing. Note that for a request, response or iso_interrupt event, + * the data[] or header[] may make the size of the full event larger than + * sizeof(union fw_cdev_event). Also note that if you attempt to read(2) + * an event into a buffer that is not large enough for it, the data that does + * not fit will be discarded so that the next read(2) will return a new event. */ union fw_cdev_event { struct fw_cdev_event_common common; -- cgit v1.2.3 From 22441cfa0c70dcd457f3c081fcf285c3bd155824 Mon Sep 17 00:00:00 2001 From: Pedro Ribeiro Date: Wed, 15 Oct 2008 15:47:49 -0700 Subject: IPV6: Fix default gateway criteria wrt. HIGH/LOW preference radv option Problem observed: In IPv6, in the presence of multiple routers candidates to default gateway in one segment, each sending a different value of preference, the Linux hosts connected to the segment weren't selecting the right one in all the combinations possible of LOW/MEDIUM/HIGH preference. This patch changes two files: include/linux/icmpv6.h Get the "router_pref" bitfield in the right place (as RFC4191 says), named the bit left with this fix as "home_agent" (RFC3775 say that's his function) net/ipv6/ndisc.c Corrects the binary logic behind the updating of the router preference in the flags of the routing table Result: With this two fixes applied, the default route used by the system was to consistent with the rules mentioned in RFC4191 in case of changes in the value of preference in router advertisements Signed-off-by: Pedro Ribeiro Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 03067443198..a93a8dd3311 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -40,16 +40,18 @@ struct icmp6hdr { struct icmpv6_nd_ra { __u8 hop_limit; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 reserved:4, + __u8 reserved:3, router_pref:2, + home_agent:1, other:1, managed:1; #elif defined(__BIG_ENDIAN_BITFIELD) __u8 managed:1, other:1, + home_agent:1, router_pref:2, - reserved:4; + reserved:3; #else #error "Please fix " #endif -- cgit v1.2.3 From 29d434b39c807320fbe4bcdce0ab98a0b9fcb285 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Oct 2008 16:08:57 +0200 Subject: fuse: add include protectors Add include protectors to include/linux/fuse.h and fs/fuse/fuse_i.h. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 265635dc990..8bc1101e9b3 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -19,6 +19,9 @@ * - add file flags field to fuse_read_in and fuse_write_in */ +#ifndef _LINUX_FUSE_H +#define _LINUX_FUSE_H + #include #include @@ -409,3 +412,5 @@ struct fuse_dirent { #define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1)) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) + +#endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From a7c1b990f71574e077b94ce4582e2cf11cb891fe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Oct 2008 16:08:57 +0200 Subject: fuse: implement nonseekable open Let the client request nonseekable open using FOPEN_NONSEEKABLE and call nonseekable_open() on the file if requested. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 8bc1101e9b3..350fe9767bb 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -17,6 +17,9 @@ * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in * - add blksize field to fuse_attr * - add file flags field to fuse_read_in and fuse_write_in + * + * 7.10 + * - add nonseekable open flag */ #ifndef _LINUX_FUSE_H @@ -29,7 +32,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 9 +#define FUSE_KERNEL_MINOR_VERSION 10 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -101,9 +104,11 @@ struct fuse_file_lock { * * FOPEN_DIRECT_IO: bypass page cache for this open file * FOPEN_KEEP_CACHE: don't invalidate the data cache on open + * FOPEN_NONSEEKABLE: the file is not seekable */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) +#define FOPEN_NONSEEKABLE (1 << 2) /** * INIT request/reply flags -- cgit v1.2.3 From 17bc6c30cf6bfffd816bdc53682dd46fc34a2cf4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 16 Oct 2008 10:09:17 -0400 Subject: vfs: Add no_nrwrite_index_update writeback control flag If no_nrwrite_index_update is set we don't update nr_to_write and address space writeback_index in write_cache_pages. This change enables a file system to skip these updates in write_cache_pages and do them in the writepages() callback. This patch will be followed by an ext4 patch that make use of these new flags. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" CC: linux-fsdevel@vger.kernel.org --- include/linux/writeback.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index bd91987c065..e585657e983 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -63,6 +63,15 @@ struct writeback_control { unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ + /* + * write_cache_pages() won't update wbc->nr_to_write and + * mapping->writeback_index if no_nrwrite_index_update + * is set. write_cache_pages() may write more than we + * requested and we want to make sure nr_to_write and + * writeback_index are updated in a consistent manner + * so we use a single control to update them + */ + unsigned no_nrwrite_index_update:1; }; /* -- cgit v1.2.3 From 3ddfda11861d305b02ed810b522dcf48b74ca808 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:43 -0700 Subject: generic: add dyn_array support Allow crazy big arrays via bootmem at init stage. Architectures use CONFIG_HAVE_DYN_ARRAY to enable it. usage: | static struct irq_desc irq_desc_init __initdata = { | .status = IRQ_DISABLED, | .chip = &no_irq_chip, | .handle_irq = handle_bad_irq, | .depth = 1, | .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), | #ifdef CONFIG_SMP | .affinity = CPU_MASK_ALL | #endif | }; | | static void __init init_work(void *data) | { | struct dyn_array *da = data; | struct irq_desc *desc; | int i; | | desc = *da->name; | | for (i = 0; i < *da->nr; i++) | memcpy(&desc[i], &irq_desc_init, sizeof(struct irq_desc)); | } | | struct irq_desc *irq_desc; | DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work); after pre_alloc_dyn_array() after setup_arch(), the array is ready to be used. Via this facility we can replace irq_desc[NR_IRQS] array with dyn_array irq_desc[nr_irqs]. v2: remove _nopanic in pre_alloc_dyn_array() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/init.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 93538b696e3..cf9fa7f174a 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -246,6 +246,29 @@ struct obs_kernel_param { /* Relies on boot_command_line being set */ void __init parse_early_param(void); + +struct dyn_array { + void **name; + unsigned long size; + unsigned int *nr; + unsigned long align; + void (*init_work)(void *); +}; +extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[]; + +#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \ + static struct dyn_array __dyn_array_##nameX __initdata = \ + { .name = (void **)&nameX,\ + .size = sizeX,\ + .nr = &nrX,\ + .align = alignX,\ + .init_work = init_workX,\ + }; \ + static struct dyn_array *__dyn_array_ptr_##nameX __used \ + __attribute__((__section__(".dyn_array.init"))) = \ + &__dyn_array_##nameX + +extern void pre_alloc_dyn_array(void); #endif /* __ASSEMBLY__ */ /** -- cgit v1.2.3 From 1f3fcd4b1adc972d5c6a34cfed98931c46575b49 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:44 -0700 Subject: add per_cpu_dyn_array support allow dyn-array in per_cpu area, allocated dynamically. usage: | /* in .h */ | struct kernel_stat { | struct cpu_usage_stat cpustat; | unsigned int *irqs; | }; | | /* in .c */ | DEFINE_PER_CPU(struct kernel_stat, kstat); | | DEFINE_PER_CPU_DYN_ARRAY_ADDR(per_cpu__kstat_irqs, per_cpu__kstat.irqs, sizeof(unsigned int), nr_irqs, sizeof(unsigned long), NULL); after setup_percpu()/per_cpu_alloc_dyn_array(), the dyn_array in per_cpu area is ready to use. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/init.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index cf9fa7f174a..332806826b8 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -255,12 +255,13 @@ struct dyn_array { void (*init_work)(void *); }; extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[]; +extern struct dyn_array *__per_cpu_dyn_array_start[], *__per_cpu_dyn_array_end[]; -#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \ +#define DEFINE_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \ static struct dyn_array __dyn_array_##nameX __initdata = \ - { .name = (void **)&nameX,\ + { .name = (void **)&(nameX),\ .size = sizeX,\ - .nr = &nrX,\ + .nr = &(nrX),\ .align = alignX,\ .init_work = init_workX,\ }; \ @@ -268,7 +269,27 @@ extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[]; __attribute__((__section__(".dyn_array.init"))) = \ &__dyn_array_##nameX +#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \ + DEFINE_DYN_ARRAY_ADDR(nameX, nameX, sizeX, nrX, alignX, init_workX) + +#define DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \ + static struct dyn_array __per_cpu_dyn_array_##nameX __initdata = \ + { .name = (void **)&(addrX),\ + .size = sizeX,\ + .nr = &(nrX),\ + .align = alignX,\ + .init_work = init_workX,\ + }; \ + static struct dyn_array *__per_cpu_dyn_array_ptr_##nameX __used \ + __attribute__((__section__(".per_cpu_dyn_array.init"))) = \ + &__per_cpu_dyn_array_##nameX + +#define DEFINE_PER_CPU_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \ + DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, nameX, nrX, alignX, init_workX) + extern void pre_alloc_dyn_array(void); +extern unsigned long per_cpu_dyn_array_size(void); +extern void per_cpu_alloc_dyn_array(int cpu, char *ptr); #endif /* __ASSEMBLY__ */ /** -- cgit v1.2.3 From 1f8ff037a871690c762d267d8a052529d3102fc9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:45 -0700 Subject: x86: alloc dyn_array all together so could spare some memory with small alignment in bootmem also tighten the alignment checking, and make print out less debug info. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/init.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 332806826b8..59fbb4aaba6 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -288,7 +288,7 @@ extern struct dyn_array *__per_cpu_dyn_array_start[], *__per_cpu_dyn_array_end[] DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, nameX, nrX, alignX, init_workX) extern void pre_alloc_dyn_array(void); -extern unsigned long per_cpu_dyn_array_size(void); +extern unsigned long per_cpu_dyn_array_size(unsigned long *align); extern void per_cpu_alloc_dyn_array(int cpu, char *ptr); #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 85c0f90978bf50596dbd23854648020f1f9b5bfd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:47 -0700 Subject: irq: introduce nr_irqs at this point nr_irqs is equal NR_IRQS convert a few easy users from NR_IRQS to dynamic nr_irqs. v2: according to Eric, we need to take care of arch without generic_hardirqs Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 58ff4e74b2f..511803853a5 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -15,6 +15,8 @@ #include #include +extern int nr_irqs; + /* * These correspond to the IORESOURCE_IRQ_* defines in * linux/ioport.h to select the interrupt line behaviour. When -- cgit v1.2.3 From d60458b224d6b997a582a05cb8c4b9bed9e17a1d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:00 -0700 Subject: irq: make irq_desc to use dyn_array Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 1d73d1abb83..5f4b013624d 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -179,7 +179,11 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; +#ifdef CONFIG_HAVE_DYN_ARRAY +extern struct irq_desc *irq_desc; +#else extern struct irq_desc irq_desc[NR_IRQS]; +#endif /* * Migration helpers for obsolete names, they will go away: -- cgit v1.2.3 From d17a55ded3393ad3878010bb3a8243a15a8d8df5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:01 -0700 Subject: irq: make irqs in kernel stat use per_cpu_dyn_array Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index cf9f40a91c9..fe1f7fe534b 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -28,7 +28,11 @@ struct cpu_usage_stat { struct kernel_stat { struct cpu_usage_stat cpustat; +#ifdef CONFIG_HAVE_DYN_ARRAY + unsigned int *irqs; +#else unsigned int irqs[NR_IRQS]; +#endif }; DECLARE_PER_CPU(struct kernel_stat, kstat); -- cgit v1.2.3 From 08678b0841267c1d00d771fe01548d86043d065e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:05 -0700 Subject: generic: sparse irqs: use irq_desc() together with dyn_array, instead of irq_desc[] add CONFIG_HAVE_SPARSE_IRQ to for use condensed array. Get rid of irq_desc[] array assumptions. Preallocate 32 irq_desc, and irq_desc() will try to get more. ( No change in functionality is expected anywhere, except the odd build failure where we missed a code site or where a crossing commit itroduces new irq_desc[] usage. ) v2: according to Eric, change get_irq_desc() to irq_desc() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 5f4b013624d..80b8200f2ad 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -152,6 +152,10 @@ struct irq_chip { * @name: flow handler name for /proc/interrupts output */ struct irq_desc { + unsigned int irq; +#ifdef CONFIG_HAVE_SPARSE_IRQ + struct irq_desc *next; +#endif irq_flow_handler_t handle_irq; struct irq_chip *chip; struct msi_desc *msi_desc; @@ -179,9 +183,9 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -#ifdef CONFIG_HAVE_DYN_ARRAY -extern struct irq_desc *irq_desc; -#else +extern struct irq_desc *irq_to_desc(unsigned int irq); +#ifndef CONFIG_HAVE_DYN_ARRAY +/* could be removed if we get rid of all irq_desc reference */ extern struct irq_desc irq_desc[NR_IRQS]; #endif @@ -249,7 +253,10 @@ extern int no_irq_affinity; static inline int irq_balancing_disabled(unsigned int irq) { - return irq_desc[irq].status & IRQ_NO_BALANCING_MASK; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + return desc->status & IRQ_NO_BALANCING_MASK; } /* Handle irq action chains: */ @@ -281,7 +288,7 @@ extern unsigned int __do_IRQ(unsigned int irq); */ static inline void generic_handle_irq(unsigned int irq) { - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ desc->handle_irq(irq, desc); @@ -325,7 +332,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, static inline void __set_irq_handler_unlocked(int irq, irq_flow_handler_t handler) { - irq_desc[irq].handle_irq = handler; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->handle_irq = handler; } /* @@ -359,7 +369,7 @@ extern void destroy_irq(unsigned int irq); /* Test to see if a driver has successfully requested an irq */ static inline int irq_has_action(unsigned int irq) { - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); return desc->action != NULL; } @@ -374,10 +384,10 @@ extern int set_irq_chip_data(unsigned int irq, void *data); extern int set_irq_type(unsigned int irq, unsigned int type); extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); -#define get_irq_chip(irq) (irq_desc[irq].chip) -#define get_irq_chip_data(irq) (irq_desc[irq].chip_data) -#define get_irq_data(irq) (irq_desc[irq].handler_data) -#define get_irq_msi(irq) (irq_desc[irq].msi_desc) +#define get_irq_chip(irq) (irq_to_desc(irq)->chip) +#define get_irq_chip_data(irq) (irq_to_desc(irq)->chip_data) +#define get_irq_data(irq) (irq_to_desc(irq)->handler_data) +#define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc) #endif /* CONFIG_GENERIC_HARDIRQS */ -- cgit v1.2.3 From 3060d6fe28570640c2d7d66d38b9eaa848c3b9e3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:08 -0700 Subject: x86: put timer_rand_state pointer into irq_desc irq_timer_state[] is a NR_IRQS sized array that is a side-by array to the real irq_desc[] array. Integrate that field into the (now dynamic) irq_desc dynamic array and save some RAM. v2: keep the old way to support arch not support irq_desc Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 80b8200f2ad..60c856aaac0 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -127,6 +127,7 @@ struct irq_chip { const char *typename; }; +struct timer_rand_state; /** * struct irq_desc - interrupt descriptor * @@ -155,6 +156,7 @@ struct irq_desc { unsigned int irq; #ifdef CONFIG_HAVE_SPARSE_IRQ struct irq_desc *next; + struct timer_rand_state *timer_rand_state; #endif irq_flow_handler_t handle_irq; struct irq_chip *chip; -- cgit v1.2.3 From 7f95ec9e4c12fd067febfd57532da1166d75d858 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:09 -0700 Subject: x86: move kstat_irqs from kstat to irq_desc based on Eric's patch ... together mold it with dyn_array for irq_desc, will allcate kstat_irqs for nr_irq_desc alltogether if needed. -- at that point nr_cpus is known already. v2: make sure system without generic_hardirqs works they don't have irq_desc v3: fix merging v4: [mingo@elte.hu] fix typo [ mingo@elte.hu ] irq: build fix fix: arch/x86/xen/spinlock.c: In function 'xen_spin_lock_slow': arch/x86/xen/spinlock.c:90: error: 'struct kernel_stat' has no member named 'irqs' Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 7 +++++++ include/linux/kernel_stat.h | 22 +++++++++++++++------- 2 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 60c856aaac0..cbf471aee1c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -157,6 +157,11 @@ struct irq_desc { #ifdef CONFIG_HAVE_SPARSE_IRQ struct irq_desc *next; struct timer_rand_state *timer_rand_state; +#endif +#ifdef CONFIG_HAVE_DYN_ARRAY + unsigned int *kstat_irqs; +#else + unsigned int kstat_irqs[NR_CPUS]; #endif irq_flow_handler_t handle_irq; struct irq_chip *chip; @@ -190,6 +195,8 @@ extern struct irq_desc *irq_to_desc(unsigned int irq); /* could be removed if we get rid of all irq_desc reference */ extern struct irq_desc irq_desc[NR_IRQS]; #endif +#define kstat_irqs_this_cpu(DESC) \ + ((DESC)->kstat_irqs[smp_processor_id()]) /* * Migration helpers for obsolete names, they will go away: diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index fe1f7fe534b..f10616712de 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -28,10 +28,8 @@ struct cpu_usage_stat { struct kernel_stat { struct cpu_usage_stat cpustat; -#ifdef CONFIG_HAVE_DYN_ARRAY - unsigned int *irqs; -#else - unsigned int irqs[NR_IRQS]; +#ifndef CONFIG_GENERIC_HARDIRQS + unsigned int irqs[NR_IRQS]; #endif }; @@ -43,15 +41,25 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); extern unsigned long long nr_context_switches(void); +#ifndef CONFIG_GENERIC_HARDIRQS +static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) +{ + return kstat_cpu(cpu).irqs[irq]; +} +#else +extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); +#endif + /* * Number of interrupts per specific IRQ source, since bootup */ -static inline int kstat_irqs(int irq) +static inline unsigned int kstat_irqs(unsigned int irq) { - int cpu, sum = 0; + unsigned int sum = 0; + int cpu; for_each_possible_cpu(cpu) - sum += kstat_cpu(cpu).irqs[irq]; + sum += kstat_irqs_cpu(irq, cpu); return sum; } -- cgit v1.2.3 From 9059d8fa4a3a9153da53da890039f7f956cc9d19 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:10 -0700 Subject: irq: add irq_desc_without_new add an irq_desc accessor that will not allocate any sparse entry but returns failure if there's no entry present. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index cbf471aee1c..c9ffef7c3b4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -191,10 +191,23 @@ struct irq_desc { } ____cacheline_internodealigned_in_smp; extern struct irq_desc *irq_to_desc(unsigned int irq); +extern struct irq_desc *__irq_to_desc(unsigned int irq); + +#ifndef CONFIG_HAVE_SPARSE_IRQ + #ifndef CONFIG_HAVE_DYN_ARRAY /* could be removed if we get rid of all irq_desc reference */ extern struct irq_desc irq_desc[NR_IRQS]; +#else +extern struct irq_desc *irq_desc; #endif + +#else + +extern struct irq_desc *sparse_irqs; + +#endif + #define kstat_irqs_this_cpu(DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]) -- cgit v1.2.3 From 2c6927a38f65b53b62f86158fba29a068c4e8b6a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:11 -0700 Subject: irq: replace loop with nr_irqs with for_each_irq_desc There are a handful of loops that go from 0 to nr_irqs and use get_irq_desc() on them. These would allocate all the irq_desc entries, regardless of the need for them. Use the smarter for_each_irq_desc() iterator that will only iterate over the present ones. v2: make sure arch without GENERIC_HARDIRQS work too Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c9ffef7c3b4..9de16ca8b8e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -202,9 +202,16 @@ extern struct irq_desc irq_desc[NR_IRQS]; extern struct irq_desc *irq_desc; #endif +#ifdef CONFIG_GENERIC_HARDIRQS +#define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc = &irq_desc[irq]) +#endif + #else extern struct irq_desc *sparse_irqs; +#define for_each_irq_desc(irqX, desc) \ + for (desc = sparse_irqs, irqX = desc->irq; desc && irqX != -1U; desc = desc->next, irqX = desc ? desc->irq : -1U) #endif -- cgit v1.2.3 From c7fb03a475bd80c642c1345d85c7c550f63514b8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:12 -0700 Subject: irq, fs/proc: replace loop with nr_irqs for proc/stat Replace another nr_irqs loop to avoid the allocation of all sparse irq entries - use for_each_irq_desc instead. v2: make sure arch without GENERIC_HARDIRQS works too Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 511803853a5..d4039a0b23f 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -17,6 +17,11 @@ extern int nr_irqs; +#ifndef CONFIG_GENERIC_HARDIRQS +#define for_each_irq_desc(irq, desc) \ + for (irq = 0; irq < nr_irqs; irq++) +#endif + /* * These correspond to the IORESOURCE_IRQ_* defines in * linux/ioport.h to select the interrupt line behaviour. When -- cgit v1.2.3 From 46926b67fc663d357a1a8174328998a9e49da0b8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:15 -0700 Subject: generic: add irq_desc in function in parameter So we could remove some duplicated calling to irq_desc v2: make sure irq_desc in init/main.c is not used without generic_hardirqs Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 9de16ca8b8e..7b59e193a11 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -315,10 +315,8 @@ extern unsigned int __do_IRQ(unsigned int irq); * irqchip-style controller then we call the ->handle_irq() handler, * and it calls __do_IRQ() if it's attached to an irqtype-style controller. */ -static inline void generic_handle_irq(unsigned int irq) +static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) { - struct irq_desc *desc = irq_to_desc(irq); - #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ desc->handle_irq(irq, desc); #else @@ -329,6 +327,11 @@ static inline void generic_handle_irq(unsigned int irq) #endif } +static inline void generic_handle_irq(unsigned int irq) +{ + generic_handle_irq_desc(irq, irq_to_desc(irq)); +} + /* Handling of unhandled and spurious interrupts: */ extern void note_interrupt(unsigned int irq, struct irq_desc *desc, int action_ret); -- cgit v1.2.3 From cb5bc83225a86ca53bbb889ed8439e4fd6cf44ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:17 -0700 Subject: x86_64: rename irq_desc/irq_desc_alloc change names: irq_desc() ==> irq_desc_alloc __irq_desc() ==> irq_desc Also split a few of the uses in lowlevel x86 code. v2: need to check if desc is null in smp_irq_move_cleanup Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 7b59e193a11..5fe1b01c11f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -191,7 +191,7 @@ struct irq_desc { } ____cacheline_internodealigned_in_smp; extern struct irq_desc *irq_to_desc(unsigned int irq); -extern struct irq_desc *__irq_to_desc(unsigned int irq); +extern struct irq_desc *irq_to_desc_alloc(unsigned int irq); #ifndef CONFIG_HAVE_SPARSE_IRQ -- cgit v1.2.3 From 67fb283e148e9bd761f73691d3173b6eab9ba8db Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:18 -0700 Subject: irq: separate sparse_irqs from sparse_irqs_free so later don't need compare with -1U Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 5fe1b01c11f..d5749852ee6 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -211,7 +211,7 @@ extern struct irq_desc *irq_desc; extern struct irq_desc *sparse_irqs; #define for_each_irq_desc(irqX, desc) \ - for (desc = sparse_irqs, irqX = desc->irq; desc && irqX != -1U; desc = desc->next, irqX = desc ? desc->irq : -1U) + for (desc = sparse_irqs, irqX = desc->irq; desc; desc = desc->next, irqX = desc ? desc->irq : -1U) #endif -- cgit v1.2.3 From e420dfb40c453a9760b86c7f338052bdb4dfa755 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:21 -0700 Subject: x86: put irq_2_iommu pointer into irq_desc when CONFIG_HAVE_SPARSE_IRQ preallocate some irq_2_iommu entries, and use get_one_free_irq_2_iomm to get new one and link to irq_desc if needed. else will use dyn_array or static array. v2: <= nr_irqs fix Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index d5749852ee6..788d5a35a58 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -128,6 +128,7 @@ struct irq_chip { }; struct timer_rand_state; +struct irq_2_iommu; /** * struct irq_desc - interrupt descriptor * @@ -162,6 +163,9 @@ struct irq_desc { unsigned int *kstat_irqs; #else unsigned int kstat_irqs[NR_CPUS]; +#endif +#if defined(CONFIG_INTR_REMAP) && defined(CONFIG_HAVE_SPARSE_IRQ) + struct irq_2_iommu *irq_2_iommu; #endif irq_flow_handler_t handle_irq; struct irq_chip *chip; -- cgit v1.2.3 From 6d50bc26836e16a9589e0b128d527c29e30d722a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:22 -0700 Subject: x86: use 28 bits irq NR for pci msi/msix and ht also print out irq no in /proc/interrups and /proc/stat in hex, so could tell bus/dev/func. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 788d5a35a58..704136138dc 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -399,6 +399,7 @@ extern void set_irq_noprobe(unsigned int irq); extern void set_irq_probe(unsigned int irq); /* Handle dynamic irq creation and destruction */ +extern unsigned int create_irq_nr(unsigned int irq_want); extern int create_irq(void); extern void destroy_irq(unsigned int irq); -- cgit v1.2.3 From 8b8e8c1bf7275eca859fe551dfa484134eaf013b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:23 -0700 Subject: x86: remove irqbalance in kernel for 32 bit This has been deprecated for years, the user space irqbalanced utility works better with numa, has configurable policies, etc... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 704136138dc..2445d2b3d5d 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -185,7 +185,7 @@ struct irq_desc { cpumask_t affinity; unsigned int cpu; #endif -#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) +#ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_t pending_mask; #endif #ifdef CONFIG_PROC_FS @@ -241,13 +241,13 @@ extern int setup_irq(unsigned int irq, struct irqaction *new); #ifdef CONFIG_SMP -#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) +#ifdef CONFIG_GENERIC_PENDING_IRQ void set_pending_irq(unsigned int irq, cpumask_t mask); void move_native_irq(int irq); void move_masked_irq(int irq); -#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */ +#else /* CONFIG_GENERIC_PENDING_IRQ */ static inline void move_irq(int irq) { @@ -274,14 +274,6 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask) #endif /* CONFIG_SMP */ -#ifdef CONFIG_IRQBALANCE -extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask); -#else -static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) -{ -} -#endif - extern int no_irq_affinity; static inline int irq_balancing_disabled(unsigned int irq) -- cgit v1.2.3 From 42379b1122bab7f9aefdbd4b7004a6fa89dfbae5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:45 -0700 Subject: pci: change msi-x vector to 32bit we are using 28bit pci (bus/dev/fn + 12 bits) as irq number, so the cache for irq number should be 32 bit too. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 98dc6243a70..1f8db240ca4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -723,7 +723,7 @@ enum pci_dma_burst_strategy { }; struct msix_entry { - u16 vector; /* kernel uses to write allocated vector */ + u32 vector; /* kernel uses to write allocated vector */ u16 entry; /* driver uses to specify entry, OS writes */ }; -- cgit v1.2.3 From 8c464a4b23ca283b414022ebc77787f3c7040fa7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 Aug 2008 12:41:19 -0700 Subject: sparseirq: move kstat_irqs from kstat to irq_desc - fix fix non-sparseirq architectures. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/irq.h | 4 ++-- include/linux/kernel_stat.h | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 2445d2b3d5d..93fe9a943e7 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -161,8 +161,6 @@ struct irq_desc { #endif #ifdef CONFIG_HAVE_DYN_ARRAY unsigned int *kstat_irqs; -#else - unsigned int kstat_irqs[NR_CPUS]; #endif #if defined(CONFIG_INTR_REMAP) && defined(CONFIG_HAVE_SPARSE_IRQ) struct irq_2_iommu *irq_2_iommu; @@ -219,8 +217,10 @@ extern struct irq_desc *sparse_irqs; #endif +#ifdef CONFIG_HAVE_DYN_ARRAY #define kstat_irqs_this_cpu(DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]) +#endif /* * Migration helpers for obsolete names, they will go away: diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index f10616712de..21249d8c129 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -28,7 +28,7 @@ struct cpu_usage_stat { struct kernel_stat { struct cpu_usage_stat cpustat; -#ifndef CONFIG_GENERIC_HARDIRQS +#ifndef CONFIG_HAVE_DYN_ARRAY unsigned int irqs[NR_IRQS]; #endif }; @@ -41,7 +41,13 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); extern unsigned long long nr_context_switches(void); -#ifndef CONFIG_GENERIC_HARDIRQS +#ifndef CONFIG_HAVE_DYN_ARRAY +#define kstat_irqs_this_cpu(irq) \ + (kstat_this_cpu.irqs[irq]) +#endif + + +#ifndef CONFIG_HAVE_DYN_ARRAY static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { return kstat_cpu(cpu).irqs[irq]; -- cgit v1.2.3 From f6dd5c3106fb283e37d915eeb33019ef40510f85 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 3 Sep 2008 16:58:32 -0700 Subject: dmar: fix using early fixmap mapping for DMAR table parsing Very early detection of the DMAR tables will setup fixmap mapping. For parsing these tables later (while enabling dma and/or interrupt remapping), early fixmap mapping shouldn't be used. Fix it by calling table detection routines again, which will call generic apci_get_table() for setting up the correct mapping. Signed-off-by: Yinghai Lu Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c360c558e59..f1984fc3e06 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -45,7 +45,6 @@ extern struct list_head dmar_drhd_units; list_for_each_entry(drhd, &dmar_drhd_units, list) extern int dmar_table_init(void); -extern int early_dmar_detect(void); extern int dmar_dev_scope_init(void); /* Intel IOMMU detection */ -- cgit v1.2.3 From a50f70b17541c0060967c6df61133e968bad3652 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 3 Oct 2008 11:58:54 -0500 Subject: x86: Add UV EFI table entry v4 Look for a UV entry in the EFI tables. Signed-off-by: Russ Anderson Signed-off-by: Paul Jackson Acked-by: Huang Ying Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 807373d467f..bb66feb164b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -208,6 +208,9 @@ typedef efi_status_t efi_set_virtual_address_map_t (unsigned long memory_map_siz #define EFI_GLOBAL_VARIABLE_GUID \ EFI_GUID( 0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c ) +#define UV_SYSTEM_TABLE_GUID \ + EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 ) + typedef struct { efi_guid_t guid; unsigned long table; @@ -255,6 +258,7 @@ extern struct efi { unsigned long boot_info; /* boot info table */ unsigned long hcdp; /* HCDP table */ unsigned long uga; /* UGA table */ + unsigned long uv_systab; /* UV system table */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; -- cgit v1.2.3 From 7ef0c30dbf96a8d9a234e90c248eb19df3c031be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 13:07:35 +0200 Subject: genirq: define nr_irqs for architectures with GENERIC_HARDIRQS=n Revert the sparse irq changes in m68k/s390/sparc and just define nr_irqs as NR_IRQS for those architectures. Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d4039a0b23f..5a57df2ee92 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -15,11 +15,13 @@ #include #include -extern int nr_irqs; - #ifndef CONFIG_GENERIC_HARDIRQS -#define for_each_irq_desc(irq, desc) \ +# define for_each_irq_desc(irq, desc) \ for (irq = 0; irq < nr_irqs; irq++) + +# define nr_irqs NR_IRQS +#else +extern int nr_irqs; #endif /* -- cgit v1.2.3 From 70dd4d992ab324a59cdcd6bedc3f4e729863d514 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 15:39:27 +0200 Subject: genirq: consolidate nr_irqs and for_each_irq_desc() Move all of those to linux/irq.h where they belong. Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 9 --------- include/linux/irq.h | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5a57df2ee92..58ff4e74b2f 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -15,15 +15,6 @@ #include #include -#ifndef CONFIG_GENERIC_HARDIRQS -# define for_each_irq_desc(irq, desc) \ - for (irq = 0; irq < nr_irqs; irq++) - -# define nr_irqs NR_IRQS -#else -extern int nr_irqs; -#endif - /* * These correspond to the IORESOURCE_IRQ_* defines in * linux/ioport.h to select the interrupt line behaviour. When diff --git a/include/linux/irq.h b/include/linux/irq.h index 93fe9a943e7..dbe8734ae86 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -11,6 +11,18 @@ #include +#ifndef CONFIG_GENERIC_HARDIRQS +# define nr_irqs NR_IRQS + +# define for_each_irq_desc(irq, desc) \ + for (irq = 0; irq < nr_irqs; irq++) +#else +extern int nr_irqs; + +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) +#endif + #ifndef CONFIG_S390 #include @@ -204,11 +216,6 @@ extern struct irq_desc irq_desc[NR_IRQS]; extern struct irq_desc *irq_desc; #endif -#ifdef CONFIG_GENERIC_HARDIRQS -#define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc = &irq_desc[irq]) -#endif - #else extern struct irq_desc *sparse_irqs; -- cgit v1.2.3 From c6b7674f323622d86316bf7951ad9cae1ce24642 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 14:31:29 +0200 Subject: genirq: use inline function for irq_to_desc For the non sparse irq case an inline function is perfectly fine. Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index dbe8734ae86..7d1adacaadb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -204,8 +204,6 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -extern struct irq_desc *irq_to_desc(unsigned int irq); -extern struct irq_desc *irq_to_desc_alloc(unsigned int irq); #ifndef CONFIG_HAVE_SPARSE_IRQ @@ -216,8 +214,21 @@ extern struct irq_desc irq_desc[NR_IRQS]; extern struct irq_desc *irq_desc; #endif +static inline struct irq_desc *irq_to_desc(unsigned int irq) +{ + return (irq < nr_irqs) ? irq_desc + irq : NULL; +} + +static inline struct irq_desc *irq_to_desc_alloc(unsigned int irq) +{ + return irq_to_desc(irq); +} + #else +extern struct irq_desc *irq_to_desc(unsigned int irq); +extern struct irq_desc *irq_to_desc_alloc(unsigned int irq); + extern struct irq_desc *sparse_irqs; #define for_each_irq_desc(irqX, desc) \ for (desc = sparse_irqs, irqX = desc->irq; desc; desc = desc->next, irqX = desc ? desc->irq : -1U) -- cgit v1.2.3 From 2cc21ef843d4fb7da122239b644a1f6f0aca60a6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 14:16:55 +0200 Subject: genirq: remove sparse irq code This code is not ready, but we need to rip it out instead of rebasing as we would lose the APIC/IO_APIC unification otherwise. Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 7d1adacaadb..68e0f3f9df3 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -167,15 +167,8 @@ struct irq_2_iommu; */ struct irq_desc { unsigned int irq; -#ifdef CONFIG_HAVE_SPARSE_IRQ - struct irq_desc *next; - struct timer_rand_state *timer_rand_state; -#endif #ifdef CONFIG_HAVE_DYN_ARRAY unsigned int *kstat_irqs; -#endif -#if defined(CONFIG_INTR_REMAP) && defined(CONFIG_HAVE_SPARSE_IRQ) - struct irq_2_iommu *irq_2_iommu; #endif irq_flow_handler_t handle_irq; struct irq_chip *chip; @@ -205,8 +198,6 @@ struct irq_desc { } ____cacheline_internodealigned_in_smp; -#ifndef CONFIG_HAVE_SPARSE_IRQ - #ifndef CONFIG_HAVE_DYN_ARRAY /* could be removed if we get rid of all irq_desc reference */ extern struct irq_desc irq_desc[NR_IRQS]; @@ -224,17 +215,6 @@ static inline struct irq_desc *irq_to_desc_alloc(unsigned int irq) return irq_to_desc(irq); } -#else - -extern struct irq_desc *irq_to_desc(unsigned int irq); -extern struct irq_desc *irq_to_desc_alloc(unsigned int irq); - -extern struct irq_desc *sparse_irqs; -#define for_each_irq_desc(irqX, desc) \ - for (desc = sparse_irqs, irqX = desc->irq; desc; desc = desc->next, irqX = desc ? desc->irq : -1U) - -#endif - #ifdef CONFIG_HAVE_DYN_ARRAY #define kstat_irqs_this_cpu(DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]) -- cgit v1.2.3 From ee32c9732244bde4b9b59eeac2814c23e2b71f8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 14:34:09 +0200 Subject: genirq: remove irq_to_desc_alloc Remove the leftover of sparseirqs. Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 68e0f3f9df3..3f33c779030 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -210,11 +210,6 @@ static inline struct irq_desc *irq_to_desc(unsigned int irq) return (irq < nr_irqs) ? irq_desc + irq : NULL; } -static inline struct irq_desc *irq_to_desc_alloc(unsigned int irq) -{ - return irq_to_desc(irq); -} - #ifdef CONFIG_HAVE_DYN_ARRAY #define kstat_irqs_this_cpu(DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]) -- cgit v1.2.3 From d6c88a507ef0b6afdb013cba4e7804ba7324d99a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 15:27:23 +0200 Subject: genirq: revert dynarray Revert the dynarray changes. They need more thought and polishing. Signed-off-by: Thomas Gleixner --- include/linux/init.h | 43 ------------------------------------------- include/linux/irq.h | 15 --------------- include/linux/kernel_stat.h | 16 ++++++---------- 3 files changed, 6 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 59fbb4aaba6..70ad53e1eab 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -247,49 +247,6 @@ struct obs_kernel_param { /* Relies on boot_command_line being set */ void __init parse_early_param(void); -struct dyn_array { - void **name; - unsigned long size; - unsigned int *nr; - unsigned long align; - void (*init_work)(void *); -}; -extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[]; -extern struct dyn_array *__per_cpu_dyn_array_start[], *__per_cpu_dyn_array_end[]; - -#define DEFINE_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \ - static struct dyn_array __dyn_array_##nameX __initdata = \ - { .name = (void **)&(nameX),\ - .size = sizeX,\ - .nr = &(nrX),\ - .align = alignX,\ - .init_work = init_workX,\ - }; \ - static struct dyn_array *__dyn_array_ptr_##nameX __used \ - __attribute__((__section__(".dyn_array.init"))) = \ - &__dyn_array_##nameX - -#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \ - DEFINE_DYN_ARRAY_ADDR(nameX, nameX, sizeX, nrX, alignX, init_workX) - -#define DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \ - static struct dyn_array __per_cpu_dyn_array_##nameX __initdata = \ - { .name = (void **)&(addrX),\ - .size = sizeX,\ - .nr = &(nrX),\ - .align = alignX,\ - .init_work = init_workX,\ - }; \ - static struct dyn_array *__per_cpu_dyn_array_ptr_##nameX __used \ - __attribute__((__section__(".per_cpu_dyn_array.init"))) = \ - &__per_cpu_dyn_array_##nameX - -#define DEFINE_PER_CPU_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \ - DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, nameX, nrX, alignX, init_workX) - -extern void pre_alloc_dyn_array(void); -extern unsigned long per_cpu_dyn_array_size(unsigned long *align); -extern void per_cpu_alloc_dyn_array(int cpu, char *ptr); #endif /* __ASSEMBLY__ */ /** diff --git a/include/linux/irq.h b/include/linux/irq.h index 3f33c779030..38bf89f2ade 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -139,8 +139,6 @@ struct irq_chip { const char *typename; }; -struct timer_rand_state; -struct irq_2_iommu; /** * struct irq_desc - interrupt descriptor * @@ -167,9 +165,6 @@ struct irq_2_iommu; */ struct irq_desc { unsigned int irq; -#ifdef CONFIG_HAVE_DYN_ARRAY - unsigned int *kstat_irqs; -#endif irq_flow_handler_t handle_irq; struct irq_chip *chip; struct msi_desc *msi_desc; @@ -198,23 +193,13 @@ struct irq_desc { } ____cacheline_internodealigned_in_smp; -#ifndef CONFIG_HAVE_DYN_ARRAY -/* could be removed if we get rid of all irq_desc reference */ extern struct irq_desc irq_desc[NR_IRQS]; -#else -extern struct irq_desc *irq_desc; -#endif static inline struct irq_desc *irq_to_desc(unsigned int irq) { return (irq < nr_irqs) ? irq_desc + irq : NULL; } -#ifdef CONFIG_HAVE_DYN_ARRAY -#define kstat_irqs_this_cpu(DESC) \ - ((DESC)->kstat_irqs[smp_processor_id()]) -#endif - /* * Migration helpers for obsolete names, they will go away: */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 21249d8c129..a9d0d360b77 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -28,9 +28,7 @@ struct cpu_usage_stat { struct kernel_stat { struct cpu_usage_stat cpustat; -#ifndef CONFIG_HAVE_DYN_ARRAY unsigned int irqs[NR_IRQS]; -#endif }; DECLARE_PER_CPU(struct kernel_stat, kstat); @@ -41,20 +39,18 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); extern unsigned long long nr_context_switches(void); -#ifndef CONFIG_HAVE_DYN_ARRAY -#define kstat_irqs_this_cpu(irq) \ - (kstat_this_cpu.irqs[irq]) -#endif +struct irq_desc; +static inline void kstat_incr_irqs_this_cpu(unsigned int irq, + struct irq_desc *desc) +{ + kstat_this_cpu.irqs[irq]++; +} -#ifndef CONFIG_HAVE_DYN_ARRAY static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { return kstat_cpu(cpu).irqs[irq]; } -#else -extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); -#endif /* * Number of interrupts per specific IRQ source, since bootup -- cgit v1.2.3 From a1aca5de08a0cb840a90fb3f729a5940f8d21185 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 15 Oct 2008 19:29:15 +0200 Subject: genirq: remove artifacts from sparseirq removal Signed-off-by: Ingo Molnar --- include/linux/init.h | 1 - include/linux/kernel_stat.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 70ad53e1eab..93538b696e3 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -246,7 +246,6 @@ struct obs_kernel_param { /* Relies on boot_command_line being set */ void __init parse_early_param(void); - #endif /* __ASSEMBLY__ */ /** diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index a9d0d360b77..89b6ecd4147 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -28,7 +28,7 @@ struct cpu_usage_stat { struct kernel_stat { struct cpu_usage_stat cpustat; - unsigned int irqs[NR_IRQS]; + unsigned int irqs[NR_IRQS]; }; DECLARE_PER_CPU(struct kernel_stat, kstat); -- cgit v1.2.3 From 811410fdb6b9d82a518542289efe9b2a51e3cbfb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 14:16:11 +0200 Subject: genirq: add reverse iterator for irq_desc Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 38bf89f2ade..31632aa65d1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -21,6 +21,10 @@ extern int nr_irqs; # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) + +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 ); \ + irq > 0; irq--, desc--) #endif #ifndef CONFIG_S390 -- cgit v1.2.3 From 2be3b52a5785a6a5c5349fbd315f57595f7074be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 14:50:27 +0200 Subject: proc: fixup irq iterator There is no need for irq_desc here. Even for sparse_irq we can handle this clever in for_each_irq_nr(). Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 31632aa65d1..0618fb362cb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -27,6 +27,9 @@ extern int nr_irqs; irq > 0; irq--, desc--) #endif +#define for_each_irq_nr(irq) \ + for (irq = 0; irq < nr_irqs; irq++) + #ifndef CONFIG_S390 #include -- cgit v1.2.3 From ae87221d3ce49d9de1e43756da834fd0bf05a2ad Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 24 Aug 2007 16:11:54 -0700 Subject: sysfs: crash debugging Print the name of the last-accessed sysfs file when we oops, to help track down oopses which occur in sysfs store/read handlers. Because these oopses tend to not leave any trace of the offending code in the stack traces. Cc: Kay Sievers Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 37fa24152bd..8ec406afb3e 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -119,6 +119,8 @@ void sysfs_remove_file_from_group(struct kobject *kobj, void sysfs_notify(struct kobject *kobj, char *dir, char *attr); +void sysfs_printk_last_file(void); + extern int __must_check sysfs_init(void); #else /* CONFIG_SYSFS */ @@ -231,6 +233,10 @@ static inline int __must_check sysfs_init(void) return 0; } +static inline void sysfs_printk_last_file(void) +{ +} + #endif /* CONFIG_SYSFS */ #endif /* _SYSFS_H_ */ -- cgit v1.2.3 From 7fb6b5d51daf3613045258ee8add07022d8c39d3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Jul 2008 20:03:34 -0700 Subject: device create: remove device_create_drvdata Now that the tree is cleaned up, device_create_drvdata can be safely removed. Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 246937c9cbc..60f6456691a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -502,7 +502,6 @@ extern struct device *device_create(struct class *cls, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...) __attribute__((format(printf, 5, 6))); -#define device_create_drvdata device_create extern void device_destroy(struct class *cls, dev_t devt); /* -- cgit v1.2.3 From 346e15beb5343c2eb8216d820f2ed8f150822b08 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 12 Aug 2008 16:46:19 -0400 Subject: driver core: basic infrastructure for per-module dynamic debug messages Base infrastructure to enable per-module debug messages. I've introduced CONFIG_DYNAMIC_PRINTK_DEBUG, which when enabled centralizes control of debugging statements on a per-module basis in one /proc file, currently, /dynamic_printk/modules. When, CONFIG_DYNAMIC_PRINTK_DEBUG, is not set, debugging statements can still be enabled as before, often by defining 'DEBUG' for the proper compilation unit. Thus, this patch set has no affect when CONFIG_DYNAMIC_PRINTK_DEBUG is not set. The infrastructure currently ties into all pr_debug() and dev_dbg() calls. That is, if CONFIG_DYNAMIC_PRINTK_DEBUG is set, all pr_debug() and dev_dbg() calls can be dynamically enabled/disabled on a per-module basis. Future plans include extending this functionality to subsystems, that define their own debug levels and flags. Usage: Dynamic debugging is controlled by the debugfs file, /dynamic_printk/modules. This file contains a list of the modules that can be enabled. The format of the file is as follows: . . . : Name of the module in which the debug call resides : whether the messages are enabled or not For example: snd_hda_intel enabled=0 fixup enabled=1 driver enabled=0 Enable a module: $echo "set enabled=1 " > dynamic_printk/modules Disable a module: $echo "set enabled=0 " > dynamic_printk/modules Enable all modules: $echo "set enabled=1 all" > dynamic_printk/modules Disable all modules: $echo "set enabled=0 all" > dynamic_printk/modules Finally, passing "dynamic_printk" at the command line enables debugging for all modules. This mode can be turned off via the above disable command. [gkh: minor cleanups and tweaks to make the build work quietly] Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 6 ++- include/linux/dynamic_printk.h | 93 ++++++++++++++++++++++++++++++++++++++++++ include/linux/kernel.h | 7 +++- include/linux/module.h | 1 - 4 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 include/linux/dynamic_printk.h (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 60f6456691a..fb034461b39 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -550,7 +550,11 @@ extern const char *dev_driver_string(const struct device *dev); #define dev_info(dev, format, arg...) \ dev_printk(KERN_INFO , dev , format , ## arg) -#ifdef DEBUG +#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#define dev_dbg(dev, format, ...) do { \ + dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \ + } while (0) +#elif defined(DEBUG) #define dev_dbg(dev, format, arg...) \ dev_printk(KERN_DEBUG , dev , format , ## arg) #else diff --git a/include/linux/dynamic_printk.h b/include/linux/dynamic_printk.h new file mode 100644 index 00000000000..2d528d00907 --- /dev/null +++ b/include/linux/dynamic_printk.h @@ -0,0 +1,93 @@ +#ifndef _DYNAMIC_PRINTK_H +#define _DYNAMIC_PRINTK_H + +#define DYNAMIC_DEBUG_HASH_BITS 6 +#define DEBUG_HASH_TABLE_SIZE (1 << DYNAMIC_DEBUG_HASH_BITS) + +#define TYPE_BOOLEAN 1 + +#define DYNAMIC_ENABLED_ALL 0 +#define DYNAMIC_ENABLED_NONE 1 +#define DYNAMIC_ENABLED_SOME 2 + +extern int dynamic_enabled; + +/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which + * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They + * use independent hash functions, to reduce the chance of false positives. + */ +extern long long dynamic_printk_enabled; +extern long long dynamic_printk_enabled2; + +struct mod_debug { + char *modname; + char *logical_modname; + char *flag_names; + int type; + int hash; + int hash2; +} __attribute__((aligned(8))); + +int register_dynamic_debug_module(char *mod_name, int type, char *share_name, + char *flags, int hash, int hash2); + +#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +extern int unregister_dynamic_debug_module(char *mod_name); +extern int __dynamic_dbg_enabled_helper(char *modname, int type, + int value, int hash); + +#define __dynamic_dbg_enabled(module, type, value, level, hash) ({ \ + int __ret = 0; \ + if (unlikely((dynamic_printk_enabled & (1LL << DEBUG_HASH)) && \ + (dynamic_printk_enabled2 & (1LL << DEBUG_HASH2)))) \ + __ret = __dynamic_dbg_enabled_helper(module, type, \ + value, hash);\ + __ret; }) + +#define dynamic_pr_debug(fmt, ...) do { \ + static char mod_name[] \ + __attribute__((section("__verbose_strings"))) \ + = KBUILD_MODNAME; \ + static struct mod_debug descriptor \ + __used \ + __attribute__((section("__verbose"), aligned(8))) = \ + { mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\ + if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN, \ + 0, 0, DEBUG_HASH)) \ + printk(KERN_DEBUG KBUILD_MODNAME ":" fmt, \ + ##__VA_ARGS__); \ + } while (0) + +#define dynamic_dev_dbg(dev, format, ...) do { \ + static char mod_name[] \ + __attribute__((section("__verbose_strings"))) \ + = KBUILD_MODNAME; \ + static struct mod_debug descriptor \ + __used \ + __attribute__((section("__verbose"), aligned(8))) = \ + { mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\ + if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN, \ + 0, 0, DEBUG_HASH)) \ + dev_printk(KERN_DEBUG, dev, \ + KBUILD_MODNAME ": " format, \ + ##__VA_ARGS__); \ + } while (0) + +#else + +static inline int unregister_dynamic_debug_module(const char *mod_name) +{ + return 0; +} +static inline int __dynamic_dbg_enabled_helper(char *modname, int type, + int value, int hash) +{ + return 0; +} + +#define __dynamic_dbg_enabled(module, type, value, level, hash) ({ 0; }) +#define dynamic_pr_debug(fmt, ...) do { } while (0) +#define dynamic_dev_dbg(dev, format, ...) do { } while (0) +#endif + +#endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 75d81f157d2..ededb6e83b4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -303,8 +304,12 @@ static inline char *pack_hex_byte(char *buf, u8 byte) #define pr_info(fmt, arg...) \ printk(KERN_INFO fmt, ##arg) -#ifdef DEBUG /* If you are writing a driver, please use dev_dbg instead */ +#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#define pr_debug(fmt, ...) do { \ + dynamic_pr_debug(fmt, ##__VA_ARGS__); \ + } while (0) +#elif defined(DEBUG) #define pr_debug(fmt, arg...) \ printk(KERN_DEBUG fmt, ##arg) #else diff --git a/include/linux/module.h b/include/linux/module.h index 68e09557c95..a41555cbe00 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -345,7 +345,6 @@ struct module /* Reference counts */ struct module_ref ref[NR_CPUS]; #endif - }; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} -- cgit v1.2.3 From f1282c844e86db5a041afa41335b5f9eea6cec0c Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 16 Jul 2008 08:58:04 +1000 Subject: sysfs: Support sysfs_notify from atomic context with new sysfs_notify_dirent Support sysfs_notify from atomic context with new sysfs_notify_dirent sysfs_notify currently takes sysfs_mutex. This means that it cannot be called in atomic context. sysfs_mutex is sometimes held over a malloc (sysfs_rename_dir) so it can block on low memory. In md I want to be able to notify on a sysfs attribute from atomic context, and I don't want to block on low memory because I could be in the writeout path for freeing memory. So: - export the "sysfs_dirent" structure along with sysfs_get, sysfs_put and sysfs_get_dirent so I can get the sysfs_dirent that I want to notify on and hold it in an md structure. - split sysfs_notify_dirent out of sysfs_notify so the sysfs_dirent can be notified on with no blocking (just a spinlock). Signed-off-by: Neil Brown Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 8ec406afb3e..d8e0230f1e6 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -78,6 +78,8 @@ struct sysfs_ops { ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t); }; +struct sysfs_dirent; + #ifdef CONFIG_SYSFS int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), @@ -118,10 +120,13 @@ void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group); void sysfs_notify(struct kobject *kobj, char *dir, char *attr); - +void sysfs_notify_dirent(struct sysfs_dirent *sd); +struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name); +struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); +void sysfs_put(struct sysfs_dirent *sd); void sysfs_printk_last_file(void); - -extern int __must_check sysfs_init(void); +int __must_check sysfs_init(void); #else /* CONFIG_SYSFS */ @@ -227,6 +232,22 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj, static inline void sysfs_notify(struct kobject *kobj, char *dir, char *attr) { } +static inline void sysfs_notify_dirent(struct sysfs_dirent *sd) +{ +} +static inline +struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name) +{ + return NULL; +} +static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) +{ + return NULL; +} +static inline void sysfs_put(struct sysfs_dirent *sd) +{ +} static inline int __must_check sysfs_init(void) { -- cgit v1.2.3 From e61396627f91abb855ddd8925be9172fb5871944 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 20 Sep 2008 19:08:39 -0700 Subject: debug: Introduce a dev_WARN() function in the line of dev_printk(), this patch introduces a dev_WARN() function, that takes a struct device and then a printk format/args set of arguments. Unlike dev_printk(), the effect is that of WARN() in that a full warning message (including filename/line, module list, versions and a backtrace) is printed in addition to the device name and the arguments. Signed-off-by: Arjan van de Ven Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index fb034461b39..ec90e79f6a0 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -570,6 +570,14 @@ extern const char *dev_driver_string(const struct device *dev); ({ if (0) dev_printk(KERN_DEBUG, dev, format, ##arg); 0; }) #endif +/* + * dev_WARN() acts like dev_printk(), but with the key difference + * of using a WARN/WARN_ON to get the message out, including the + * file/line information and a backtrace. + */ +#define dev_WARN(dev, format, arg...) \ + WARN(1, "Device: %s\n" format, dev_driver_string(dev), ## arg); + /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.3 From d8bf254089a6c31d7d01a4d1d2f1861662900855 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 22 Sep 2008 14:41:40 -0700 Subject: platform: add new device registration helper Add a helper that registers simple platform_device w/o resources but with parent and device data. This is usefull to cleanup platform code from code that registers such simple devices as leds-gpio, generic-bl, etc. Signed-off-by: Dmitry Baryshkov Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 95ac21ab3a0..4b8cc6a3247 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -37,6 +37,8 @@ extern int platform_add_devices(struct platform_device **, int); extern struct platform_device *platform_device_register_simple(const char *, int id, struct resource *, unsigned int); +extern struct platform_device *platform_device_register_data(struct device *, + const char *, int, const void *, size_t); extern struct platform_device *platform_device_alloc(const char *name, int id); extern int platform_device_add_resources(struct platform_device *pdev, struct resource *res, unsigned int num); -- cgit v1.2.3 From 8c0e3998f5b71e68fe6b6e489a92e052715e563c Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Thu, 25 Sep 2008 16:45:13 -0700 Subject: sysfs: Make dir and name args to sysfs_notify() const Because they can be, and because code like this produces a warning if they're not: struct device_attribute dev_attr; sysfs_notify(&kobj, NULL, dev_attr.attr.name); Signed-off-by: Trent Piepho CC: Neil Brown Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index d8e0230f1e6..b330e289d71 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -119,7 +119,7 @@ int sysfs_add_file_to_group(struct kobject *kobj, void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group); -void sysfs_notify(struct kobject *kobj, char *dir, char *attr); +void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); void sysfs_notify_dirent(struct sysfs_dirent *sd); struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, const unsigned char *name); @@ -229,7 +229,8 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj, { } -static inline void sysfs_notify(struct kobject *kobj, char *dir, char *attr) +static inline void sysfs_notify(struct kobject *kobj, const char *dir, + const char *attr) { } static inline void sysfs_notify_dirent(struct sysfs_dirent *sd) -- cgit v1.2.3 From 030c1d2bfcc2187650fb975456ca0b61a5bb77f4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 8 May 2008 14:41:00 -0700 Subject: kobject: Fix kobject_rename and !CONFIG_SYSFS When looking at kobject_rename I found two bugs with that exist when sysfs support is disabled in the kernel. kobject_rename does not change the name on the kobject when sysfs support is not compiled in. kobject_rename without locking attempts to check the validity of a rename operation, which the kobject layer simply does not have the infrastructure to do. This patch documents the previously unstated requirement of kobject_rename that is the responsibility of the caller to provide mutual exclusion and to be certain that the new_name for the kobject is valid. This patch modifies sysfs_rename_dir in !CONFIG_SYSFS case to call kobject_set_name to actually change the kobject_name. This patch removes the bogus and misleading check in kobject_rename that attempts to see if a rename is valid. The check is bogus because we do not have the proper locking. The check is misleading because it looks like we can and do perform checking at the kobject level that we don't. Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index b330e289d71..39924a96220 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -20,6 +20,8 @@ struct kobject; struct module; +extern int kobject_set_name(struct kobject *kobj, const char *name, ...) + __attribute__((format(printf, 2, 3))); /* FIXME * The *owner field is no longer used, but leave around * until the tree gets cleaned up fully. @@ -147,7 +149,7 @@ static inline void sysfs_remove_dir(struct kobject *kobj) static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name) { - return 0; + return kobject_set_name(kobj, "%s", new_name); } static inline int sysfs_move_dir(struct kobject *kobj, -- cgit v1.2.3 From 0b4a4fea253e1296222603ccc55430ed7cd9413a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Jul 2008 18:05:28 -0700 Subject: kobject: Cleanup kobject_rename and !CONFIG_SYSFS It finally dawned on me what the clean fix to sysfs_rename_dir calling kobject_set_name is. Move the work into kobject_rename where it belongs. The callers serialize us anyway so this is safe. Signed-off-by: Eric W. Biederman Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 39924a96220..b330e289d71 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -20,8 +20,6 @@ struct kobject; struct module; -extern int kobject_set_name(struct kobject *kobj, const char *name, ...) - __attribute__((format(printf, 2, 3))); /* FIXME * The *owner field is no longer used, but leave around * until the tree gets cleaned up fully. @@ -149,7 +147,7 @@ static inline void sysfs_remove_dir(struct kobject *kobj) static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name) { - return kobject_set_name(kobj, "%s", new_name); + return 0; } static inline int sysfs_move_dir(struct kobject *kobj, -- cgit v1.2.3 From 99178b036c97293a65004ff5ec5cff9f833aaecd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 26 Aug 2008 11:00:57 -0500 Subject: Driver core: add bus_sort_breadthfirst() function The PCI core wants to reorder the devices in the bus list. So move this functionality out of the pci core and into the driver core so that anyone else can also do this if needed. This also lets us change how struct device is attached to drivers in the future without messing with the PCI core. Acked-by: Jesse Barnes Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ec90e79f6a0..987f5912720 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -90,6 +90,9 @@ int __must_check bus_for_each_drv(struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)); +void bus_sort_breadthfirst(struct bus_type *bus, + int (*compare)(const struct device *a, + const struct device *b)); /* * Bus notifiers: Get notified of addition/removal of devices * and binding/unbinding of drivers to devices. -- cgit v1.2.3 From 1648993fb05c487947c1cec6307aca29d8002abe Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 15 Oct 2008 22:01:03 -0700 Subject: introduce generic header file for the software IO/TLB A series of patches introduce a generic header file for the software IO/TLB implementation in lib/swiotlb.c. Currently each architecture using this code defines the prototypes itself. The prototypes are moved to include/linux/swiotlb.h and this file is included in architecture specific code for X86 and IA64. This patch: Create include/linux/swiotlb.h file which contains all function prototypes for the lib/swiotlb.c file. (akpm: the dependent patches will be trickled through arch trees) Signed-off-by: Joerg Roedel Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swiotlb.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 include/linux/swiotlb.h (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h new file mode 100644 index 00000000000..b18ec5533e8 --- /dev/null +++ b/include/linux/swiotlb.h @@ -0,0 +1,83 @@ +#ifndef __LINUX_SWIOTLB_H +#define __LINUX_SWIOTLB_H + +#include + +struct device; +struct dma_attrs; +struct scatterlist; + +extern void +swiotlb_init(void); + +extern void +*swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags); + +extern void +swiotlb_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle); + +extern dma_addr_t +swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir); + +extern void +swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir); + +extern dma_addr_t +swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, + int dir, struct dma_attrs *attrs); + +extern void +swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir, struct dma_attrs *attrs); + +extern int +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, + int direction); + +extern void +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, + int direction); + +extern int +swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, + int dir, struct dma_attrs *attrs); + +extern void +swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, + int nelems, int dir, struct dma_attrs *attrs); + +extern void +swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir); + +extern void +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir); + +extern void +swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir); + +extern void +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir); + +extern void +swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + unsigned long offset, size_t size, int dir); + +extern void +swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, + unsigned long offset, size_t size, + int dir); + +extern int +swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); + +extern int +swiotlb_dma_supported(struct device *hwdev, u64 mask); + +#endif /* __LINUX_SWIOTLB_H */ -- cgit v1.2.3 From 9363b9f23c9cc36cc8ef6c05fdf879ee4a96ae92 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 15 Oct 2008 22:01:05 -0700 Subject: memrlimit: cgroup mm owner callback changes to add task info This patch adds an additional field to the mm_owner callbacks. This field is required to get to the mm that changed. Hold mmap_sem in write mode before calling the mm_owner_changed callback [hugh@veritas.com: fix mmap_sem deadlock] Signed-off-by: Balbir Singh Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: Li Zefan Cc: Pavel Emelianov Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: David Rientjes Cc: Vivek Goyal Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c98dd7cb707..30934e4bfaa 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -326,7 +326,8 @@ struct cgroup_subsys { */ void (*mm_owner_changed)(struct cgroup_subsys *ss, struct cgroup *old, - struct cgroup *new); + struct cgroup *new, + struct task_struct *p); int subsys_id; int active; int disabled; -- cgit v1.2.3 From 1bfcf1304ea79c46efc3724e548b13b4b442b418 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Oct 2008 22:01:21 -0700 Subject: pm: rework disabling of user mode helpers during suspend/hibernation We currently use a PM notifier to disable user mode helpers before suspend and hibernation and to re-enable them during resume. However, this is not an ideal solution, because if any drivers want to upload firmware into memory before suspend, they have to use a PM notifier for this purpose and there is no guarantee that the ordering of PM notifiers will be as expected (ie. the notifier that disables user mode helpers has to be run after the driver's notifier used for uploading the firmware). For this reason, it seems better to move the disabling and enabling of user mode helpers to separate functions that will be called by the PM core as necessary. [akpm@linux-foundation.org: remove unneeded ifdefs] Signed-off-by: Rafael J. Wysocki Cc: Alan Stern Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index a1a91577813..92213a9194e 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -99,4 +99,7 @@ struct file; extern int call_usermodehelper_pipe(char *path, char *argv[], char *envp[], struct file **filp); +extern int usermodehelper_disable(void); +extern void usermodehelper_enable(void); + #endif /* __LINUX_KMOD_H__ */ -- cgit v1.2.3 From d5c003b4d1690e666dbab02bc8e705947baa848c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 15 Oct 2008 22:01:24 -0700 Subject: include: replace __FUNCTION__ with __func__ __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs.h | 2 +- include/linux/ext3_fs.h | 4 ++-- include/linux/ext3_jbd.h | 14 +++++++------- include/linux/jbd.h | 4 ++-- include/linux/jbd2.h | 4 ++-- include/linux/pm.h | 2 +- include/linux/reiserfs_fs.h | 2 +- include/linux/rtmutex.h | 2 +- 8 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index 2efe7b863cf..78c775a83f7 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -47,7 +47,7 @@ #ifdef EXT2FS_DEBUG # define ext2_debug(f, a...) { \ printk ("EXT2-fs DEBUG (%s, %d): %s:", \ - __FILE__, __LINE__, __FUNCTION__); \ + __FILE__, __LINE__, __func__); \ printk (f, ## a); \ } #else diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 8120fa1bc23..159d9b476cd 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -43,7 +43,7 @@ #define ext3_debug(f, a...) \ do { \ printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:", \ - __FILE__, __LINE__, __FUNCTION__); \ + __FILE__, __LINE__, __func__); \ printk (KERN_DEBUG f, ## a); \ } while (0) #else @@ -871,7 +871,7 @@ extern void ext3_update_dynamic_rev (struct super_block *sb); #define ext3_std_error(sb, errno) \ do { \ if ((errno)) \ - __ext3_std_error((sb), __FUNCTION__, (errno)); \ + __ext3_std_error((sb), __func__, (errno)); \ } while (0) /* diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h index 8c43b13a02f..cf82d519be4 100644 --- a/include/linux/ext3_jbd.h +++ b/include/linux/ext3_jbd.h @@ -137,17 +137,17 @@ int __ext3_journal_dirty_metadata(const char *where, handle_t *handle, struct buffer_head *bh); #define ext3_journal_get_undo_access(handle, bh) \ - __ext3_journal_get_undo_access(__FUNCTION__, (handle), (bh)) + __ext3_journal_get_undo_access(__func__, (handle), (bh)) #define ext3_journal_get_write_access(handle, bh) \ - __ext3_journal_get_write_access(__FUNCTION__, (handle), (bh)) + __ext3_journal_get_write_access(__func__, (handle), (bh)) #define ext3_journal_revoke(handle, blocknr, bh) \ - __ext3_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) + __ext3_journal_revoke(__func__, (handle), (blocknr), (bh)) #define ext3_journal_get_create_access(handle, bh) \ - __ext3_journal_get_create_access(__FUNCTION__, (handle), (bh)) + __ext3_journal_get_create_access(__func__, (handle), (bh)) #define ext3_journal_dirty_metadata(handle, bh) \ - __ext3_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) + __ext3_journal_dirty_metadata(__func__, (handle), (bh)) #define ext3_journal_forget(handle, bh) \ - __ext3_journal_forget(__FUNCTION__, (handle), (bh)) + __ext3_journal_forget(__func__, (handle), (bh)) int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh); @@ -160,7 +160,7 @@ static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks) } #define ext3_journal_stop(handle) \ - __ext3_journal_stop(__FUNCTION__, (handle)) + __ext3_journal_stop(__func__, (handle)) static inline handle_t *ext3_journal_current_handle(void) { diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 07a9b52a265..7ebbcb1c9ba 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -61,7 +61,7 @@ extern u8 journal_enable_debug; do { \ if ((n) <= journal_enable_debug) { \ printk (KERN_DEBUG "(%s, %d): %s: ", \ - __FILE__, __LINE__, __FUNCTION__); \ + __FILE__, __LINE__, __func__); \ printk (f, ## a); \ } \ } while (0) @@ -984,7 +984,7 @@ extern int cleanup_journal_tail(journal_t *); #define jbd_ENOSYS() \ do { \ - printk (KERN_ERR "JBD unimplemented function %s\n", __FUNCTION__); \ + printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \ current->state = TASK_UNINTERRUPTIBLE; \ schedule(); \ } while (1) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d2e91ea998f..463d6f10b64 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -61,7 +61,7 @@ extern u8 jbd2_journal_enable_debug; do { \ if ((n) <= jbd2_journal_enable_debug) { \ printk (KERN_DEBUG "(%s, %d): %s: ", \ - __FILE__, __LINE__, __FUNCTION__); \ + __FILE__, __LINE__, __func__); \ printk (f, ## a); \ } \ } while (0) @@ -1143,7 +1143,7 @@ extern int jbd2_cleanup_journal_tail(journal_t *); #define jbd_ENOSYS() \ do { \ - printk (KERN_ERR "JBD unimplemented function %s\n", __FUNCTION__); \ + printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \ current->state = TASK_UNINTERRUPTIBLE; \ schedule(); \ } while (1) diff --git a/include/linux/pm.h b/include/linux/pm.h index 4dcce54b6d7..42de4003c4e 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -419,7 +419,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); #define suspend_report_result(fn, ret) \ do { \ - __suspend_report_result(__FUNCTION__, fn, ret); \ + __suspend_report_result(__func__, fn, ret); \ } while (0) #else /* !CONFIG_PM_SLEEP */ diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index e9963af16cd..bc5114d35e9 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -87,7 +87,7 @@ void reiserfs_warning(struct super_block *s, const char *fmt, ...); if( !( cond ) ) \ reiserfs_panic( NULL, "reiserfs[%i]: assertion " scond " failed at " \ __FILE__ ":%i:%s: " format "\n", \ - in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __FUNCTION__ , ##args ) + in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __func__ , ##args ) #define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args) diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 382bb795116..f19b00b7d53 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -54,7 +54,7 @@ struct hrtimer_sleeper; #ifdef CONFIG_DEBUG_RT_MUTEXES # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ , .name = #mutexname, .file = __FILE__, .line = __LINE__ -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __FUNCTION__) +# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) extern void rt_mutex_debug_task_free(struct task_struct *tsk); #else # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) -- cgit v1.2.3 From 693ac389326a87d608baa2902c45a6e78ed46681 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 15 Oct 2008 22:01:25 -0700 Subject: include/linux/mount.h: remove CVS keyword Remove a CVS keyword that wasn't updated for a long time from a comment. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mount.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 30a1d63b6fb..cab2a85e2ee 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -5,8 +5,6 @@ * * Author: Marco van Wieringen * - * Version: $Id: mount.h,v 2.0 1996/11/17 16:48:14 mvw Exp mvw $ - * */ #ifndef _LINUX_MOUNT_H #define _LINUX_MOUNT_H -- cgit v1.2.3 From 1ecfea06386c6b1344e83c8f909c87c88262ba1d Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 15 Oct 2008 22:01:27 -0700 Subject: init.h: remove long-dead __setup_null_param() macro This macro appears to have been unused for ages, and there are no invocations of it anywhere in the source tree. Signed-off-by: Robert P. J. Day Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 93538b696e3..ad63824460e 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -233,9 +233,6 @@ struct obs_kernel_param { __attribute__((aligned((sizeof(long))))) \ = { __setup_str_##unique_id, fn, early } -#define __setup_null_param(str, unique_id) \ - __setup_param(str, unique_id, NULL, 0) - #define __setup(str, fn) \ __setup_param(str, fn, fn, 0) @@ -296,7 +293,6 @@ void __init parse_early_param(void); void cleanup_module(void) __attribute__((alias(#exitfn))); #define __setup_param(str, unique_id, fn) /* nothing */ -#define __setup_null_param(str, unique_id) /* nothing */ #define __setup(str, func) /* nothing */ #endif -- cgit v1.2.3 From c80cfb0406c01bb5da91bfe30f5cb1fd96831138 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 15 Oct 2008 22:01:35 -0700 Subject: vsprintf: use new vsprintf symbolic function pointer format Use the '%pF' format to get rid of an "#ifdef DEBUG" and make some printks atomic. This removes the last in-tree uses of print_fn_descriptor_symbol(). I marked print_fn_descriptor_symbol() deprecated and scheduled it for removal next year to give time for out-of-tree modules to be updated. parisc's print_fn_descriptor_symbol() is currently broken there (it needs to dereference the function pointer similar to ia64 and power). This patch shouldn't make anything worse, but it means we need to fix dereference_function_descriptor() instead of print_fn_descriptor_symbol() to get meaningful initcall_debug output. Signed-off-by: Bjorn Helgaas Cc: Jesse Barnes Cc: Kyle McMartin Cc: "Rafael J. Wysocki" Cc: Kay Sievers Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index b9614488744..f3fe34391d8 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -93,12 +93,10 @@ static inline void print_symbol(const char *fmt, unsigned long addr) } /* - * Pretty-print a function pointer. - * - * ia64 and ppc64 function pointers are really function descriptors, - * which contain a pointer the real address. + * Pretty-print a function pointer. This function is deprecated. + * Please use the "%pF" vsprintf format instead. */ -static inline void print_fn_descriptor_symbol(const char *fmt, void *addr) +static inline void __deprecated print_fn_descriptor_symbol(const char *fmt, void *addr) { #if defined(CONFIG_IA64) || defined(CONFIG_PPC64) addr = *(void **)addr; -- cgit v1.2.3 From a25d644fc0e232f242d1f3baa63c149c42536ff0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 Oct 2008 22:01:38 -0700 Subject: wait: kill is_sync_wait() is_sync_wait() is used to distinguish between sync and async waits. Basically sync waits are the ones initialized with init_waitqueue_entry() and async ones with init_waitqueue_func_entry(). The sync/async distinction is used only in prepare_to_wait[_exclusive]() and its only function is to skip setting the current task state if the wait is async. This has a few problems. * No one uses it. None of func_entry users use prepare_to_wait() functions, so the code path never gets executed. * The distinction is bogus. Maybe back when func_entry is used only by aio but it's now also used by epoll and in future possibly by 9p and poll/select. * Taking @state as argument and ignoring it silenly depending on how @wait is initialized is just a bad error-prone API. * It prevents func_entry waits from using wait->private for no good reason. This patch kills is_sync_wait() and the associated code paths from prepare_to_wait[_exclusive](). As there was no user of these code paths, this patch doesn't cause any behavior difference. Signed-off-by: Tejun Heo Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 0081147a9fe..ef609f842fa 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -108,15 +108,6 @@ static inline int waitqueue_active(wait_queue_head_t *q) return !list_empty(&q->task_list); } -/* - * Used to distinguish between sync and async io wait context: - * sync i/o typically specifies a NULL wait queue entry or a wait - * queue entry bound to a task (current task) to wake up. - * aio specifies a wait queue entry with an async notification - * callback routine, not associated with any task. - */ -#define is_sync_wait(wait) (!(wait) || ((wait)->private)) - extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); -- cgit v1.2.3 From 25ddbb18aae33ad255eb9f35aacebe3af01e1e9c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 15 Oct 2008 22:01:41 -0700 Subject: Make the taint flags reliable It's somewhat unlikely that it happens, but right now a race window between interrupts or machine checks or oopses could corrupt the tainted bitmap because it is modified in a non atomic fashion. Convert the taint variable to an unsigned long and use only atomic bit operations on it. Unfortunately this means the intvec sysctl functions cannot be used on it anymore. It turned out the taint sysctl handler could actually be simplified a bit (since it only increases capabilities) so this patch actually removes code. [akpm@linux-foundation.org: remove unneeded include] Signed-off-by: Andi Kleen Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 75d81f157d2..e971c55f45a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -235,9 +235,10 @@ extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in extern int panic_timeout; extern int panic_on_oops; extern int panic_on_unrecovered_nmi; -extern int tainted; extern const char *print_tainted(void); -extern void add_taint(unsigned); +extern void add_taint(unsigned flag); +extern int test_taint(unsigned flag); +extern unsigned long get_taint(void); extern int root_mountflags; /* Values used for system_state */ @@ -250,16 +251,16 @@ extern enum system_states { SYSTEM_SUSPEND_DISK, } system_state; -#define TAINT_PROPRIETARY_MODULE (1<<0) -#define TAINT_FORCED_MODULE (1<<1) -#define TAINT_UNSAFE_SMP (1<<2) -#define TAINT_FORCED_RMMOD (1<<3) -#define TAINT_MACHINE_CHECK (1<<4) -#define TAINT_BAD_PAGE (1<<5) -#define TAINT_USER (1<<6) -#define TAINT_DIE (1<<7) -#define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8) -#define TAINT_WARN (1<<9) +#define TAINT_PROPRIETARY_MODULE 0 +#define TAINT_FORCED_MODULE 1 +#define TAINT_UNSAFE_SMP 2 +#define TAINT_FORCED_RMMOD 3 +#define TAINT_MACHINE_CHECK 4 +#define TAINT_BAD_PAGE 5 +#define TAINT_USER 6 +#define TAINT_DIE 7 +#define TAINT_OVERRIDDEN_ACPI_TABLE 8 +#define TAINT_WARN 9 extern void dump_stack(void) __cold; -- cgit v1.2.3 From 22b8ce94708f7cdf0b04965c6f7443dfd374c35c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 15 Oct 2008 22:01:46 -0700 Subject: profiling: dynamically enable readprofile at runtime Way too often, I have a machine that exhibits some kind of crappy behavior. The CPU looks wedged in the kernel or it is spending way too much system time and I wonder what is responsible. I try to run readprofile. But, of course, Ubuntu doesn't enable it by default. Dang! The reason we boot-time enable it is that it takes a big bufffer that we generally can only bootmem alloc. But, does it hurt to at least try and runtime-alloc it? To use: echo 2 > /sys/kernel/profile Then run readprofile like normal. This should fix the compile issue with allmodconfig. I've compile-tested on a bunch more configs now including a few more architectures. Signed-off-by: Dave Hansen Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/profile.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/profile.h b/include/linux/profile.h index 7e7087239af..570045053ce 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -35,7 +35,9 @@ enum profile_type { extern int prof_on __read_mostly; /* init basic kernel profiler */ -void __init profile_init(void); +int profile_init(void); +int profile_setup(char *str); +int create_proc_profile(void); void profile_tick(int type); /* @@ -84,9 +86,9 @@ struct pt_regs; #define prof_on 0 -static inline void profile_init(void) +static inline int profile_init(void) { - return; + return 0; } static inline void profile_tick(int type) -- cgit v1.2.3 From e1f8e87449147ffe5ea3de64a46af7de450ce279 Mon Sep 17 00:00:00 2001 From: Francois Cami Date: Wed, 15 Oct 2008 22:01:59 -0700 Subject: Remove Andrew Morton's old email accounts People can use the real name an an index into MAINTAINERS to find the current email address. Signed-off-by: Francois Cami Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/journal-head.h | 2 +- include/linux/task_io_accounting.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h index 8a62d1e84b9..bb70ebb6a2d 100644 --- a/include/linux/journal-head.h +++ b/include/linux/journal-head.h @@ -3,7 +3,7 @@ * * buffer_head fields for JBD * - * 27 May 2001 Andrew Morton + * 27 May 2001 Andrew Morton * Created - pulled out of fs.h */ diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h index 5e88afc9a2f..bdf855c2856 100644 --- a/include/linux/task_io_accounting.h +++ b/include/linux/task_io_accounting.h @@ -5,7 +5,7 @@ * Don't include this header file directly - it is designed to be dragged in via * sched.h. * - * Blame akpm@osdl.org for all this. + * Blame Andrew Morton for all this. */ struct task_io_accounting { -- cgit v1.2.3 From f7ad160b49c49dc9cd383b9184c6fa4a9b4f7ebb Mon Sep 17 00:00:00 2001 From: Alex Raimondi Date: Wed, 15 Oct 2008 22:02:03 -0700 Subject: include/linux/clk.h: fix comment clk_get and clk_put may not be used from within interrupt context. Change comment to this function. Signed-off-by: Alex Raimondi Signed-off-by: Haavard Skinnemoen Acked-by: Russell King Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clk.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 5ca8c6fddb5..778777316ea 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -35,6 +35,8 @@ struct clk; * clk_get may return different clock producers depending on @dev.) * * Drivers must assume that the clock source is not enabled. + * + * clk_get should not be called from within interrupt context. */ struct clk *clk_get(struct device *dev, const char *id); @@ -76,6 +78,8 @@ unsigned long clk_get_rate(struct clk *clk); * Note: drivers must ensure that all clk_enable calls made on this * clock source are balanced by clk_disable calls prior to calling * this function. + * + * clk_put should not be called from within interrupt context. */ void clk_put(struct clk *clk); -- cgit v1.2.3 From f7a5000f7a8924e9c5fad1801616601d6dc65a17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Oct 2008 22:02:05 -0700 Subject: compat: move cp_compat_stat to common code struct stat / compat_stat is the same on all architectures, so cp_compat_stat should be, too. Turns out it is, except that various architectures have slightly and some high2lowuid/high2lowgid or the direct assignment instead of the SET_UID/SET_GID that expands to the correct one anyway. This patch replaces the arch-specific cp_compat_stat implementations with a common one based on the x86-64 one. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller [ sparc bits ] Acked-by: Kyle McMartin [ parisc bits ] Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index cf8d11cad5a..999dddd8d93 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -78,7 +78,6 @@ typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; -extern int cp_compat_stat(struct kstat *, struct compat_stat __user *); extern int get_compat_timespec(struct timespec *, const struct compat_timespec __user *); extern int put_compat_timespec(const struct timespec *, struct compat_timespec __user *); -- cgit v1.2.3 From b418da16dd44810e5d5a22bba377cca80512a524 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Oct 2008 22:02:06 -0700 Subject: compat: generic compat get/settimeofday Nothing arch specific in get/settimeofday. The details of the timeval conversion varied a little from arch to arch, but all with the same results. Also add an extern declaration for sys_tz to linux/time.h because externs in .c files are fowned upon. I'll kill the externs in various other files in a sparate patch. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Christoph Hellwig Acked-by: David S. Miller [ sparc bits ] Cc: "Luck, Tony" Cc: Ralf Baechle Acked-by: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 5 +++++ include/linux/time.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 999dddd8d93..f061a1ea1b7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -234,6 +234,11 @@ extern int get_compat_itimerspec(struct itimerspec *dst, extern int put_compat_itimerspec(struct compat_itimerspec __user *dst, const struct itimerspec *src); +asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); +asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); + asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); extern int compat_printk(const char *fmt, ...); diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82..51e883df0fa 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -29,6 +29,8 @@ struct timezone { #ifdef __KERNEL__ +extern struct timezone sys_tz; + /* Parameters used to convert the timespec values: */ #define MSEC_PER_SEC 1000L #define USEC_PER_MSEC 1000L -- cgit v1.2.3 From 56d936607408d71c4141b2ed501410b072f1e211 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 15 Oct 2008 22:02:10 -0700 Subject: introduce generic iommu_num_pages function This patch introduces the generic iommu_num_pages function. It can be used by a given memory area. Signed-off-by: Joerg Roedel Cc: "David S. Miller" Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Ingo Molnar Cc: Thomas Gleixner Cc: FUJITA Tomonori Cc: Muli Ben-Yehuda Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/iommu-helper.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index a6d0586e2bf..3b068e5b567 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -23,4 +23,7 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, extern void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr); +extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len, + unsigned long io_page_size); + #endif -- cgit v1.2.3 From 53112488bebe25c0f5f8a002470046c0fe9a6c61 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Oct 2008 22:02:37 -0700 Subject: alpha: introduce field 'taso' into struct linux_binprm This change is Alpha-specific. It adds field 'taso' into struct linux_binprm to remember if the application is TASO. Previously, field sh_bang was used for this purpose. Signed-off-by: Kirill A. Shutemov Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Pavel Emelyanov Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 826f6235080..54980a3c760 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -36,6 +36,9 @@ struct linux_binprm{ unsigned long p; /* current top of mem */ unsigned int sh_bang:1, misc_bang:1; +#ifdef __alpha__ + unsigned int taso:1; +#endif struct file * file; int e_uid, e_gid; kernel_cap_t cap_post_exec_permitted; -- cgit v1.2.3 From bf2a9a39639b8b51377905397a5005f444e9a892 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Oct 2008 22:02:39 -0700 Subject: Allow recursion in binfmt_script and binfmt_misc binfmt_script and binfmt_misc disallow recursion to avoid stack overflow using sh_bang and misc_bang. It causes problem in some cases: $ echo '#!/bin/ls' > /tmp/t0 $ echo '#!/tmp/t0' > /tmp/t1 $ echo '#!/tmp/t1' > /tmp/t2 $ chmod +x /tmp/t* $ /tmp/t2 zsh: exec format error: /tmp/t2 Similar problem with binfmt_misc. This patch introduces field 'recursion_depth' into struct linux_binprm to track recursion level in binfmt_misc and binfmt_script. If recursion level more then BINPRM_MAX_RECURSION it generates -ENOEXEC. [akpm@linux-foundation.org: make linux_binprm.recursion_depth a uint] Signed-off-by: Kirill A. Shutemov Cc: Pavel Emelyanov Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 54980a3c760..7394b5b349f 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -39,6 +39,7 @@ struct linux_binprm{ #ifdef __alpha__ unsigned int taso:1; #endif + unsigned int recursion_depth; struct file * file; int e_uid, e_gid; kernel_cap_t cap_post_exec_permitted; @@ -61,6 +62,7 @@ struct linux_binprm{ #define BINPRM_FLAGS_EXECFD_BIT 1 #define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT) +#define BINPRM_MAX_RECURSION 4 /* * This structure defines the functions that are used to load the binary formats that -- cgit v1.2.3 From 2bec19feabd53cba75e9dab0e79afbe868a37113 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 15 Oct 2008 22:02:44 -0700 Subject: orion_spi: handle 88F6183 erratum Add support to orion_spi for the 88F6183 ARM SoC by adding code to work around a 6183-specific erratum. Signed-off-by: Lennert Buytenhek Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/orion_spi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/orion_spi.h b/include/linux/spi/orion_spi.h index b4d9fa6f797..decf6d8c77b 100644 --- a/include/linux/spi/orion_spi.h +++ b/include/linux/spi/orion_spi.h @@ -11,6 +11,7 @@ struct orion_spi_info { u32 tclk; /* no support yet */ + u32 enable_clock_fix; }; -- cgit v1.2.3 From 9d793b0bcbbbc37d80241862dfa5257963d5415e Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 15 Oct 2008 22:02:47 -0700 Subject: i2o: Fix 32/64bit DMA locking The I2O ioctls assume 32bits. In itself that is fine as they are old cards and nobody uses 64bit. However on LKML it was noted this assumption is also made for allocated memory and is unsafe on 64bit systems. Fixing this is a mess. It turns out there is tons of crap buried in a header file that does racy 32/64bit filtering on the masks. So we: - Verify all callers of the racy code can sleep (i2o_dma_[re]alloc) - Move the code into a new i2o/memory.c file - Remove the gfp_mask argument so nobody can try and misuse the function - Wrap a mutex around the problem area (a single mutex is easy to do and none of this is performance relevant) - Switch the remaining problem kmalloc holdout to use i2o_dma_alloc Cc: Markus Lidel Cc: Vasily Averin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2o.h | 292 +++------------------------------------------------- 1 file changed, 12 insertions(+), 280 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 75ae6d8aba4..4c4e57d1f19 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -570,7 +570,6 @@ struct i2o_controller { #endif spinlock_t lock; /* lock for controller configuration */ - void *driver_data[I2O_MAX_DRIVERS]; /* storage for drivers */ }; @@ -691,289 +690,22 @@ static inline u32 i2o_dma_high(dma_addr_t dma_addr) }; #endif -/** - * i2o_sg_tablesize - Calculate the maximum number of elements in a SGL - * @c: I2O controller for which the calculation should be done - * @body_size: maximum body size used for message in 32-bit words. - * - * Return the maximum number of SG elements in a SG list. - */ -static inline u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size) -{ - i2o_status_block *sb = c->status_block.virt; - u16 sg_count = - (sb->inbound_frame_size - sizeof(struct i2o_message) / 4) - - body_size; - - if (c->pae_support) { - /* - * for 64-bit a SG attribute element must be added and each - * SG element needs 12 bytes instead of 8. - */ - sg_count -= 2; - sg_count /= 3; - } else - sg_count /= 2; - - if (c->short_req && (sg_count > 8)) - sg_count = 8; - - return sg_count; -}; - -/** - * i2o_dma_map_single - Map pointer to controller and fill in I2O message. - * @c: I2O controller - * @ptr: pointer to the data which should be mapped - * @size: size of data in bytes - * @direction: DMA_TO_DEVICE / DMA_FROM_DEVICE - * @sg_ptr: pointer to the SG list inside the I2O message - * - * This function does all necessary DMA handling and also writes the I2O - * SGL elements into the I2O message. For details on DMA handling see also - * dma_map_single(). The pointer sg_ptr will only be set to the end of the - * SG list if the allocation was successful. - * - * Returns DMA address which must be checked for failures using - * dma_mapping_error(). - */ -static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr, +extern u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size); +extern dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr, size_t size, enum dma_data_direction direction, - u32 ** sg_ptr) -{ - u32 sg_flags; - u32 *mptr = *sg_ptr; - dma_addr_t dma_addr; - - switch (direction) { - case DMA_TO_DEVICE: - sg_flags = 0xd4000000; - break; - case DMA_FROM_DEVICE: - sg_flags = 0xd0000000; - break; - default: - return 0; - } - - dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction); - if (!dma_mapping_error(&c->pdev->dev, dma_addr)) { -#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 - if ((sizeof(dma_addr_t) > 4) && c->pae_support) { - *mptr++ = cpu_to_le32(0x7C020002); - *mptr++ = cpu_to_le32(PAGE_SIZE); - } -#endif - - *mptr++ = cpu_to_le32(sg_flags | size); - *mptr++ = cpu_to_le32(i2o_dma_low(dma_addr)); -#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 - if ((sizeof(dma_addr_t) > 4) && c->pae_support) - *mptr++ = cpu_to_le32(i2o_dma_high(dma_addr)); -#endif - *sg_ptr = mptr; - } - return dma_addr; -}; - -/** - * i2o_dma_map_sg - Map a SG List to controller and fill in I2O message. - * @c: I2O controller - * @sg: SG list to be mapped - * @sg_count: number of elements in the SG list - * @direction: DMA_TO_DEVICE / DMA_FROM_DEVICE - * @sg_ptr: pointer to the SG list inside the I2O message - * - * This function does all necessary DMA handling and also writes the I2O - * SGL elements into the I2O message. For details on DMA handling see also - * dma_map_sg(). The pointer sg_ptr will only be set to the end of the SG - * list if the allocation was successful. - * - * Returns 0 on failure or 1 on success. - */ -static inline int i2o_dma_map_sg(struct i2o_controller *c, + u32 ** sg_ptr); +extern int i2o_dma_map_sg(struct i2o_controller *c, struct scatterlist *sg, int sg_count, enum dma_data_direction direction, - u32 ** sg_ptr) -{ - u32 sg_flags; - u32 *mptr = *sg_ptr; - - switch (direction) { - case DMA_TO_DEVICE: - sg_flags = 0x14000000; - break; - case DMA_FROM_DEVICE: - sg_flags = 0x10000000; - break; - default: - return 0; - } - - sg_count = dma_map_sg(&c->pdev->dev, sg, sg_count, direction); - if (!sg_count) - return 0; - -#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 - if ((sizeof(dma_addr_t) > 4) && c->pae_support) { - *mptr++ = cpu_to_le32(0x7C020002); - *mptr++ = cpu_to_le32(PAGE_SIZE); - } -#endif - - while (sg_count-- > 0) { - if (!sg_count) - sg_flags |= 0xC0000000; - *mptr++ = cpu_to_le32(sg_flags | sg_dma_len(sg)); - *mptr++ = cpu_to_le32(i2o_dma_low(sg_dma_address(sg))); -#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 - if ((sizeof(dma_addr_t) > 4) && c->pae_support) - *mptr++ = cpu_to_le32(i2o_dma_high(sg_dma_address(sg))); -#endif - sg = sg_next(sg); - } - *sg_ptr = mptr; - - return 1; -}; - -/** - * i2o_dma_alloc - Allocate DMA memory - * @dev: struct device pointer to the PCI device of the I2O controller - * @addr: i2o_dma struct which should get the DMA buffer - * @len: length of the new DMA memory - * @gfp_mask: GFP mask - * - * Allocate a coherent DMA memory and write the pointers into addr. - * - * Returns 0 on success or -ENOMEM on failure. - */ -static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr, - size_t len, gfp_t gfp_mask) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int dma_64 = 0; - - if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) { - dma_64 = 1; - if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) - return -ENOMEM; - } - - addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask); - - if ((sizeof(dma_addr_t) > 4) && dma_64) - if (pci_set_dma_mask(pdev, DMA_64BIT_MASK)) - printk(KERN_WARNING "i2o: unable to set 64-bit DMA"); - - if (!addr->virt) - return -ENOMEM; - - memset(addr->virt, 0, len); - addr->len = len; - - return 0; -}; - -/** - * i2o_dma_free - Free DMA memory - * @dev: struct device pointer to the PCI device of the I2O controller - * @addr: i2o_dma struct which contains the DMA buffer - * - * Free a coherent DMA memory and set virtual address of addr to NULL. - */ -static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr) -{ - if (addr->virt) { - if (addr->phys) - dma_free_coherent(dev, addr->len, addr->virt, - addr->phys); - else - kfree(addr->virt); - addr->virt = NULL; - } -}; - -/** - * i2o_dma_realloc - Realloc DMA memory - * @dev: struct device pointer to the PCI device of the I2O controller - * @addr: pointer to a i2o_dma struct DMA buffer - * @len: new length of memory - * @gfp_mask: GFP mask - * - * If there was something allocated in the addr, free it first. If len > 0 - * than try to allocate it and write the addresses back to the addr - * structure. If len == 0 set the virtual address to NULL. - * - * Returns the 0 on success or negative error code on failure. - */ -static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr, - size_t len, gfp_t gfp_mask) -{ - i2o_dma_free(dev, addr); - - if (len) - return i2o_dma_alloc(dev, addr, len, gfp_mask); - - return 0; -}; - -/* - * i2o_pool_alloc - Allocate an slab cache and mempool - * @mempool: pointer to struct i2o_pool to write data into. - * @name: name which is used to identify cache - * @size: size of each object - * @min_nr: minimum number of objects - * - * First allocates a slab cache with name and size. Then allocates a - * mempool which uses the slab cache for allocation and freeing. - * - * Returns 0 on success or negative error code on failure. - */ -static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name, - size_t size, int min_nr) -{ - pool->name = kmalloc(strlen(name) + 1, GFP_KERNEL); - if (!pool->name) - goto exit; - strcpy(pool->name, name); - - pool->slab = - kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL); - if (!pool->slab) - goto free_name; - - pool->mempool = mempool_create_slab_pool(min_nr, pool->slab); - if (!pool->mempool) - goto free_slab; - - return 0; - - free_slab: - kmem_cache_destroy(pool->slab); - - free_name: - kfree(pool->name); - - exit: - return -ENOMEM; -}; - -/* - * i2o_pool_free - Free slab cache and mempool again - * @mempool: pointer to struct i2o_pool which should be freed - * - * Note that you have to return all objects to the mempool again before - * calling i2o_pool_free(). - */ -static inline void i2o_pool_free(struct i2o_pool *pool) -{ - mempool_destroy(pool->mempool); - kmem_cache_destroy(pool->slab); - kfree(pool->name); -}; - + u32 ** sg_ptr); +extern int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr, size_t len); +extern void i2o_dma_free(struct device *dev, struct i2o_dma *addr); +extern int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr, + size_t len); +extern int i2o_pool_alloc(struct i2o_pool *pool, const char *name, + size_t size, int min_nr); +extern void i2o_pool_free(struct i2o_pool *pool); /* I2O driver (OSM) functions */ extern int i2o_driver_register(struct i2o_driver *); extern void i2o_driver_unregister(struct i2o_driver *); -- cgit v1.2.3 From bb979d7fc360bc37cbaff43a6fafceb897cb5e47 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 15 Oct 2008 22:02:52 -0700 Subject: autofs4: cleanup autofs mount type usage Usage of the AUTOFS_TYPE_* defines is a little confusing and appears inconsistent. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_fs4.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index b785c6f8644..aa96a04ae02 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -29,6 +29,11 @@ #define AUTOFS_EXP_IMMEDIATE 1 #define AUTOFS_EXP_LEAVES 2 +#define AUTOFS_TYPE_ANY 0x0000 +#define AUTOFS_TYPE_INDIRECT 0x0001 +#define AUTOFS_TYPE_DIRECT 0x0002 +#define AUTOFS_TYPE_OFFSET 0x0004 + /* Daemon notification packet types */ enum autofs_notify { NFY_NONE, -- cgit v1.2.3 From 8d7b48e0bc5fa01a818eac713d4cb0763090cd0e Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 15 Oct 2008 22:02:54 -0700 Subject: autofs4: add miscellaneous device for ioctls Add a miscellaneous device to the autofs4 module for routing ioctls. This provides the ability to obtain an ioctl file handle for an autofs mount point that is possibly covered by another mount. The actual problem with autofs is that it can't reconnect to existing mounts. Immediately one things of just adding the ability to remount autofs file systems would solve it, but alas, that can't work. This is because autofs direct mounts and the implementation of "on demand mount and expire" of nested mount trees have the file system mounted on top of the mount trigger dentry. To resolve this a miscellaneous device node for routing ioctl commands to these mount points has been implemented in the autofs4 kernel module and a library added to autofs. This provides the ability to open a file descriptor for these over mounted autofs mount points. Please refer to Documentation/filesystems/autofs4-mount-control.txt for a discussion of the problem, implementation alternatives considered and a description of the interface. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: build fix] Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_dev-ioctl.h | 157 +++++++++++++++++++++++++++++++++++++++++ include/linux/auto_fs4.h | 2 +- 2 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 include/linux/auto_dev-ioctl.h (limited to 'include/linux') diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h new file mode 100644 index 00000000000..f4d05ccd731 --- /dev/null +++ b/include/linux/auto_dev-ioctl.h @@ -0,0 +1,157 @@ +/* + * Copyright 2008 Red Hat, Inc. All rights reserved. + * Copyright 2008 Ian Kent + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + */ + +#ifndef _LINUX_AUTO_DEV_IOCTL_H +#define _LINUX_AUTO_DEV_IOCTL_H + +#include + +#define AUTOFS_DEVICE_NAME "autofs" + +#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1 +#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0 + +#define AUTOFS_DEVID_LEN 16 + +#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) + +/* + * An ioctl interface for autofs mount point control. + */ + +/* + * All the ioctls use this structure. + * When sending a path size must account for the total length + * of the chunk of memory otherwise is is the size of the + * structure. + */ + +struct autofs_dev_ioctl { + __u32 ver_major; + __u32 ver_minor; + __u32 size; /* total size of data passed in + * including this struct */ + __s32 ioctlfd; /* automount command fd */ + + __u32 arg1; /* Command parameters */ + __u32 arg2; + + char path[0]; +}; + +static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) +{ + in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; + in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; + in->size = sizeof(struct autofs_dev_ioctl); + in->ioctlfd = -1; + in->arg1 = 0; + in->arg2 = 0; + return; +} + +/* + * If you change this make sure you make the corresponding change + * to autofs-dev-ioctl.c:lookup_ioctl() + */ +enum { + /* Get various version info */ + AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71, + AUTOFS_DEV_IOCTL_PROTOVER_CMD, + AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, + + /* Open mount ioctl fd */ + AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, + + /* Close mount ioctl fd */ + AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, + + /* Mount/expire status returns */ + AUTOFS_DEV_IOCTL_READY_CMD, + AUTOFS_DEV_IOCTL_FAIL_CMD, + + /* Activate/deactivate autofs mount */ + AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, + AUTOFS_DEV_IOCTL_CATATONIC_CMD, + + /* Expiry timeout */ + AUTOFS_DEV_IOCTL_TIMEOUT_CMD, + + /* Get mount last requesting uid and gid */ + AUTOFS_DEV_IOCTL_REQUESTER_CMD, + + /* Check for eligible expire candidates */ + AUTOFS_DEV_IOCTL_EXPIRE_CMD, + + /* Request busy status */ + AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, + + /* Check if path is a mountpoint */ + AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, +}; + +#define AUTOFS_IOCTL 0x93 + +#define AUTOFS_DEV_IOCTL_VERSION \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_PROTOVER \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_PROTOSUBVER \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_OPENMOUNT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_READY \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_FAIL \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_SETPIPEFD \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_CATATONIC \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_TIMEOUT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_REQUESTER \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_EXPIRE \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_ASKUMOUNT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl) + +#endif /* _LINUX_AUTO_DEV_IOCTL_H */ diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index aa96a04ae02..2253716d4b9 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -23,7 +23,7 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 5 -#define AUTOFS_PROTO_SUBVERSION 0 +#define AUTOFS_PROTO_SUBVERSION 1 /* Mask for expire behaviour */ #define AUTOFS_EXP_IMMEDIATE 1 -- cgit v1.2.3 From 3d599d1ca57f443e5c4ff5af1e69d90350082f77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 15 Oct 2008 22:03:12 -0700 Subject: gpio_free might sleep, generic part MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the documentation gpio_free should only be called from task context only. To make this more explicit add a might sleep to all implementations. This is the generic part which changes gpiolib and the fallback implementation only. Signed-off-by: Uwe Kleine-König Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gpio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 730a20b8357..e10c49a5b96 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -8,6 +8,7 @@ #else +#include #include #include @@ -32,6 +33,8 @@ static inline int gpio_request(unsigned gpio, const char *label) static inline void gpio_free(unsigned gpio) { + might_sleep(); + /* GPIO can never have been requested */ WARN_ON(1); } -- cgit v1.2.3 From e3a1938805d2e81b27d3d348788644f3bad004f2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 15 Oct 2008 22:03:52 -0700 Subject: matroxfb: support G200eV chip Support the Matrox G200eV chip, based on timings that I found in the X.org matrox driver. Signed-off-by: Darrick J. Wong Acked-by: Krzysztof Helt Cc: Petr Vandrovec Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1176f1f177e..8edddc240e4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -587,6 +587,7 @@ #define PCI_DEVICE_ID_MATROX_G200_PCI 0x0520 #define PCI_DEVICE_ID_MATROX_G200_AGP 0x0521 #define PCI_DEVICE_ID_MATROX_G400 0x0525 +#define PCI_DEVICE_ID_MATROX_G200EV_PCI 0x0530 #define PCI_DEVICE_ID_MATROX_G550 0x2527 #define PCI_DEVICE_ID_MATROX_VIA 0x4536 -- cgit v1.2.3 From b53cde3557b8f97e6a635782875d442551a89bf1 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 15 Oct 2008 22:03:55 -0700 Subject: fbdev: add new TMIO framebuffer driver Add driver for TMIO framebuffer cells as found e.g. in Toshiba TC6393XB chips. Signed-off-by: Dmitry Baryshkov Cc: Ian Molton Acked-by: Samuel Ortiz Acked-by: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mfd/tmio.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index ec612e66391..516d955ab8a 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -1,6 +1,8 @@ #ifndef MFD_TMIO_H #define MFD_TMIO_H +#include + #define tmio_ioread8(addr) readb(addr) #define tmio_ioread16(addr) readw(addr) #define tmio_ioread16_rep(r, b, l) readsw(r, b, l) @@ -25,4 +27,21 @@ struct tmio_nand_data { unsigned int num_partitions; }; +#define FBIO_TMIO_ACC_WRITE 0x7C639300 +#define FBIO_TMIO_ACC_SYNC 0x7C639301 + +struct tmio_fb_data { + int (*lcd_set_power)(struct platform_device *fb_dev, + bool on); + int (*lcd_mode)(struct platform_device *fb_dev, + const struct fb_videomode *mode); + int num_modes; + struct fb_videomode *modes; + + /* in mm: size of screen */ + int height; + int width; +}; + + #endif -- cgit v1.2.3 From b563cf59c4d67da7d671788a9848416bfa4180ab Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Wed, 15 Oct 2008 22:03:58 -0700 Subject: pnp: make the resource type an unsigned long PnP encodes the resource type directly as its struct resource->flags value which is an unsigned long. Make it so... Signed-off-by: Rene Herman Cc: "H. Peter Anvin" Acked-by: Bjorn Helgaas Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pnp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index be764e514e3..53b70fd1d9a 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -22,9 +22,11 @@ struct pnp_dev; * Resource Management */ #ifdef CONFIG_PNP -struct resource *pnp_get_resource(struct pnp_dev *, unsigned int, unsigned int); +struct resource *pnp_get_resource(struct pnp_dev *dev, unsigned long type, + unsigned int num); #else -static inline struct resource *pnp_get_resource(struct pnp_dev *dev, unsigned int type, unsigned int num) +static inline struct resource *pnp_get_resource(struct pnp_dev *dev, + unsigned long type, unsigned int num) { return NULL; } -- cgit v1.2.3 From 60836eb63b941f407dc2a609f3f0f34fd74ef6c3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 15 Oct 2008 22:04:00 -0700 Subject: telephony: remove CVS keywords Remove CVS keywords that weren't updated for a long time from comments. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/telephony.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/telephony.h b/include/linux/telephony.h index 0d0cf2a1e7b..5b2b6261f19 100644 --- a/include/linux/telephony.h +++ b/include/linux/telephony.h @@ -28,10 +28,6 @@ * ON AN "AS IS" BASIS, AND QUICKNET TECHNOLOGIES, INC. HAS NO OBLIGATION * TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. * - * Version: $Revision: 4.2 $ - * - * $Id: telephony.h,v 4.2 2001/08/06 07:09:43 craigs Exp $ - * *****************************************************************************/ #ifndef TELEPHONY_H -- cgit v1.2.3 From b73c29f6b0ddbcf07b43c5c5e6354e5839b5e68d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 15 Oct 2008 22:04:13 -0700 Subject: quota: remove CVS keywords Remove CVS keywords that weren't updated for a long time from comments. Signed-off-by: Adrian Bunk Acked-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/quota.h | 2 -- include/linux/quotaops.h | 3 --- 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 376a05048bc..40401b55448 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -28,8 +28,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * Version: $Id: quota.h,v 2.0 1996/11/17 16:48:14 mvw Exp mvw $ */ #ifndef _LINUX_QUOTA_ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index ca6b9b5c8d5..a558a4c1d35 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -3,9 +3,6 @@ * macros expand to the right source-code. * * Author: Marco van Wieringen - * - * Version: $Id: quotaops.h,v 1.2 1998/01/15 16:22:26 ecd Exp $ - * */ #ifndef _LINUX_QUOTAOPS_ #define _LINUX_QUOTAOPS_ -- cgit v1.2.3 From f221e726bf4e082a05dcd573379ac859bfba7126 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 15 Oct 2008 22:04:23 -0700 Subject: sysctl: simplify ->strategy name and nlen parameters passed to ->strategy hook are unused, remove them. In general ->strategy hook should know what it's doing, and don't do something tricky for which, say, pointer to original userspace array may be needed (name). Signed-off-by: Alexey Dobriyan Acked-by: David S. Miller [ networking bits ] Cc: Ralf Baechle Cc: David Howells Cc: Matt Mackall Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index d0437f36921..39d471d1163 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -972,7 +972,7 @@ extern int sysctl_perm(struct ctl_table_root *root, typedef struct ctl_table ctl_table; -typedef int ctl_handler (struct ctl_table *table, int __user *name, int nlen, +typedef int ctl_handler (struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -- cgit v1.2.3 From 25cbe53ef1cb828ae012f3955a5aa18117114439 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 15 Oct 2008 22:04:25 -0700 Subject: pid_ns: kill the now unused task_child_reaper() task_child_reaper() has no callers anymore, kill it. Signed-off-by: Oleg Nesterov Acked-by: Serge Hallyn Acked-by: Pavel Emelyanov Acked-by: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid_namespace.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 1af82c4e17d..d82fe825d62 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -84,12 +84,6 @@ static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) return tsk->nsproxy->pid_ns; } -static inline struct task_struct *task_child_reaper(struct task_struct *tsk) -{ - BUG_ON(tsk != current); - return tsk->nsproxy->pid_ns->child_reaper; -} - void pidhash_init(void); void pidmap_init(void); -- cgit v1.2.3 From 612de10db06c0704a66bbe7fd13990cb1c2cb958 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 15 Oct 2008 22:04:33 -0700 Subject: parport: remove CVS keywords Remove CVS keywords that weren't updated for a long time from comments. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/parport.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/parport.h b/include/linux/parport.h index 6a0d7cdb577..e1f83c5065c 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -1,5 +1,3 @@ -/* $Id: parport.h,v 1.1 1998/05/17 10:57:52 andrea Exp andrea $ */ - /* * Any part of this program may be used in documents licensed under * the GNU Free Documentation License, Version 1.1 or any later version -- cgit v1.2.3 From ebf3f09c634906d371f2bfd71b41c7e0c52efe7e Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 15 Oct 2008 22:05:12 -0700 Subject: Configure out AIO support This patchs adds the CONFIG_AIO option which allows to remove support for asynchronous I/O operations, that are not necessarly used by applications, particularly on embedded devices. As this is a size-reduction option, it depends on CONFIG_EMBEDDED. It allows to save ~7 kilobytes of kernel code/data: text data bss dec hex filename 1115067 119180 217088 1451335 162547 vmlinux 1108025 119048 217088 1444161 160941 vmlinux.new -7042 -132 0 -7174 -1C06 +/- This patch has been originally written by Matt Mackall , and is part of the Linux Tiny project. [randy.dunlap@oracle.com: build fix] Signed-off-by: Thomas Petazzoni Cc: Benjamin LaHaise Cc: Zach Brown Signed-off-by: Matt Mackall Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 09b276c3522..f6b8cf99b59 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -204,12 +204,21 @@ struct kioctx { /* prototypes */ extern unsigned aio_max_size; +#ifdef CONFIG_AIO extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); extern int aio_put_req(struct kiocb *iocb); extern void kick_iocb(struct kiocb *iocb); extern int aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); +#else +static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } +static inline int aio_put_req(struct kiocb *iocb) { return 0; } +static inline void kick_iocb(struct kiocb *iocb) { } +static inline int aio_complete(struct kiocb *iocb, long res, long res2) { return 0; } +struct mm_struct; +static inline void exit_aio(struct mm_struct *mm) { } +#endif /* CONFIG_AIO */ #define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait) -- cgit v1.2.3 From c9f66169f1c696f9489503d7de92daff135c1efd Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 15 Oct 2008 22:05:15 -0700 Subject: resource: add resource_type() and IORESOURCE_TYPE_BITS Add resource_type() and IORESOURCE_TYPE_BITS. They make it easier to add more resource types without having to rewrite tons of code. Signed-off-by: Magnus Damm Cc: Ben Dooks Cc: Jean Delvare Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index ee9bcc6f32b..0dde77272d7 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -34,7 +34,8 @@ struct resource_list { */ #define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */ -#define IORESOURCE_IO 0x00000100 /* Resource type */ +#define IORESOURCE_TYPE_BITS 0x00000f00 /* Resource type */ +#define IORESOURCE_IO 0x00000100 #define IORESOURCE_MEM 0x00000200 #define IORESOURCE_IRQ 0x00000400 #define IORESOURCE_DMA 0x00000800 @@ -126,6 +127,10 @@ static inline resource_size_t resource_size(struct resource *res) { return res->end - res->start + 1; } +static inline unsigned long resource_type(struct resource *res) +{ + return res->flags & IORESOURCE_TYPE_BITS; +} /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) -- cgit v1.2.3 From 3e624fc72fba09b6f999a9fbb87b64efccd38036 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 16 Oct 2008 20:00:24 -0400 Subject: ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback The multiblock allocator needs to be able to release blocks (and issue a blkdev discard request) when the transaction which freed those blocks is committed. Previously this was done via a polling mechanism when blocks are allocated or freed. A much better way of doing things is to create a jbd2 callback function and attaching the list of blocks to be freed directly to the transaction structure. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 463d6f10b64..c7d106ef22e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -641,6 +641,11 @@ struct transaction_s */ int t_handle_count; + /* + * For use by the filesystem to store fs-specific data + * structures associated with the transaction + */ + struct list_head t_private_list; }; struct transaction_run_stats_s { @@ -935,6 +940,10 @@ struct journal_s pid_t j_last_sync_writer; + /* This function is called when a transaction is closed */ + void (*j_commit_callback)(journal_t *, + transaction_t *); + /* * Journal statistics */ -- cgit v1.2.3 From 54514a70adefe356afe854e2d3912d46668068e6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 22:15:57 -0700 Subject: softirq: Add support for triggering softirq work on softirqs. This is basically a genericization of Jens Axboe's block layer remote softirq changes. Signed-off-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/interrupt.h | 21 +++++++++++++++++++++ include/linux/smp.h | 4 +++- 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 54b3623434e..35a61dc60d5 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -273,6 +275,25 @@ extern void softirq_init(void); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +/* This is the worklist that queues up per-cpu softirq work. + * + * send_remote_sendirq() adds work to these lists, and + * the softirq handler itself dequeues from them. The queues + * are protected by disabling local cpu interrupts and they must + * only be accessed by the local cpu that they are for. + */ +DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); + +/* Try to send a softirq to a remote cpu. If this cannot be done, the + * work will be queued to the local cpu. + */ +extern void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq); + +/* Like send_remote_softirq(), but the caller must disable local cpu interrupts + * and compute the current cpu, passed in as 'this_cpu'. + */ +extern void __send_remote_softirq(struct call_single_data *cp, int cpu, + int this_cpu, int softirq); /* Tasklets --- multithreaded analogue of BHs. diff --git a/include/linux/smp.h b/include/linux/smp.h index 66484d4a845..2e4d58b26c0 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -7,6 +7,7 @@ */ #include +#include #include #include @@ -16,7 +17,8 @@ struct call_single_data { struct list_head list; void (*func) (void *info); void *info; - unsigned int flags; + u16 flags; + u16 priv; }; #ifdef CONFIG_SMP -- cgit v1.2.3 From 8677142710516d986d932d6f1fba7be8382c1fec Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 13 Oct 2008 14:19:05 +0200 Subject: block: fix nr_phys_segments miscalculation bug This fixes the bug reported by Nikanth Karthikesan : http://lkml.org/lkml/2008/10/2/203 The root cause of the bug is that blk_phys_contig_segment miscalculates q->max_segment_size. blk_phys_contig_segment checks: req->biotail->bi_size + next_req->bio->bi_size > q->max_segment_size But blk_recalc_rq_segments might expect that req->biotail and the previous bio in the req are supposed be merged into one segment. blk_recalc_rq_segments might also expect that next_req->bio and the next bio in the next_req are supposed be merged into one segment. In such case, we merge two requests that can't be merged here. Later, blk_rq_map_sg gives more segments than it should. We need to keep track of segment size in blk_recalc_rq_segments and use it to see if two requests can be merged. This patch implements it in the similar way that we used to do for hw merging (virtual merging). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/bio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index ff5b4cf9e2d..dc3cec386a9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -79,6 +79,13 @@ struct bio { unsigned int bi_size; /* residual I/O count */ + /* + * To keep track of the max segment size, we account for the + * sizes of the first and last mergeable segments in this bio. + */ + unsigned int bi_seg_front_size; + unsigned int bi_seg_back_size; + unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ unsigned int bi_comp_cpu; /* completion CPU */ -- cgit v1.2.3 From 756f8243188e013bd067811cdc0cc60760abfdf9 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 16 Oct 2008 08:23:21 +0200 Subject: blktrace: add support for driver data This patch adds the new api call blk_add_driver_data() to blktrace. It allows to trace device driver-specific binary data. Signed-off-by: Stefan Raspl Signed-off-by: Martin Peschke Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3a31eb50616..bdf505d33e7 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -24,6 +24,7 @@ enum blktrace_cat { BLK_TC_AHEAD = 1 << 11, /* readahead */ BLK_TC_META = 1 << 12, /* metadata */ BLK_TC_DISCARD = 1 << 13, /* discard requests */ + BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ }; @@ -51,6 +52,7 @@ enum blktrace_act { __BLK_TA_BOUNCE, /* bio was bounced */ __BLK_TA_REMAP, /* bio was remapped */ __BLK_TA_ABORT, /* request aborted */ + __BLK_TA_DRV_DATA, /* driver-specific binary data */ }; /* @@ -82,6 +84,7 @@ enum blktrace_notify { #define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) #define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) #define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA)) #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) @@ -317,6 +320,34 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); } +/** + * blk_add_driver_data - Add binary message with driver-specific data + * @q: queue the io is for + * @rq: io request + * @data: driver-specific data + * @len: length of driver-specific data + * + * Description: + * Some drivers might want to write driver-specific data per request. + * + **/ +static inline void blk_add_driver_data(struct request_queue *q, + struct request *rq, + void *data, size_t len) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + if (blk_pc_request(rq)) + __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, + rq->errors, len, data); + else + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + 0, BLK_TA_DRV_DATA, rq->errors, len, data); +} + extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); @@ -330,6 +361,7 @@ extern int blk_trace_remove(struct request_queue *q); #define blk_add_trace_generic(q, rq, rw, what) do { } while (0) #define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) #define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) +#define blk_add_driver_data(q, rq, data, len) do {} while (0) #define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) #define blk_trace_setup(q, name, dev, arg) (-ENOTTY) #define blk_trace_startstop(q, start) (-ENOTTY) -- cgit v1.2.3 From f73e2d13a16cc88c4faa4729967f92bfeec8a142 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Oct 2008 14:03:08 +0200 Subject: block: remove __generic_unplug_device() from exports The only out-of-core user is IDE, and that should be using blk_start_queueing() instead. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a92d9e4ea96..8eed8b15f99 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -856,7 +856,6 @@ extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); extern void generic_unplug_device(struct request_queue *); -extern void __generic_unplug_device(struct request_queue *); extern long nr_blockdev_pages(void); int blk_get_queue(struct request_queue *); -- cgit v1.2.3 From fe11edfaabf1787c05d782a7b33e6497d1118b1d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 17 Oct 2008 18:09:11 +0200 Subject: ide: IDE_AFLAG_MEDIA_CHANGED -> IDE_DFLAG_MEDIA_CHANGED There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index c47e371554c..155a57f55c6 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -464,7 +464,6 @@ struct ide_acpi_hwif_link; /* ATAPI device flags */ enum { IDE_AFLAG_DRQ_INTERRUPT = (1 << 0), - IDE_AFLAG_MEDIA_CHANGED = (1 << 1), /* Drive cannot lock the door. */ IDE_AFLAG_NO_DOORLOCK = (1 << 2), @@ -578,7 +577,8 @@ enum { /* don't unload heads */ IDE_DFLAG_NO_UNLOAD = (1 << 27), /* heads unloaded, please don't reset port */ - IDE_DFLAG_PARKED = (1 << 28) + IDE_DFLAG_PARKED = (1 << 28), + IDE_DFLAG_MEDIA_CHANGED = (1 << 29), }; struct ide_drive_s { -- cgit v1.2.3 From da167876bd0f71f1c646e5dd98997544d8d90e8e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 17 Oct 2008 18:09:11 +0200 Subject: ide: IDE_AFLAG_WP -> IDE_DFLAG_WP There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 155a57f55c6..bd0a4d36b6d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -503,8 +503,6 @@ enum { IDE_AFLAG_CLIK_DRIVE = (1 << 19), /* Requires BH algorithm for packets */ IDE_AFLAG_ZIP_DRIVE = (1 << 20), - /* Write protect */ - IDE_AFLAG_WP = (1 << 21), /* Supports format progress report */ IDE_AFLAG_SRFP = (1 << 22), @@ -579,6 +577,8 @@ enum { /* heads unloaded, please don't reset port */ IDE_DFLAG_PARKED = (1 << 28), IDE_DFLAG_MEDIA_CHANGED = (1 << 29), + /* write protect */ + IDE_DFLAG_WP = (1 << 30), }; struct ide_drive_s { -- cgit v1.2.3 From e01286282eef85e4783b06fb2e0ed84fc111eb32 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 17 Oct 2008 18:09:11 +0200 Subject: ide: IDE_AFLAG_FORMAT_IN_PROGRESS -> IDE_DFLAG_FORMAT_IN_PROGRESS There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index bd0a4d36b6d..d111c3ebbba 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -497,8 +497,6 @@ enum { IDE_AFLAG_LE_SPEED_FIELDS = (1 << 17), /* ide-floppy */ - /* Format in progress */ - IDE_AFLAG_FORMAT_IN_PROGRESS = (1 << 18), /* Avoid commands not supported in Clik drive */ IDE_AFLAG_CLIK_DRIVE = (1 << 19), /* Requires BH algorithm for packets */ @@ -579,6 +577,7 @@ enum { IDE_DFLAG_MEDIA_CHANGED = (1 << 29), /* write protect */ IDE_DFLAG_WP = (1 << 30), + IDE_DFLAG_FORMAT_IN_PROGRESS = (1 << 31), }; struct ide_drive_s { -- cgit v1.2.3 From 42619d35c7af2f88cad56425fe3981f1f65ff0bd Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 17 Oct 2008 18:09:11 +0200 Subject: ide: remove IDE_AFLAG_NO_DOORLOCKING Just use IDE_DFLAG_DOORLOCKING instead. There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index d111c3ebbba..ba51a93fa54 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -464,8 +464,6 @@ struct ide_acpi_hwif_link; /* ATAPI device flags */ enum { IDE_AFLAG_DRQ_INTERRUPT = (1 << 0), - /* Drive cannot lock the door. */ - IDE_AFLAG_NO_DOORLOCK = (1 << 2), /* ide-cd */ /* Drive cannot eject the disc. */ -- cgit v1.2.3 From 79cb380397c834a35952d8497651d93b543ef968 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 17 Oct 2008 18:09:13 +0200 Subject: ide: allow device drivers to specify per-device type /proc settings Turn ide_driver_t's 'proc' field into ->proc_entries method (and also 'settings' field into ->proc_devsets method). Then update all device drivers accordingly. There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ba51a93fa54..488808891ac 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1120,8 +1120,8 @@ struct ide_driver_s { void (*resume)(ide_drive_t *); void (*shutdown)(ide_drive_t *); #ifdef CONFIG_IDE_PROC_FS - ide_proc_entry_t *proc; - const struct ide_proc_devset *settings; + ide_proc_entry_t * (*proc_entries)(ide_drive_t *); + const struct ide_proc_devset * (*proc_devsets)(ide_drive_t *); #endif }; -- cgit v1.2.3 From 806f80a6fc203ad0bde84e5a9e94572617d2ae45 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 17 Oct 2008 18:09:14 +0200 Subject: ide: add generic ATA/ATAPI disk driver * Add struct ide_disk_ops containing protocol specific methods. * Add 'struct ide_disk_ops *' to ide_drive_t. * Convert ide-{disk,floppy} drivers to use struct ide_disk_ops. * Merge ide-{disk,floppy} drivers into generic ide-gd driver. While at it: - ide_disk_init_capacity() -> ide_disk_get_capacity() Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 488808891ac..89e53cfbc78 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -461,6 +461,23 @@ struct ide_acpi_drive_link; struct ide_acpi_hwif_link; #endif +struct ide_drive_s; + +struct ide_disk_ops { + int (*check)(struct ide_drive_s *, const char *); + int (*get_capacity)(struct ide_drive_s *); + void (*setup)(struct ide_drive_s *); + void (*flush)(struct ide_drive_s *); + int (*init_media)(struct ide_drive_s *, struct gendisk *); + int (*set_doorlock)(struct ide_drive_s *, struct gendisk *, + int); + ide_startstop_t (*do_request)(struct ide_drive_s *, struct request *, + sector_t); + int (*end_request)(struct ide_drive_s *, int, int); + int (*ioctl)(struct ide_drive_s *, struct inode *, + struct file *, unsigned int, unsigned long); +}; + /* ATAPI device flags */ enum { IDE_AFLAG_DRQ_INTERRUPT = (1 << 0), @@ -594,6 +611,8 @@ struct ide_drive_s { #endif struct hwif_s *hwif; /* actually (ide_hwif_t *) */ + const struct ide_disk_ops *disk_ops; + unsigned long dev_flags; unsigned long sleep; /* sleep until this time */ -- cgit v1.2.3 From 719254faa17ffedc87ba0fadb9b34e535c9758d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 17 Oct 2008 09:59:47 +0200 Subject: NOHZ: unify the nohz function calls in irq_enter() We have two separate nohz function calls in irq_enter() for no good reason. Just call a single NOHZ function from irq_enter() and call the bits in the tick code. Signed-off-by: Thomas Gleixner --- include/linux/tick.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 98921a3e1aa..b6ec8189ac0 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -96,9 +96,11 @@ extern cpumask_t *tick_get_broadcast_oneshot_mask(void); extern void tick_clock_notify(void); extern int tick_check_oneshot_change(int allow_nohz); extern struct tick_sched *tick_get_tick_sched(int cpu); +extern void tick_check_idle(int cpu); # else static inline void tick_clock_notify(void) { } static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } +static inline void tick_check_idle(int cpu) { } # endif #else /* CONFIG_GENERIC_CLOCKEVENTS */ @@ -106,26 +108,23 @@ static inline void tick_init(void) { } static inline void tick_cancel_sched_timer(int cpu) { } static inline void tick_clock_notify(void) { } static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } +static inline void tick_check_idle(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ extern void tick_nohz_stop_sched_tick(int inidle); extern void tick_nohz_restart_sched_tick(void); -extern void tick_nohz_update_jiffies(void); extern ktime_t tick_nohz_get_sleep_length(void); -extern void tick_nohz_stop_idle(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); # else static inline void tick_nohz_stop_sched_tick(int inidle) { } static inline void tick_nohz_restart_sched_tick(void) { } -static inline void tick_nohz_update_jiffies(void) { } static inline ktime_t tick_nohz_get_sleep_length(void) { ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; return len; } -static inline void tick_nohz_stop_idle(int cpu) { } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ -- cgit v1.2.3 From 504e518953a330c8d44a95bdd65a5c9f50f1012e Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Thu, 16 Oct 2008 14:15:16 +1100 Subject: Make nfs_file_cred more robust. As not all files have an associated open_context (e.g. device special files), it is safest to test for the existence of the open context before de-referencing it. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ac8d0233b05..4eaa8347a0d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -367,8 +367,12 @@ static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) static inline struct rpc_cred *nfs_file_cred(struct file *file) { - if (file != NULL) - return nfs_file_open_context(file)->cred; + if (file != NULL) { + struct nfs_open_context *ctx = + nfs_file_open_context(file); + if (ctx) + return ctx->cred; + } return NULL; } -- cgit v1.2.3 From 97854829b97093ae172144a2597fc49ea203dcf3 Mon Sep 17 00:00:00 2001 From: Manu Abraham Date: Tue, 14 Oct 2008 19:48:07 -0300 Subject: V4L/DVB (9195): Frontend API Fix: 32APSK is a valid modulation for the DVB-S2 delivery Signed-off-by: Manu Abraham Signed-off-by: Mauro Carvalho Chehab --- include/linux/dvb/frontend.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h index 6e4ace27027..38942f731b9 100644 --- a/include/linux/dvb/frontend.h +++ b/include/linux/dvb/frontend.h @@ -166,6 +166,7 @@ typedef enum fe_modulation { VSB_16, PSK_8, APSK_16, + APSK_32, DQPSK, } fe_modulation_t; -- cgit v1.2.3 From 5ba4ecc8b0166de4363cc31aa68d52abe0dff8de Mon Sep 17 00:00:00 2001 From: Manu Abraham Date: Tue, 14 Oct 2008 19:50:03 -0300 Subject: V4L/DVB (9196): Add support for DSS delivery Signed-off-by: Manu Abraham Signed-off-by: Mauro Carvalho Chehab --- include/linux/dvb/frontend.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h index 38942f731b9..79a8ed8e6a7 100644 --- a/include/linux/dvb/frontend.h +++ b/include/linux/dvb/frontend.h @@ -296,6 +296,7 @@ typedef enum fe_delivery_system { SYS_DVBC_ANNEX_AC, SYS_DVBC_ANNEX_B, SYS_DVBT, + SYS_DSS, SYS_DVBS, SYS_DVBS2, SYS_DVBH, -- cgit v1.2.3 From 2a1d245b70f3f966f96767aaea1a2db6823e2f6e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 15 Oct 2008 14:47:36 -0300 Subject: V4L/DVB (9240): saa7127: Fix two typos Signed-off-by: Jean Delvare Signed-off-by: Mauro Carvalho Chehab --- include/linux/i2c-id.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 493435bcdbe..01d67ba9e98 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -60,7 +60,7 @@ #define I2C_DRIVERID_WM8775 69 /* wm8775 audio processor */ #define I2C_DRIVERID_CS53L32A 70 /* cs53l32a audio processor */ #define I2C_DRIVERID_CX25840 71 /* cx2584x video encoder */ -#define I2C_DRIVERID_SAA7127 72 /* saa7124 video encoder */ +#define I2C_DRIVERID_SAA7127 72 /* saa7127 video encoder */ #define I2C_DRIVERID_SAA711X 73 /* saa711x video encoders */ #define I2C_DRIVERID_AKITAIOEXP 74 /* IO Expander on Sharp SL-C1000 */ #define I2C_DRIVERID_INFRARED 75 /* I2C InfraRed on Video boards */ -- cgit v1.2.3 From 5b775f672cc993ba9dba5626811ab1f2ac42883b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 26 Aug 2008 16:22:06 -0700 Subject: USB: add USB test and measurement class driver This driver was originaly written by Stefan Kopp, but massively reworked by Greg for submission. Thanks to Felipe Balbi for lots of work in cleaning up this driver. Thanks to Oliver Neukum for reviewing previous versions and pointing out problems. Cc: Stefan Kopp Cc: Marcel Janssen Cc: Felipe Balbi Cc: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/Kbuild | 2 +- include/linux/usb/tmc.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 include/linux/usb/tmc.h (limited to 'include/linux') diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild index 42e84fc315e..29fd73b0bff 100644 --- a/include/linux/usb/Kbuild +++ b/include/linux/usb/Kbuild @@ -4,4 +4,4 @@ header-y += ch9.h header-y += gadgetfs.h header-y += midi.h header-y += g_printer.h - +header-y += tmc.h diff --git a/include/linux/usb/tmc.h b/include/linux/usb/tmc.h new file mode 100644 index 00000000000..c045ae12556 --- /dev/null +++ b/include/linux/usb/tmc.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2007 Stefan Kopp, Gechingen, Germany + * Copyright (C) 2008 Novell, Inc. + * Copyright (C) 2008 Greg Kroah-Hartman + * + * This file holds USB constants defined by the USB Device Class + * Definition for Test and Measurement devices published by the USB-IF. + * + * It also has the ioctl definitions for the usbtmc kernel driver that + * userspace needs to know about. + */ + +#ifndef __LINUX_USB_TMC_H +#define __LINUX_USB_TMC_H + +/* USB TMC status values */ +#define USBTMC_STATUS_SUCCESS 0x01 +#define USBTMC_STATUS_PENDING 0x02 +#define USBTMC_STATUS_FAILED 0x80 +#define USBTMC_STATUS_TRANSFER_NOT_IN_PROGRESS 0x81 +#define USBTMC_STATUS_SPLIT_NOT_IN_PROGRESS 0x82 +#define USBTMC_STATUS_SPLIT_IN_PROGRESS 0x83 + +/* USB TMC requests values */ +#define USBTMC_REQUEST_INITIATE_ABORT_BULK_OUT 1 +#define USBTMC_REQUEST_CHECK_ABORT_BULK_OUT_STATUS 2 +#define USBTMC_REQUEST_INITIATE_ABORT_BULK_IN 3 +#define USBTMC_REQUEST_CHECK_ABORT_BULK_IN_STATUS 4 +#define USBTMC_REQUEST_INITIATE_CLEAR 5 +#define USBTMC_REQUEST_CHECK_CLEAR_STATUS 6 +#define USBTMC_REQUEST_GET_CAPABILITIES 7 +#define USBTMC_REQUEST_INDICATOR_PULSE 64 + +/* Request values for USBTMC driver's ioctl entry point */ +#define USBTMC_IOC_NR 91 +#define USBTMC_IOCTL_INDICATOR_PULSE _IO(USBTMC_IOC_NR, 1) +#define USBTMC_IOCTL_CLEAR _IO(USBTMC_IOC_NR, 2) +#define USBTMC_IOCTL_ABORT_BULK_OUT _IO(USBTMC_IOC_NR, 3) +#define USBTMC_IOCTL_ABORT_BULK_IN _IO(USBTMC_IOC_NR, 4) +#define USBTMC_IOCTL_CLEAR_OUT_HALT _IO(USBTMC_IOC_NR, 6) +#define USBTMC_IOCTL_CLEAR_IN_HALT _IO(USBTMC_IOC_NR, 7) + +#endif -- cgit v1.2.3 From 55b447bf79ad25591437d24b78caa9d0ae4fec82 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 29 Jul 2008 15:26:15 +0200 Subject: USB: kill URBs permanently looking at usb_kill_urb() it seems to me that it is unnecessarily lenient. In the use case of disconnect() you never want to use the URB again (for the same device) But leaving urb->reject elevated will make it easier to avoid races between read/write and disconnect. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 94ac74aba6b..3371c91e7ff 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1459,6 +1459,8 @@ extern struct urb *usb_get_urb(struct urb *urb); extern int usb_submit_urb(struct urb *urb, gfp_t mem_flags); extern int usb_unlink_urb(struct urb *urb); extern void usb_kill_urb(struct urb *urb); +extern void usb_poison_urb(struct urb *urb); +extern void usb_unpoison_urb(struct urb *urb); extern void usb_kill_anchored_urbs(struct usb_anchor *anchor); extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor); extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor); -- cgit v1.2.3 From 6a2839bedc1502b3f0366cc3ad1099a1d92cf8fb Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 29 Jul 2008 16:18:47 +0200 Subject: USB: extend poisoning to anchors this extends the poisoning concept to anchors. This way poisoning will work with fire and forget drivers. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 3371c91e7ff..d97927970f5 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1135,6 +1135,7 @@ struct usb_anchor { struct list_head urb_list; wait_queue_head_t wait; spinlock_t lock; + unsigned int poisoned:1; }; static inline void init_usb_anchor(struct usb_anchor *anchor) @@ -1462,6 +1463,7 @@ extern void usb_kill_urb(struct urb *urb); extern void usb_poison_urb(struct urb *urb); extern void usb_unpoison_urb(struct urb *urb); extern void usb_kill_anchored_urbs(struct usb_anchor *anchor); +extern void usb_poison_anchored_urbs(struct usb_anchor *anchor); extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor); extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor); extern void usb_unanchor_urb(struct urb *urb); -- cgit v1.2.3 From 60beed95e38793c0baff7f94433c1f639d8d5efd Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 18 Aug 2008 17:38:22 -0700 Subject: usb gadget: function activation/deactivation Add a new mechanism to the composite gadget framework, letting functions deactivate (and reactivate) themselves. Think of it as a refcounted wrapper for the software pullup control. A key example of why to use this mechanism involves functions that require a userspace daemon. Those functions shuld use this new mechanism to prevent the gadget from enumerating until those daemons are activated. Without this mechanism, hosts would see devices that malfunction until the relevant daemons start. Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index c932390c6da..935c380ffe4 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -130,6 +130,9 @@ struct usb_function { int usb_add_function(struct usb_configuration *, struct usb_function *); +int usb_function_deactivate(struct usb_function *); +int usb_function_activate(struct usb_function *); + int usb_interface_id(struct usb_configuration *, struct usb_function *); /** @@ -316,9 +319,13 @@ struct usb_composite_dev { struct usb_composite_driver *driver; u8 next_string_id; - spinlock_t lock; + /* the gadget driver won't enable the data pullup + * while the deactivation count is nonzero. + */ + unsigned deactivations; - /* REVISIT use and existence of lock ... */ + /* protects at least deactivation count */ + spinlock_t lock; }; extern int usb_string_id(struct usb_composite_dev *c); -- cgit v1.2.3 From 3086775a4916b0fe128d924d83f4e7d7c39e4d0e Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 18 Aug 2008 17:39:30 -0700 Subject: usb gadget: cdc obex glue The following patch introduces a new f_obex.c function driver. It allows userspace obex servers to use usb as transport layer for their messages. [ dbrownell@users.sourceforge.net: various fixes and cleanups ] Signed-off-by: Felipe Balbi Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/cdc.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index ca228bb9421..18a729343ff 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -160,6 +160,15 @@ struct usb_cdc_mdlm_detail_desc { __u8 bDetailData[0]; } __attribute__ ((packed)); +/* "OBEX Control Model Functional Descriptor" */ +struct usb_cdc_obex_desc { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubType; + + __le16 bcdVersion; +} __attribute__ ((packed)); + /*-------------------------------------------------------------------------*/ /* -- cgit v1.2.3 From 0b14c3881d4b91272b779f4037e263d392de058f Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Sat, 20 Sep 2008 14:41:47 -0700 Subject: USB: Fix spelling in usb/serial.h Fixes a minor typo in the comments for usb_set_serial_data. Signed-off-by: Geoff Levand Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/serial.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 655341d0f53..0b8617a9176 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -192,7 +192,7 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data) * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver (remember * it will show up in sysfs, so it needs to be short and to the point. - * Useing the module name is a good idea.) + * Using the module name is a good idea.) */ struct usb_serial_driver { const char *description; -- cgit v1.2.3 From cbc30118d7a376dab4113f299c0c8f035737a5c3 Mon Sep 17 00:00:00 2001 From: Stephen Ware Date: Tue, 30 Sep 2008 11:39:38 -0700 Subject: usb: vstusb.c : new driver for spectrometers used by Vernier Software & Technology, Inc. This patch adds the vstusb driver to the drivers/usb/misc directory. This driver provides support for Vernier Software & Technology spectrometers, all made by Ocean Optics. The driver provides both IOCTL and read()/write() methods for sending raw data to spectrometers across the bulk channel. Each method allows for a configured timeout. From: Stephen Ware Signed-off-by: Dennis O'Brien Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/Kbuild | 1 + include/linux/usb/vstusb.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 include/linux/usb/vstusb.h (limited to 'include/linux') diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild index 29fd73b0bff..54c446309a2 100644 --- a/include/linux/usb/Kbuild +++ b/include/linux/usb/Kbuild @@ -5,3 +5,4 @@ header-y += gadgetfs.h header-y += midi.h header-y += g_printer.h header-y += tmc.h +header-y += vstusb.h diff --git a/include/linux/usb/vstusb.h b/include/linux/usb/vstusb.h new file mode 100644 index 00000000000..1cfac67191f --- /dev/null +++ b/include/linux/usb/vstusb.h @@ -0,0 +1,71 @@ +/***************************************************************************** + * File: drivers/usb/misc/vstusb.h + * + * Purpose: Support for the bulk USB Vernier Spectrophotometers + * + * Author: EQware Engineering, Inc. + * Oregon City, OR, USA 97045 + * + * Copyright: 2007, 2008 + * Vernier Software & Technology + * Beaverton, OR, USA 97005 + * + * Web: www.vernier.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + *****************************************************************************/ +/***************************************************************************** + * + * The vstusb module is a standard usb 'client' driver running on top of the + * standard usb host controller stack. + * + * In general, vstusb supports standard bulk usb pipes. It supports multiple + * devices and multiple pipes per device. + * + * The vstusb driver supports two interfaces: + * 1 - ioctl SEND_PIPE/RECV_PIPE - a general bulk write/read msg + * interface to any pipe with timeout support; + * 2 - standard read/write with ioctl config - offers standard read/write + * interface with ioctl configured pipes and timeouts. + * + * Both interfaces can be signal from other process and will abort its i/o + * operation. + * + * A timeout of 0 means NO timeout. The user can still terminate the read via + * signal. + * + * If using multiple threads with this driver, the user should ensure that + * any reads, writes, or ioctls are complete before closing the device. + * Changing read/write timeouts or pipes takes effect on next read/write. + * + *****************************************************************************/ + +struct vstusb_args { + union { + /* this struct is used for IOCTL_VSTUSB_SEND_PIPE, * + * IOCTL_VSTUSB_RECV_PIPE, and read()/write() fops */ + struct { + void __user *buffer; + size_t count; + unsigned int timeout_ms; + int pipe; + }; + + /* this one is used for IOCTL_VSTUSB_CONFIG_RW */ + struct { + int rd_pipe; + int rd_timeout_ms; + int wr_pipe; + int wr_timeout_ms; + }; + }; +}; + +#define VST_IOC_MAGIC 'L' +#define VST_IOC_FIRST 0x20 +#define IOCTL_VSTUSB_SEND_PIPE _IO(VST_IOC_MAGIC, VST_IOC_FIRST) +#define IOCTL_VSTUSB_RECV_PIPE _IO(VST_IOC_MAGIC, VST_IOC_FIRST + 1) +#define IOCTL_VSTUSB_CONFIG_RW _IO(VST_IOC_MAGIC, VST_IOC_FIRST + 2) -- cgit v1.2.3 From 1987625226a918cd20c334ffce5e2a224cba0718 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 25 Aug 2008 22:40:25 +0200 Subject: USB: anchor API changes needed for btusb This extends the anchor API as btusb needs for autosuspend. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index d97927970f5..8fa973bede5 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1469,6 +1469,9 @@ extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor); extern void usb_unanchor_urb(struct urb *urb); extern int usb_wait_anchor_empty_timeout(struct usb_anchor *anchor, unsigned int timeout); +extern struct urb *usb_get_from_anchor(struct usb_anchor *anchor); +extern void usb_scuttle_anchored_urbs(struct usb_anchor *anchor); +extern int usb_anchor_empty(struct usb_anchor *anchor); /** * usb_urb_dir_in - check if an URB describes an IN transfer -- cgit v1.2.3 From aaf7ea20000436df3cbb397ccb734ad1e2e5164d Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 15 Oct 2008 08:38:49 +0200 Subject: [MTD] [NAND] GPIO NAND flash driver The patch adds support for NAND flashes connected to GPIOs. Signed-off-by: Russell King Signed-off-by: Mike Rapoport Signed-off-by: David Woodhouse --- include/linux/mtd/nand-gpio.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/mtd/nand-gpio.h (limited to 'include/linux') diff --git a/include/linux/mtd/nand-gpio.h b/include/linux/mtd/nand-gpio.h new file mode 100644 index 00000000000..51534e50f7f --- /dev/null +++ b/include/linux/mtd/nand-gpio.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_MTD_NAND_GPIO_H +#define __LINUX_MTD_NAND_GPIO_H + +#include + +struct gpio_nand_platdata { + int gpio_nce; + int gpio_nwp; + int gpio_cle; + int gpio_ale; + int gpio_rdy; + void (*adjust_parts)(struct gpio_nand_platdata *, size_t); + struct mtd_partition *parts; + unsigned int num_parts; + unsigned int options; + int chip_delay; +}; + +#endif -- cgit v1.2.3 From dd3a1db900f2a215a7d7dd71b836e149a6cf5fed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 18:20:58 +0200 Subject: genirq: improve include files Move the irq_desc related iterators out of irq.h, into irqnr.h, also available via interrupt.h. This way non-genirq (and even non-hardirq) architectures get the common definitions and iterators. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 1 + include/linux/irq.h | 20 +------------------- include/linux/irqnr.h | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 19 deletions(-) create mode 100644 include/linux/irqnr.h (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 58ff4e74b2f..72fcfcff563 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/irq.h b/include/linux/irq.h index 0618fb362cb..d058c57be02 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -11,25 +11,6 @@ #include -#ifndef CONFIG_GENERIC_HARDIRQS -# define nr_irqs NR_IRQS - -# define for_each_irq_desc(irq, desc) \ - for (irq = 0; irq < nr_irqs; irq++) -#else -extern int nr_irqs; - -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) - -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 ); \ - irq > 0; irq--, desc--) -#endif - -#define for_each_irq_nr(irq) \ - for (irq = 0; irq < nr_irqs; irq++) - #ifndef CONFIG_S390 #include @@ -37,6 +18,7 @@ extern int nr_irqs; #include #include #include +#include #include #include diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h new file mode 100644 index 00000000000..3171ddc3b39 --- /dev/null +++ b/include/linux/irqnr.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_IRQNR_H +#define _LINUX_IRQNR_H + +#ifndef CONFIG_GENERIC_HARDIRQS +#include +# define nr_irqs NR_IRQS + +# define for_each_irq_desc(irq, desc) \ + for (irq = 0; irq < nr_irqs; irq++) +#else +extern int nr_irqs; + +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) + +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 ); \ + irq > 0; irq--, desc--) +#endif + +#define for_each_irq_nr(irq) \ + for (irq = 0; irq < nr_irqs; irq++) + +#endif -- cgit v1.2.3 From 1c1b6ffce5737d764cc474b9bd6677bb9a344094 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 24 Sep 2008 23:36:23 +0200 Subject: mfd: provide and use setup hook for tc6393xb Instead of using bitfields for initial gpio setup, provide generic setup/teardown hooks that can be used to set the gpio states, register child devices, etc. Signed-off-by: Dmitry Baryshkov Signed-off-by: Samuel Ortiz --- include/linux/mfd/tc6393xb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h index fec7b3f7a81..1fa820646d9 100644 --- a/include/linux/mfd/tc6393xb.h +++ b/include/linux/mfd/tc6393xb.h @@ -21,8 +21,6 @@ struct tc6393xb_platform_data { u16 scr_pll2cr; /* PLL2 Control */ u16 scr_gper; /* GP Enable */ - u32 scr_gpo_doecr; /* GPO Data OE Control */ - u32 scr_gpo_dsr; /* GPO Data Set */ int (*enable)(struct platform_device *dev); int (*disable)(struct platform_device *dev); @@ -31,6 +29,8 @@ struct tc6393xb_platform_data { int irq_base; /* base for subdevice irqs */ int gpio_base; + int (*setup)(struct platform_device *dev); + void (*teardown)(struct platform_device *dev); struct tmio_nand_data *nand_data; }; -- cgit v1.2.3 From f98a0bd0e4b77b12e49ce01f4c9f04503931c291 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 24 Sep 2008 23:46:10 +0200 Subject: mfd: do tcb6393xb state restore on resume only if requested As requested by Ian make state restore only if it's requested by platform data: some platforms do correctly save the state of the chip during suspend/resume, but some (like tosa) incorrectly power off the chip at suspend, so the driver supports restoring some bits of the tc6393xb state (not full, merely enough to support resume on tosa). With this patch this code is disabled by default. Signed-off-by: Dmitry Baryshkov Acked-by: Ian Molton Signed-off-by: Samuel Ortiz --- include/linux/mfd/tc6393xb.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h index 1fa820646d9..3ce10ae0f39 100644 --- a/include/linux/mfd/tc6393xb.h +++ b/include/linux/mfd/tc6393xb.h @@ -33,6 +33,10 @@ struct tc6393xb_platform_data { void (*teardown)(struct platform_device *dev); struct tmio_nand_data *nand_data; + + unsigned resume_restore : 1; /* make special actions + to preserve the state + on suspend/resume */ }; /* -- cgit v1.2.3 From 51a55623565c6ca864f7cf19e87c2d4bde1c0c5e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 3 Oct 2008 20:11:36 +0200 Subject: mfd: add OHCI cell to tc6393xb Add information regarding OHCI cell of the tc6393xb Signed-off-by: Dmitry Baryshkov Acked-by: Ian Molton Signed-off-by: Samuel Ortiz --- include/linux/mfd/tc6393xb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h index 3ce10ae0f39..4437736ebe1 100644 --- a/include/linux/mfd/tc6393xb.h +++ b/include/linux/mfd/tc6393xb.h @@ -44,6 +44,7 @@ struct tc6393xb_platform_data { */ #define IRQ_TC6393_NAND 0 #define IRQ_TC6393_MMC 1 +#define IRQ_TC6393_OHCI 2 #define TC6393XB_NR_IRQS 8 -- cgit v1.2.3 From 9e78cfe53f3c2bc1b37870697c3cde1543fefa8b Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 4 Oct 2008 00:50:36 +0200 Subject: mfd: support tmiofb cell on tc6393xb Add support for tmiofb cell found in tc6393xb chip. Signed-off-by: Dmitry Baryshkov Cc: Ian Molton Signed-off-by: Samuel Ortiz --- include/linux/mfd/tc6393xb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h index 4437736ebe1..626e448205c 100644 --- a/include/linux/mfd/tc6393xb.h +++ b/include/linux/mfd/tc6393xb.h @@ -17,6 +17,8 @@ #ifndef MFD_TC6393XB_H #define MFD_TC6393XB_H +#include + /* Also one should provide the CK3P6MI clock */ struct tc6393xb_platform_data { u16 scr_pll2cr; /* PLL2 Control */ @@ -33,18 +35,24 @@ struct tc6393xb_platform_data { void (*teardown)(struct platform_device *dev); struct tmio_nand_data *nand_data; + struct tmio_fb_data *fb_data; unsigned resume_restore : 1; /* make special actions to preserve the state on suspend/resume */ }; +extern int tc6393xb_lcd_mode(struct platform_device *fb, + const struct fb_videomode *mode); +extern int tc6393xb_lcd_set_power(struct platform_device *fb, bool on); + /* * Relative to irq_base */ #define IRQ_TC6393_NAND 0 #define IRQ_TC6393_MMC 1 #define IRQ_TC6393_OHCI 2 +#define IRQ_TC6393_FB 4 #define TC6393XB_NR_IRQS 8 -- cgit v1.2.3 From a603a7fa8717fb778bba91b5a879babf333dc6a3 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 15 Oct 2008 12:15:39 +0200 Subject: mfd: TWL4030 core driver This patch adds the core of the TWL4030 driver, which supports chips including the TPS65950. These chips are multi-function; see http://focus.ti.com/docs/prod/folders/print/tps65950.html Public specs are in the works. For now, the block diagram on the second page of the datasheet is fairly informative. There are some known issues with this core code. Most notably, the IRQ dispatching needs simplification (to use more of genirq), generalization (integrating support for secondary IRQ dispatch as well as primary, and removing the build dependency on OMAP), and then probably updating to leverage threaded IRQ support (expected to arrive in mainline "soon"). Once the core is in mainline, drivers for other parts of this chip can follow its lead and start swimming upstream too. Signed-off-by: David Brownell Signed-off-by: Samuel Ortiz --- include/linux/i2c/twl4030.h | 339 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 include/linux/i2c/twl4030.h (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h new file mode 100644 index 00000000000..cdb453162a9 --- /dev/null +++ b/include/linux/i2c/twl4030.h @@ -0,0 +1,339 @@ +/* + * twl4030.h - header for TWL4030 PM and audio CODEC device + * + * Copyright (C) 2005-2006 Texas Instruments, Inc. + * + * Based on tlv320aic23.c: + * Copyright (c) by Kai Svahn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __TWL4030_H_ +#define __TWL4030_H_ + +/* + * Using the twl4030 core we address registers using a pair + * { module id, relative register offset } + * which that core then maps to the relevant + * { i2c slave, absolute register address } + * + * The module IDs are meaningful only to the twl4030 core code, + * which uses them as array indices to look up the first register + * address each module uses within a given i2c slave. + */ + +/* Slave 0 (i2c address 0x48) */ +#define TWL4030_MODULE_USB 0x00 + +/* Slave 1 (i2c address 0x49) */ +#define TWL4030_MODULE_AUDIO_VOICE 0x01 +#define TWL4030_MODULE_GPIO 0x02 +#define TWL4030_MODULE_INTBR 0x03 +#define TWL4030_MODULE_PIH 0x04 +#define TWL4030_MODULE_TEST 0x05 + +/* Slave 2 (i2c address 0x4a) */ +#define TWL4030_MODULE_KEYPAD 0x06 +#define TWL4030_MODULE_MADC 0x07 +#define TWL4030_MODULE_INTERRUPTS 0x08 +#define TWL4030_MODULE_LED 0x09 +#define TWL4030_MODULE_MAIN_CHARGE 0x0A +#define TWL4030_MODULE_PRECHARGE 0x0B +#define TWL4030_MODULE_PWM0 0x0C +#define TWL4030_MODULE_PWM1 0x0D +#define TWL4030_MODULE_PWMA 0x0E +#define TWL4030_MODULE_PWMB 0x0F + +/* Slave 3 (i2c address 0x4b) */ +#define TWL4030_MODULE_BACKUP 0x10 +#define TWL4030_MODULE_INT 0x11 +#define TWL4030_MODULE_PM_MASTER 0x12 +#define TWL4030_MODULE_PM_RECEIVER 0x13 +#define TWL4030_MODULE_RTC 0x14 +#define TWL4030_MODULE_SECURED_REG 0x15 + +/* + * Read and write single 8-bit registers + */ +int twl4030_i2c_write_u8(u8 mod_no, u8 val, u8 reg); +int twl4030_i2c_read_u8(u8 mod_no, u8 *val, u8 reg); + +/* + * Read and write several 8-bit registers at once. + * + * IMPORTANT: For twl4030_i2c_write(), allocate num_bytes + 1 + * for the value, and populate your data starting at offset 1. + */ +int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes); +int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes); + +/*----------------------------------------------------------------------*/ + +/* + * NOTE: at up to 1024 registers, this is a big chip. + * + * Avoid putting register declarations in this file, instead of into + * a driver-private file, unless some of the registers in a block + * need to be shared with other drivers. One example is blocks that + * have Secondary IRQ Handler (SIH) registers. + */ + +#define TWL4030_SIH_CTRL_EXCLEN_MASK BIT(0) +#define TWL4030_SIH_CTRL_PENDDIS_MASK BIT(1) +#define TWL4030_SIH_CTRL_COR_MASK BIT(2) + +/*----------------------------------------------------------------------*/ + +/* + * GPIO Block Register offsets (use TWL4030_MODULE_GPIO) + */ + +#define REG_GPIODATAIN1 0x0 +#define REG_GPIODATAIN2 0x1 +#define REG_GPIODATAIN3 0x2 +#define REG_GPIODATADIR1 0x3 +#define REG_GPIODATADIR2 0x4 +#define REG_GPIODATADIR3 0x5 +#define REG_GPIODATAOUT1 0x6 +#define REG_GPIODATAOUT2 0x7 +#define REG_GPIODATAOUT3 0x8 +#define REG_CLEARGPIODATAOUT1 0x9 +#define REG_CLEARGPIODATAOUT2 0xA +#define REG_CLEARGPIODATAOUT3 0xB +#define REG_SETGPIODATAOUT1 0xC +#define REG_SETGPIODATAOUT2 0xD +#define REG_SETGPIODATAOUT3 0xE +#define REG_GPIO_DEBEN1 0xF +#define REG_GPIO_DEBEN2 0x10 +#define REG_GPIO_DEBEN3 0x11 +#define REG_GPIO_CTRL 0x12 +#define REG_GPIOPUPDCTR1 0x13 +#define REG_GPIOPUPDCTR2 0x14 +#define REG_GPIOPUPDCTR3 0x15 +#define REG_GPIOPUPDCTR4 0x16 +#define REG_GPIOPUPDCTR5 0x17 +#define REG_GPIO_ISR1A 0x19 +#define REG_GPIO_ISR2A 0x1A +#define REG_GPIO_ISR3A 0x1B +#define REG_GPIO_IMR1A 0x1C +#define REG_GPIO_IMR2A 0x1D +#define REG_GPIO_IMR3A 0x1E +#define REG_GPIO_ISR1B 0x1F +#define REG_GPIO_ISR2B 0x20 +#define REG_GPIO_ISR3B 0x21 +#define REG_GPIO_IMR1B 0x22 +#define REG_GPIO_IMR2B 0x23 +#define REG_GPIO_IMR3B 0x24 +#define REG_GPIO_EDR1 0x28 +#define REG_GPIO_EDR2 0x29 +#define REG_GPIO_EDR3 0x2A +#define REG_GPIO_EDR4 0x2B +#define REG_GPIO_EDR5 0x2C +#define REG_GPIO_SIH_CTRL 0x2D + +/* Up to 18 signals are available as GPIOs, when their + * pins are not assigned to another use (such as ULPI/USB). + */ +#define TWL4030_GPIO_MAX 18 + +/*----------------------------------------------------------------------*/ + +/* + * Keypad register offsets (use TWL4030_MODULE_KEYPAD) + * ... SIH/interrupt only + */ + +#define TWL4030_KEYPAD_KEYP_ISR1 0x11 +#define TWL4030_KEYPAD_KEYP_IMR1 0x12 +#define TWL4030_KEYPAD_KEYP_ISR2 0x13 +#define TWL4030_KEYPAD_KEYP_IMR2 0x14 +#define TWL4030_KEYPAD_KEYP_SIR 0x15 /* test register */ +#define TWL4030_KEYPAD_KEYP_EDR 0x16 +#define TWL4030_KEYPAD_KEYP_SIH_CTRL 0x17 + +/*----------------------------------------------------------------------*/ + +/* + * Multichannel ADC register offsets (use TWL4030_MODULE_MADC) + * ... SIH/interrupt only + */ + +#define TWL4030_MADC_ISR1 0x61 +#define TWL4030_MADC_IMR1 0x62 +#define TWL4030_MADC_ISR2 0x63 +#define TWL4030_MADC_IMR2 0x64 +#define TWL4030_MADC_SIR 0x65 /* test register */ +#define TWL4030_MADC_EDR 0x66 +#define TWL4030_MADC_SIH_CTRL 0x67 + +/*----------------------------------------------------------------------*/ + +/* + * Battery charger register offsets (use TWL4030_MODULE_INTERRUPTS) + */ + +#define TWL4030_INTERRUPTS_BCIISR1A 0x0 +#define TWL4030_INTERRUPTS_BCIISR2A 0x1 +#define TWL4030_INTERRUPTS_BCIIMR1A 0x2 +#define TWL4030_INTERRUPTS_BCIIMR2A 0x3 +#define TWL4030_INTERRUPTS_BCIISR1B 0x4 +#define TWL4030_INTERRUPTS_BCIISR2B 0x5 +#define TWL4030_INTERRUPTS_BCIIMR1B 0x6 +#define TWL4030_INTERRUPTS_BCIIMR2B 0x7 +#define TWL4030_INTERRUPTS_BCISIR1 0x8 /* test register */ +#define TWL4030_INTERRUPTS_BCISIR2 0x9 /* test register */ +#define TWL4030_INTERRUPTS_BCIEDR1 0xa +#define TWL4030_INTERRUPTS_BCIEDR2 0xb +#define TWL4030_INTERRUPTS_BCIEDR3 0xc +#define TWL4030_INTERRUPTS_BCISIHCTRL 0xd + +/*----------------------------------------------------------------------*/ + +/* + * Power Interrupt block register offsets (use TWL4030_MODULE_INT) + */ + +#define TWL4030_INT_PWR_ISR1 0x0 +#define TWL4030_INT_PWR_IMR1 0x1 +#define TWL4030_INT_PWR_ISR2 0x2 +#define TWL4030_INT_PWR_IMR2 0x3 +#define TWL4030_INT_PWR_SIR 0x4 /* test register */ +#define TWL4030_INT_PWR_EDR1 0x5 +#define TWL4030_INT_PWR_EDR2 0x6 +#define TWL4030_INT_PWR_SIH_CTRL 0x7 + +/*----------------------------------------------------------------------*/ + +struct twl4030_bci_platform_data { + int *battery_tmp_tbl; + unsigned int tblsize; +}; + +/* TWL4030_GPIO_MAX (18) GPIOs, with interrupts */ +struct twl4030_gpio_platform_data { + int gpio_base; + unsigned irq_base, irq_end; + + /* For gpio-N, bit (1 << N) in "pullups" is set if that pullup + * should be enabled. Else, if that bit is set in "pulldowns", + * that pulldown is enabled. Don't waste power by letting any + * digital inputs float... + */ + u32 pullups; + u32 pulldowns; + + int (*setup)(struct device *dev, + unsigned gpio, unsigned ngpio); + int (*teardown)(struct device *dev, + unsigned gpio, unsigned ngpio); +}; + +struct twl4030_madc_platform_data { + int irq_line; +}; + +struct twl4030_keypad_data { + int rows; + int cols; + int *keymap; + int irq; + unsigned int keymapsize; + unsigned int rep:1; +}; + +enum twl4030_usb_mode { + T2_USB_MODE_ULPI = 1, + T2_USB_MODE_CEA2011_3PIN = 2, +}; + +struct twl4030_usb_data { + enum twl4030_usb_mode usb_mode; +}; + +struct twl4030_platform_data { + unsigned irq_base, irq_end; + struct twl4030_bci_platform_data *bci; + struct twl4030_gpio_platform_data *gpio; + struct twl4030_madc_platform_data *madc; + struct twl4030_keypad_data *keypad; + struct twl4030_usb_data *usb; + + /* REVISIT more to come ... _nothing_ should be hard-wired */ +}; + +/*----------------------------------------------------------------------*/ + +/* + * FIXME completely stop using TWL4030_IRQ_BASE ... instead, pass the + * IRQ data to subsidiary devices using platform device resources. + */ + +/* IRQ information-need base */ +#include +/* TWL4030 interrupts */ + +/* #define TWL4030_MODIRQ_GPIO (TWL4030_IRQ_BASE + 0) */ +#define TWL4030_MODIRQ_KEYPAD (TWL4030_IRQ_BASE + 1) +#define TWL4030_MODIRQ_BCI (TWL4030_IRQ_BASE + 2) +#define TWL4030_MODIRQ_MADC (TWL4030_IRQ_BASE + 3) +/* #define TWL4030_MODIRQ_USB (TWL4030_IRQ_BASE + 4) */ +#define TWL4030_MODIRQ_PWR (TWL4030_IRQ_BASE + 5) + +#define TWL4030_PWRIRQ_PWRBTN (TWL4030_PWR_IRQ_BASE + 0) +#define TWL4030_PWRIRQ_CHG_PRES (TWL4030_PWR_IRQ_BASE + 1) +#define TWL4030_PWRIRQ_USB_PRES (TWL4030_PWR_IRQ_BASE + 2) +#define TWL4030_PWRIRQ_RTC (TWL4030_PWR_IRQ_BASE + 3) +#define TWL4030_PWRIRQ_HOT_DIE (TWL4030_PWR_IRQ_BASE + 4) +#define TWL4030_PWRIRQ_PWROK_TIMEOUT (TWL4030_PWR_IRQ_BASE + 5) +#define TWL4030_PWRIRQ_MBCHG (TWL4030_PWR_IRQ_BASE + 6) +#define TWL4030_PWRIRQ_SC_DETECT (TWL4030_PWR_IRQ_BASE + 7) + +/* Rest are unsued currently*/ + +/* Offsets to Power Registers */ +#define TWL4030_VDAC_DEV_GRP 0x3B +#define TWL4030_VDAC_DEDICATED 0x3E +#define TWL4030_VAUX1_DEV_GRP 0x17 +#define TWL4030_VAUX1_DEDICATED 0x1A +#define TWL4030_VAUX2_DEV_GRP 0x1B +#define TWL4030_VAUX2_DEDICATED 0x1E +#define TWL4030_VAUX3_DEV_GRP 0x1F +#define TWL4030_VAUX3_DEDICATED 0x22 + +/* TWL4030 GPIO interrupt definitions */ + +#define TWL4030_GPIO_IRQ_NO(n) (TWL4030_GPIO_IRQ_BASE + (n)) +#define TWL4030_GPIO_IS_ENABLE 1 + +/* + * Exported TWL4030 GPIO APIs + * + * WARNING -- use standard GPIO and IRQ calls instead; these will vanish. + */ +int twl4030_get_gpio_datain(int gpio); +int twl4030_request_gpio(int gpio); +int twl4030_set_gpio_debounce(int gpio, int enable); +int twl4030_free_gpio(int gpio); + +#if defined(CONFIG_TWL4030_BCI_BATTERY) || \ + defined(CONFIG_TWL4030_BCI_BATTERY_MODULE) + extern int twl4030charger_usb_en(int enable); +#else + static inline int twl4030charger_usb_en(int enable) { return 0; } +#endif + +#endif /* End of __TWL4030_H */ -- cgit v1.2.3 From 26b8f5e1e2d1229c186d8e61d26513c43a058c5e Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Wed, 15 Oct 2008 12:20:06 +0200 Subject: mfd: add base support for Dialog DA9030/DA9034 PMICs DA9030 (a.k.a ARAVA) and DA9034 (a.k.a MICCO) are PMICs designed by Dialog Semiconductor, usually found on PXA-based platforms. These PMICs are I2C-based, multi-function devices, usually with LEDs, PWMs for backlight, BUCKs and LDOs, ADCs and touchscreen controller (on DA9034). This is the base support for the I2C operations, event registration and handling, sub-devices management. Signed-off-by: Mike Rapoport Signed-off-by: Eric Miao Signed-off-by: Liam Girdwood Signed-off-by: Samuel Ortiz --- include/linux/mfd/da903x.h | 201 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 include/linux/mfd/da903x.h (limited to 'include/linux') diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h new file mode 100644 index 00000000000..cad314c1243 --- /dev/null +++ b/include/linux/mfd/da903x.h @@ -0,0 +1,201 @@ +#ifndef __LINUX_PMIC_DA903X_H +#define __LINUX_PMIC_DA903X_H + +/* Unified sub device IDs for DA9030/DA9034 */ +enum { + DA9030_ID_LED_1, + DA9030_ID_LED_2, + DA9030_ID_LED_3, + DA9030_ID_LED_4, + DA9030_ID_LED_PC, + DA9030_ID_VIBRA, + DA9030_ID_WLED, + DA9030_ID_BUCK1, + DA9030_ID_BUCK2, + DA9030_ID_LDO1, + DA9030_ID_LDO2, + DA9030_ID_LDO3, + DA9030_ID_LDO4, + DA9030_ID_LDO5, + DA9030_ID_LDO6, + DA9030_ID_LDO7, + DA9030_ID_LDO8, + DA9030_ID_LDO9, + DA9030_ID_LDO10, + DA9030_ID_LDO11, + DA9030_ID_LDO12, + DA9030_ID_LDO13, + DA9030_ID_LDO14, + DA9030_ID_LDO15, + DA9030_ID_LDO16, + DA9030_ID_LDO17, + DA9030_ID_LDO18, + DA9030_ID_LDO19, + DA9030_ID_LDO_INT, /* LDO Internal */ + + DA9034_ID_LED_1, + DA9034_ID_LED_2, + DA9034_ID_VIBRA, + DA9034_ID_WLED, + DA9034_ID_TOUCH, + + DA9034_ID_BUCK1, + DA9034_ID_BUCK2, + DA9034_ID_LDO1, + DA9034_ID_LDO2, + DA9034_ID_LDO3, + DA9034_ID_LDO4, + DA9034_ID_LDO5, + DA9034_ID_LDO6, + DA9034_ID_LDO7, + DA9034_ID_LDO8, + DA9034_ID_LDO9, + DA9034_ID_LDO10, + DA9034_ID_LDO11, + DA9034_ID_LDO12, + DA9034_ID_LDO13, + DA9034_ID_LDO14, + DA9034_ID_LDO15, +}; + +/* + * DA9030/DA9034 LEDs sub-devices uses generic "struct led_info" + * as the platform_data + */ + +/* DA9030 flags for "struct led_info" + */ +#define DA9030_LED_RATE_ON (0 << 5) +#define DA9030_LED_RATE_052S (1 << 5) +#define DA9030_LED_DUTY_1_16 (0 << 3) +#define DA9030_LED_DUTY_1_8 (1 << 3) +#define DA9030_LED_DUTY_1_4 (2 << 3) +#define DA9030_LED_DUTY_1_2 (3 << 3) + +#define DA9030_VIBRA_MODE_1P3V (0 << 1) +#define DA9030_VIBRA_MODE_2P7V (1 << 1) +#define DA9030_VIBRA_FREQ_1HZ (0 << 2) +#define DA9030_VIBRA_FREQ_2HZ (1 << 2) +#define DA9030_VIBRA_FREQ_4HZ (2 << 2) +#define DA9030_VIBRA_FREQ_8HZ (3 << 2) +#define DA9030_VIBRA_DUTY_ON (0 << 4) +#define DA9030_VIBRA_DUTY_75P (1 << 4) +#define DA9030_VIBRA_DUTY_50P (2 << 4) +#define DA9030_VIBRA_DUTY_25P (3 << 4) + +/* DA9034 flags for "struct led_info" */ +#define DA9034_LED_RAMP (1 << 7) + +/* DA9034 touch screen platform data */ +struct da9034_touch_pdata { + int interval_ms; /* sampling interval while pen down */ + int x_inverted; + int y_inverted; +}; + +struct da903x_subdev_info { + int id; + const char *name; + void *platform_data; +}; + +struct da903x_platform_data { + int num_subdevs; + struct da903x_subdev_info *subdevs; +}; + +/* bit definitions for DA9030 events */ +#define DA9030_EVENT_ONKEY (1 << 0) +#define DA9030_EVENT_PWREN (1 << 1) +#define DA9030_EVENT_EXTON (1 << 2) +#define DA9030_EVENT_CHDET (1 << 3) +#define DA9030_EVENT_TBAT (1 << 4) +#define DA9030_EVENT_VBATMON (1 << 5) +#define DA9030_EVENT_VBATMON_TXON (1 << 6) +#define DA9030_EVENT_CHIOVER (1 << 7) +#define DA9030_EVENT_TCTO (1 << 8) +#define DA9030_EVENT_CCTO (1 << 9) +#define DA9030_EVENT_ADC_READY (1 << 10) +#define DA9030_EVENT_VBUS_4P4 (1 << 11) +#define DA9030_EVENT_VBUS_4P0 (1 << 12) +#define DA9030_EVENT_SESS_VALID (1 << 13) +#define DA9030_EVENT_SRP_DETECT (1 << 14) +#define DA9030_EVENT_WATCHDOG (1 << 15) +#define DA9030_EVENT_LDO15 (1 << 16) +#define DA9030_EVENT_LDO16 (1 << 17) +#define DA9030_EVENT_LDO17 (1 << 18) +#define DA9030_EVENT_LDO18 (1 << 19) +#define DA9030_EVENT_LDO19 (1 << 20) +#define DA9030_EVENT_BUCK2 (1 << 21) + +/* bit definitions for DA9034 events */ +#define DA9034_EVENT_ONKEY (1 << 0) +#define DA9034_EVENT_EXTON (1 << 2) +#define DA9034_EVENT_CHDET (1 << 3) +#define DA9034_EVENT_TBAT (1 << 4) +#define DA9034_EVENT_VBATMON (1 << 5) +#define DA9034_EVENT_REV_IOVER (1 << 6) +#define DA9034_EVENT_CH_IOVER (1 << 7) +#define DA9034_EVENT_CH_TCTO (1 << 8) +#define DA9034_EVENT_CH_CCTO (1 << 9) +#define DA9034_EVENT_USB_DEV (1 << 10) +#define DA9034_EVENT_OTGCP_IOVER (1 << 11) +#define DA9034_EVENT_VBUS_4P55 (1 << 12) +#define DA9034_EVENT_VBUS_3P8 (1 << 13) +#define DA9034_EVENT_SESS_1P8 (1 << 14) +#define DA9034_EVENT_SRP_READY (1 << 15) +#define DA9034_EVENT_ADC_MAN (1 << 16) +#define DA9034_EVENT_ADC_AUTO4 (1 << 17) +#define DA9034_EVENT_ADC_AUTO5 (1 << 18) +#define DA9034_EVENT_ADC_AUTO6 (1 << 19) +#define DA9034_EVENT_PEN_DOWN (1 << 20) +#define DA9034_EVENT_TSI_READY (1 << 21) +#define DA9034_EVENT_UART_TX (1 << 22) +#define DA9034_EVENT_UART_RX (1 << 23) +#define DA9034_EVENT_HEADSET (1 << 25) +#define DA9034_EVENT_HOOKSWITCH (1 << 26) +#define DA9034_EVENT_WATCHDOG (1 << 27) + +extern int da903x_register_notifier(struct device *dev, + struct notifier_block *nb, unsigned int events); +extern int da903x_unregister_notifier(struct device *dev, + struct notifier_block *nb, unsigned int events); + +/* Status Query Interface */ +#define DA9030_STATUS_ONKEY (1 << 0) +#define DA9030_STATUS_PWREN1 (1 << 1) +#define DA9030_STATUS_EXTON (1 << 2) +#define DA9030_STATUS_CHDET (1 << 3) +#define DA9030_STATUS_TBAT (1 << 4) +#define DA9030_STATUS_VBATMON (1 << 5) +#define DA9030_STATUS_VBATMON_TXON (1 << 6) +#define DA9030_STATUS_MCLKDET (1 << 7) + +#define DA9034_STATUS_ONKEY (1 << 0) +#define DA9034_STATUS_EXTON (1 << 2) +#define DA9034_STATUS_CHDET (1 << 3) +#define DA9034_STATUS_TBAT (1 << 4) +#define DA9034_STATUS_VBATMON (1 << 5) +#define DA9034_STATUS_PEN_DOWN (1 << 6) +#define DA9034_STATUS_MCLKDET (1 << 7) +#define DA9034_STATUS_USB_DEV (1 << 8) +#define DA9034_STATUS_HEADSET (1 << 9) +#define DA9034_STATUS_HOOKSWITCH (1 << 10) +#define DA9034_STATUS_REMCON (1 << 11) +#define DA9034_STATUS_VBUS_VALID_4P55 (1 << 12) +#define DA9034_STATUS_VBUS_VALID_3P8 (1 << 13) +#define DA9034_STATUS_SESS_VALID_1P8 (1 << 14) +#define DA9034_STATUS_SRP_READY (1 << 15) + +extern int da903x_query_status(struct device *dev, unsigned int status); + + +/* NOTE: the two functions below are not intended for use outside + * of the DA9034 sub-device drivers + */ +extern int da903x_write(struct device *dev, int reg, uint8_t val); +extern int da903x_read(struct device *dev, int reg, uint8_t *val); +extern int da903x_update(struct device *dev, int reg, uint8_t val, uint8_t mask); +extern int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask); +extern int da903x_clr_bits(struct device *dev, int reg, uint8_t bit_mask); +#endif /* __LINUX_PMIC_DA903X_H */ -- cgit v1.2.3 From 7acb706ca97fce84bda4a902a33de2f3dae10260 Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Thu, 9 Oct 2008 20:06:09 +0200 Subject: mfd: update TMIO drivers to use the clock API This patch updates the remaining two TMIO drivers to use the clock API rather than callback hooks into platform code. Signed-off-by: Ian Molton Signed-off-by: Samuel Ortiz --- include/linux/mfd/t7l66xb.h | 2 -- include/linux/mfd/tc6387xb.h | 3 --- 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h index e83c7f2036f..b4629818aea 100644 --- a/include/linux/mfd/t7l66xb.h +++ b/include/linux/mfd/t7l66xb.h @@ -15,8 +15,6 @@ #include struct t7l66xb_platform_data { - int (*enable_clk32k)(struct platform_device *dev); - void (*disable_clk32k)(struct platform_device *dev); int (*enable)(struct platform_device *dev); int (*disable)(struct platform_device *dev); int (*suspend)(struct platform_device *dev); diff --git a/include/linux/mfd/tc6387xb.h b/include/linux/mfd/tc6387xb.h index fa06e0610b8..b4888209494 100644 --- a/include/linux/mfd/tc6387xb.h +++ b/include/linux/mfd/tc6387xb.h @@ -11,9 +11,6 @@ #define MFD_TC6387XB_H struct tc6387xb_platform_data { - int (*enable_clk32k)(struct platform_device *dev); - void (*disable_clk32k)(struct platform_device *dev); - int (*enable)(struct platform_device *dev); int (*disable)(struct platform_device *dev); int (*suspend)(struct platform_device *dev); -- cgit v1.2.3 From 62695a84eb8f2e718bf4dfb21700afaa7a08e0ea Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:09 -0700 Subject: vmscan: move isolate_lru_page() to vmscan.c On large memory systems, the VM can spend way too much time scanning through pages that it cannot (or should not) evict from memory. Not only does it use up CPU time, but it also provokes lock contention and can leave large systems under memory presure in a catatonic state. This patch series improves VM scalability by: 1) putting filesystem backed, swap backed and unevictable pages onto their own LRUs, so the system only scans the pages that it can/should evict from memory 2) switching to two handed clock replacement for the anonymous LRUs, so the number of pages that need to be scanned when the system starts swapping is bound to a reasonable number 3) keeping unevictable pages off the LRU completely, so the VM does not waste CPU time scanning them. ramfs, ramdisk, SHM_LOCKED shared memory segments and mlock()ed VMA pages are keept on the unevictable list. This patch: isolate_lru_page logically belongs to be in vmscan.c than migrate.c. It is tough, because we don't need that function without memory migration so there is a valid argument to have it in migrate.c. However a subsequent patch needs to make use of it in the core mm, so we can happily move it to vmscan.c. Also, make the function a little more generic by not requiring that it adds an isolated page to a given list. Callers can do that. Note that we now have '__isolate_lru_page()', that does something quite different, visible outside of vmscan.c for use with memory controller. Methinks we need to rationalize these names/purposes. --lts [akpm@linux-foundation.org: fix mm/memory_hotplug.c build] Signed-off-by: Nick Piggin Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 03aea612d28..3f34005068d 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -7,7 +7,6 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); #ifdef CONFIG_MIGRATION -extern int isolate_lru_page(struct page *p, struct list_head *pagelist); extern int putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *); @@ -21,8 +20,6 @@ extern int migrate_vmas(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, unsigned long flags); #else -static inline int isolate_lru_page(struct page *p, struct list_head *list) - { return -ENOSYS; } static inline int putback_lru_pages(struct list_head *l) { return 0; } static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private) { return -ENOSYS; } -- cgit v1.2.3 From b69408e88bd86b98feb7b9a38fd865e1ddb29827 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 18 Oct 2008 20:26:14 -0700 Subject: vmscan: Use an indexed array for LRU variables Currently we are defining explicit variables for the inactive and active list. An indexed array can be more generic and avoid repeating similar code in several places in the reclaim code. We are saving a few bytes in terms of code size: Before: text data bss dec hex filename 4097753 573120 4092484 8763357 85b7dd vmlinux After: text data bss dec hex filename 4097729 573120 4092484 8763333 85b7c5 vmlinux Having an easy way to add new lru lists may ease future work on the reclaim code. Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: Christoph Lameter Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 17 +++++----------- include/linux/mm_inline.h | 49 +++++++++++++++++++++++++++++++++++----------- include/linux/mmzone.h | 26 ++++++++++++++++++------ 3 files changed, 63 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fdf3967e139..a6ac0d491fe 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -69,10 +69,8 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority); -extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, - struct zone *zone, int priority); -extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, - struct zone *zone, int priority); +extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, + int priority, enum lru_list lru); #else /* CONFIG_CGROUP_MEM_RES_CTLR */ static inline void page_reset_bad_cgroup(struct page *page) @@ -159,14 +157,9 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, { } -static inline long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, - struct zone *zone, int priority) -{ - return 0; -} - -static inline long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, - struct zone *zone, int priority) +static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, + struct zone *zone, int priority, + enum lru_list lru) { return 0; } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 895bc4e9303..2704729777e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -1,40 +1,67 @@ +static inline void +add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) +{ + list_add(&page->lru, &zone->lru[l].list); + __inc_zone_state(zone, NR_LRU_BASE + l); +} + +static inline void +del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) +{ + list_del(&page->lru); + __dec_zone_state(zone, NR_LRU_BASE + l); +} + static inline void add_page_to_active_list(struct zone *zone, struct page *page) { - list_add(&page->lru, &zone->active_list); - __inc_zone_state(zone, NR_ACTIVE); + add_page_to_lru_list(zone, page, LRU_ACTIVE); } static inline void add_page_to_inactive_list(struct zone *zone, struct page *page) { - list_add(&page->lru, &zone->inactive_list); - __inc_zone_state(zone, NR_INACTIVE); + add_page_to_lru_list(zone, page, LRU_INACTIVE); } static inline void del_page_from_active_list(struct zone *zone, struct page *page) { - list_del(&page->lru); - __dec_zone_state(zone, NR_ACTIVE); + del_page_from_lru_list(zone, page, LRU_ACTIVE); } static inline void del_page_from_inactive_list(struct zone *zone, struct page *page) { - list_del(&page->lru); - __dec_zone_state(zone, NR_INACTIVE); + del_page_from_lru_list(zone, page, LRU_INACTIVE); } static inline void del_page_from_lru(struct zone *zone, struct page *page) { + enum lru_list l = LRU_INACTIVE; + list_del(&page->lru); if (PageActive(page)) { __ClearPageActive(page); - __dec_zone_state(zone, NR_ACTIVE); - } else { - __dec_zone_state(zone, NR_INACTIVE); + l = LRU_ACTIVE; } + __dec_zone_state(zone, NR_LRU_BASE + l); } +/** + * page_lru - which LRU list should a page be on? + * @page: the page to test + * + * Returns the LRU list a page should be on, as an index + * into the array of LRU lists. + */ +static inline enum lru_list page_lru(struct page *page) +{ + enum lru_list lru = LRU_BASE; + + if (PageActive(page)) + lru += LRU_ACTIVE; + + return lru; +} diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 428328a05fa..156e18f3919 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -81,8 +81,9 @@ struct zone_padding { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, - NR_INACTIVE, - NR_ACTIVE, + NR_LRU_BASE, + NR_INACTIVE = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ + NR_ACTIVE, /* " " " " " */ NR_ANON_PAGES, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ @@ -107,6 +108,19 @@ enum zone_stat_item { #endif NR_VM_ZONE_STAT_ITEMS }; +enum lru_list { + LRU_BASE, + LRU_INACTIVE=LRU_BASE, /* must match order of NR_[IN]ACTIVE */ + LRU_ACTIVE, /* " " " " " */ + NR_LRU_LISTS }; + +#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++) + +static inline int is_active_lru(enum lru_list l) +{ + return (l == LRU_ACTIVE); +} + struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ @@ -251,10 +265,10 @@ struct zone { /* Fields commonly accessed by the page reclaim scanner */ spinlock_t lru_lock; - struct list_head active_list; - struct list_head inactive_list; - unsigned long nr_scan_active; - unsigned long nr_scan_inactive; + struct { + struct list_head list; + unsigned long nr_scan; + } lru[NR_LRU_LISTS]; unsigned long pages_scanned; /* since last reclaim */ unsigned long flags; /* zone flags, see below */ -- cgit v1.2.3 From f04e9ebbe4909f9a41efd55149bc353299f4e83b Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Sat, 18 Oct 2008 20:26:19 -0700 Subject: swap: use an array for the LRU pagevecs Turn the pagevecs into an array just like the LRUs. This significantly cleans up the source code and reduces the size of the kernel by about 13kB after all the LRU lists have been created further down in the split VM patch series. Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 13 +++++++++++-- include/linux/swap.h | 18 ++++++++++++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 8eb7fa76c1d..6b8f11bcc94 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -23,8 +23,7 @@ struct pagevec { void __pagevec_release(struct pagevec *pvec); void __pagevec_release_nonlru(struct pagevec *pvec); void __pagevec_free(struct pagevec *pvec); -void __pagevec_lru_add(struct pagevec *pvec); -void __pagevec_lru_add_active(struct pagevec *pvec); +void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); void pagevec_strip(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); @@ -81,6 +80,16 @@ static inline void pagevec_free(struct pagevec *pvec) __pagevec_free(pvec); } +static inline void __pagevec_lru_add(struct pagevec *pvec) +{ + ____pagevec_lru_add(pvec, LRU_INACTIVE); +} + +static inline void __pagevec_lru_add_active(struct pagevec *pvec) +{ + ____pagevec_lru_add(pvec, LRU_ACTIVE); +} + static inline void pagevec_lru_add(struct pagevec *pvec) { if (pagevec_count(pvec)) diff --git a/include/linux/swap.h b/include/linux/swap.h index de40f169a4e..fcc169610d0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -171,8 +171,8 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ -extern void lru_cache_add(struct page *); -extern void lru_cache_add_active(struct page *); +extern void __lru_cache_add(struct page *, enum lru_list lru); +extern void lru_cache_add_lru(struct page *, enum lru_list lru); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); @@ -180,6 +180,20 @@ extern int lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void swap_setup(void); +/** + * lru_cache_add: add a page to the page lists + * @page: the page to add + */ +static inline void lru_cache_add(struct page *page) +{ + __lru_cache_add(page, LRU_INACTIVE); +} + +static inline void lru_cache_add_active(struct page *page) +{ + __lru_cache_add(page, LRU_ACTIVE); +} + /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); -- cgit v1.2.3 From 68a22394c286a2daf06ee8d65d8835f738faefa5 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:23 -0700 Subject: vmscan: free swap space on swap-in/activation If vm_swap_full() (swap space more than 50% full), the system will free swap space at swapin time. With this patch, the system will also free the swap space in the pageout code, when we decide that the page is not a candidate for swapout (and just wasting swap space). Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: MinChan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 1 + include/linux/swap.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 6b8f11bcc94..fea3a982ee5 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -25,6 +25,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec); void __pagevec_free(struct pagevec *pvec); void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); void pagevec_strip(struct pagevec *pvec); +void pagevec_swap_free(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); unsigned pagevec_lookup_tag(struct pagevec *pvec, diff --git a/include/linux/swap.h b/include/linux/swap.h index fcc169610d0..833be56ad83 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -265,6 +265,7 @@ extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); extern int can_share_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *); +extern int remove_exclusive_swap_page_ref(struct page *); struct backing_dev_info; /* linux/mm/thrash.c */ @@ -353,6 +354,11 @@ static inline int remove_exclusive_swap_page(struct page *p) return 0; } +static inline int remove_exclusive_swap_page_ref(struct page *page) +{ + return 0; +} + static inline swp_entry_t get_swap_page(void) { swp_entry_t entry; -- cgit v1.2.3 From b2e185384f534781fd22f5ce170b2ad26f97df70 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:30 -0700 Subject: define page_file_cache() function Define page_file_cache() function to answer the question: is page backed by a file? Originally part of Rik van Riel's split-lru patch. Extracted to make available for other, independent reclaim patches. Moved inline function to linux/mm_inline.h where it will be needed by subsequent "split LRU" and "noreclaim" patches. Unfortunately this needs to use a page flag, since the PG_swapbacked state needs to be preserved all the way to the point where the page is last removed from the LRU. Trying to derive the status from other info in the page resulted in wrong VM statistics in earlier split VM patchsets. The total number of page flags in use on a 32 bit machine after this patch is 19. [akpm@linux-foundation.org: fix up out-of-order merge fallout] [hugh@veritas.com: splitlru: shmem_getpage SetPageSwapBacked sooner[ Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: MinChan Kim Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 27 +++++++++++++++++++++++++++ include/linux/page-flags.h | 8 ++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 2704729777e..96e970485b6 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -1,3 +1,28 @@ +#ifndef LINUX_MM_INLINE_H +#define LINUX_MM_INLINE_H + +/** + * page_is_file_cache - should the page be on a file LRU or anon LRU? + * @page: the page to test + * + * Returns !0 if @page is page cache page backed by a regular filesystem, + * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. + * Used by functions that manipulate the LRU lists, to sort a page + * onto the right LRU list. + * + * We would like to get this info without a page flag, but the state + * needs to survive until the page is last deleted from the LRU, which + * could be as far down as __page_cache_release. + */ +static inline int page_is_file_cache(struct page *page) +{ + if (PageSwapBacked(page)) + return 0; + + /* The page is page cache backed by a normal filesystem. */ + return 1; +} + static inline void add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) { @@ -65,3 +90,5 @@ static inline enum lru_list page_lru(struct page *page) return lru; } + +#endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c74d3e87531..57b688cfb5e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -93,6 +93,7 @@ enum pageflags { PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_buddy, /* Page is free, on buddy lists */ + PG_swapbacked, /* Page is backed by RAM/swap */ #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR PG_uncached, /* Page has been mapped as uncached */ #endif @@ -176,6 +177,7 @@ PAGEFLAG(SavePinned, savepinned); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) +PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) __PAGEFLAG(SlobPage, slob_page) __PAGEFLAG(SlobFree, slob_free) @@ -334,7 +336,8 @@ static inline void __ClearPageTail(struct page *page) * Flags checked in bad_page(). Pages on the free list should not have * these flags set. It they are, there is a problem. */ -#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | 1 << PG_reclaim | 1 << PG_dirty) +#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \ + 1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked) /* * Flags checked when a page is freed. Pages being freed should not have @@ -347,7 +350,8 @@ static inline void __ClearPageTail(struct page *page) * Pages being prepped should not have these flags set. It they are, there * is a problem. */ -#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | 1 << PG_reserved | 1 << PG_dirty) +#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \ + 1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked) #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ -- cgit v1.2.3 From 4f98a2fee8acdb4ac84545df98cccecfd130f8db Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:32 -0700 Subject: vmscan: split LRU lists into anon & file sets Split the LRU lists in two, one set for pages that are backed by real file systems ("file") and one for pages that are backed by memory and swap ("anon"). The latter includes tmpfs. The advantage of doing this is that the VM will not have to scan over lots of anonymous pages (which we generally do not want to swap out), just to find the page cache pages that it should evict. This patch has the infrastructure and a basic policy to balance how much we scan the anon lists and how much we scan the file lists. The big policy changes are in separate patches. [lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset] [kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru] [kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page] [hugh@veritas.com: memcg swapbacked pages active] [hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED] [akpm@linux-foundation.org: fix /proc/vmstat units] [nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration] [kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo] [kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()] Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 13 ++++++++++++ include/linux/memcontrol.h | 2 +- include/linux/mm_inline.h | 50 ++++++++++++++++++++++++++++++++++----------- include/linux/mmzone.h | 47 +++++++++++++++++++++++++++++++++++------- include/linux/pagevec.h | 29 ++++++++++++++++++++------ include/linux/swap.h | 20 +++++++++++++----- include/linux/vmstat.h | 10 +++++++++ 7 files changed, 140 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0a24d5550eb..bee52abb8a4 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_READ_MAP: Can be mapped for reading * BDI_CAP_WRITE_MAP: Can be mapped for writing * BDI_CAP_EXEC_MAP: Can be mapped for execution + * + * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 @@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_WRITE_MAP 0x00000020 #define BDI_CAP_EXEC_MAP 0x00000040 #define BDI_CAP_NO_ACCT_WB 0x00000080 +#define BDI_CAP_SWAP_BACKED 0x00000100 #define BDI_CAP_VMFLAGS \ (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) @@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi) BDI_CAP_NO_WRITEBACK)); } +static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi) +{ + return bdi->capabilities & BDI_CAP_SWAP_BACKED; +} + static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) { return bdi_cap_writeback_dirty(mapping->backing_dev_info); @@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping) return bdi_cap_account_dirty(mapping->backing_dev_info); } +static inline bool mapping_cap_swap_backed(struct address_space *mapping) +{ + return bdi_cap_swap_backed(mapping->backing_dev_info); +} + #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a6ac0d491fe..8d8f05c1515 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -44,7 +44,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, unsigned long *scanned, int order, int mode, struct zone *z, struct mem_cgroup *mem_cont, - int active); + int active, int file); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 96e970485b6..2eb599465d5 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -5,7 +5,7 @@ * page_is_file_cache - should the page be on a file LRU or anon LRU? * @page: the page to test * - * Returns !0 if @page is page cache page backed by a regular filesystem, + * Returns LRU_FILE if @page is page cache page backed by a regular filesystem, * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. * Used by functions that manipulate the LRU lists, to sort a page * onto the right LRU list. @@ -20,7 +20,7 @@ static inline int page_is_file_cache(struct page *page) return 0; /* The page is page cache backed by a normal filesystem. */ - return 1; + return LRU_FILE; } static inline void @@ -38,39 +38,64 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) } static inline void -add_page_to_active_list(struct zone *zone, struct page *page) +add_page_to_inactive_anon_list(struct zone *zone, struct page *page) { - add_page_to_lru_list(zone, page, LRU_ACTIVE); + add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON); } static inline void -add_page_to_inactive_list(struct zone *zone, struct page *page) +add_page_to_active_anon_list(struct zone *zone, struct page *page) { - add_page_to_lru_list(zone, page, LRU_INACTIVE); + add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON); } static inline void -del_page_from_active_list(struct zone *zone, struct page *page) +add_page_to_inactive_file_list(struct zone *zone, struct page *page) { - del_page_from_lru_list(zone, page, LRU_ACTIVE); + add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE); } static inline void -del_page_from_inactive_list(struct zone *zone, struct page *page) +add_page_to_active_file_list(struct zone *zone, struct page *page) { - del_page_from_lru_list(zone, page, LRU_INACTIVE); + add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE); +} + +static inline void +del_page_from_inactive_anon_list(struct zone *zone, struct page *page) +{ + del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON); +} + +static inline void +del_page_from_active_anon_list(struct zone *zone, struct page *page) +{ + del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON); +} + +static inline void +del_page_from_inactive_file_list(struct zone *zone, struct page *page) +{ + del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); +} + +static inline void +del_page_from_active_file_list(struct zone *zone, struct page *page) +{ + del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); } static inline void del_page_from_lru(struct zone *zone, struct page *page) { - enum lru_list l = LRU_INACTIVE; + enum lru_list l = LRU_BASE; list_del(&page->lru); if (PageActive(page)) { __ClearPageActive(page); - l = LRU_ACTIVE; + l += LRU_ACTIVE; } + l += page_is_file_cache(page); __dec_zone_state(zone, NR_LRU_BASE + l); } @@ -87,6 +112,7 @@ static inline enum lru_list page_lru(struct page *page) if (PageActive(page)) lru += LRU_ACTIVE; + lru += page_is_file_cache(page); return lru; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 156e18f3919..59a4c8fd6eb 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -82,21 +82,23 @@ enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, NR_LRU_BASE, - NR_INACTIVE = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ - NR_ACTIVE, /* " " " " " */ + NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ + NR_ACTIVE_ANON, /* " " " " " */ + NR_INACTIVE_FILE, /* " " " " " */ + NR_ACTIVE_FILE, /* " " " " " */ NR_ANON_PAGES, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, NR_FILE_DIRTY, NR_WRITEBACK, - /* Second 128 byte cacheline */ NR_SLAB_RECLAIMABLE, NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, NR_VMSCAN_WRITE, + /* Second 128 byte cacheline */ NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ @@ -108,17 +110,36 @@ enum zone_stat_item { #endif NR_VM_ZONE_STAT_ITEMS }; +/* + * We do arithmetic on the LRU lists in various places in the code, + * so it is important to keep the active lists LRU_ACTIVE higher in + * the array than the corresponding inactive lists, and to keep + * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists. + * + * This has to be kept in sync with the statistics in zone_stat_item + * above and the descriptions in vmstat_text in mm/vmstat.c + */ +#define LRU_BASE 0 +#define LRU_ACTIVE 1 +#define LRU_FILE 2 + enum lru_list { - LRU_BASE, - LRU_INACTIVE=LRU_BASE, /* must match order of NR_[IN]ACTIVE */ - LRU_ACTIVE, /* " " " " " */ + LRU_INACTIVE_ANON = LRU_BASE, + LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, + LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, + LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, NR_LRU_LISTS }; #define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++) +static inline int is_file_lru(enum lru_list l) +{ + return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE); +} + static inline int is_active_lru(enum lru_list l) { - return (l == LRU_ACTIVE); + return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE); } struct per_cpu_pages { @@ -269,6 +290,18 @@ struct zone { struct list_head list; unsigned long nr_scan; } lru[NR_LRU_LISTS]; + + /* + * The pageout code in vmscan.c keeps track of how many of the + * mem/swap backed and file backed pages are refeferenced. + * The higher the rotated/scanned ratio, the more valuable + * that cache is. + * + * The anon LRU stats live in [0], file LRU stats in [1] + */ + unsigned long recent_rotated[2]; + unsigned long recent_scanned[2]; + unsigned long pages_scanned; /* since last reclaim */ unsigned long flags; /* zone flags, see below */ diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index fea3a982ee5..5fc96a4e760 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -81,20 +81,37 @@ static inline void pagevec_free(struct pagevec *pvec) __pagevec_free(pvec); } -static inline void __pagevec_lru_add(struct pagevec *pvec) +static inline void __pagevec_lru_add_anon(struct pagevec *pvec) { - ____pagevec_lru_add(pvec, LRU_INACTIVE); + ____pagevec_lru_add(pvec, LRU_INACTIVE_ANON); } -static inline void __pagevec_lru_add_active(struct pagevec *pvec) +static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec) { - ____pagevec_lru_add(pvec, LRU_ACTIVE); + ____pagevec_lru_add(pvec, LRU_ACTIVE_ANON); } -static inline void pagevec_lru_add(struct pagevec *pvec) +static inline void __pagevec_lru_add_file(struct pagevec *pvec) +{ + ____pagevec_lru_add(pvec, LRU_INACTIVE_FILE); +} + +static inline void __pagevec_lru_add_active_file(struct pagevec *pvec) +{ + ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE); +} + + +static inline void pagevec_lru_add_file(struct pagevec *pvec) +{ + if (pagevec_count(pvec)) + __pagevec_lru_add_file(pvec); +} + +static inline void pagevec_lru_add_anon(struct pagevec *pvec) { if (pagevec_count(pvec)) - __pagevec_lru_add(pvec); + __pagevec_lru_add_anon(pvec); } #endif /* _LINUX_PAGEVEC_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 833be56ad83..7d09d79997a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -184,14 +184,24 @@ extern void swap_setup(void); * lru_cache_add: add a page to the page lists * @page: the page to add */ -static inline void lru_cache_add(struct page *page) +static inline void lru_cache_add_anon(struct page *page) { - __lru_cache_add(page, LRU_INACTIVE); + __lru_cache_add(page, LRU_INACTIVE_ANON); } -static inline void lru_cache_add_active(struct page *page) +static inline void lru_cache_add_active_anon(struct page *page) { - __lru_cache_add(page, LRU_ACTIVE); + __lru_cache_add(page, LRU_ACTIVE_ANON); +} + +static inline void lru_cache_add_file(struct page *page) +{ + __lru_cache_add(page, LRU_INACTIVE_FILE); +} + +static inline void lru_cache_add_active_file(struct page *page) +{ + __lru_cache_add(page, LRU_ACTIVE_FILE); } /* linux/mm/vmscan.c */ @@ -199,7 +209,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask); -extern int __isolate_lru_page(struct page *page, int mode); +extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 58334d43951..ff5179f2b15 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -159,6 +159,16 @@ static inline unsigned long zone_page_state(struct zone *zone, return x; } +extern unsigned long global_lru_pages(void); + +static inline unsigned long zone_lru_pages(struct zone *zone) +{ + return (zone_page_state(zone, NR_ACTIVE_ANON) + + zone_page_state(zone, NR_ACTIVE_FILE) + + zone_page_state(zone, NR_INACTIVE_ANON) + + zone_page_state(zone, NR_INACTIVE_FILE)); +} + #ifdef CONFIG_NUMA /* * Determine the per node value of a stat item. This function -- cgit v1.2.3 From 556adecba110bf5f1db6c6b56416cfab5bcab698 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:34 -0700 Subject: vmscan: second chance replacement for anonymous pages We avoid evicting and scanning anonymous pages for the most part, but under some workloads we can end up with most of memory filled with anonymous pages. At that point, we suddenly need to clear the referenced bits on all of memory, which can take ages on very large memory systems. We can reduce the maximum number of pages that need to be scanned by not taking the referenced state into account when deactivating an anonymous page. After all, every anonymous page starts out referenced, so why check? If an anonymous page gets referenced again before it reaches the end of the inactive list, we move it back to the active list. To keep the maximum amount of necessary work reasonable, we scale the active to inactive ratio with the size of memory, using the formula active:inactive ratio = sqrt(memory in GB * 10). Kswapd CPU use now seems to scale by the amount of pageout bandwidth, instead of by the amount of memory present in the system. [kamezawa.hiroyu@jp.fujitsu.com: fix OOM with memcg] [kamezawa.hiroyu@jp.fujitsu.com: memcg: lru scan fix] Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 19 +++++++++++++++++++ include/linux/mmzone.h | 6 ++++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 2eb599465d5..f451fedd1e7 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -117,4 +117,23 @@ static inline enum lru_list page_lru(struct page *page) return lru; } +/** + * inactive_anon_is_low - check if anonymous pages need to be deactivated + * @zone: zone to check + * + * Returns true if the zone does not have enough inactive anon pages, + * meaning some active anon pages need to be deactivated. + */ +static inline int inactive_anon_is_low(struct zone *zone) +{ + unsigned long active, inactive; + + active = zone_page_state(zone, NR_ACTIVE_ANON); + inactive = zone_page_state(zone, NR_INACTIVE_ANON); + + if (inactive * zone->inactive_ratio < active) + return 1; + + return 0; +} #endif diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 59a4c8fd6eb..9c5111f49a3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -323,6 +323,12 @@ struct zone { */ int prev_priority; + /* + * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on + * this zone's LRU. Maintained by the pageout code. + */ + unsigned int inactive_ratio; + ZONE_PADDING(_pad2_) /* Rarely used or read-mostly fields */ -- cgit v1.2.3 From 8a7a8544a4f6554ec2d8048ac9f9672f442db5a2 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:37 -0700 Subject: pageflag helpers for configed-out flags Define proper false/noop inline functions for noreclaim page flags when !defined(CONFIG_UNEVICTABLE_LRU) Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 57b688cfb5e..3d31616dcd2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -162,6 +162,18 @@ static inline int Page##uname(struct page *page) \ #define TESTSCFLAG(uname, lname) \ TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname) +#define SETPAGEFLAG_NOOP(uname) \ +static inline void SetPage##uname(struct page *page) { } + +#define CLEARPAGEFLAG_NOOP(uname) \ +static inline void ClearPage##uname(struct page *page) { } + +#define __CLEARPAGEFLAG_NOOP(uname) \ +static inline void __ClearPage##uname(struct page *page) { } + +#define TESTCLEARFLAG_FALSE(uname) \ +static inline int TestClearPage##uname(struct page *page) { return 0; } + struct page; /* forward declaration */ TESTPAGEFLAG(Locked, locked) -- cgit v1.2.3 From 894bc310419ac95f4fa4142dc364401a7e607f65 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:39 -0700 Subject: Unevictable LRU Infrastructure When the system contains lots of mlocked or otherwise unevictable pages, the pageout code (kswapd) can spend lots of time scanning over these pages. Worse still, the presence of lots of unevictable pages can confuse kswapd into thinking that more aggressive pageout modes are required, resulting in all kinds of bad behaviour. Infrastructure to manage pages excluded from reclaim--i.e., hidden from vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to maintain "unevictable" pages on a separate per-zone LRU list, to "hide" them from vmscan. Kosaki Motohiro added the support for the memory controller unevictable lru list. Pages on the unevictable list have both PG_unevictable and PG_lru set. Thus, PG_unevictable is analogous to and mutually exclusive with PG_active--it specifies which LRU list the page is on. The unevictable infrastructure is enabled by a new mm Kconfig option [CONFIG_]UNEVICTABLE_LRU. A new function 'page_evictable(page, vma)' in vmscan.c tests whether or not a page may be evictable. Subsequent patches will add the various !evictable tests. We'll want to keep these tests light-weight for use in shrink_active_list() and, possibly, the fault path. To avoid races between tasks putting pages [back] onto an LRU list and tasks that might be moving the page from non-evictable to evictable state, the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()' -- tests the "evictability" of a page after placing it on the LRU, before dropping the reference. If the page has become unevictable, putback_lru_page() will redo the 'putback', thus moving the page to the unevictable list. This way, we avoid "stranding" evictable pages on the unevictable list. [akpm@linux-foundation.org: fix fallout from out-of-order merge] [riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build] [nishimura@mxp.nes.nec.co.jp: remove redundant mapping check] [kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework] [kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c] [kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure] [kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch] [kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Debugged-by: Benjamin Kidwell Signed-off-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- include/linux/mm_inline.h | 23 ++++++++++++++++------- include/linux/mmzone.h | 24 +++++++++++++++++++++++- include/linux/page-flags.h | 22 +++++++++++++++++++++- include/linux/pagevec.h | 1 - include/linux/swap.h | 12 ++++++++++++ 6 files changed, 73 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8d8f05c1515..ee1b2fcb441 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -34,9 +34,9 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern void mem_cgroup_move_lists(struct page *page, bool active); extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index f451fedd1e7..67d7697fd01 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -91,11 +91,16 @@ del_page_from_lru(struct zone *zone, struct page *page) enum lru_list l = LRU_BASE; list_del(&page->lru); - if (PageActive(page)) { - __ClearPageActive(page); - l += LRU_ACTIVE; + if (PageUnevictable(page)) { + __ClearPageUnevictable(page); + l = LRU_UNEVICTABLE; + } else { + if (PageActive(page)) { + __ClearPageActive(page); + l += LRU_ACTIVE; + } + l += page_is_file_cache(page); } - l += page_is_file_cache(page); __dec_zone_state(zone, NR_LRU_BASE + l); } @@ -110,9 +115,13 @@ static inline enum lru_list page_lru(struct page *page) { enum lru_list lru = LRU_BASE; - if (PageActive(page)) - lru += LRU_ACTIVE; - lru += page_is_file_cache(page); + if (PageUnevictable(page)) + lru = LRU_UNEVICTABLE; + else { + if (PageActive(page)) + lru += LRU_ACTIVE; + lru += page_is_file_cache(page); + } return lru; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9c5111f49a3..d1f60d5fe2e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -86,6 +86,11 @@ enum zone_stat_item { NR_ACTIVE_ANON, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ +#ifdef CONFIG_UNEVICTABLE_LRU + NR_UNEVICTABLE, /* " " " " " */ +#else + NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */ +#endif NR_ANON_PAGES, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ @@ -128,10 +133,18 @@ enum lru_list { LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, - NR_LRU_LISTS }; +#ifdef CONFIG_UNEVICTABLE_LRU + LRU_UNEVICTABLE, +#else + LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */ +#endif + NR_LRU_LISTS +}; #define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++) +#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++) + static inline int is_file_lru(enum lru_list l) { return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE); @@ -142,6 +155,15 @@ static inline int is_active_lru(enum lru_list l) return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE); } +static inline int is_unevictable_lru(enum lru_list l) +{ +#ifdef CONFIG_UNEVICTABLE_LRU + return (l == LRU_UNEVICTABLE); +#else + return 0; +#endif +} + struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 3d31616dcd2..ec1a1baad34 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -94,6 +94,9 @@ enum pageflags { PG_reclaim, /* To be reclaimed asap */ PG_buddy, /* Page is free, on buddy lists */ PG_swapbacked, /* Page is backed by RAM/swap */ +#ifdef CONFIG_UNEVICTABLE_LRU + PG_unevictable, /* Page is "unevictable" */ +#endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR PG_uncached, /* Page has been mapped as uncached */ #endif @@ -182,6 +185,7 @@ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) + TESTCLEARFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, checked) /* Used by some filesystems */ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ @@ -225,6 +229,15 @@ PAGEFLAG(SwapCache, swapcache) PAGEFLAG_FALSE(SwapCache) #endif +#ifdef CONFIG_UNEVICTABLE_LRU +PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) + TESTCLEARFLAG(Unevictable, unevictable) +#else +PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) + SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) + __CLEARPAGEFLAG_NOOP(Unevictable) +#endif + #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR PAGEFLAG(Uncached, uncached) #else @@ -340,9 +353,16 @@ static inline void __ClearPageTail(struct page *page) #endif /* !PAGEFLAGS_EXTENDED */ +#ifdef CONFIG_UNEVICTABLE_LRU +#define __PG_UNEVICTABLE (1 << PG_unevictable) +#else +#define __PG_UNEVICTABLE 0 +#endif + #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ 1 << PG_buddy | 1 << PG_writeback | \ - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active) + 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ + __PG_UNEVICTABLE) /* * Flags checked in bad_page(). Pages on the free list should not have diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 5fc96a4e760..e90a2cb0291 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -101,7 +101,6 @@ static inline void __pagevec_lru_add_active_file(struct pagevec *pvec) ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE); } - static inline void pagevec_lru_add_file(struct pagevec *pvec) { if (pagevec_count(pvec)) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7d09d79997a..a2113044d20 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -180,6 +180,8 @@ extern int lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void swap_setup(void); +extern void add_page_to_unevictable_list(struct page *page); + /** * lru_cache_add: add a page to the page lists * @page: the page to add @@ -228,6 +230,16 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) } #endif +#ifdef CONFIG_UNEVICTABLE_LRU +extern int page_evictable(struct page *page, struct vm_area_struct *vma); +#else +static inline int page_evictable(struct page *page, + struct vm_area_struct *vma) +{ + return 1; +} +#endif + extern int kswapd_run(int nid); #ifdef CONFIG_MMU -- cgit v1.2.3 From bbfd28eee9fbd73e780b19beb3dc562befbb94fa Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:40 -0700 Subject: unevictable lru: add event counting with statistics Fix to unevictable-lru-page-statistics.patch Add unevictable lru infrastructure vm events to the statistics patch. Rename the "NORECL_" and "noreclaim_" symbols and text strings to "UNEVICTABLE_" and "unevictable_", respectively. Currently, both the infrastructure and the mlocked pages event are added by a single patch later in the series. This makes it difficult to add or rework the incremental patches. The events actually "belong" with the stats, so pull them up to here. Also, restore the event counting to putback_lru_page(). This was removed from previous patch in series where it was "misplaced". The actual events weren't defined that early. Signed-off-by: Lee Schermerhorn Cc: Rik van Riel Reviewed-by: KOSAKI Motohiro Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index ff5179f2b15..135840cd7fe 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -40,6 +40,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PAGEOUTRUN, ALLOCSTALL, PGROTATED, #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, +#endif +#ifdef CONFIG_UNEVICTABLE_LRU + UNEVICTABLE_PGCULLED, /* culled to noreclaim list */ + UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */ + UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */ #endif NR_VM_EVENT_ITEMS }; -- cgit v1.2.3 From ba9ddf49391645e6bb93219131a40446538a5e76 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:42 -0700 Subject: Ramfs and Ram Disk pages are unevictable Christoph Lameter pointed out that ram disk pages also clutter the LRU lists. When vmscan finds them dirty and tries to clean them, the ram disk writeback function just redirties the page so that it goes back onto the active list. Round and round she goes... With the ram disk driver [rd.c] replaced by the newer 'brd.c', this is no longer the case, as ram disk pages are no longer maintained on the lru. [This makes them unmigratable for defrag or memory hot remove, but that can be addressed by a separate patch series.] However, the ramfs pages behave like ram disk pages used to, so: Define new address_space flag [shares address_space flags member with mapping's gfp mask] to indicate that the address space contains all unevictable pages. This will provide for efficient testing of ramfs pages in page_evictable(). Also provide wrapper functions to set/test the unevictable state to minimize #ifdefs in ramfs driver and any other users of this facility. Set the unevictable state on address_space structures for new ramfs inodes. Test the unevictable state in page_evictable() to cull unevictable pages. These changes depend on [CONFIG_]UNEVICTABLE_LRU. [riel@redhat.com: undo the brd.c part] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Debugged-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 5da31c12101..09164d2c5c2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -32,6 +32,28 @@ static inline void mapping_set_error(struct address_space *mapping, int error) } } +#ifdef CONFIG_UNEVICTABLE_LRU +#define AS_UNEVICTABLE (__GFP_BITS_SHIFT + 2) /* e.g., ramdisk, SHM_LOCK */ + +static inline void mapping_set_unevictable(struct address_space *mapping) +{ + set_bit(AS_UNEVICTABLE, &mapping->flags); +} + +static inline int mapping_unevictable(struct address_space *mapping) +{ + if (mapping && (mapping->flags & AS_UNEVICTABLE)) + return 1; + return 0; +} +#else +static inline void mapping_set_unevictable(struct address_space *mapping) { } +static inline int mapping_unevictable(struct address_space *mapping) +{ + return 0; +} +#endif + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; -- cgit v1.2.3 From 89e004ea55abe201b29e2d6e35124101f1288ef7 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:43 -0700 Subject: SHM_LOCKED pages are unevictable Shmem segments locked into memory via shmctl(SHM_LOCKED) should not be kept on the normal LRU, since scanning them is a waste of time and might throw off kswapd's balancing algorithms. Place them on the unevictable LRU list instead. Use the AS_UNEVICTABLE flag to mark address_space of SHM_LOCKed shared memory regions as unevictable. Then these pages will be culled off the normal LRU lists during vmscan. Add new wrapper function to clear the mapping's unevictable state when/if shared memory segment is munlocked. Add 'scan_mapping_unevictable_page()' to mm/vmscan.c to scan all pages in the shmem segment's mapping [struct address_space] for evictability now that they're no longer locked. If so, move them to the appropriate zone lru list. Changes depend on [CONFIG_]UNEVICTABLE_LRU. [kosaki.motohiro@jp.fujitsu.com: revert shm change] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: Kosaki Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- include/linux/pagemap.h | 12 +++++++++--- include/linux/swap.h | 4 ++++ 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c61ba10768e..40236290e2a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -700,10 +700,10 @@ static inline int page_mapped(struct page *page) extern void show_free_areas(void); #ifdef CONFIG_SHMEM -int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern int shmem_lock(struct file *file, int lock, struct user_struct *user); #else static inline int shmem_lock(struct file *file, int lock, - struct user_struct *user) + struct user_struct *user) { return 0; } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 09164d2c5c2..4b6c4d8d26b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -40,14 +40,20 @@ static inline void mapping_set_unevictable(struct address_space *mapping) set_bit(AS_UNEVICTABLE, &mapping->flags); } +static inline void mapping_clear_unevictable(struct address_space *mapping) +{ + clear_bit(AS_UNEVICTABLE, &mapping->flags); +} + static inline int mapping_unevictable(struct address_space *mapping) { - if (mapping && (mapping->flags & AS_UNEVICTABLE)) - return 1; - return 0; + if (likely(mapping)) + return test_bit(AS_UNEVICTABLE, &mapping->flags); + return !!mapping; } #else static inline void mapping_set_unevictable(struct address_space *mapping) { } +static inline void mapping_clear_unevictable(struct address_space *mapping) { } static inline int mapping_unevictable(struct address_space *mapping) { return 0; diff --git a/include/linux/swap.h b/include/linux/swap.h index a2113044d20..7edb4cbc29f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -232,12 +232,16 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) #ifdef CONFIG_UNEVICTABLE_LRU extern int page_evictable(struct page *page, struct vm_area_struct *vma); +extern void scan_mapping_unevictable_pages(struct address_space *); #else static inline int page_evictable(struct page *page, struct vm_area_struct *vma) { return 1; } +static inline void scan_mapping_unevictable_pages(struct address_space *mapping) +{ +} #endif extern int kswapd_run(int nid); -- cgit v1.2.3 From b291f000393f5a0b679012b39d79fbc85c018233 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:44 -0700 Subject: mlock: mlocked pages are unevictable Make sure that mlocked pages also live on the unevictable LRU, so kswapd will not scan them over and over again. This is achieved through various strategies: 1) add yet another page flag--PG_mlocked--to indicate that the page is locked for efficient testing in vmscan and, optionally, fault path. This allows early culling of unevictable pages, preventing them from getting to page_referenced()/try_to_unmap(). Also allows separate accounting of mlock'd pages, as Nick's original patch did. Note: Nick's original mlock patch used a PG_mlocked flag. I had removed this in favor of the PG_unevictable flag + an mlock_count [new page struct member]. I restored the PG_mlocked flag to eliminate the new count field. 2) add the mlock/unevictable infrastructure to mm/mlock.c, with internal APIs in mm/internal.h. This is a rework of Nick's original patch to these files, taking into account that mlocked pages are now kept on unevictable LRU list. 3) update vmscan.c:page_evictable() to check PageMlocked() and, if vma passed in, the vm_flags. Note that the vma will only be passed in for new pages in the fault path; and then only if the "cull unevictable pages in fault path" patch is included. 4) add try_to_unlock() to rmap.c to walk a page's rmap and ClearPageMlocked() if no other vmas have it mlocked. Reuses as much of try_to_unmap() as possible. This effectively replaces the use of one of the lru list links as an mlock count. If this mechanism let's pages in mlocked vmas leak through w/o PG_mlocked set [I don't know that it does], we should catch them later in try_to_unmap(). One hopes this will be rare, as it will be relatively expensive. Original mm/internal.h, mm/rmap.c and mm/mlock.c changes: Signed-off-by: Nick Piggin splitlru: introduce __get_user_pages(): New munlock processing need to GUP_FLAGS_IGNORE_VMA_PERMISSIONS. because current get_user_pages() can't grab PROT_NONE pages theresore it cause PROT_NONE pages can't munlock. [akpm@linux-foundation.org: fix this for pagemap-pass-mm-into-pagewalkers.patch] [akpm@linux-foundation.org: untangle patch interdependencies] [akpm@linux-foundation.org: fix things after out-of-order merging] [hugh@veritas.com: fix page-flags mess] [lee.schermerhorn@hp.com: fix munlock page table walk - now requires 'mm'] [kosaki.motohiro@jp.fujitsu.com: build fix] [kosaki.motohiro@jp.fujitsu.com: fix truncate race and sevaral comments] [kosaki.motohiro@jp.fujitsu.com: splitlru: introduce __get_user_pages()] Signed-off-by: KOSAKI Motohiro Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Cc: Nick Piggin Cc: Dave Hansen Cc: Matt Mackall Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++++ include/linux/page-flags.h | 19 ++++++++++++++++--- include/linux/rmap.h | 14 ++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 40236290e2a..ffee2f74341 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -131,6 +131,11 @@ extern unsigned int kobjsize(const void *objp); #define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) +/* + * special vmas that are non-mergable, non-mlock()able + */ +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ec1a1baad34..b12f93a3c34 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -96,6 +96,7 @@ enum pageflags { PG_swapbacked, /* Page is backed by RAM/swap */ #ifdef CONFIG_UNEVICTABLE_LRU PG_unevictable, /* Page is "unevictable" */ + PG_mlocked, /* Page is vma mlocked */ #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR PG_uncached, /* Page has been mapped as uncached */ @@ -232,7 +233,17 @@ PAGEFLAG_FALSE(SwapCache) #ifdef CONFIG_UNEVICTABLE_LRU PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) TESTCLEARFLAG(Unevictable, unevictable) + +#define MLOCK_PAGES 1 +PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) + TESTSCFLAG(Mlocked, mlocked) + #else + +#define MLOCK_PAGES 0 +PAGEFLAG_FALSE(Mlocked) + SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) + PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) __CLEARPAGEFLAG_NOOP(Unevictable) @@ -354,15 +365,17 @@ static inline void __ClearPageTail(struct page *page) #endif /* !PAGEFLAGS_EXTENDED */ #ifdef CONFIG_UNEVICTABLE_LRU -#define __PG_UNEVICTABLE (1 << PG_unevictable) +#define __PG_UNEVICTABLE (1 << PG_unevictable) +#define __PG_MLOCKED (1 << PG_mlocked) #else -#define __PG_UNEVICTABLE 0 +#define __PG_UNEVICTABLE 0 +#define __PG_MLOCKED 0 #endif #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ 1 << PG_buddy | 1 << PG_writeback | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - __PG_UNEVICTABLE) + __PG_UNEVICTABLE | __PG_MLOCKED) /* * Flags checked in bad_page(). Pages on the free list should not have diff --git a/include/linux/rmap.h b/include/linux/rmap.h index fed6f5e0b41..955667e6a52 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -117,6 +117,19 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); */ int page_mkclean(struct page *); +#ifdef CONFIG_UNEVICTABLE_LRU +/* + * called in munlock()/munmap() path to check for other vmas holding + * the page mlocked. + */ +int try_to_munlock(struct page *); +#else +static inline int try_to_munlock(struct page *page) +{ + return 0; /* a.k.a. SWAP_SUCCESS */ +} +#endif + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) @@ -140,5 +153,6 @@ static inline int page_mkclean(struct page *page) #define SWAP_SUCCESS 0 #define SWAP_AGAIN 1 #define SWAP_FAIL 2 +#define SWAP_MLOCK 3 #endif /* _LINUX_RMAP_H */ -- cgit v1.2.3 From 5344b7e648980cc2ca613ec03a56a8222ff48820 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:51 -0700 Subject: vmstat: mlocked pages statistics Add NR_MLOCK zone page state, which provides a (conservative) count of mlocked pages (actually, the number of mlocked pages moved off the LRU). Reworked by lts to fit in with the modified mlock page support in the Reclaim Scalability series. [kosaki.motohiro@jp.fujitsu.com: fix incorrect Mlocked field of /proc/meminfo] [lee.schermerhorn@hp.com: mlocked-pages: add event counting with statistics] Signed-off-by: Nick Piggin Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ include/linux/vmstat.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d1f60d5fe2e..da2d053a95f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -88,8 +88,10 @@ enum zone_stat_item { NR_ACTIVE_FILE, /* " " " " " */ #ifdef CONFIG_UNEVICTABLE_LRU NR_UNEVICTABLE, /* " " " " " */ + NR_MLOCK, /* mlock()ed pages found and moved off LRU */ #else NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */ + NR_MLOCK = NR_ACTIVE_FILE, #endif NR_ANON_PAGES, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 135840cd7fe..05b805020be 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -45,6 +45,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, UNEVICTABLE_PGCULLED, /* culled to noreclaim list */ UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */ UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */ + UNEVICTABLE_PGMLOCKED, + UNEVICTABLE_PGMUNLOCKED, + UNEVICTABLE_PGCLEARED, /* on COW, page truncate */ + UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */ #endif NR_VM_EVENT_ITEMS }; -- cgit v1.2.3 From 64d6519dda3905dfb94d3f93c07c5f263f41813f Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:52 -0700 Subject: swap: cull unevictable pages in fault path In the fault paths that install new anonymous pages, check whether the page is evictable or not using lru_cache_add_active_or_unevictable(). If the page is evictable, just add it to the active lru list [via the pagevec cache], else add it to the unevictable list. This "proactive" culling in the fault path mimics the handling of mlocked pages in Nick Piggin's series to keep mlocked pages off the lru lists. Notes: 1) This patch is optional--e.g., if one is concerned about the additional test in the fault path. We can defer the moving of nonreclaimable pages until when vmscan [shrink_*_list()] encounters them. Vmscan will only need to handle such pages once, but if there are a lot of them it could impact system performance. 2) The 'vma' argument to page_evictable() is require to notice that we're faulting a page into an mlock()ed vma w/o having to scan the page's rmap in the fault path. Culling mlock()ed anon pages is currently the only reason for this patch. 3) We can't cull swap pages in read_swap_cache_async() because the vma argument doesn't necessarily correspond to the swap cache offset passed in by swapin_readahead(). This could [did!] result in mlocking pages in non-VM_LOCKED vmas if [when] we tried to cull in this path. 4) Move set_pte_at() to after where we add page to lru to keep it hidden from other tasks that might walk the page table. We already do it in this order in do_anonymous() page. And, these are COW'd anon pages. Is this safe? [riel@redhat.com: undo an overzealous code cleanup] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7edb4cbc29f..07eda69412f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -173,6 +173,8 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ extern void __lru_cache_add(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); +extern void lru_cache_add_active_or_unevictable(struct page *, + struct vm_area_struct *); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); -- cgit v1.2.3 From af936a1606246a10c145feac3770f6287f483f02 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:53 -0700 Subject: vmscan: unevictable LRU scan sysctl This patch adds a function to scan individual or all zones' unevictable lists and move any pages that have become evictable onto the respective zone's inactive list, where shrink_inactive_list() will deal with them. Adds sysctl to scan all nodes, and per node attributes to individual nodes' zones. Kosaki: If evictable page found in unevictable lru when write /proc/sys/vm/scan_unevictable_pages, print filename and file offset of these pages. [akpm@linux-foundation.org: fix one CONFIG_MMU=n build error] [kosaki.motohiro@jp.fujitsu.com: adapt vmscan-unevictable-lru-scan-sysctl.patch to new sysfs API] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 3 +++ include/linux/swap.h | 15 +++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 955667e6a52..1da48db8db0 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -75,6 +75,9 @@ void anon_vma_unlink(struct vm_area_struct *); void anon_vma_link(struct vm_area_struct *); void __anon_vma_link(struct vm_area_struct *); +extern struct anon_vma *page_lock_anon_vma(struct page *page); +extern void page_unlock_anon_vma(struct anon_vma *anon_vma); + /* * rmap interfaces called when adding or removing pte of page */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 07eda69412f..a3af95b2cb6 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -235,15 +236,29 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) #ifdef CONFIG_UNEVICTABLE_LRU extern int page_evictable(struct page *page, struct vm_area_struct *vma); extern void scan_mapping_unevictable_pages(struct address_space *); + +extern unsigned long scan_unevictable_pages; +extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int scan_unevictable_register_node(struct node *node); +extern void scan_unevictable_unregister_node(struct node *node); #else static inline int page_evictable(struct page *page, struct vm_area_struct *vma) { return 1; } + static inline void scan_mapping_unevictable_pages(struct address_space *mapping) { } + +static inline int scan_unevictable_register_node(struct node *node) +{ + return 0; +} + +static inline void scan_unevictable_unregister_node(struct node *node) { } #endif extern int kswapd_run(int nid); -- cgit v1.2.3 From 985737cf2ea096ea946aed82c7484d40defc71a8 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:53 -0700 Subject: mlock: count attempts to free mlocked page Allow free of mlock()ed pages. This shouldn't happen, but during developement, it occasionally did. This patch allows us to survive that condition, while keeping the statistics and events correct for debug. Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 05b805020be..9cd3ab0f554 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -49,6 +49,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, UNEVICTABLE_PGMUNLOCKED, UNEVICTABLE_PGCLEARED, /* on COW, page truncate */ UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */ + UNEVICTABLE_MLOCKFREED, #endif NR_VM_EVENT_ITEMS }; -- cgit v1.2.3 From 902d2e8ae0de29f483840ba1134af27343b9564d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Sat, 18 Oct 2008 20:26:54 -0700 Subject: vmscan: kill unused lru functions Several LRU manupuration function are not used now. So they can be removed. Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 48 ----------------------------------------------- 1 file changed, 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 67d7697fd01..c948350c378 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -37,54 +37,6 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) __dec_zone_state(zone, NR_LRU_BASE + l); } -static inline void -add_page_to_inactive_anon_list(struct zone *zone, struct page *page) -{ - add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON); -} - -static inline void -add_page_to_active_anon_list(struct zone *zone, struct page *page) -{ - add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON); -} - -static inline void -add_page_to_inactive_file_list(struct zone *zone, struct page *page) -{ - add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE); -} - -static inline void -add_page_to_active_file_list(struct zone *zone, struct page *page) -{ - add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE); -} - -static inline void -del_page_from_inactive_anon_list(struct zone *zone, struct page *page) -{ - del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON); -} - -static inline void -del_page_from_active_anon_list(struct zone *zone, struct page *page) -{ - del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON); -} - -static inline void -del_page_from_inactive_file_list(struct zone *zone, struct page *page) -{ - del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); -} - -static inline void -del_page_from_active_file_list(struct zone *zone, struct page *page) -{ - del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); -} - static inline void del_page_from_lru(struct zone *zone, struct page *page) { -- cgit v1.2.3 From f45840b5c128445da70e7ec33adc47b4a12bdaf4 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:57 -0700 Subject: mm: pagecache insertion fewer atomics Setting and clearing the page locked when inserting it into swapcache / pagecache when it has no other references can use non-atomic page flags operations because no other CPU may be operating on it at this time. This saves one atomic operation when inserting a page into pagecache. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b6c4d8d26b..7334b2b6c4c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -299,14 +299,14 @@ extern int __lock_page_killable(struct page *page); extern void __lock_page_nosync(struct page *page); extern void unlock_page(struct page *page); -static inline void set_page_locked(struct page *page) +static inline void __set_page_locked(struct page *page) { - set_bit(PG_locked, &page->flags); + __set_bit(PG_locked, &page->flags); } -static inline void clear_page_locked(struct page *page) +static inline void __clear_page_locked(struct page *page) { - clear_bit(PG_locked, &page->flags); + __clear_bit(PG_locked, &page->flags); } static inline int trylock_page(struct page *page) @@ -438,17 +438,17 @@ extern void __remove_from_page_cache(struct page *page); /* * Like add_to_page_cache_locked, but used to add newly allocated pages: - * the page is new, so we can just run set_page_locked() against it. + * the page is new, so we can just run __set_page_locked() against it. */ static inline int add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { int error; - set_page_locked(page); + __set_page_locked(page); error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); if (unlikely(error)) - clear_page_locked(page); + __clear_page_locked(page); return error; } -- cgit v1.2.3 From 8413ac9d8c9a1366a4f57880723126cd24e5a5c3 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:59 -0700 Subject: mm: page lock use lock bitops trylock_page, unlock_page open and close a critical section. Hence, we can use the lock bitops to get the desired memory ordering. Also, mark trylock as likely to succeed (and remove the annotation from callers). Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7334b2b6c4c..709742be02f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -311,7 +311,7 @@ static inline void __clear_page_locked(struct page *page) static inline int trylock_page(struct page *page) { - return !test_and_set_bit(PG_locked, &page->flags); + return (likely(!test_and_set_bit_lock(PG_locked, &page->flags))); } /* -- cgit v1.2.3 From 51b07fc3c5c830bb49c80fc5eac041e1f66a72e7 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:27:00 -0700 Subject: fs: buffer lock use lock bitops trylock_buffer and unlock_buffer open and close a critical section. Hence, we can use the lock bitops to get the desired memory ordering. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index eadaab44015..3ce64b90118 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -322,7 +322,7 @@ static inline void wait_on_buffer(struct buffer_head *bh) static inline int trylock_buffer(struct buffer_head *bh) { - return likely(!test_and_set_bit(BH_Lock, &bh->b_state)); + return likely(!test_and_set_bit_lock(BH_Lock, &bh->b_state)); } static inline void lock_buffer(struct buffer_head *bh) -- cgit v1.2.3 From db64fe02258f1507e13fe5212a989922323685ce Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:27:03 -0700 Subject: mm: rewrite vmap layer Rewrite the vmap allocator to use rbtrees and lazy tlb flushing, and provide a fast, scalable percpu frontend for small vmaps (requires a slightly different API, though). The biggest problem with vmap is actually vunmap. Presently this requires a global kernel TLB flush, which on most architectures is a broadcast IPI to all CPUs to flush the cache. This is all done under a global lock. As the number of CPUs increases, so will the number of vunmaps a scaled workload will want to perform, and so will the cost of a global TLB flush. This gives terrible quadratic scalability characteristics. Another problem is that the entire vmap subsystem works under a single lock. It is a rwlock, but it is actually taken for write in all the fast paths, and the read locking would likely never be run concurrently anyway, so it's just pointless. This is a rewrite of vmap subsystem to solve those problems. The existing vmalloc API is implemented on top of the rewritten subsystem. The TLB flushing problem is solved by using lazy TLB unmapping. vmap addresses do not have to be flushed immediately when they are vunmapped, because the kernel will not reuse them again (would be a use-after-free) until they are reallocated. So the addresses aren't allocated again until a subsequent TLB flush. A single TLB flush then can flush multiple vunmaps from each CPU. XEN and PAT and such do not like deferred TLB flushing because they can't always handle multiple aliasing virtual addresses to a physical address. They now call vm_unmap_aliases() in order to flush any deferred mappings. That call is very expensive (well, actually not a lot more expensive than a single vunmap under the old scheme), however it should be OK if not called too often. The virtual memory extent information is stored in an rbtree rather than a linked list to improve the algorithmic scalability. There is a per-CPU allocator for small vmaps, which amortizes or avoids global locking. To use the per-CPU interface, the vm_map_ram / vm_unmap_ram interfaces must be used in place of vmap and vunmap. Vmalloc does not use these interfaces at the moment, so it will not be quite so scalable (although it will use lazy TLB flushing). As a quick test of performance, I ran a test that loops in the kernel, linearly mapping then touching then unmapping 4 pages. Different numbers of tests were run in parallel on an 4 core, 2 socket opteron. Results are in nanoseconds per map+touch+unmap. threads vanilla vmap rewrite 1 14700 2900 2 33600 3000 4 49500 2800 8 70631 2900 So with a 8 cores, the rewritten version is already 25x faster. In a slightly more realistic test (although with an older and less scalable version of the patch), I ripped the not-very-good vunmap batching code out of XFS, and implemented the large buffer mapping with vm_map_ram and vm_unmap_ram... along with a couple of other tricks, I was able to speed up a large directory workload by 20x on a 64 CPU system. I believe vmap/vunmap is actually sped up a lot more than 20x on such a system, but I'm running into other locks now. vmap is pretty well blown off the profiles. Before: 1352059 total 0.1401 798784 _write_lock 8320.6667 <- vmlist_lock 529313 default_idle 1181.5022 15242 smp_call_function 15.8771 <- vmap tlb flushing 2472 __get_vm_area_node 1.9312 <- vmap 1762 remove_vm_area 4.5885 <- vunmap 316 map_vm_area 0.2297 <- vmap 312 kfree 0.1950 300 _spin_lock 3.1250 252 sn_send_IPI_phys 0.4375 <- tlb flushing 238 vmap 0.8264 <- vmap 216 find_lock_page 0.5192 196 find_next_bit 0.3603 136 sn2_send_IPI 0.2024 130 pio_phys_write_mmr 2.0312 118 unmap_kernel_range 0.1229 After: 78406 total 0.0081 40053 default_idle 89.4040 33576 ia64_spinlock_contention 349.7500 1650 _spin_lock 17.1875 319 __reg_op 0.5538 281 _atomic_dec_and_lock 1.0977 153 mutex_unlock 1.5938 123 iget_locked 0.1671 117 xfs_dir_lookup 0.1662 117 dput 0.1406 114 xfs_iget_core 0.0268 92 xfs_da_hashname 0.1917 75 d_alloc 0.0670 68 vmap_page_range 0.0462 <- vmap 58 kmem_cache_alloc 0.0604 57 memset 0.0540 52 rb_next 0.1625 50 __copy_user 0.0208 49 bitmap_find_free_region 0.2188 <- vmap 46 ia64_sn_udelay 0.1106 45 find_inode_fast 0.1406 42 memcmp 0.2188 42 finish_task_switch 0.1094 42 __d_lookup 0.0410 40 radix_tree_lookup_slot 0.1250 37 _spin_unlock_irqrestore 0.3854 36 xfs_bmapi 0.0050 36 kmem_cache_free 0.0256 35 xfs_vn_getattr 0.0322 34 radix_tree_lookup 0.1062 33 __link_path_walk 0.0035 31 xfs_da_do_buf 0.0091 30 _xfs_buf_find 0.0204 28 find_get_page 0.0875 27 xfs_iread 0.0241 27 __strncpy_from_user 0.2812 26 _xfs_buf_initialize 0.0406 24 _xfs_buf_lookup_pages 0.0179 24 vunmap_page_range 0.0250 <- vunmap 23 find_lock_page 0.0799 22 vm_map_ram 0.0087 <- vmap 20 kfree 0.0125 19 put_page 0.0330 18 __kmalloc 0.0176 17 xfs_da_node_lookup_int 0.0086 17 _read_lock 0.0885 17 page_waitqueue 0.0664 vmap has gone from being the top 5 on the profiles and flushing the crap out of all TLBs, to using less than 1% of kernel time. [akpm@linux-foundation.org: cleanups, section fix] [akpm@linux-foundation.org: fix build on alpha] Signed-off-by: Nick Piggin Cc: Jeremy Fitzhardinge Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 328eb402272..4c28c4d564e 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -2,6 +2,7 @@ #define _LINUX_VMALLOC_H #include +#include #include /* pgprot_t */ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ @@ -23,7 +24,6 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #endif struct vm_struct { - /* keep next,addr,size together to speedup lookups */ struct vm_struct *next; void *addr; unsigned long size; @@ -37,6 +37,19 @@ struct vm_struct { /* * Highlevel APIs for driver use */ +extern void vm_unmap_ram(const void *mem, unsigned int count); +extern void *vm_map_ram(struct page **pages, unsigned int count, + int node, pgprot_t prot); +extern void vm_unmap_aliases(void); + +#ifdef CONFIG_MMU +extern void __init vmalloc_init(void); +#else +static inline void vmalloc_init(void) +{ +} +#endif + extern void *vmalloc(unsigned long size); extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); -- cgit v1.2.3 From e575f111dc0f27044e170580e7de50985ab3e011 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Sat, 18 Oct 2008 20:27:08 -0700 Subject: coredump_filter: add hugepage dumping Presently hugepage's vma has a VM_RESERVED flag in order not to be swapped. But a VM_RESERVED vma isn't core dumped because this flag is often used for some kernel vmas (e.g. vmalloc, sound related). Thus hugepages are never dumped and it can't be debugged easily. Many developers want hugepages to be included into core-dump. However, We can't read generic VM_RESERVED area because this area is often IO mapping area. then these area reading may change device state. it is definitly undesiable side-effect. So adding a hugepage specific bit to the coredump filter is better. It will be able to hugepage core dumping and doesn't cause any side-effect to any i/o devices. In additional, libhugetlb use hugetlb private mapping pages as anonymous page. Then, hugepage private mapping pages should be core dumped by default. Then, /proc/[pid]/core_dump_filter has two new bits. - bit 5 mean hugetlb private mapping pages are dumped or not. (default: yes) - bit 6 mean hugetlb shared mapping pages are dumped or not. (default: no) I tested by following method. % ulimit -c unlimited % ./crash_hugepage 50 % ./crash_hugepage 50 -p % ls -lh % gdb ./crash_hugepage core % % echo 0x43 > /proc/self/coredump_filter % ./crash_hugepage 50 % ./crash_hugepage 50 -p % ls -lh % gdb ./crash_hugepage core #include #include #include #include #include #include "hugetlbfs.h" int main(int argc, char** argv){ char* p; int ch; int mmap_flags = MAP_SHARED; int fd; int nr_pages; while((ch = getopt(argc, argv, "p")) != -1) { switch (ch) { case 'p': mmap_flags &= ~MAP_SHARED; mmap_flags |= MAP_PRIVATE; break; default: /* nothing*/ break; } } argc -= optind; argv += optind; if (argc == 0){ printf("need # of pages\n"); exit(1); } nr_pages = atoi(argv[0]); if (nr_pages < 2) { printf("nr_pages must >2\n"); exit(1); } fd = hugetlbfs_unlinked_fd(); p = mmap(NULL, nr_pages * gethugepagesize(), PROT_READ|PROT_WRITE, mmap_flags, fd, 0); sleep(2); *(p + gethugepagesize()) = 1; /* COW */ sleep(2); /* crash! */ *(int*)0 = 1; return 0; } Signed-off-by: KOSAKI Motohiro Reviewed-by: Kawai Hidehiro Cc: Hugh Dickins Cc: William Irwin Cc: Adam Litke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c226c7b8294..017cc914ef1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -403,12 +403,15 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_DUMP_MAPPED_PRIVATE 4 #define MMF_DUMP_MAPPED_SHARED 5 #define MMF_DUMP_ELF_HEADERS 6 +#define MMF_DUMP_HUGETLB_PRIVATE 7 +#define MMF_DUMP_HUGETLB_SHARED 8 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 5 +#define MMF_DUMP_FILTER_BITS 7 #define MMF_DUMP_FILTER_MASK \ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED)) + ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ + (1 << MMF_DUMP_HUGETLB_PRIVATE)) struct sighand_struct { atomic_t count; -- cgit v1.2.3 From 8174f1503f4bf7e9a14b3fbbfdb30c6be6e29f77 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Sat, 18 Oct 2008 20:27:19 -0700 Subject: container freezer: make refrigerator always available Now that the TIF_FREEZE flag is available in all architectures, extract the refrigerator() and freeze_task() from kernel/power/process.c and make it available to all. The refrigerator() can now be used in a control group subsystem implementing a control group freezer. Signed-off-by: Cedric Le Goater Signed-off-by: Matt Helsley Acked-by: Serge E. Hallyn Tested-by: Matt Helsley Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/freezer.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index deddeedf325..17e3bb42dd3 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -6,7 +6,7 @@ #include #include -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_FREEZER /* * Check if a process has been frozen */ @@ -39,6 +39,11 @@ static inline void clear_freeze_flag(struct task_struct *p) clear_tsk_thread_flag(p, TIF_FREEZE); } +static inline bool should_send_signal(struct task_struct *p) +{ + return !(p->flags & PF_FREEZER_NOSIG); +} + /* * Wake up a frozen process * @@ -75,6 +80,9 @@ static inline int try_to_freeze(void) return 0; } +extern bool freeze_task(struct task_struct *p, bool sig_only); +extern void cancel_freezing(struct task_struct *p); + /* * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it * calls wait_for_completion(&vfork) and reset right after it returns from this @@ -166,7 +174,7 @@ static inline void set_freezable_with_signal(void) } while (try_to_freeze()); \ __retval; \ }) -#else /* !CONFIG_PM_SLEEP */ +#else /* !CONFIG_FREEZER */ static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } static inline void set_freeze_flag(struct task_struct *p) {} @@ -191,6 +199,6 @@ static inline void set_freezable_with_signal(void) {} #define wait_event_freezable_timeout(wq, condition, timeout) \ wait_event_interruptible_timeout(wq, condition, timeout) -#endif /* !CONFIG_PM_SLEEP */ +#endif /* !CONFIG_FREEZER */ #endif /* FREEZER_H_INCLUDED */ -- cgit v1.2.3 From dc52ddc0e6f45b04780b26fc0813509f8e798c42 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Sat, 18 Oct 2008 20:27:21 -0700 Subject: container freezer: implement freezer cgroup subsystem This patch implements a new freezer subsystem in the control groups framework. It provides a way to stop and resume execution of all tasks in a cgroup by writing in the cgroup filesystem. The freezer subsystem in the container filesystem defines a file named freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the cgroup. Subsequently writing "RUNNING" will unfreeze the tasks in the cgroup. Reading will return the current state. * Examples of usage : # mkdir /containers/freezer # mount -t cgroup -ofreezer freezer /containers # mkdir /containers/0 # echo $some_pid > /containers/0/tasks to get status of the freezer subsystem : # cat /containers/0/freezer.state RUNNING to freeze all tasks in the container : # echo FROZEN > /containers/0/freezer.state # cat /containers/0/freezer.state FREEZING # cat /containers/0/freezer.state FROZEN to unfreeze all tasks in the container : # echo RUNNING > /containers/0/freezer.state # cat /containers/0/freezer.state RUNNING This is the basic mechanism which should do the right thing for user space task in a simple scenario. It's important to note that freezing can be incomplete. In that case we return EBUSY. This means that some tasks in the cgroup are busy doing something that prevents us from completely freezing the cgroup at this time. After EBUSY, the cgroup will remain partially frozen -- reflected by freezer.state reporting "FREEZING" when read. The state will remain "FREEZING" until one of these things happens: 1) Userspace cancels the freezing operation by writing "RUNNING" to the freezer.state file 2) Userspace retries the freezing operation by writing "FROZEN" to the freezer.state file (writing "FREEZING" is not legal and returns EIO) 3) The tasks that blocked the cgroup from entering the "FROZEN" state disappear from the cgroup's set of tasks. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: export thaw_process] Signed-off-by: Cedric Le Goater Signed-off-by: Matt Helsley Acked-by: Serge E. Hallyn Tested-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup_subsys.h | 6 ++++++ include/linux/freezer.h | 29 ++++++++++------------------- 2 files changed, 16 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index e2877454ec8..9c22396e8b5 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -48,3 +48,9 @@ SUBSYS(devices) #endif /* */ + +#ifdef CONFIG_CGROUP_FREEZER +SUBSYS(freezer) +#endif + +/* */ diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 17e3bb42dd3..8f225339eee 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -46,26 +46,11 @@ static inline bool should_send_signal(struct task_struct *p) /* * Wake up a frozen process - * - * task_lock() is taken to prevent the race with refrigerator() which may - * occur if the freezing of tasks fails. Namely, without the lock, if the - * freezing of tasks failed, thaw_tasks() might have run before a task in - * refrigerator() could call frozen_process(), in which case the task would be - * frozen and no one would thaw it. */ -static inline int thaw_process(struct task_struct *p) -{ - task_lock(p); - if (frozen(p)) { - p->flags &= ~PF_FROZEN; - task_unlock(p); - wake_up_process(p); - return 1; - } - clear_freeze_flag(p); - task_unlock(p); - return 0; -} +extern int __thaw_process(struct task_struct *p); + +/* Takes and releases task alloc lock using task_lock() */ +extern int thaw_process(struct task_struct *p); extern void refrigerator(void); extern int freeze_processes(void); @@ -83,6 +68,12 @@ static inline int try_to_freeze(void) extern bool freeze_task(struct task_struct *p, bool sig_only); extern void cancel_freezing(struct task_struct *p); +#ifdef CONFIG_CGROUP_FREEZER +extern int cgroup_frozen(struct task_struct *task); +#else /* !CONFIG_CGROUP_FREEZER */ +static inline int cgroup_frozen(struct task_struct *task) { return 0; } +#endif /* !CONFIG_CGROUP_FREEZER */ + /* * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it * calls wait_for_completion(&vfork) and reset right after it returns from this -- cgit v1.2.3 From 3e680aae4e53ab54cdbb0c29257dae0cbb158e1c Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sat, 18 Oct 2008 20:27:51 -0700 Subject: fb: convert lock/unlock_kernel() into local fb mutex Change lock_kernel()/unlock_kernel() to local fb mutex. Each frame buffer instance has its own mutex. The one line try_to_load() function is unrolled to request_module() in two places for readability. [righi.andrea@gmail.com: fb: fix NULL pointer BUG dereference in fb_open()] Signed-off-by: Krzysztof Helt Signed-off-by: Andrea Righi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 531ccd5f596..75a81eaf343 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -808,6 +808,7 @@ struct fb_tile_ops { struct fb_info { int node; int flags; + struct mutex lock; /* Lock for open/release/ioctl funcs */ struct fb_var_screeninfo var; /* Current var */ struct fb_fix_screeninfo fix; /* Current fix */ struct fb_monspecs monspecs; /* Current Monitor specs */ -- cgit v1.2.3 From 0e4fb5e283870757024294bc4567a7c59d936f0b Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Sat, 18 Oct 2008 20:27:57 -0700 Subject: ext3: add an option to control error handling on file data If the journal doesn't abort when it gets an IO error in file data blocks, the file data corruption will spread silently. Because most of applications and commands do buffered writes without fsync(), they don't notice the IO error. It's scary for mission critical systems. On the other hand, if the journal aborts whenever it gets an IO error in file data blocks, the system will easily become inoperable. So this patch introduces a filesystem option to determine whether it aborts the journal or just call printk() when it gets an IO error in file data. If you mount a ext3 fs with data_err=abort option, it aborts on file data write error. If you mount it with data_err=ignore, it doesn't abort, just call printk(). data_err=ignore is the default. Signed-off-by: Hidehiro Kawai Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs.h | 2 ++ include/linux/jbd.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 159d9b476cd..d14f0291848 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -380,6 +380,8 @@ struct ext3_inode { #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write + * error in ordered mode */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 7ebbcb1c9ba..35d4f6342fa 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -816,6 +816,9 @@ struct journal_s #define JFS_FLUSHED 0x008 /* The journal superblock has been flushed */ #define JFS_LOADED 0x010 /* The journal superblock has been loaded */ #define JFS_BARRIER 0x020 /* Use IDE barriers */ +#define JFS_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file + * data write error in ordered + * mode */ /* * Function declarations for the journaling transaction and buffer -- cgit v1.2.3 From 146aa1bd0511f88ddb4e92fafa2b8aad4f2f65f3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 18 Oct 2008 20:28:03 -0700 Subject: cgroups: fix probable race with put_css_set[_taskexit] and find_css_set put_css_set_taskexit may be called when find_css_set is called on other cpu. And the race will occur: put_css_set_taskexit side find_css_set side | atomic_dec_and_test(&kref->refcount) | /* kref->refcount = 0 */ | .................................................................... | read_lock(&css_set_lock) | find_existing_css_set | get_css_set | read_unlock(&css_set_lock); .................................................................... __release_css_set | .................................................................... | /* use a released css_set */ | [put_css_set is the same. But in the current code, all put_css_set are put into cgroup mutex critical region as the same as find_css_set.] [akpm@linux-foundation.org: repair comments] [menage@google.com: eliminate race in css_set refcounting] Signed-off-by: Lai Jiangshan Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 30934e4bfaa..7166023e07d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -9,7 +9,6 @@ */ #include -#include #include #include #include @@ -149,7 +148,7 @@ struct cgroup { struct css_set { /* Reference count */ - struct kref ref; + atomic_t refcount; /* * List running through all cgroup groups in the same hash -- cgit v1.2.3 From cc31edceee04a7b87f2be48f9489ebb72d264844 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Sat, 18 Oct 2008 20:28:04 -0700 Subject: cgroups: convert tasks file to use a seq_file with shared pid array Rather than pre-generating the entire text for the "tasks" file each time the file is opened, we instead just generate/update the array of process ids and use a seq_file to report these to userspace. All open file handles on the same "tasks" file can share a pid array, which may be updated any time that no thread is actively reading the array. By sharing the array, the potential for userspace to DoS the system by opening many handles on the same "tasks" file is removed. [Based on a patch by Lai Jiangshan, extended to use seq_file] Signed-off-by: Paul Menage Reviewed-by: Lai Jiangshan Cc: Serge Hallyn Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7166023e07d..8ab91880a0a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUPS @@ -136,6 +137,15 @@ struct cgroup { * release_list_lock */ struct list_head release_list; + + /* pids_mutex protects the fields below */ + struct rw_semaphore pids_mutex; + /* Array of process ids in the cgroup */ + pid_t *tasks_pids; + /* How many files are using the current tasks_pids array */ + int pids_use_count; + /* Length of the current tasks_pids array */ + int pids_length; }; /* A css_set is a structure holding pointers to a set of -- cgit v1.2.3 From 886465f407e57d6c3c81013c919ea670ce1ae0d0 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Sat, 18 Oct 2008 20:28:05 -0700 Subject: cgroups: fix declaration of cgroup_mm_owner_callbacks The choice of real/dummy declaration for cgroup_mm_owner_callbacks() shouldn't be based on CONFIG_MM_OWNER, but on CONFIG_CGROUPS. Otherwise kernel/exit.c fails to compile when something other than a cgroups controller selects CONFIG_MM_OWNER Signed-off-by: Paul Menage Acked-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8ab91880a0a..8b00f6643e9 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -403,6 +403,9 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); +void cgroup_mm_owner_callbacks(struct task_struct *old, + struct task_struct *new); + #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } @@ -421,15 +424,9 @@ static inline int cgroupstats_build(struct cgroupstats *stats, return -EINVAL; } +static inline void cgroup_mm_owner_callbacks(struct task_struct *old, + struct task_struct *new) {} + #endif /* !CONFIG_CGROUPS */ -#ifdef CONFIG_MM_OWNER -extern void -cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new); -#else /* !CONFIG_MM_OWNER */ -static inline void -cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) -{ -} -#endif /* CONFIG_MM_OWNER */ #endif /* _LINUX_CGROUP_H */ -- cgit v1.2.3 From 52d4b9ac0b985168009c2a57098324e67bae171f Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Sat, 18 Oct 2008 20:28:16 -0700 Subject: memcg: allocate all page_cgroup at boot Allocate all page_cgroup at boot and remove page_cgroup poitner from struct page. This patch adds an interface as struct page_cgroup *lookup_page_cgroup(struct page*) All FLATMEM/DISCONTIGMEM/SPARSEMEM and MEMORY_HOTPLUG is supported. Remove page_cgroup pointer reduces the amount of memory by - 4 bytes per PAGE_SIZE. - 8 bytes per PAGE_SIZE if memory controller is disabled. (even if configured.) On usual 8GB x86-32 server, this saves 8MB of NORMAL_ZONE memory. On my x86-64 server with 48GB of memory, this saves 96MB of memory. I think this reduction makes sense. By pre-allocation, kmalloc/kfree in charge/uncharge are removed. This means - we're not necessary to be afraid of kmalloc faiulre. (this can happen because of gfp_mask type.) - we can avoid calling kmalloc/kfree. - we can avoid allocating tons of small objects which can be fragmented. - we can know what amount of memory will be used for this extra-lru handling. I added printk message as "allocated %ld bytes of page_cgroup" "please try cgroup_disable=memory option if you don't want" maybe enough informative for users. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 13 +----- include/linux/mm_types.h | 3 -- include/linux/mmzone.h | 14 +++++- include/linux/page_cgroup.h | 103 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+), 16 deletions(-) create mode 100644 include/linux/page_cgroup.h (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ee1b2fcb441..1fbe14d3952 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,9 +27,6 @@ struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR -#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) - -extern struct page_cgroup *page_get_page_cgroup(struct page *page); extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, @@ -72,16 +69,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ -static inline void page_reset_bad_cgroup(struct page *page) -{ -} - -static inline struct page_cgroup *page_get_page_cgroup(struct page *page) -{ - return NULL; -} +#else /* CONFIG_CGROUP_MEM_RES_CTLR */ static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9d49fa36bbe..fe825471d5a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -94,9 +94,6 @@ struct page { void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR - unsigned long page_cgroup; -#endif }; /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index da2d053a95f..35a7b5e1946 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -601,8 +601,11 @@ typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; struct zonelist node_zonelists[MAX_ZONELISTS]; int nr_zones; -#ifdef CONFIG_FLAT_NODE_MEM_MAP +#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ struct page *node_mem_map; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR + struct page_cgroup *node_page_cgroup; +#endif #endif struct bootmem_data *bdata; #ifdef CONFIG_MEMORY_HOTPLUG @@ -931,6 +934,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #endif struct page; +struct page_cgroup; struct mem_section { /* * This is, logically, a pointer to an array of struct @@ -948,6 +952,14 @@ struct mem_section { /* See declaration of similar field in struct zone */ unsigned long *pageblock_flags; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR + /* + * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use + * section. (see memcontrol.h/page_cgroup.h about this.) + */ + struct page_cgroup *page_cgroup; + unsigned long pad; +#endif }; #ifdef CONFIG_SPARSEMEM_EXTREME diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h new file mode 100644 index 00000000000..0fd39f2231e --- /dev/null +++ b/include/linux/page_cgroup.h @@ -0,0 +1,103 @@ +#ifndef __LINUX_PAGE_CGROUP_H +#define __LINUX_PAGE_CGROUP_H + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#include +/* + * Page Cgroup can be considered as an extended mem_map. + * A page_cgroup page is associated with every page descriptor. The + * page_cgroup helps us identify information about the cgroup + * All page cgroups are allocated at boot or memory hotplug event, + * then the page cgroup for pfn always exists. + */ +struct page_cgroup { + unsigned long flags; + struct mem_cgroup *mem_cgroup; + struct page *page; + struct list_head lru; /* per cgroup LRU list */ +}; + +void __init pgdat_page_cgroup_init(struct pglist_data *pgdat); +void __init page_cgroup_init(void); +struct page_cgroup *lookup_page_cgroup(struct page *page); + +enum { + /* flags for mem_cgroup */ + PCG_LOCK, /* page cgroup is locked */ + PCG_CACHE, /* charged as cache */ + PCG_USED, /* this object is in use. */ + /* flags for LRU placement */ + PCG_ACTIVE, /* page is active in this cgroup */ + PCG_FILE, /* page is file system backed */ + PCG_UNEVICTABLE, /* page is unevictableable */ +}; + +#define TESTPCGFLAG(uname, lname) \ +static inline int PageCgroup##uname(struct page_cgroup *pc) \ + { return test_bit(PCG_##lname, &pc->flags); } + +#define SETPCGFLAG(uname, lname) \ +static inline void SetPageCgroup##uname(struct page_cgroup *pc)\ + { set_bit(PCG_##lname, &pc->flags); } + +#define CLEARPCGFLAG(uname, lname) \ +static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ + { clear_bit(PCG_##lname, &pc->flags); } + +/* Cache flag is set only once (at allocation) */ +TESTPCGFLAG(Cache, CACHE) + +TESTPCGFLAG(Used, USED) +CLEARPCGFLAG(Used, USED) + +/* LRU management flags (from global-lru definition) */ +TESTPCGFLAG(File, FILE) +SETPCGFLAG(File, FILE) +CLEARPCGFLAG(File, FILE) + +TESTPCGFLAG(Active, ACTIVE) +SETPCGFLAG(Active, ACTIVE) +CLEARPCGFLAG(Active, ACTIVE) + +TESTPCGFLAG(Unevictable, UNEVICTABLE) +SETPCGFLAG(Unevictable, UNEVICTABLE) +CLEARPCGFLAG(Unevictable, UNEVICTABLE) + +static inline int page_cgroup_nid(struct page_cgroup *pc) +{ + return page_to_nid(pc->page); +} + +static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc) +{ + return page_zonenum(pc->page); +} + +static inline void lock_page_cgroup(struct page_cgroup *pc) +{ + bit_spin_lock(PCG_LOCK, &pc->flags); +} + +static inline int trylock_page_cgroup(struct page_cgroup *pc) +{ + return bit_spin_trylock(PCG_LOCK, &pc->flags); +} + +static inline void unlock_page_cgroup(struct page_cgroup *pc) +{ + bit_spin_unlock(PCG_LOCK, &pc->flags); +} + +#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +struct page_cgroup; + +static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat) +{ +} + +static inline struct page_cgroup *lookup_page_cgroup(struct page *page) +{ + return NULL; +} +#endif +#endif -- cgit v1.2.3 From 3eda20118000941e7e8994fc5fac8706d8c10f00 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 18 Oct 2008 20:28:19 -0700 Subject: seq_file: add seq_cpumask_list(), seq_nodemask_list() seq_cpumask_list(), seq_nodemask_list() are very like seq_cpumask(), seq_nodemask(), but they print human readable string. Signed-off-by: Lai Jiangshan Cc: Alexey Dobriyan Cc: Paul Menage Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index a1783b229ef..dc50bcc282a 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -60,6 +60,19 @@ static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask) return seq_bitmap(m, mask->bits, MAX_NUMNODES); } +int seq_bitmap_list(struct seq_file *m, unsigned long *bits, + unsigned int nr_bits); + +static inline int seq_cpumask_list(struct seq_file *m, cpumask_t *mask) +{ + return seq_bitmap_list(m, mask->bits, NR_CPUS); +} + +static inline int seq_nodemask_list(struct seq_file *m, nodemask_t *mask) +{ + return seq_bitmap_list(m, mask->bits, MAX_NUMNODES); +} + int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_release(struct inode *, struct file *); void *__seq_open_private(struct file *, const struct seq_operations *, int); -- cgit v1.2.3 From c4596435404976b0ded9cdf18b456ca2e1408ddd Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 18 Oct 2008 20:28:21 -0700 Subject: bitmask: remove bitmap_scnprintf_len() bitmap_scnprintf_len() is not used now, so we remove it. Otherwise we have to maintain it and make its return value always equal to bitmap_scnprintf()'s return value. Signed-off-by: Lai Jiangshan Cc: Alexey Dobriyan Cc: Paul Menage Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 89781fd4885..1abfe664c44 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -110,7 +110,6 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits); extern int bitmap_scnprintf(char *buf, unsigned int len, const unsigned long *src, int nbits); -extern int bitmap_scnprintf_len(unsigned int nr_bits); extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, unsigned long *dst, int nbits); extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, -- cgit v1.2.3 From b747c8c102cc0677a7a8056a093f58d7c9b500e7 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 18 Oct 2008 20:28:21 -0700 Subject: make ptrace_untrace() static ptrace_untrace() can now become static. Signed-off-by: Adrian Bunk Cc: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index ea7416c901d..22641d5d45d 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -94,7 +94,6 @@ extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); -extern void ptrace_untrace(struct task_struct *child); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ -- cgit v1.2.3 From 656eb2cd5da153762f2e8419ca117ce12ef522c3 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 18 Oct 2008 20:28:23 -0700 Subject: add CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS This adds a kconfig option to change the /proc/PID/coredump_filter default. Fedora has been carrying a trivial patch to change the hard-wired value for this default, since Fedora 8. The default default can't change safely because there are old GDB versions out there (all before 6.7) that are confused by the core dump files created by the MMF_DUMP_ELF_HEADERS setting. Signed-off-by: Roland McGrath Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Alan Cox Cc: Andi Kleen Cc: KOSAKI Motohiro Cc: Kawai Hidehiro Cc: Ingo Molnar Cc: David Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 017cc914ef1..f52dbd3587a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -411,7 +411,13 @@ extern int get_dumpable(struct mm_struct *mm); (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ - (1 << MMF_DUMP_HUGETLB_PRIVATE)) + (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) + +#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS +# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) +#else +# define MMF_DUMP_MASK_DEFAULT_ELF 0 +#endif struct sighand_struct { atomic_t count; -- cgit v1.2.3 From 57cac4d1880527e0baf6c2fda529d2ad1d815aec Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Sat, 18 Oct 2008 20:28:25 -0700 Subject: kdump: make elfcorehdr_addr independent of CONFIG_PROC_VMCORE o elfcorehdr_addr is used by not only the code under CONFIG_PROC_VMCORE but also by the code which is not inside CONFIG_PROC_VMCORE. For example, is_kdump_kernel() is used by powerpc code to determine if kernel is booting after a panic then use previous kernel's TCE table. So even if CONFIG_PROC_VMCORE is not set in second kernel, one should be able to correctly determine that we are booting after a panic and setup calgary iommu accordingly. o So remove the assumption that elfcorehdr_addr is under CONFIG_PROC_VMCORE. o Move definition of elfcorehdr_addr to arch dependent crash files. (Unfortunately crash dump does not have an arch independent file otherwise that would have been the best place). o kexec.c is not the right place as one can Have CRASH_DUMP enabled in second kernel without KEXEC being enabled. o I don't see sh setup code parsing the command line for elfcorehdr_addr. I am wondering how does vmcore interface work on sh. Anyway, I am atleast defining elfcoredhr_addr so that compilation is not broken on sh. Signed-off-by: Vivek Goyal Acked-by: "Eric W. Biederman" Acked-by: Simon Horman Acked-by: Paul Mundt Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_dump.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 025e4f57510..de027d1db74 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -9,11 +9,7 @@ #define ELFCORE_ADDR_MAX (-1ULL) -#ifdef CONFIG_PROC_VMCORE extern unsigned long long elfcorehdr_addr; -#else -static const unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; -#endif extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); @@ -28,6 +24,16 @@ extern struct proc_dir_entry *proc_vmcore; #define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) +/* + * is_kdump_kernel() checks whether this kernel is booting after a panic of + * previous kernel or not. This is determined by checking if previous kernel + * has passed the elf core header address on command line. + * + * This is not just a test if CONFIG_CRASH_DUMP is enabled or not. It will + * return 1 if CONFIG_CRASH_DUMP=y and if kernel is booting after a panic of + * previous kernel. + */ + static inline int is_kdump_kernel(void) { return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0; -- cgit v1.2.3 From 85a0ee342e0c06c19d78fdf48307211c6cf18fcb Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sat, 18 Oct 2008 20:28:29 -0700 Subject: kdump: add is_vmcore_usable() and vmcore_unusable() The usage of elfcorehdr_addr has changed recently such that being set to ELFCORE_ADDR_MAX is used by is_kdump_kernel() to indicate if the code is executing in a kernel executed as a crash kernel. However, arch/ia64/kernel/setup.c:reserve_elfcorehdr will rest elfcorehdr_addr to ELFCORE_ADDR_MAX on error, which means any subsequent calls to is_kdump_kernel() will return 0, even though they should return 1. Ok, at this point in time there are no subsequent calls, but I think its fair to say that there is ample scope for error or at the very least confusion. This patch add an extra state, ELFCORE_ADDR_ERR, which indicates that elfcorehdr_addr was passed on the command line, and thus execution is taking place in a crashdump kernel, but vmcore can't be used for some reason. This is tested for using is_vmcore_usable() and set using vmcore_unusable(). A subsequent patch makes use of this new code. To summarise, the states that elfcorehdr_addr can now be in are as follows: ELFCORE_ADDR_MAX: not a crashdump kernel ELFCORE_ADDR_ERR: crashdump kernel but vmcore is unusable any other value: crash dump kernel and vmcore is usable Signed-off-by: Simon Horman Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_dump.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index de027d1db74..0acf3b737e2 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -8,6 +8,7 @@ #include #define ELFCORE_ADDR_MAX (-1ULL) +#define ELFCORE_ADDR_ERR (-2ULL) extern unsigned long long elfcorehdr_addr; @@ -38,6 +39,29 @@ static inline int is_kdump_kernel(void) { return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0; } + +/* is_vmcore_usable() checks if the kernel is booting after a panic and + * the vmcore region is usable. + * + * This makes use of the fact that due to alignment -2ULL is not + * a valid pointer, much in the vain of IS_ERR(), except + * dealing directly with an unsigned long long rather than a pointer. + */ + +static inline int is_vmcore_usable(void) +{ + return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; +} + +/* vmcore_unusable() marks the vmcore as unusable, + * without disturbing the logic of is_kdump_kernel() + */ + +static inline void vmcore_unusable(void) +{ + if (is_kdump_kernel()) + elfcorehdr_addr = ELFCORE_ADDR_ERR; +} #else /* !CONFIG_CRASH_DUMP */ static inline int is_kdump_kernel(void) { return 0; } #endif /* CONFIG_CRASH_DUMP */ -- cgit v1.2.3 From b8e465f4945bc0e9f324e3bbe15f5180a8e9a6fe Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 18 Oct 2008 20:28:35 -0700 Subject: byteorder: add new headers for make headers-install Signed-off-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index bf9aca548f1..e531783e5d7 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -183,6 +183,7 @@ unifdef-y += auto_fs.h unifdef-y += auxvec.h unifdef-y += binfmts.h unifdef-y += blktrace_api.h +unifdef-y += byteorder.h unifdef-y += capability.h unifdef-y += capi.h unifdef-y += cciss_ioctl.h @@ -340,6 +341,7 @@ unifdef-y += soundcard.h unifdef-y += stat.h unifdef-y += stddef.h unifdef-y += string.h +unifdef-y += swab.h unifdef-y += synclink.h unifdef-y += sysctl.h unifdef-y += tcp.h -- cgit v1.2.3 From acf0108a84edae22b99655eb2f6f6c9f7ec4d449 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 18 Oct 2008 20:28:36 -0700 Subject: byteorder: use generic C version for value byteswapping This makes the new implementation of the byteorder helpers match the old in how it degraded when an arch-defined version was not available: 1) swab() - look for arch defined - if not, use generic c version 2) swabp() - look for arch-defined - if not, deref pointer and use swab() 3) swabs() - look for arch defined - if not, use swabp Signed-off-by: Harvey Harrison Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swab.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swab.h b/include/linux/swab.h index 270d5c208a8..bbed279f3b3 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -47,8 +47,6 @@ static inline __attribute_const__ __u16 ___swab16(__u16 val) { #ifdef __arch_swab16 return __arch_swab16(val); -#elif defined(__arch_swab16p) - return __arch_swab16p(&val); #else return __const_swab16(val); #endif @@ -58,8 +56,6 @@ static inline __attribute_const__ __u32 ___swab32(__u32 val) { #ifdef __arch_swab32 return __arch_swab32(val); -#elif defined(__arch_swab32p) - return __arch_swab32p(&val); #else return __const_swab32(val); #endif @@ -69,8 +65,6 @@ static inline __attribute_const__ __u64 ___swab64(__u64 val) { #ifdef __arch_swab64 return __arch_swab64(val); -#elif defined(__arch_swab64p) - return __arch_swab64p(&val); #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; __u32 l = val & ((1ULL << 32) - 1); @@ -84,8 +78,6 @@ static inline __attribute_const__ __u32 ___swahw32(__u32 val) { #ifdef __arch_swahw32 return __arch_swahw32(val); -#elif defined(__arch_swahw32p) - return __arch_swahw32p(&val); #else return __const_swahw32(val); #endif @@ -95,8 +87,6 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val) { #ifdef __arch_swahb32 return __arch_swahb32(val); -#elif defined(__arch_swahb32p) - return __arch_swahb32p(&val); #else return __const_swahb32(val); #endif -- cgit v1.2.3 From 1d8cca44b6a244b7e378546d719041819049a0f9 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 18 Oct 2008 20:28:37 -0700 Subject: byteorder: provide swabb.h generically in asm/byteorder.h This is needed during the transition to the new byteorder headers as the swabb.h functionality will be provided from asm/byteorder.h in the new version. To avoid breakage on arches still using the old implementation, provide swabb.h from asm/byteorder.h as well. Signed-off-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/byteorder/Kbuild | 1 + include/linux/byteorder/big_endian.h | 1 + include/linux/byteorder/little_endian.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild index 1133d5f9d81..fbaa7f9cee3 100644 --- a/include/linux/byteorder/Kbuild +++ b/include/linux/byteorder/Kbuild @@ -1,3 +1,4 @@ unifdef-y += big_endian.h unifdef-y += little_endian.h unifdef-y += swab.h +unifdef-y += swabb.h diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h index 44f95b92393..1cba3f3efe5 100644 --- a/include/linux/byteorder/big_endian.h +++ b/include/linux/byteorder/big_endian.h @@ -10,6 +10,7 @@ #include #include +#include #define __constant_htonl(x) ((__force __be32)(__u32)(x)) #define __constant_ntohl(x) ((__force __u32)(__be32)(x)) diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h index 4cc170a3176..cedc1b5a289 100644 --- a/include/linux/byteorder/little_endian.h +++ b/include/linux/byteorder/little_endian.h @@ -10,6 +10,7 @@ #include #include +#include #define __constant_htonl(x) ((__force __be32)___constant_swab32((x))) #define __constant_ntohl(x) ___constant_swab32((__force __be32)(x)) -- cgit v1.2.3 From fdd2e5f88a259a537bb239e0c03c973cb6ea402a Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 18 Oct 2008 20:28:38 -0700 Subject: make mm/rmap.c:anon_vma_cachep static This patch makes the needlessly global anon_vma_cachep static. Signed-off-by: Adrian Bunk Reviewed-by: KOSAKI Motohiro Acked-by: Rik van Riel Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1da48db8db0..89f0564b10c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -39,18 +39,6 @@ struct anon_vma { #ifdef CONFIG_MMU -extern struct kmem_cache *anon_vma_cachep; - -static inline struct anon_vma *anon_vma_alloc(void) -{ - return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); -} - -static inline void anon_vma_free(struct anon_vma *anon_vma) -{ - kmem_cache_free(anon_vma_cachep, anon_vma); -} - static inline void anon_vma_lock(struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; -- cgit v1.2.3 From a0098efd6ee4e8c04d82d761aa1bb9ec7a0aa32d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 18 Oct 2008 20:28:47 -0700 Subject: remove the obsolete BCD*BIN/BIN*BCD macros Remove the following obsolete macros: - BCD2BIN - BIN2BCD - BCD_TO_BIN - BIN_TO_BCD Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bcd.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcd.h b/include/linux/bcd.h index 7ac518e3c15..75f6d6e699a 100644 --- a/include/linux/bcd.h +++ b/include/linux/bcd.h @@ -15,11 +15,4 @@ unsigned bcd2bin(unsigned char val) __attribute_const__; unsigned char bin2bcd(unsigned val) __attribute_const__; -#define BCD2BIN(val) bcd2bin(val) -#define BIN2BCD(val) bin2bcd(val) - -/* backwards compat */ -#define BCD_TO_BIN(val) ((val)=BCD2BIN(val)) -#define BIN_TO_BCD(val) ((val)=BIN2BCD(val)) - #endif /* _BCD_H */ -- cgit v1.2.3 From 5a85a7dda15f88b7f9c96c67fe826b5d0486d601 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 18 Oct 2008 20:28:48 -0700 Subject: include/linux/bcd.h: remove comments - the macros are gone - there's no more code in this file, LGPL + GPL = GPL, and the code that was moved to lib/bcd.c is anyway trivial Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bcd.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcd.h b/include/linux/bcd.h index 75f6d6e699a..22ea563ba3e 100644 --- a/include/linux/bcd.h +++ b/include/linux/bcd.h @@ -1,12 +1,3 @@ -/* Permission is hereby granted to copy, modify and redistribute this code - * in terms of the GNU Library General Public License, Version 2 or later, - * at your option. - */ - -/* macros to translate to/from binary and binary-coded decimal (frequently - * found in RTC chips). - */ - #ifndef _BCD_H #define _BCD_H -- cgit v1.2.3 From 01e8ef11bc1a74e65678ed55795f59266d4add01 Mon Sep 17 00:00:00 2001 From: Parag Warudkar Date: Sat, 18 Oct 2008 20:28:50 -0700 Subject: x86: sysfs: kill owner field from attribute Tejun's commit 7b595756ec1f49e0049a9e01a1298d53a7faaa15 made sysfs attribute->owner unnecessary. But the field was left in the structure to ease the merge. It's been over a year since that change and it is now time to start killing attribute->owner along with its users - one arch at a time! This patch is attempt #1 to get rid of attribute->owner only for CONFIG_X86_64 or CONFIG_X86_32 . We will deal with other arches later on as and when possible - avr32 will be the next since that is something I can test. Compile (make allyesconfig / make allmodconfig / custom config) and boot tested. akpm: the idea is that we put the declaration of sttribute.owner inside `#ifndef CONFIG_X86'. But that proved to be too ambitious for now because new usages kept on turning up in subsystem trees. [akpm: remove the ifdef for now] Signed-off-by: Parag Warudkar Cc: Greg KH Cc: Ingo Molnar Cc: Tejun Heo Cc: Len Brown Cc: Jens Axboe Cc: Jean Delvare Cc: Roland Dreier Cc: David Brownell Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysfs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index b330e289d71..9d68fed50f1 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -21,8 +21,9 @@ struct kobject; struct module; /* FIXME - * The *owner field is no longer used, but leave around - * until the tree gets cleaned up fully. + * The *owner field is no longer used. + * x86 tree has been cleaned up. The owner + * attribute is still left for other arches. */ struct attribute { const char *name; -- cgit v1.2.3 From edbc25caaa492a82e19baa915f1f6b0a0db6554d Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 10 Jul 2008 16:29:37 -0500 Subject: PCI: remove dynids.use_driver_data The driver flag dynids.use_driver_data is almost consistently not set, and causes more problems than it solves. It was initially intended as a flag to indicate whether a driver's usage of driver_data had been carefully inspected and was ready for values from userspace. That audit was never done, so most drivers just get a 0 for driver_data when new IDs are added from userspace via sysfs. So remove the flag, allowing drivers to see the data directly (a followon patch validates the passed driver_data value against what the drivers expect). Acked-by: Greg Kroah-Hartman Acked-by: Jean Delvare Signed-off-by: Milton Miller Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index acf8f24037c..c989f58d09b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -347,7 +347,6 @@ struct pci_bus_region { struct pci_dynids { spinlock_t lock; /* protects list, index */ struct list_head list; /* for IDs added at runtime */ - unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */ }; /* ---------------------------------------------------------------- */ -- cgit v1.2.3 From 0235c4fc7fc6f621dc0dd89eba102ad5aa373390 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 18 Aug 2008 21:38:00 +0200 Subject: PCI PM: Introduce function pci_wake_from_d3 Many device drivers use the following sequence of statements to enable the device to wake up the system while being in the D3_hot or D3_cold low power state: pci_enable_wake(pdev, PCI_D3hot, 1); pci_enable_wake(pdev, PCI_D3cold, 1); However, the second call is not necessary if the first one succeeds (the ordering of the statements above doesn't matter here) and it may even be harmful, because we are not supposed to enable PME# after the wake-up power has been enabled for the device. To allow drivers to overcome this problem, introduce function pci_wake_from_d3() that will enable the device to wake up the system from any of D3_hot and D3_cold as long as the wake-up from at least one of them is supported. Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c989f58d09b..f7e7dbc0919 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -644,6 +644,7 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable); +int pci_wake_from_d3(struct pci_dev *dev, bool enable); pci_power_t pci_target_state(struct pci_dev *dev); int pci_prepare_to_sleep(struct pci_dev *dev); int pci_back_from_sleep(struct pci_dev *dev); -- cgit v1.2.3 From 16dbef4a831782466b10d4ae56837c5ba17d1948 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 Aug 2008 19:36:45 -0700 Subject: PCI: change MSI-x vector to 32bit We are using 28bit pci (bus/dev/fn + 12 bits) as irq number, so the cache for irq number should be 32 bit too. Signed-off-by: Yinghai Lu Cc: Andrew Vasquez Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f7e7dbc0919..8a4d0bebc31 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -725,7 +725,7 @@ enum pci_dma_burst_strategy { }; struct msix_entry { - u16 vector; /* kernel uses to write allocated vector */ + u32 vector; /* kernel uses to write allocated vector */ u16 entry; /* driver uses to specify entry, OS writes */ }; -- cgit v1.2.3 From 37a84ec668ba251ae02cf2c2c664baf6b247ae1f Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Thu, 28 Aug 2008 15:40:59 -0700 Subject: x86/PCI: irq and pci_ids patch for Intel Ibex Peak DeviceIDs This patch updates the Intel Ibex Peak (PCH) LPC and SMBus Controller DeviceIDs. The LPC Controller ID is set by Firmware within the range of 0x3b00-3b1f. This range is included in pci_ids.h using min and max values, and irq.c now has code to handle the range (in lieu of 32 additions to a SWITCH statement). The SMBus Controller ID is a fixed-value and will not change. Signed-off-by: Seth Heasley Acked-by: Jean Delvare Signed-off-by: Jesse Barnes --- include/linux/pci_ids.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 8edddc240e4..e5d344bfcb7 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2454,9 +2454,9 @@ #define PCI_DEVICE_ID_INTEL_ICH10_3 0x3a1a #define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30 #define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 -#define PCI_DEVICE_ID_INTEL_PCH_0 0x3b10 -#define PCI_DEVICE_ID_INTEL_PCH_1 0x3b11 -#define PCI_DEVICE_ID_INTEL_PCH_2 0x3b30 +#define PCI_DEVICE_ID_INTEL_PCH_LPC_MIN 0x3b00 +#define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX 0x3b1f +#define PCI_DEVICE_ID_INTEL_PCH_SMBUS 0x3b30 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 -- cgit v1.2.3 From c322b28a04c084a467a862766f74c40c917a721c Mon Sep 17 00:00:00 2001 From: "Zhao, Yu" Date: Mon, 13 Oct 2008 19:36:05 +0800 Subject: PCI: use same arg names in PCI_VDEVICE comment This cleanup makes the argument names in PCI_VDEVICE comment consistent with those used in its definition. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 8a4d0bebc31..008005674b6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -455,8 +455,8 @@ struct pci_driver { /** * PCI_VDEVICE - macro used to describe a specific pci device in short form - * @vend: the vendor name - * @dev: the 16 bit PCI Device ID + * @vendor: the vendor name + * @device: the 16 bit PCI Device ID * * This macro is used to create a struct pci_device_id that matches a * specific PCI device. The subvendor, and subdevice fields will be set -- cgit v1.2.3 From 58c3a727cb73b75a9104d295f096cca12959a5a5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 14 Oct 2008 14:02:53 +0800 Subject: PCI: support PCIe ARI capability This patch adds support for PCI Express Alternative Routing-ID Interpretation (ARI) capability. The ARI capability extends the Function Number field of the PCI Express Endpoint by reusing the Device Number which is otherwise hardwired to 0. With ARI, an Endpoint can have up to 256 functions. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + include/linux/pci_regs.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 008005674b6..7e9a1f0715e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -214,6 +214,7 @@ struct pci_dev { unsigned int broken_parity_status:1; /* Device generates false positive parity */ unsigned int msi_enabled:1; unsigned int msix_enabled:1; + unsigned int ari_enabled:1; /* ARI forwarding */ unsigned int is_managed:1; unsigned int is_pcie:1; pci_dev_flags_t dev_flags; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 450684f7eaa..eb6686b88f9 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -419,6 +419,10 @@ #define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */ #define PCI_EXP_RTCAP 30 /* Root Capabilities */ #define PCI_EXP_RTSTA 32 /* Root Status */ +#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ +#define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ /* Extended Capabilities (PCI-X 2.0 and Express) */ #define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) @@ -429,6 +433,7 @@ #define PCI_EXT_CAP_ID_VC 2 #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 +#define PCI_EXT_CAP_ID_ARI 14 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -536,5 +541,14 @@ #define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 hypertransport configuration */ #define HT_CAPTYPE_PM 0xE0 /* Hypertransport powermanagement configuration */ +/* Alternative Routing-ID Interpretation */ +#define PCI_ARI_CAP 0x04 /* ARI Capability Register */ +#define PCI_ARI_CAP_MFVC 0x0001 /* MFVC Function Groups Capability */ +#define PCI_ARI_CAP_ACS 0x0002 /* ACS Function Groups Capability */ +#define PCI_ARI_CAP_NFN(x) (((x) >> 8) & 0xff) /* Next Function Number */ +#define PCI_ARI_CTRL 0x06 /* ARI Control Register */ +#define PCI_ARI_CTRL_MFVC 0x0001 /* MFVC Function Groups Enable */ +#define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ +#define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ #endif /* LINUX_PCI_REGS_H */ -- cgit v1.2.3 From aa42d7c6138afdc54f74e971456a0fbfec16b77b Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 28 Sep 2008 16:36:11 -0700 Subject: PCI: introduce an pci_ioremap(pdev, barnr) function A common thing in many PCI drivers is to ioremap() an entire bar. This is a slightly fragile thing right now, needing both an address and a size, and many driver writers do.. various things there. This patch introduces an pci_ioremap() function taking just a PCI device struct and the bar number as arguments, and figures this all out itself, in one place. In addition, we can add various sanity checks to this function (the patch already checks to make sure that the bar in question really is a MEM bar; few to no drivers do that sort of thing). Hopefully with this type of API we get less chance of mistakes in drivers with ioremap() operations. Signed-off-by: Arjan van de Ven Signed-off-by: Jesse Barnes --- include/linux/pci.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7e9a1f0715e..46ad282ffe4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1119,5 +1119,18 @@ static inline void pci_mmcfg_early_init(void) { } static inline void pci_mmcfg_late_init(void) { } #endif +static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar) +{ + /* + * Make sure the BAR is actually a memory resource, not an IO resource + */ + if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { + WARN_ON(1); + return NULL; + } + return ioremap_nocache(pci_resource_start(pdev, bar), + pci_resource_len(pdev, bar)); +} + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3 From 0927678f55c9a50c296f7e6dae85e87b8236e155 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Sat, 18 Oct 2008 17:33:19 -0700 Subject: PCI: use pci_find_ext_capability everywhere Remove some open coded (and buggy) versions of pci_find_ext_capability in favor of the real routine in the PCI core. Tested-by: Tomasz Czernecki Acked-by: Andrew Vasquez Reviewed-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- include/linux/aer.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index f2518141de8..a2383a72356 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -18,10 +18,6 @@ static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) { return -EINVAL; } -static inline int pci_find_aer_capability(struct pci_dev *dev) -{ - return 0; -} static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) { return -EINVAL; -- cgit v1.2.3 From 270c66be9b4a6f2be53ef3aec5dc8e7b07782ec9 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sun, 19 Oct 2008 20:35:20 +0800 Subject: PCI: fix AER capability check The 'use pci_find_ext_capability everywhere' cleanup brought a new bug, which makes the AER stop working. Fix it by actually using find_ext_cap instead of just find_cap. Drop the unused config space size define while we're at it. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/aer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index a2383a72356..f7df1eefc10 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -10,7 +10,6 @@ #if defined(CONFIG_PCIEAER) /* pci-e port driver needs this function to enable aer */ extern int pci_enable_pcie_error_reporting(struct pci_dev *dev); -extern int pci_find_aer_capability(struct pci_dev *dev); extern int pci_disable_pcie_error_reporting(struct pci_dev *dev); extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); #else -- cgit v1.2.3 From 96499871f45b9126157b1a5c512d6e30f1635225 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Oct 2008 19:45:43 +0200 Subject: PCI: fix pci_ioremap_bar() on s390 s390 doesn't have ioremap_*, so protect the definition of the new pci_ioremap_bar function with CONFIG_HAS_IOMEM to avoid build breakage. Acked-by: Arjan van de Ven Signed-off-by: Heiko Carstens Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 46ad282ffe4..085187be29c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1119,6 +1119,7 @@ static inline void pci_mmcfg_early_init(void) { } static inline void pci_mmcfg_late_init(void) { } #endif +#ifdef CONFIG_HAS_IOMEM static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar) { /* @@ -1131,6 +1132,7 @@ static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar) return ioremap_nocache(pci_resource_start(pdev, bar), pci_resource_len(pdev, bar)); } +#endif #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3