From 4446a36ff8c74ac3b32feb009b651048e129c6af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add call_rcu_sched() Fourth cut of patch to provide the call_rcu_sched(). This is again to synchronize_sched() as call_rcu() is to synchronize_rcu(). Should be fine for experimental and -rt use, but not ready for inclusion. With some luck, I will be able to tell Andrew to come out of hiding on the next round. Passes multi-day rcutorture sessions with concurrent CPU hotplugging. Fixes since the first version include a bug that could result in indefinite blocking (spotted by Gautham Shenoy), better resiliency against CPU-hotplug operations, and other minor fixes. Fixes since the second version include reworking grace-period detection to avoid deadlocks that could happen when running concurrently with CPU hotplug, adding Mathieu's fix to avoid the softlockup messages, as well as Mathieu's fix to allow use earlier in boot. Fixes since the third version include a wrong-CPU bug spotted by Andrew, getting rid of the obsolete synchronize_kernel API that somehow snuck back in, merging spin_unlock() and local_irq_restore() in a few places, commenting the code that checks for quiescent states based on interrupting from user-mode execution or the idle loop, removing some inline attributes, and some code-style changes. Known/suspected shortcomings: o I still do not entirely trust the sleep/wakeup logic. Next step will be to use a private snapshot of the CPU online mask in rcu_sched_grace_period() -- if the CPU wasn't there at the start of the grace period, we don't need to hear from it. And the bit about accounting for changes in online CPUs inside of rcu_sched_grace_period() is ugly anyway. o It might be good for rcu_sched_grace_period() to invoke resched_cpu() when a given CPU wasn't responding quickly, but resched_cpu() is declared static... This patch also fixes a long-standing bug in the earlier preemptable-RCU implementation of synchronize_rcu() that could result in loss of concurrent external changes to a task's CPU affinity mask. I still cannot remember who reported this... Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- init/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'init') diff --git a/init/main.c b/init/main.c index f7fb20021d4..a9cc3e0803d 100644 --- a/init/main.c +++ b/init/main.c @@ -758,6 +758,7 @@ static void __init do_initcalls(void) */ static void __init do_basic_setup(void) { + rcu_init_sched(); /* needed by module_init stage. */ /* drivers will send hotplug events */ init_workqueues(); usermodehelper_init(); -- cgit v1.2.3 From 3da757daf86e498872855f0b5e101f763ba79499 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 20 Jun 2008 15:06:33 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation On the x86 platform we can use the value of tsc_khz computed during tsc calibration to calculate the loops_per_jiffy value. Its very important to keep the error in lpj values to minimum as any error in that may result in kernel panic in check_timer. In virtualization environment, On a highly overloaded host the guest delay calibration may sometimes result in errors beyond the ~50% that timer_irq_works can handle, resulting in the guest panicking. Does some formating changes to lpj_setup code to now have a single printk to print the bogomips value. We do this only for the boot processor because the AP's can have different base frequencies or the BIOS might boot a AP at a different frequency. Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- init/calibrate.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'init') diff --git a/init/calibrate.c b/init/calibrate.c index ecb3822d4f7..86286974dad 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -8,7 +8,9 @@ #include #include #include +#include +unsigned long lpj_tsc; unsigned long preset_lpj; static int __init lpj_setup(char *str) { @@ -108,6 +110,10 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} * This is the number of bits of precision for the loops_per_jiffy. Each * bit takes on average 1.5/HZ seconds. This (like the original) is a little * better than 1% + * For the boot cpu we can skip the delay calibration and assign it a value + * calculated based on the tsc frequency. + * For the rest of the CPUs we cannot assume that the tsc frequency is same as + * the cpu frequency, hence do the calibration for those. */ #define LPS_PREC 8 @@ -118,20 +124,20 @@ void __cpuinit calibrate_delay(void) if (preset_lpj) { loops_per_jiffy = preset_lpj; - printk("Calibrating delay loop (skipped)... " - "%lu.%02lu BogoMIPS preset\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100); + printk(KERN_INFO + "Calibrating delay loop (skipped) preset value.. "); + } else if ((smp_processor_id() == 0) && lpj_tsc) { + loops_per_jiffy = lpj_tsc; + printk(KERN_INFO + "Calibrating delay loop (skipped), " + "using tsc calculated value.. "); } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { - printk("Calibrating delay using timer specific routine.. "); - printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100, - loops_per_jiffy); + printk(KERN_INFO + "Calibrating delay using timer specific routine.. "); } else { loops_per_jiffy = (1<<12); - printk(KERN_DEBUG "Calibrating delay loop... "); + printk(KERN_INFO "Calibrating delay loop... "); while ((loops_per_jiffy <<= 1) != 0) { /* wait for "start of" clock tick */ ticks = jiffies; @@ -161,12 +167,8 @@ void __cpuinit calibrate_delay(void) if (jiffies != ticks) /* longer than 1 tick */ loops_per_jiffy &= ~loopbit; } - - /* Round the value and print it */ - printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100, - loops_per_jiffy); } - + printk(KERN_INFO "%lu.%02lu BogoMIPS (lpj=%lu)\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); } -- cgit v1.2.3 From f3f3149f35b9195ef4b761b1353fc0766b5f53be Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 23 Jun 2008 18:21:56 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation, cleanup As suggested by Ingo, remove all references to tsc from init/calibrate.c TSC is x86 specific, and using tsc in variable names in a generic file should be avoided. lpj_tsc is now called lpj_fine, since it is related to fine tuning of lpj value. Also tsc_rate_* is called timer_rate_* Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- init/calibrate.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'init') diff --git a/init/calibrate.c b/init/calibrate.c index 86286974dad..7963e3fc51d 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -10,7 +10,7 @@ #include #include -unsigned long lpj_tsc; +unsigned long lpj_fine; unsigned long preset_lpj; static int __init lpj_setup(char *str) { @@ -35,9 +35,9 @@ static unsigned long __cpuinit calibrate_delay_direct(void) unsigned long pre_start, start, post_start; unsigned long pre_end, end, post_end; unsigned long start_jiffies; - unsigned long tsc_rate_min, tsc_rate_max; - unsigned long good_tsc_sum = 0; - unsigned long good_tsc_count = 0; + unsigned long timer_rate_min, timer_rate_max; + unsigned long good_timer_sum = 0; + unsigned long good_timer_count = 0; int i; if (read_current_timer(&pre_start) < 0 ) @@ -81,22 +81,24 @@ static unsigned long __cpuinit calibrate_delay_direct(void) } read_current_timer(&post_end); - tsc_rate_max = (post_end - pre_start) / DELAY_CALIBRATION_TICKS; - tsc_rate_min = (pre_end - post_start) / DELAY_CALIBRATION_TICKS; + timer_rate_max = (post_end - pre_start) / + DELAY_CALIBRATION_TICKS; + timer_rate_min = (pre_end - post_start) / + DELAY_CALIBRATION_TICKS; /* - * If the upper limit and lower limit of the tsc_rate is + * If the upper limit and lower limit of the timer_rate is * >= 12.5% apart, redo calibration. */ if (pre_start != 0 && pre_end != 0 && - (tsc_rate_max - tsc_rate_min) < (tsc_rate_max >> 3)) { - good_tsc_count++; - good_tsc_sum += tsc_rate_max; + (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) { + good_timer_count++; + good_timer_sum += timer_rate_max; } } - if (good_tsc_count) - return (good_tsc_sum/good_tsc_count); + if (good_timer_count) + return (good_timer_sum/good_timer_count); printk(KERN_WARNING "calibrate_delay_direct() failed to get a good " "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n"); @@ -111,8 +113,8 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} * bit takes on average 1.5/HZ seconds. This (like the original) is a little * better than 1% * For the boot cpu we can skip the delay calibration and assign it a value - * calculated based on the tsc frequency. - * For the rest of the CPUs we cannot assume that the tsc frequency is same as + * calculated based on the timer frequency. + * For the rest of the CPUs we cannot assume that the timer frequency is same as * the cpu frequency, hence do the calibration for those. */ #define LPS_PREC 8 @@ -126,11 +128,11 @@ void __cpuinit calibrate_delay(void) loops_per_jiffy = preset_lpj; printk(KERN_INFO "Calibrating delay loop (skipped) preset value.. "); - } else if ((smp_processor_id() == 0) && lpj_tsc) { - loops_per_jiffy = lpj_tsc; + } else if ((smp_processor_id() == 0) && lpj_fine) { + loops_per_jiffy = lpj_fine; printk(KERN_INFO "Calibrating delay loop (skipped), " - "using tsc calculated value.. "); + "value calculated using timer frequency.. "); } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { printk(KERN_INFO "Calibrating delay using timer specific routine.. "); -- cgit v1.2.3 From 3d4422332711ef48ef0f132f1fcbfcbd56c7f3d1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 26 Jun 2008 11:21:34 +0200 Subject: Add generic helpers for arch IPI function calls This adds kernel/smp.c which contains helpers for IPI function calls. In addition to supporting the existing smp_call_function() in a more efficient manner, it also adds a more scalable variant called smp_call_function_single() for calling a given function on a single CPU only. The core of this is based on the x86-64 patch from Nick Piggin, lots of changes since then. "Alan D. Brunelle" has contributed lots of fixes and suggestions as well. Also thanks to Paul E. McKenney for reviewing RCU usage and getting rid of the data allocation fallback deadlock. Acked-by: Ingo Molnar Reviewed-by: Paul E. McKenney Signed-off-by: Jens Axboe --- init/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'init') diff --git a/init/main.c b/init/main.c index f7fb20021d4..1efcccff1bd 100644 --- a/init/main.c +++ b/init/main.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -779,6 +780,7 @@ static void __init do_pre_smp_initcalls(void) { extern int spawn_ksoftirqd(void); + init_call_single_data(); migration_init(); spawn_ksoftirqd(); if (!nosoftlockup) -- cgit v1.2.3 From 2d62f488585405bc9108c47cb06957397cfd1059 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 31 Jan 2008 17:25:00 +0200 Subject: do_mounts: allow UBI root device name Similarly to MTD devices, allow UBI devices. Signed-off-by: Adrian Hunter --- init/do_mounts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'init') diff --git a/init/do_mounts.c b/init/do_mounts.c index 660c1e50c91..a1de1bf3d6b 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -372,7 +372,8 @@ void __init prepare_namespace(void) if (saved_root_name[0]) { root_device_name = saved_root_name; - if (!strncmp(root_device_name, "mtd", 3)) { + if (!strncmp(root_device_name, "mtd", 3) || + !strncmp(root_device_name, "ubi", 3)) { mount_block_root(root_device_name, root_mountflags); goto out; } -- cgit v1.2.3