From c5829cd07ec4c08daa7ff91c821af9b2ac7748df Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Wed, 17 Oct 2007 16:52:08 -0500 Subject: [CPUFREQ] architectural pstate driver for powernow-k8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch should apply cleanly to the 2.6.23-git7 kernel.  It changes the powernow-k8 driver code that deals with 3rd generation Opteron, Phenom, and later processors to match the architectural pstate driver described in the AMD64 Architecture Programmer's Manual Volume 2 Chapter 18.  The initial implementation of the hardware pstate driver for PowerNow! used some processor-version specific features, and would not be maintainable in the long term as the processor features changed. This architectural driver should work on all future AMD processors.   Signed-off-by: Mark Langsdorf Signed-off-by: Andreas Herrmann Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 90 ++++++++----------------------- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 20 +++---- 2 files changed, 29 insertions(+), 81 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index b273b69cfdd..f105ffd0996 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -46,7 +46,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 2.00.00" +#define VERSION "version 2.20.00" #include "powernow-k8.h" /* serialize freq changes */ @@ -73,33 +73,11 @@ static u32 find_khz_freq_from_fid(u32 fid) return 1000 * find_freq_from_fid(fid); } -/* Return a frequency in MHz, given an input fid and did */ -static u32 find_freq_from_fiddid(u32 fid, u32 did) +static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate) { - if (current_cpu_data.x86 == 0x10) - return 100 * (fid + 0x10) >> did; - else - return 100 * (fid + 0x8) >> did; -} - -static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) -{ - return 1000 * find_freq_from_fiddid(fid, did); -} - -static u32 find_fid_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return lo & HW_PSTATE_FID_MASK; + return data[pstate].frequency; } -static u32 find_did_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; -} /* Return the vco fid for an input fid * @@ -142,9 +120,7 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) if (cpu_family == CPU_HW_PSTATE) { rdmsr(MSR_PSTATE_STATUS, lo, hi); i = lo & HW_PSTATE_MASK; - rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); - data->currfid = lo & HW_PSTATE_FID_MASK; - data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + data->currpstate = i; return 0; } do { @@ -295,7 +271,7 @@ static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, static int transition_pstate(struct powernow_k8_data *data, u32 pstate) { wrmsr(MSR_PSTATE_CTRL, pstate, 0); - data->currfid = find_fid_from_pstate(pstate); + data->currpstate = pstate; return 0; } @@ -845,17 +821,20 @@ err_out: static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) { int i; + u32 hi = 0, lo = 0; + rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); + data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; for (i = 0; i < data->acpi_data.state_count; i++) { u32 index; u32 hi = 0, lo = 0; - u32 fid; - u32 did; index = data->acpi_data.states[i].control & HW_PSTATE_MASK; - if (index > MAX_HW_PSTATE) { + if (index > data->max_hw_pstate) { printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; } rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); if (!(hi & HW_PSTATE_VALID_MASK)) { @@ -864,22 +843,9 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf continue; } - fid = lo & HW_PSTATE_FID_MASK; - did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + powernow_table[i].index = index; - dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); - - powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); - - powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); - - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } + powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; } return 0; } @@ -1020,22 +986,18 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i /* Take a frequency, and issue the hardware pstate transition command */ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) { - u32 fid = 0; - u32 did = 0; u32 pstate = 0; int res, i; struct cpufreq_freqs freqs; dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - /* get fid did for hardware pstate transition */ + /* get MSR index for hardware pstate transition */ pstate = index & HW_PSTATE_MASK; - if (pstate > MAX_HW_PSTATE) + if (pstate > data->max_hw_pstate) return 0; - fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; - did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; - freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); - freqs.new = find_khz_freq_from_fiddid(fid, did); + freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; @@ -1043,9 +1005,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i } res = transition_pstate(data, pstate); - data->currfid = find_fid_from_pstate(pstate); - data->currdid = find_did_from_pstate(pstate); - freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); + freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; @@ -1090,10 +1050,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi if (query_current_values_with_pending_wait(data)) goto err_out; - if (cpu_family == CPU_HW_PSTATE) - dprintk("targ: curr fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); - else { + if (cpu_family != CPU_HW_PSTATE) { dprintk("targ: curr fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1124,7 +1081,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi mutex_unlock(&fidvid_mutex); if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate); else pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -1223,7 +1180,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) + (3 * (1 << data->irt) * 10)) * 1000; if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else pol->cur = find_khz_freq_from_fid(data->currfid); dprintk("policy current frequency %d kHz\n", pol->cur); @@ -1240,8 +1197,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); if (cpu_family == CPU_HW_PSTATE) - dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); + dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate); else dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1297,7 +1253,7 @@ static unsigned int powernowk8_get (unsigned int cpu) goto out; if (cpu_family == CPU_HW_PSTATE) - khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); + khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else khz = find_khz_freq_from_fid(data->currfid); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index b06c812208c..8ae88f18128 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -10,6 +10,7 @@ struct powernow_k8_data { u32 numps; /* number of p-states */ u32 batps; /* number of p-states supported on battery */ + u32 max_hw_pstate; /* maximum legal hardware pstate */ /* these values are constant when the PSB is used to determine * vid/fid pairings, but are modified during the ->target() call @@ -21,8 +22,8 @@ struct powernow_k8_data { u32 plllock; /* pll lock time, units 1 us */ u32 exttype; /* extended interface = 1 */ - /* keep track of the current fid / vid or did */ - u32 currvid, currfid, currdid; + /* keep track of the current fid / vid or pstate */ + u32 currvid, currfid, currpstate; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. @@ -87,23 +88,14 @@ struct powernow_k8_data { /* Hardware Pstate _PSS and MSR definitions */ #define USE_HW_PSTATE 0x00000080 -#define HW_PSTATE_FID_MASK 0x0000003f -#define HW_PSTATE_DID_MASK 0x000001c0 -#define HW_PSTATE_DID_SHIFT 6 #define HW_PSTATE_MASK 0x00000007 #define HW_PSTATE_VALID_MASK 0x80000000 -#define HW_FID_INDEX_SHIFT 8 -#define HW_FID_INDEX_MASK 0x0000ff00 -#define HW_DID_INDEX_SHIFT 16 -#define HW_DID_INDEX_MASK 0x00ff0000 -#define HW_WATTS_MASK 0xff -#define HW_PWR_DVR_MASK 0x300 -#define HW_PWR_DVR_SHIFT 8 -#define HW_PWR_MAX_MULT 3 -#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */ +#define HW_PSTATE_MAX_MASK 0x000000f0 +#define HW_PSTATE_MAX_SHIFT 4 #define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ #define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ #define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ +#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */ /* define the two driver architectures */ #define CPU_OPTERON 0 -- cgit v1.2.3 From a8d7c3bc2396aff14f9e920677072cb55b016040 Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Mon, 22 Oct 2007 09:50:13 +0200 Subject: [CPUFREQ] Make cpufreq_conservative handle out-of-sync events properly Make cpufreq_conservative handle out-of-sync events properly Currently, the cpufreq_conservative governor doesn't get notified when the actual frequency the cpu is running at differs from what cpufreq thought it was. As a result the cpu may stay at the maximum frequency after a s2ram / resume cycle even though the system is idle. Signed-off-by: Elias Oltmanns Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4bd33ce8a6f..57d02e990af 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -116,6 +116,27 @@ static inline unsigned int get_cpu_idle_time(unsigned int cpu) return ret; } +/* keep track of frequency transitions */ +static int +dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, + freq->cpu); + + if (!this_dbs_info->enable) + return 0; + + this_dbs_info->requested_freq = freq->new; + + return 0; +} + +static struct notifier_block dbs_cpufreq_notifier_block = { + .notifier_call = dbs_cpufreq_notifier +}; + /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { @@ -511,6 +532,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_tuners_ins.sampling_rate = def_sampling_rate; dbs_timer_init(); + cpufreq_register_notifier( + &dbs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); } mutex_unlock(&dbs_mutex); @@ -525,9 +549,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, * Stop the timerschedule work, when this governor * is used for first time */ - if (dbs_enable == 0) + if (dbs_enable == 0) { dbs_timer_exit(); - + cpufreq_unregister_notifier( + &dbs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + mutex_unlock(&dbs_mutex); break; -- cgit v1.2.3 From 18a7247d1bb2e2dcbab628d7e786d03df5bf1eed Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 22 Oct 2007 16:49:09 -0400 Subject: [CPUFREQ] Fix up whitespace in conservative governor. Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 121 ++++++++++++++++----------------- 1 file changed, 60 insertions(+), 61 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 57d02e990af..1bba99747f5 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -37,17 +37,17 @@ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) -/* - * The polling frequency of this governor depends on the capability of +/* + * The polling frequency of this governor depends on the capability of * the processor. Default polling frequency is 1000 times the transition - * latency of the processor. The governor will work on any processor with - * transition latency <= 10mS, using appropriate sampling + * latency of the processor. The governor will work on any processor with + * transition latency <= 10mS, using appropriate sampling * rate. * For CPUs with transition latency > 10mS (mostly drivers * with CPUFREQ_ETERNAL), this governor will not work. * All times here are in uS. */ -static unsigned int def_sampling_rate; +static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE_RATIO (2) /* for correct statistics, we need at least 10 ticks between each measure */ #define MIN_STAT_SAMPLING_RATE \ @@ -63,12 +63,12 @@ static unsigned int def_sampling_rate; static void do_dbs_timer(struct work_struct *work); struct cpu_dbs_info_s { - struct cpufreq_policy *cur_policy; - unsigned int prev_cpu_idle_up; - unsigned int prev_cpu_idle_down; - unsigned int enable; - unsigned int down_skip; - unsigned int requested_freq; + struct cpufreq_policy *cur_policy; + unsigned int prev_cpu_idle_up; + unsigned int prev_cpu_idle_down; + unsigned int enable; + unsigned int down_skip; + unsigned int requested_freq; }; static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); @@ -82,24 +82,24 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */ * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock * is recursive for the same process. -Venki */ -static DEFINE_MUTEX (dbs_mutex); +static DEFINE_MUTEX (dbs_mutex); static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer); struct dbs_tuners { - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; - unsigned int down_threshold; - unsigned int ignore_nice; - unsigned int freq_step; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int down_threshold; + unsigned int ignore_nice; + unsigned int freq_step; }; static struct dbs_tuners dbs_tuners_ins = { - .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, - .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, - .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, - .ignore_nice = 0, - .freq_step = 5, + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .ignore_nice = 0, + .freq_step = 5, }; static inline unsigned int get_cpu_idle_time(unsigned int cpu) @@ -109,7 +109,7 @@ static inline unsigned int get_cpu_idle_time(unsigned int cpu) if (dbs_tuners_ins.ignore_nice) add_nice = kstat_cpu(cpu).cpustat.nice; - ret = kstat_cpu(cpu).cpustat.idle + + ret = kstat_cpu(cpu).cpustat.idle + kstat_cpu(cpu).cpustat.iowait + add_nice; @@ -148,8 +148,8 @@ static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); } -#define define_one_ro(_name) \ -static struct freq_attr _name = \ +#define define_one_ro(_name) \ +static struct freq_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) define_one_ro(sampling_rate_max); @@ -169,7 +169,7 @@ show_one(down_threshold, down_threshold); show_one(ignore_nice_load, ignore_nice); show_one(freq_step, freq_step); -static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, +static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, const char *buf, size_t count) { unsigned int input; @@ -185,7 +185,7 @@ static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, return count; } -static ssize_t store_sampling_rate(struct cpufreq_policy *unused, +static ssize_t store_sampling_rate(struct cpufreq_policy *unused, const char *buf, size_t count) { unsigned int input; @@ -204,7 +204,7 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, return count; } -static ssize_t store_up_threshold(struct cpufreq_policy *unused, +static ssize_t store_up_threshold(struct cpufreq_policy *unused, const char *buf, size_t count) { unsigned int input; @@ -223,7 +223,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, return count; } -static ssize_t store_down_threshold(struct cpufreq_policy *unused, +static ssize_t store_down_threshold(struct cpufreq_policy *unused, const char *buf, size_t count) { unsigned int input; @@ -249,16 +249,16 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, int ret; unsigned int j; - - ret = sscanf (buf, "%u", &input); - if ( ret != 1 ) + + ret = sscanf(buf, "%u", &input); + if (ret != 1) return -EINVAL; - if ( input > 1 ) + if (input > 1) input = 1; - + mutex_lock(&dbs_mutex); - if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ mutex_unlock(&dbs_mutex); return count; } @@ -282,14 +282,14 @@ static ssize_t store_freq_step(struct cpufreq_policy *policy, unsigned int input; int ret; - ret = sscanf (buf, "%u", &input); + ret = sscanf(buf, "%u", &input); - if ( ret != 1 ) + if (ret != 1) return -EINVAL; - if ( input > 100 ) + if (input > 100) input = 100; - + /* no need to test here if freq_step is zero as the user might actually * want this, they would be crazy though :) */ mutex_lock(&dbs_mutex); @@ -343,18 +343,18 @@ static void dbs_check_cpu(int cpu) policy = this_dbs_info->cur_policy; - /* - * The default safe range is 20% to 80% + /* + * The default safe range is 20% to 80% * Every sampling_rate, we check - * - If current idle time is less than 20%, then we try to - * increase frequency + * - If current idle time is less than 20%, then we try to + * increase frequency * Every sampling_rate*sampling_down_factor, we check - * - If current idle time is more than 80%, then we try to - * decrease frequency + * - If current idle time is more than 80%, then we try to + * decrease frequency * - * Any frequency increase takes it to the maximum frequency. - * Frequency reduction happens at minimum steps of - * 5% (default) of max_frequency + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of max_frequency */ /* Check for frequency increase */ @@ -382,13 +382,13 @@ static void dbs_check_cpu(int cpu) /* if we are already at full speed then break out early */ if (this_dbs_info->requested_freq == policy->max) return; - + freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; /* max freq cannot be less than 100. But who knows.... */ if (unlikely(freq_step == 0)) freq_step = 5; - + this_dbs_info->requested_freq += freq_step; if (this_dbs_info->requested_freq > policy->max) this_dbs_info->requested_freq = policy->max; @@ -448,15 +448,15 @@ static void dbs_check_cpu(int cpu) } static void do_dbs_timer(struct work_struct *work) -{ +{ int i; mutex_lock(&dbs_mutex); for_each_online_cpu(i) dbs_check_cpu(i); - schedule_delayed_work(&dbs_work, + schedule_delayed_work(&dbs_work, usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); mutex_unlock(&dbs_mutex); -} +} static inline void dbs_timer_init(void) { @@ -483,13 +483,12 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, switch (event) { case CPUFREQ_GOV_START: - if ((!cpu_online(cpu)) || - (!policy->cur)) + if ((!cpu_online(cpu)) || (!policy->cur)) return -EINVAL; if (this_dbs_info->enable) /* Already enabled */ break; - + mutex_lock(&dbs_mutex); rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); @@ -502,7 +501,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; - + j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu); j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; @@ -536,7 +535,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, &dbs_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); } - + mutex_unlock(&dbs_mutex); break; @@ -565,11 +564,11 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target( this_dbs_info->cur_policy, - policy->max, CPUFREQ_RELATION_H); + policy->max, CPUFREQ_RELATION_H); else if (policy->min > this_dbs_info->cur_policy->cur) __cpufreq_driver_target( this_dbs_info->cur_policy, - policy->min, CPUFREQ_RELATION_L); + policy->min, CPUFREQ_RELATION_L); mutex_unlock(&dbs_mutex); break; } -- cgit v1.2.3 From 6d6a54d1e4c2c6fbccd128ab4e6c8817db124866 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 11 Nov 2007 10:55:25 +0000 Subject: [ARM] remove useless setting of VM_RESERVED remap_pfn_range() takes care of setting the appropriate VM_* flags itself; there's no need for callers of remap_pfn_range() to set VM_RESERVED before it is called. Signed-off-by: Russell King --- arch/arm/mm/consistent.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c index cefdf2f9f26..333a82a3717 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/consistent.c @@ -322,7 +322,6 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, if (off < kern_size && user_size <= (kern_size - off)) { - vma->vm_flags |= VM_RESERVED; ret = remap_pfn_range(vma, vma->vm_start, page_to_pfn(c->vm_pages) + off, user_size << PAGE_SHIFT, -- cgit v1.2.3 From 2e21630ddc3fb717dc645356b75771c6a52dc627 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Sat, 10 Nov 2007 19:37:49 +0800 Subject: [CRYPTO] geode: Fix not inplace encryption Currently the Geode AES module fails to encrypt or decrypt if the coherent bits are not set what is currently the case if the encryption does not occur inplace. However, the encryption works on my Geode machine _only_ if the coherent bits are always set. Signed-off-by: Sebastian Siewior Acked-by: Jordan Crouse Signed-off-by: Herbert Xu --- drivers/crypto/geode-aes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c index f9a34abbf4f..711e246e1ef 100644 --- a/drivers/crypto/geode-aes.c +++ b/drivers/crypto/geode-aes.c @@ -110,8 +110,7 @@ geode_aes_crypt(struct geode_aes_op *op) * we don't need to worry */ - if (op->src == op->dst) - flags |= (AES_CTRL_DCA | AES_CTRL_SCA); + flags |= (AES_CTRL_DCA | AES_CTRL_SCA); if (op->dir == AES_DIR_ENCRYPT) flags |= AES_CTRL_ENCRYPT; -- cgit v1.2.3 From e8ef95227a66284e070d255f902e2e4139fd141a Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 6 Nov 2007 19:35:19 +0100 Subject: x86: unification of cfufreq/Kconfig Merge the two Kconfig files to a single file. Checked using make allmodconfig for x86_64. No changes in build. Signed-off-by: Sam Ravnborg Cc: Adrian Bunk Cc: Dave Jones --- arch/x86/Kconfig.i386 | 2 +- arch/x86/Kconfig.x86_64 | 2 +- arch/x86/kernel/cpu/cpufreq/Kconfig | 275 +++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/cpufreq/Kconfig_32 | 250 ------------------------------ arch/x86/kernel/cpu/cpufreq/Kconfig_64 | 108 ------------- 5 files changed, 277 insertions(+), 360 deletions(-) create mode 100644 arch/x86/kernel/cpu/cpufreq/Kconfig delete mode 100644 arch/x86/kernel/cpu/cpufreq/Kconfig_32 delete mode 100644 arch/x86/kernel/cpu/cpufreq/Kconfig_64 diff --git a/arch/x86/Kconfig.i386 b/arch/x86/Kconfig.i386 index 7331efe891a..b6f2fd0e443 100644 --- a/arch/x86/Kconfig.i386 +++ b/arch/x86/Kconfig.i386 @@ -1092,7 +1092,7 @@ config APM_REAL_MODE_POWER_OFF endif # APM -source "arch/x86/kernel/cpu/cpufreq/Kconfig_32" +source "arch/x86/kernel/cpu/cpufreq/Kconfig" source "drivers/cpuidle/Kconfig" diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64 index cc468ea6124..8d6b5342544 100644 --- a/arch/x86/Kconfig.x86_64 +++ b/arch/x86/Kconfig.x86_64 @@ -719,7 +719,7 @@ config ARCH_HIBERNATION_HEADER source "drivers/acpi/Kconfig" -source "arch/x86/kernel/cpu/cpufreq/Kconfig_64" +source "arch/x86/kernel/cpu/cpufreq/Kconfig" source "drivers/cpuidle/Kconfig" diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig new file mode 100644 index 00000000000..151eda0a23f --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -0,0 +1,275 @@ +# +# CPU Frequency scaling +# + +menu "CPU Frequency scaling" + +source "drivers/cpufreq/Kconfig" + +if CPU_FREQ + +comment "CPUFreq processor drivers" + +config X86_ACPI_CPUFREQ + tristate "ACPI Processor P-States driver" + select CPU_FREQ_TABLE + depends on ACPI_PROCESSOR + help + This driver adds a CPUFreq driver which utilizes the ACPI + Processor Performance States. + This driver also supports Intel Enhanced Speedstep. + + To compile this driver as a module, choose M here: the + module will be called acpi-cpufreq. + + For details, take a look at . + + If in doubt, say N. + +config ELAN_CPUFREQ + tristate "AMD Elan SC400 and SC410" + select CPU_FREQ_TABLE + depends on X86_32 && X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC400 and SC410 + processors. + + You need to specify the processor maximum speed as boot + parameter: elanfreq=maxspeed (in kHz) or as module + parameter "max_freq". + + For details, take a look at . + + If in doubt, say N. + +config SC520_CPUFREQ + tristate "AMD Elan SC520" + select CPU_FREQ_TABLE + depends on X86_32 && X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC520 processor. + + For details, take a look at . + + If in doubt, say N. + + +config X86_POWERNOW_K6 + tristate "AMD Mobile K6-2/K6-3 PowerNow!" + select CPU_FREQ_TABLE + depends on X86_32 + help + This adds the CPUFreq driver for mobile AMD K6-2+ and mobile + AMD K6-3+ processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_POWERNOW_K7 + tristate "AMD Mobile Athlon/Duron PowerNow!" + select CPU_FREQ_TABLE + depends on X86_32 + help + This adds the CPUFreq driver for mobile AMD K7 mobile processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_POWERNOW_K7_ACPI + bool + depends on X86_POWERNOW_K7 && ACPI_PROCESSOR + depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) + depends on X86_32 + default y + +config X86_POWERNOW_K8 + tristate "AMD Opteron/Athlon64 PowerNow!" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. + + To compile this driver as a module, choose M here: the + module will be called powernow-k8. + + For details, take a look at . + + If in doubt, say N. + +config X86_POWERNOW_K8_ACPI + bool + prompt "ACPI Support" if X86_32 + depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR + depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) + default y + help + This provides access to the K8s Processor Performance States via ACPI. + This driver is probably required for CPUFreq to work with multi-socket and + SMP systems. It is not required on at least some single-socket yet + multi-core systems, even if SMP is enabled. + + It is safe to say Y here. + +config X86_GX_SUSPMOD + tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" + depends on X86_32 && PCI + help + This add the CPUFreq driver for NatSemi Geode processors which + support suspend modulation. + + For details, take a look at . + + If in doubt, say N. + +config X86_SPEEDSTEP_CENTRINO + tristate "Intel Enhanced SpeedStep (deprecated)" + select CPU_FREQ_TABLE + select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32 + depends on X86_32 || (X86_64 && ACPI_PROCESSOR) + help + This is deprecated and this functionality is now merged into + acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of + speedstep_centrino. + This adds the CPUFreq driver for Enhanced SpeedStep enabled + mobile CPUs. This means Intel Pentium M (Centrino) CPUs + or 64bit enabled Intel Xeons. + + To compile this driver as a module, choose M here: the + module will be called speedstep-centrino. + + For details, take a look at . + + If in doubt, say N. + +config X86_SPEEDSTEP_CENTRINO_TABLE + bool "Built-in tables for Banias CPUs" + depends on X86_32 && X86_SPEEDSTEP_CENTRINO + default y + help + Use built-in tables for Banias CPUs if ACPI encoding + is not available. + + If in doubt, say N. + +config X86_SPEEDSTEP_ICH + tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" + select CPU_FREQ_TABLE + depends on X86_32 + help + This adds the CPUFreq driver for certain mobile Intel Pentium III + (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all + mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, + ICH3 or ICH4 southbridge. + + For details, take a look at . + + If in doubt, say N. + +config X86_SPEEDSTEP_SMI + tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" + select CPU_FREQ_TABLE + depends on X86_32 && EXPERIMENTAL + help + This adds the CPUFreq driver for certain mobile Intel Pentium III + (Coppermine), all mobile Intel Pentium III-M (Tualatin) + on systems which have an Intel 440BX/ZX/MX southbridge. + + For details, take a look at . + + If in doubt, say N. + +config X86_P4_CLOCKMOD + tristate "Intel Pentium 4 clock modulation" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for Intel Pentium 4 / XEON + processors. When enabled it will lower CPU temperature by skipping + clocks. + + This driver should be only used in exceptional + circumstances when very low power is needed because it causes severe + slowdowns and noticeable latencies. Normally Speedstep should be used + instead. + + To compile this driver as a module, choose M here: the + module will be called p4-clockmod. + + For details, take a look at . + + Unless you are absolutely sure say N. + +config X86_CPUFREQ_NFORCE2 + tristate "nVidia nForce2 FSB changing" + depends on X86_32 && EXPERIMENTAL + help + This adds the CPUFreq driver for FSB changing on nVidia nForce2 + platforms. + + For details, take a look at . + + If in doubt, say N. + +config X86_LONGRUN + tristate "Transmeta LongRun" + depends on X86_32 + help + This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors + which support LongRun. + + For details, take a look at . + + If in doubt, say N. + +config X86_LONGHAUL + tristate "VIA Cyrix III Longhaul" + select CPU_FREQ_TABLE + depends on X86_32 && ACPI_PROCESSOR + help + This adds the CPUFreq driver for VIA Samuel/CyrixIII, + VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T + processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_E_POWERSAVER + tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" + select CPU_FREQ_TABLE + depends on X86_32 && EXPERIMENTAL + help + This adds the CPUFreq driver for VIA C7 processors. + + If in doubt, say N. + +comment "shared options" + +config X86_ACPI_CPUFREQ_PROC_INTF + bool "/proc/acpi/processor/../performance interface (deprecated)" + depends on PROC_FS + depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI + help + This enables the deprecated /proc/acpi/processor/../performance + interface. While it is helpful for debugging, the generic, + cross-architecture cpufreq interfaces should be used. + + If in doubt, say N. + +config X86_SPEEDSTEP_LIB + tristate + default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) + +config X86_SPEEDSTEP_RELAXED_CAP_CHECK + bool "Relaxed speedstep capability checks" + depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) + help + Don't perform all checks for a speedstep capable system which would + normally be done. Some ancient or strange systems, though speedstep + capable, don't always indicate that they are speedstep capable. This + option lets the probing code bypass some of those checks if the + parameter "relaxed_check=1" is passed to the module. + +endif # CPU_FREQ + +endmenu diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig_32 b/arch/x86/kernel/cpu/cpufreq/Kconfig_32 deleted file mode 100644 index d8c6f132dc7..00000000000 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig_32 +++ /dev/null @@ -1,250 +0,0 @@ -# -# CPU Frequency scaling -# - -menu "CPU Frequency scaling" - -source "drivers/cpufreq/Kconfig" - -if CPU_FREQ - -comment "CPUFreq processor drivers" - -config X86_ACPI_CPUFREQ - tristate "ACPI Processor P-States driver" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This driver adds a CPUFreq driver which utilizes the ACPI - Processor Performance States. - This driver also supports Intel Enhanced Speedstep. - - For details, take a look at . - - If in doubt, say N. - -config ELAN_CPUFREQ - tristate "AMD Elan SC400 and SC410" - select CPU_FREQ_TABLE - depends on X86_ELAN - ---help--- - This adds the CPUFreq driver for AMD Elan SC400 and SC410 - processors. - - You need to specify the processor maximum speed as boot - parameter: elanfreq=maxspeed (in kHz) or as module - parameter "max_freq". - - For details, take a look at . - - If in doubt, say N. - -config SC520_CPUFREQ - tristate "AMD Elan SC520" - select CPU_FREQ_TABLE - depends on X86_ELAN - ---help--- - This adds the CPUFreq driver for AMD Elan SC520 processor. - - For details, take a look at . - - If in doubt, say N. - - -config X86_POWERNOW_K6 - tristate "AMD Mobile K6-2/K6-3 PowerNow!" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for mobile AMD K6-2+ and mobile - AMD K6-3+ processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K7 - tristate "AMD Mobile Athlon/Duron PowerNow!" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for mobile AMD K7 mobile processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K7_ACPI - bool - depends on X86_POWERNOW_K7 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) - default y - -config X86_POWERNOW_K8 - tristate "AMD Opteron/Athlon64 PowerNow!" - select CPU_FREQ_TABLE - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K8_ACPI - bool "ACPI Support" - select ACPI_PROCESSOR - depends on ACPI && X86_POWERNOW_K8 - default y - help - This provides access to the K8s Processor Performance States via ACPI. - This driver is probably required for CPUFreq to work with multi-socket and - SMP systems. It is not required on at least some single-socket yet - multi-core systems, even if SMP is enabled. - - It is safe to say Y here. - -config X86_GX_SUSPMOD - tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" - depends on PCI - help - This add the CPUFreq driver for NatSemi Geode processors which - support suspend modulation. - - For details, take a look at . - - If in doubt, say N. - -config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep" - select CPU_FREQ_TABLE - select X86_SPEEDSTEP_CENTRINO_TABLE - help - This adds the CPUFreq driver for Enhanced SpeedStep enabled - mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, - you also need to say Y to "Use ACPI tables to decode..." below - [which might imply enabling ACPI] if you want to use this driver - on non-Banias CPUs. - - For details, take a look at . - - If in doubt, say N. - -config X86_SPEEDSTEP_CENTRINO_TABLE - bool "Built-in tables for Banias CPUs" - depends on X86_SPEEDSTEP_CENTRINO - default y - help - Use built-in tables for Banias CPUs if ACPI encoding - is not available. - - If in doubt, say N. - -config X86_SPEEDSTEP_ICH - tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all - mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, - ICH3 or ICH4 southbridge. - - For details, take a look at . - - If in doubt, say N. - -config X86_SPEEDSTEP_SMI - tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" - select CPU_FREQ_TABLE - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) - on systems which have an Intel 440BX/ZX/MX southbridge. - - For details, take a look at . - - If in doubt, say N. - -config X86_P4_CLOCKMOD - tristate "Intel Pentium 4 clock modulation" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for Intel Pentium 4 / XEON - processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_CPUFREQ_NFORCE2 - tristate "nVidia nForce2 FSB changing" - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for FSB changing on nVidia nForce2 - platforms. - - For details, take a look at . - - If in doubt, say N. - -config X86_LONGRUN - tristate "Transmeta LongRun" - help - This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors - which support LongRun. - - For details, take a look at . - - If in doubt, say N. - -config X86_LONGHAUL - tristate "VIA Cyrix III Longhaul" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This adds the CPUFreq driver for VIA Samuel/CyrixIII, - VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T - processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_E_POWERSAVER - tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" - select CPU_FREQ_TABLE - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for VIA C7 processors. - - If in doubt, say N. - -comment "shared options" - -config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" - depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI - help - This enables the deprecated /proc/acpi/processor/../performance - interface. While it is helpful for debugging, the generic, - cross-architecture cpufreq interfaces should be used. - - If in doubt, say N. - -config X86_SPEEDSTEP_LIB - tristate - default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD - -config X86_SPEEDSTEP_RELAXED_CAP_CHECK - bool "Relaxed speedstep capability checks" - depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) - help - Don't perform all checks for a speedstep capable system which would - normally be done. Some ancient or strange systems, though speedstep - capable, don't always indicate that they are speedstep capable. This - option lets the probing code bypass some of those checks if the - parameter "relaxed_check=1" is passed to the module. - -endif # CPU_FREQ - -endmenu diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig_64 b/arch/x86/kernel/cpu/cpufreq/Kconfig_64 deleted file mode 100644 index 9c9699fdcf5..00000000000 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig_64 +++ /dev/null @@ -1,108 +0,0 @@ -# -# CPU Frequency scaling -# - -menu "CPU Frequency scaling" - -source "drivers/cpufreq/Kconfig" - -if CPU_FREQ - -comment "CPUFreq processor drivers" - -config X86_POWERNOW_K8 - tristate "AMD Opteron/Athlon64 PowerNow!" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. - - To compile this driver as a module, choose M here: the - module will be called powernow-k8. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K8_ACPI - bool - depends on X86_POWERNOW_K8 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) - default y - -config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep (deprecated)" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This is deprecated and this functionality is now merged into - acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of - speedstep_centrino. - This adds the CPUFreq driver for Enhanced SpeedStep enabled - mobile CPUs. This means Intel Pentium M (Centrino) CPUs - or 64bit enabled Intel Xeons. - - To compile this driver as a module, choose M here: the - module will be called speedstep-centrino. - - For details, take a look at . - - If in doubt, say N. - -config X86_ACPI_CPUFREQ - tristate "ACPI Processor P-States driver" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This driver adds a CPUFreq driver which utilizes the ACPI - Processor Performance States. - This driver also supports Intel Enhanced Speedstep. - - To compile this driver as a module, choose M here: the - module will be called acpi-cpufreq. - - For details, take a look at . - - If in doubt, say N. - -comment "shared options" - -config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" - depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K8_ACPI - help - This enables the deprecated /proc/acpi/processor/../performance - interface. While it is helpful for debugging, the generic, - cross-architecture cpufreq interfaces should be used. - - If in doubt, say N. - -config X86_P4_CLOCKMOD - tristate "Intel Pentium 4 clock modulation" - depends on EMBEDDED - select CPU_FREQ_TABLE - help - This adds the clock modulation driver for Intel Pentium 4 / XEON - processors. When enabled it will lower CPU temperature by skipping - clocks. - - This driver should be only used in exceptional - circumstances when very low power is needed because it causes severe - slowdowns and noticeable latencies. Normally Speedstep should be used - instead. - - To compile this driver as a module, choose M here: the - module will be called p4-clockmod. - - For details, take a look at . - - Unless you are absolutely sure say N. - - -config X86_SPEEDSTEP_LIB - tristate - default X86_P4_CLOCKMOD - -endif - -endmenu -- cgit v1.2.3 From e279b6c1d329e50b766bce96aacc197eae8a053b Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 6 Nov 2007 20:41:05 +0100 Subject: x86: start unification of arch/x86/Kconfig.* This step introduces the file arch/x86/Kconfig which contains all the menu's from "Power Management" and below. The main part of the new Kconfig file is shared and the remaining i386/x86_64 specific symbols are covered by dependencies. A x86_64 allmodconfig build did not show any differences. Signed-off-by: Sam Ravnborg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/Kconfig | 401 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/Kconfig.i386 | 326 +-------------------------------------- arch/x86/Kconfig.x86_64 | 135 +--------------- 3 files changed, 404 insertions(+), 458 deletions(-) create mode 100644 arch/x86/Kconfig diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig new file mode 100644 index 00000000000..d1382c51295 --- /dev/null +++ b/arch/x86/Kconfig @@ -0,0 +1,401 @@ +menu "Power management options" + depends on !X86_VOYAGER + +config ARCH_HIBERNATION_HEADER + bool + depends on X86_64 && HIBERNATION + default y + +source "kernel/power/Kconfig" + +source "drivers/acpi/Kconfig" + +menuconfig APM + tristate "APM (Advanced Power Management) BIOS support" + depends on X86_32 && PM_SLEEP && !X86_VISWS + ---help--- + APM is a BIOS specification for saving power using several different + techniques. This is mostly useful for battery powered laptops with + APM compliant BIOSes. If you say Y here, the system time will be + reset after a RESUME operation, the /proc/apm device will provide + battery status information, and user-space programs will receive + notification of APM "events" (e.g. battery status change). + + If you select "Y" here, you can disable actual use of the APM + BIOS by passing the "apm=off" option to the kernel at boot time. + + Note that the APM support is almost completely disabled for + machines with more than one CPU. + + In order to use APM, you will need supporting software. For location + and more information, read and the + Battery Powered Linux mini-HOWTO, available from + . + + This driver does not spin down disk drives (see the hdparm(8) + manpage ("man 8 hdparm") for that), and it doesn't turn off + VESA-compliant "green" monitors. + + This driver does not support the TI 4000M TravelMate and the ACER + 486/DX4/75 because they don't have compliant BIOSes. Many "green" + desktop machines also don't have compliant BIOSes, and this driver + may cause those machines to panic during the boot phase. + + Generally, if you don't have a battery in your machine, there isn't + much point in using this driver and you should say N. If you get + random kernel OOPSes or reboots that don't seem to be related to + anything, try disabling/enabling this option (or disabling/enabling + APM in your BIOS). + + Some other things you should try when experiencing seemingly random, + "weird" problems: + + 1) make sure that you have enough swap space and that it is + enabled. + 2) pass the "no-hlt" option to the kernel + 3) switch on floating point emulation in the kernel and pass + the "no387" option to the kernel + 4) pass the "floppy=nodma" option to the kernel + 5) pass the "mem=4M" option to the kernel (thereby disabling + all but the first 4 MB of RAM) + 6) make sure that the CPU is not over clocked. + 7) read the sig11 FAQ at + 8) disable the cache from your BIOS settings + 9) install a fan for the video card or exchange video RAM + 10) install a better fan for the CPU + 11) exchange RAM chips + 12) exchange the motherboard. + + To compile this driver as a module, choose M here: the + module will be called apm. + +if APM + +config APM_IGNORE_USER_SUSPEND + bool "Ignore USER SUSPEND" + help + This option will ignore USER SUSPEND requests. On machines with a + compliant APM BIOS, you want to say N. However, on the NEC Versa M + series notebooks, it is necessary to say Y because of a BIOS bug. + +config APM_DO_ENABLE + bool "Enable PM at boot time" + ---help--- + Enable APM features at boot time. From page 36 of the APM BIOS + specification: "When disabled, the APM BIOS does not automatically + power manage devices, enter the Standby State, enter the Suspend + State, or take power saving steps in response to CPU Idle calls." + This driver will make CPU Idle calls when Linux is idle (unless this + feature is turned off -- see "Do CPU IDLE calls", below). This + should always save battery power, but more complicated APM features + will be dependent on your BIOS implementation. You may need to turn + this option off if your computer hangs at boot time when using APM + support, or if it beeps continuously instead of suspending. Turn + this off if you have a NEC UltraLite Versa 33/C or a Toshiba + T400CDT. This is off by default since most machines do fine without + this feature. + +config APM_CPU_IDLE + bool "Make CPU Idle calls when idle" + help + Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. + On some machines, this can activate improved power savings, such as + a slowed CPU clock rate, when the machine is idle. These idle calls + are made after the idle loop has run for some length of time (e.g., + 333 mS). On some machines, this will cause a hang at boot time or + whenever the CPU becomes idle. (On machines with more than one CPU, + this option does nothing.) + +config APM_DISPLAY_BLANK + bool "Enable console blanking using APM" + help + Enable console blanking using the APM. Some laptops can use this to + turn off the LCD backlight when the screen blanker of the Linux + virtual console blanks the screen. Note that this is only used by + the virtual console screen blanker, and won't turn off the backlight + when using the X Window system. This also doesn't have anything to + do with your VESA-compliant power-saving monitor. Further, this + option doesn't work for all laptops -- it might not turn off your + backlight at all, or it might print a lot of errors to the console, + especially if you are using gpm. + +config APM_ALLOW_INTS + bool "Allow interrupts during APM BIOS calls" + help + Normally we disable external interrupts while we are making calls to + the APM BIOS as a measure to lessen the effects of a badly behaving + BIOS implementation. The BIOS should reenable interrupts if it + needs to. Unfortunately, some BIOSes do not -- especially those in + many of the newer IBM Thinkpads. If you experience hangs when you + suspend, try setting this to Y. Otherwise, say N. + +config APM_REAL_MODE_POWER_OFF + bool "Use real mode APM BIOS call to power off" + help + Use real mode APM BIOS calls to switch off the computer. This is + a work-around for a number of buggy BIOSes. Switch this option on if + your computer crashes instead of powering off properly. + +endif # APM + +source "arch/x86/kernel/cpu/cpufreq/Kconfig" + +source "drivers/cpuidle/Kconfig" + +endmenu + + +menu "Bus options (PCI etc.)" + +config PCI + bool "PCI support" if !X86_VISWS + depends on !X86_VOYAGER + default y if X86_VISWS + select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) + help + Find out whether you have a PCI motherboard. PCI is the name of a + bus system, i.e. the way the CPU talks to the other stuff inside + your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or + VESA. If you have PCI, say Y, otherwise N. + + The PCI-HOWTO, available from + , contains valuable + information about which PCI hardware does work under Linux and which + doesn't. + +choice + prompt "PCI access mode" + depends on X86_32 && PCI && !X86_VISWS + default PCI_GOANY + ---help--- + On PCI systems, the BIOS can be used to detect the PCI devices and + determine their configuration. However, some old PCI motherboards + have BIOS bugs and may crash if this is done. Also, some embedded + PCI-based systems don't have any BIOS at all. Linux can also try to + detect the PCI hardware directly without using the BIOS. + + With this option, you can specify how Linux should detect the + PCI devices. If you choose "BIOS", the BIOS will be used, + if you choose "Direct", the BIOS won't be used, and if you + choose "MMConfig", then PCI Express MMCONFIG will be used. + If you choose "Any", the kernel will try MMCONFIG, then the + direct access method and falls back to the BIOS if that doesn't + work. If unsure, go with the default, which is "Any". + +config PCI_GOBIOS + bool "BIOS" + +config PCI_GOMMCONFIG + bool "MMConfig" + +config PCI_GODIRECT + bool "Direct" + +config PCI_GOANY + bool "Any" + +endchoice + +config PCI_BIOS + bool + depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) + default y + +# x86-64 doesn't support PCI BIOS access from long mode so always go direct. +config PCI_DIRECT + bool + depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY) || X86_VISWS) + default y + +config PCI_MMCONFIG + bool + depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) + default y + +config PCI_DOMAINS + bool + depends on PCI + default y + +config PCI_MMCONFIG + bool "Support mmconfig PCI config space access" + depends on X86_64 && PCI && ACPI + +config DMAR + bool "Support for DMA Remapping Devices (EXPERIMENTAL)" + depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL + help + DMA remapping (DMAR) devices support enables independent address + translations for Direct Memory Access (DMA) from devices. + These DMA remapping devices are reported via ACPI tables + and include PCI device scope covered by these DMA + remapping devices. + +config DMAR_GFX_WA + bool "Support for Graphics workaround" + depends on DMAR + default y + help + Current Graphics drivers tend to use physical address + for DMA and avoid using DMA APIs. Setting this config + option permits the IOMMU driver to set a unity map for + all the OS-visible memory. Hence the driver can continue + to use physical addresses for DMA. + +config DMAR_FLOPPY_WA + bool + depends on DMAR + default y + help + Floppy disk drivers are know to bypass DMA API calls + thereby failing to work when IOMMU is enabled. This + workaround will setup a 1:1 mapping for the first + 16M to make floppy (an ISA device) work. + +source "drivers/pci/pcie/Kconfig" + +source "drivers/pci/Kconfig" + +# x86_64 have no ISA slots, but do have ISA-style DMA. +config ISA_DMA_API + bool + default y + +if X86_32 + +config ISA + bool "ISA support" + depends on !(X86_VOYAGER || X86_VISWS) + help + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. Other bus systems are PCI, EISA, MicroChannel + (MCA) or VESA. ISA is an older system, now being displaced by PCI; + newer boards don't support it. If you have ISA, say Y, otherwise N. + +config EISA + bool "EISA support" + depends on ISA + ---help--- + The Extended Industry Standard Architecture (EISA) bus was + developed as an open alternative to the IBM MicroChannel bus. + + The EISA bus provided some of the features of the IBM MicroChannel + bus while maintaining backward compatibility with cards made for + the older ISA bus. The EISA bus saw limited use between 1988 and + 1995 when it was made obsolete by the PCI bus. + + Say Y here if you are building a kernel for an EISA-based machine. + + Otherwise, say N. + +source "drivers/eisa/Kconfig" + +config MCA + bool "MCA support" if !(X86_VISWS || X86_VOYAGER) + default y if X86_VOYAGER + help + MicroChannel Architecture is found in some IBM PS/2 machines and + laptops. It is a bus system similar to PCI or ISA. See + (and especially the web page given + there) before attempting to build an MCA bus kernel. + +source "drivers/mca/Kconfig" + +config SCx200 + tristate "NatSemi SCx200 support" + depends on !X86_VOYAGER + help + This provides basic support for National Semiconductor's + (now AMD's) Geode processors. The driver probes for the + PCI-IDs of several on-chip devices, so its a good dependency + for other scx200_* drivers. + + If compiled as a module, the driver is named scx200. + +config SCx200HR_TIMER + tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" + depends on SCx200 && GENERIC_TIME + default y + help + This driver provides a clocksource built upon the on-chip + 27MHz high-resolution timer. Its also a workaround for + NSC Geode SC-1100's buggy TSC, which loses time when the + processor goes idle (as is done by the scheduler). The + other workaround is idle=poll boot option. + +config GEODE_MFGPT_TIMER + bool "Geode Multi-Function General Purpose Timer (MFGPT) events" + depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS + default y + help + This driver provides a clock event source based on the MFGPT + timer(s) in the CS5535 and CS5536 companion chip for the geode. + MFGPTs have a better resolution and max interval than the + generic PIT, and are suitable for use as high-res timers. + +config K8_NB + def_bool y + depends on AGP_AMD64 + +endif # X86_32 + +source "drivers/pcmcia/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +endmenu + + +menu "Executable file formats / Emulations" + +source "fs/Kconfig.binfmt" + +config IA32_EMULATION + bool "IA32 Emulation" + depends on X86_64 + help + Include code to run 32-bit programs under a 64-bit kernel. You should + likely turn this on, unless you're 100% sure that you don't have any + 32-bit programs left. + +config IA32_AOUT + tristate "IA32 a.out support" + depends on IA32_EMULATION + help + Support old a.out binaries in the 32bit emulation. + +config COMPAT + bool + depends on IA32_EMULATION + default y + +config COMPAT_FOR_U64_ALIGNMENT + def_bool COMPAT + depends on X86_64 + +config SYSVIPC_COMPAT + bool + depends on X86_64 && COMPAT && SYSVIPC + default y + +endmenu + + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "drivers/firmware/Kconfig" + +source "fs/Kconfig" + +source "kernel/Kconfig.instrumentation" + +source "arch/x86/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" diff --git a/arch/x86/Kconfig.i386 b/arch/x86/Kconfig.i386 index b6f2fd0e443..9fe63f10e57 100644 --- a/arch/x86/Kconfig.i386 +++ b/arch/x86/Kconfig.i386 @@ -517,8 +517,6 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -source "drivers/firmware/Kconfig" - choice prompt "High Memory Support" default HIGHMEM4G if !X86_NUMAQ @@ -957,328 +955,6 @@ config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y depends on HIGHMEM -menu "Power management options (ACPI, APM)" - depends on !X86_VOYAGER - -source kernel/power/Kconfig - -source "drivers/acpi/Kconfig" - -menuconfig APM - tristate "APM (Advanced Power Management) BIOS support" - depends on PM_SLEEP && !X86_VISWS - ---help--- - APM is a BIOS specification for saving power using several different - techniques. This is mostly useful for battery powered laptops with - APM compliant BIOSes. If you say Y here, the system time will be - reset after a RESUME operation, the /proc/apm device will provide - battery status information, and user-space programs will receive - notification of APM "events" (e.g. battery status change). - - If you select "Y" here, you can disable actual use of the APM - BIOS by passing the "apm=off" option to the kernel at boot time. - - Note that the APM support is almost completely disabled for - machines with more than one CPU. - - In order to use APM, you will need supporting software. For location - and more information, read and the - Battery Powered Linux mini-HOWTO, available from - . - - This driver does not spin down disk drives (see the hdparm(8) - manpage ("man 8 hdparm") for that), and it doesn't turn off - VESA-compliant "green" monitors. - - This driver does not support the TI 4000M TravelMate and the ACER - 486/DX4/75 because they don't have compliant BIOSes. Many "green" - desktop machines also don't have compliant BIOSes, and this driver - may cause those machines to panic during the boot phase. - - Generally, if you don't have a battery in your machine, there isn't - much point in using this driver and you should say N. If you get - random kernel OOPSes or reboots that don't seem to be related to - anything, try disabling/enabling this option (or disabling/enabling - APM in your BIOS). - - Some other things you should try when experiencing seemingly random, - "weird" problems: - - 1) make sure that you have enough swap space and that it is - enabled. - 2) pass the "no-hlt" option to the kernel - 3) switch on floating point emulation in the kernel and pass - the "no387" option to the kernel - 4) pass the "floppy=nodma" option to the kernel - 5) pass the "mem=4M" option to the kernel (thereby disabling - all but the first 4 MB of RAM) - 6) make sure that the CPU is not over clocked. - 7) read the sig11 FAQ at - 8) disable the cache from your BIOS settings - 9) install a fan for the video card or exchange video RAM - 10) install a better fan for the CPU - 11) exchange RAM chips - 12) exchange the motherboard. - - To compile this driver as a module, choose M here: the - module will be called apm. - -if APM - -config APM_IGNORE_USER_SUSPEND - bool "Ignore USER SUSPEND" - help - This option will ignore USER SUSPEND requests. On machines with a - compliant APM BIOS, you want to say N. However, on the NEC Versa M - series notebooks, it is necessary to say Y because of a BIOS bug. - -config APM_DO_ENABLE - bool "Enable PM at boot time" - ---help--- - Enable APM features at boot time. From page 36 of the APM BIOS - specification: "When disabled, the APM BIOS does not automatically - power manage devices, enter the Standby State, enter the Suspend - State, or take power saving steps in response to CPU Idle calls." - This driver will make CPU Idle calls when Linux is idle (unless this - feature is turned off -- see "Do CPU IDLE calls", below). This - should always save battery power, but more complicated APM features - will be dependent on your BIOS implementation. You may need to turn - this option off if your computer hangs at boot time when using APM - support, or if it beeps continuously instead of suspending. Turn - this off if you have a NEC UltraLite Versa 33/C or a Toshiba - T400CDT. This is off by default since most machines do fine without - this feature. - -config APM_CPU_IDLE - bool "Make CPU Idle calls when idle" - help - Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. - On some machines, this can activate improved power savings, such as - a slowed CPU clock rate, when the machine is idle. These idle calls - are made after the idle loop has run for some length of time (e.g., - 333 mS). On some machines, this will cause a hang at boot time or - whenever the CPU becomes idle. (On machines with more than one CPU, - this option does nothing.) - -config APM_DISPLAY_BLANK - bool "Enable console blanking using APM" - help - Enable console blanking using the APM. Some laptops can use this to - turn off the LCD backlight when the screen blanker of the Linux - virtual console blanks the screen. Note that this is only used by - the virtual console screen blanker, and won't turn off the backlight - when using the X Window system. This also doesn't have anything to - do with your VESA-compliant power-saving monitor. Further, this - option doesn't work for all laptops -- it might not turn off your - backlight at all, or it might print a lot of errors to the console, - especially if you are using gpm. - -config APM_ALLOW_INTS - bool "Allow interrupts during APM BIOS calls" - help - Normally we disable external interrupts while we are making calls to - the APM BIOS as a measure to lessen the effects of a badly behaving - BIOS implementation. The BIOS should reenable interrupts if it - needs to. Unfortunately, some BIOSes do not -- especially those in - many of the newer IBM Thinkpads. If you experience hangs when you - suspend, try setting this to Y. Otherwise, say N. - -config APM_REAL_MODE_POWER_OFF - bool "Use real mode APM BIOS call to power off" - help - Use real mode APM BIOS calls to switch off the computer. This is - a work-around for a number of buggy BIOSes. Switch this option on if - your computer crashes instead of powering off properly. - -endif # APM - -source "arch/x86/kernel/cpu/cpufreq/Kconfig" - -source "drivers/cpuidle/Kconfig" - -endmenu - -menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" - -config PCI - bool "PCI support" if !X86_VISWS - depends on !X86_VOYAGER - default y if X86_VISWS - select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) - help - Find out whether you have a PCI motherboard. PCI is the name of a - bus system, i.e. the way the CPU talks to the other stuff inside - your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or - VESA. If you have PCI, say Y, otherwise N. - - The PCI-HOWTO, available from - , contains valuable - information about which PCI hardware does work under Linux and which - doesn't. - -choice - prompt "PCI access mode" - depends on PCI && !X86_VISWS - default PCI_GOANY - ---help--- - On PCI systems, the BIOS can be used to detect the PCI devices and - determine their configuration. However, some old PCI motherboards - have BIOS bugs and may crash if this is done. Also, some embedded - PCI-based systems don't have any BIOS at all. Linux can also try to - detect the PCI hardware directly without using the BIOS. - - With this option, you can specify how Linux should detect the - PCI devices. If you choose "BIOS", the BIOS will be used, - if you choose "Direct", the BIOS won't be used, and if you - choose "MMConfig", then PCI Express MMCONFIG will be used. - If you choose "Any", the kernel will try MMCONFIG, then the - direct access method and falls back to the BIOS if that doesn't - work. If unsure, go with the default, which is "Any". - -config PCI_GOBIOS - bool "BIOS" - -config PCI_GOMMCONFIG - bool "MMConfig" - -config PCI_GODIRECT - bool "Direct" - -config PCI_GOANY - bool "Any" - -endchoice - -config PCI_BIOS - bool - depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) - default y - -config PCI_DIRECT - bool - depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS) - default y - -config PCI_MMCONFIG - bool - depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) - default y - -config PCI_DOMAINS - bool - depends on PCI - default y - -source "drivers/pci/pcie/Kconfig" - -source "drivers/pci/Kconfig" - -config ISA_DMA_API - bool - default y - -config ISA - bool "ISA support" - depends on !(X86_VOYAGER || X86_VISWS) - help - Find out whether you have ISA slots on your motherboard. ISA is the - name of a bus system, i.e. the way the CPU talks to the other stuff - inside your box. Other bus systems are PCI, EISA, MicroChannel - (MCA) or VESA. ISA is an older system, now being displaced by PCI; - newer boards don't support it. If you have ISA, say Y, otherwise N. - -config EISA - bool "EISA support" - depends on ISA - ---help--- - The Extended Industry Standard Architecture (EISA) bus was - developed as an open alternative to the IBM MicroChannel bus. - - The EISA bus provided some of the features of the IBM MicroChannel - bus while maintaining backward compatibility with cards made for - the older ISA bus. The EISA bus saw limited use between 1988 and - 1995 when it was made obsolete by the PCI bus. - - Say Y here if you are building a kernel for an EISA-based machine. - - Otherwise, say N. - -source "drivers/eisa/Kconfig" - -config MCA - bool "MCA support" if !(X86_VISWS || X86_VOYAGER) - default y if X86_VOYAGER - help - MicroChannel Architecture is found in some IBM PS/2 machines and - laptops. It is a bus system similar to PCI or ISA. See - (and especially the web page given - there) before attempting to build an MCA bus kernel. - -source "drivers/mca/Kconfig" - -config SCx200 - tristate "NatSemi SCx200 support" - depends on !X86_VOYAGER - help - This provides basic support for National Semiconductor's - (now AMD's) Geode processors. The driver probes for the - PCI-IDs of several on-chip devices, so its a good dependency - for other scx200_* drivers. - - If compiled as a module, the driver is named scx200. - -config SCx200HR_TIMER - tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" - depends on SCx200 && GENERIC_TIME - default y - help - This driver provides a clocksource built upon the on-chip - 27MHz high-resolution timer. Its also a workaround for - NSC Geode SC-1100's buggy TSC, which loses time when the - processor goes idle (as is done by the scheduler). The - other workaround is idle=poll boot option. - -config GEODE_MFGPT_TIMER - bool "Geode Multi-Function General Purpose Timer (MFGPT) events" - depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS - default y - help - This driver provides a clock event source based on the MFGPT - timer(s) in the CS5535 and CS5536 companion chip for the geode. - MFGPTs have a better resolution and max interval than the - generic PIT, and are suitable for use as high-res timers. - -config K8_NB - def_bool y - depends on AGP_AMD64 - -source "drivers/pcmcia/Kconfig" - -source "drivers/pci/hotplug/Kconfig" - -endmenu - -menu "Executable file formats" - -source "fs/Kconfig.binfmt" - -endmenu - -source "net/Kconfig" - -source "drivers/Kconfig" - -source "fs/Kconfig" - -source "kernel/Kconfig.instrumentation" - -source "arch/x86/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" - -source "lib/Kconfig" # # Use the generic interrupt handling code in kernel/irq/: @@ -1319,3 +995,5 @@ config X86_TRAMPOLINE config KTIME_SCALAR bool default y + +source "arch/x86/Kconfig" diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64 index 8d6b5342544..264623c30d5 100644 --- a/arch/x86/Kconfig.x86_64 +++ b/arch/x86/Kconfig.x86_64 @@ -698,142 +698,9 @@ config GENERIC_IRQ_PROBE bool default y -# we have no ISA slots, but we do have ISA-style DMA. -config ISA_DMA_API - bool - default y - config GENERIC_PENDING_IRQ bool depends on GENERIC_HARDIRQS && SMP default y -menu "Power management options" - -source kernel/power/Kconfig - -config ARCH_HIBERNATION_HEADER - bool - depends on HIBERNATION - default y - -source "drivers/acpi/Kconfig" - -source "arch/x86/kernel/cpu/cpufreq/Kconfig" - -source "drivers/cpuidle/Kconfig" - -endmenu - -menu "Bus options (PCI etc.)" - -config PCI - bool "PCI support" - select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) - -# x86-64 doesn't support PCI BIOS access from long mode so always go direct. -config PCI_DIRECT - bool - depends on PCI - default y - -config PCI_MMCONFIG - bool "Support mmconfig PCI config space access" - depends on PCI && ACPI - -config PCI_DOMAINS - bool - depends on PCI - default y - -config DMAR - bool "Support for DMA Remapping Devices (EXPERIMENTAL)" - depends on PCI_MSI && ACPI && EXPERIMENTAL - help - DMA remapping (DMAR) devices support enables independent address - translations for Direct Memory Access (DMA) from devices. - These DMA remapping devices are reported via ACPI tables - and include PCI device scope covered by these DMA - remapping devices. - -config DMAR_GFX_WA - bool "Support for Graphics workaround" - depends on DMAR - default y - help - Current Graphics drivers tend to use physical address - for DMA and avoid using DMA APIs. Setting this config - option permits the IOMMU driver to set a unity map for - all the OS-visible memory. Hence the driver can continue - to use physical addresses for DMA. - -config DMAR_FLOPPY_WA - bool - depends on DMAR - default y - help - Floppy disk drivers are know to bypass DMA API calls - thereby failing to work when IOMMU is enabled. This - workaround will setup a 1:1 mapping for the first - 16M to make floppy (an ISA device) work. - -source "drivers/pci/pcie/Kconfig" - -source "drivers/pci/Kconfig" - -source "drivers/pcmcia/Kconfig" - -source "drivers/pci/hotplug/Kconfig" - -endmenu - - -menu "Executable file formats / Emulations" - -source "fs/Kconfig.binfmt" - -config IA32_EMULATION - bool "IA32 Emulation" - help - Include code to run 32-bit programs under a 64-bit kernel. You should - likely turn this on, unless you're 100% sure that you don't have any - 32-bit programs left. - -config IA32_AOUT - tristate "IA32 a.out support" - depends on IA32_EMULATION - help - Support old a.out binaries in the 32bit emulation. - -config COMPAT - bool - depends on IA32_EMULATION - default y - -config COMPAT_FOR_U64_ALIGNMENT - def_bool COMPAT - -config SYSVIPC_COMPAT - bool - depends on COMPAT && SYSVIPC - default y - -endmenu - -source "net/Kconfig" - -source drivers/Kconfig - -source "drivers/firmware/Kconfig" - -source fs/Kconfig - -source "kernel/Kconfig.instrumentation" - -source "arch/x86/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" - -source "lib/Kconfig" +source "arch/x86/Kconfig" -- cgit v1.2.3 From 1032c0ba9da5c5b53173ad2dcf8b2a2da78f8b17 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 6 Nov 2007 21:35:08 +0100 Subject: x86: arch/x86/Kconfig.cpu unification Move all CPU definitions to Kconfig.cpu Always define X86_MINIMUM_CPU_FAMILY and do the obvious code cleanup in boot/cpucheck.c Comments from: Adrian Bunk incorporated. Signed-off-by: Sam Ravnborg Cc: Adrian Bunk Cc: Brian Gerst Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/Kconfig | 19 ++++++++ arch/x86/Kconfig.cpu | 121 +++++++++++++++++++++++++++++------------------ arch/x86/Kconfig.x86_64 | 83 +------------------------------- arch/x86/boot/cpucheck.c | 6 --- 4 files changed, 95 insertions(+), 134 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d1382c51295..e741fc772da 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1,3 +1,22 @@ +# x86 configuration + +### Arch settings +config RWSEM_GENERIC_SPINLOCK + def_bool !X86_XADD + +config RWSEM_XCHGADD_ALGORITHM + def_bool X86_XADD + +config ARCH_HAS_ILOG2_U32 + def_bool n + +config ARCH_HAS_ILOG2_U64 + def_bool n + +config GENERIC_CALIBRATE_DELAY + def_bool y + + menu "Power management options" depends on !X86_VOYAGER diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 0e2adadf590..c30162202dc 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -3,11 +3,12 @@ if !X86_ELAN choice prompt "Processor family" - default M686 + default M686 if X86_32 + default GENERIC_CPU if X86_64 config M386 bool "386" - depends on !UML + depends on X86_32 && !UML ---help--- This is the processor type of your CPU. This information is used for optimizing purposes. In order to compile a kernel that can run on @@ -49,6 +50,7 @@ config M386 config M486 bool "486" + depends on X86_32 help Select this for a 486 series processor, either Intel or one of the compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, @@ -57,6 +59,7 @@ config M486 config M586 bool "586/K5/5x86/6x86/6x86MX" + depends on X86_32 help Select this for an 586 or 686 series processor such as the AMD K5, the Cyrix 5x86, 6x86 and 6x86MX. This choice does not @@ -64,18 +67,21 @@ config M586 config M586TSC bool "Pentium-Classic" + depends on X86_32 help Select this for a Pentium Classic processor with the RDTSC (Read Time Stamp Counter) instruction for benchmarking. config M586MMX bool "Pentium-MMX" + depends on X86_32 help Select this for a Pentium with the MMX graphics/multimedia extended instructions. config M686 bool "Pentium-Pro" + depends on X86_32 help Select this for Intel Pentium Pro chips. This enables the use of Pentium Pro extended instructions, and disables the init-time guard @@ -83,6 +89,7 @@ config M686 config MPENTIUMII bool "Pentium-II/Celeron(pre-Coppermine)" + depends on X86_32 help Select this for Intel chips based on the Pentium-II and pre-Coppermine Celeron core. This option enables an unaligned @@ -92,6 +99,7 @@ config MPENTIUMII config MPENTIUMIII bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" + depends on X86_32 help Select this for Intel chips based on the Pentium-III and Celeron-Coppermine core. This option enables use of some @@ -100,19 +108,14 @@ config MPENTIUMIII config MPENTIUMM bool "Pentium M" + depends on X86_32 help Select this for Intel Pentium M (not Pentium-4 M) notebook chips. -config MCORE2 - bool "Core 2/newer Xeon" - help - Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) - CPUs. You can distinguish newer from older Xeons by the CPU family - in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) - config MPENTIUM4 bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" + depends on X86_32 help Select this for Intel Pentium 4 chips. This includes the Pentium 4, Pentium D, P4-based Celeron and Xeon, and @@ -148,6 +151,7 @@ config MPENTIUM4 config MK6 bool "K6/K6-II/K6-III" + depends on X86_32 help Select this for an AMD K6-family processor. Enables use of some extended instructions, and passes appropriate optimization @@ -155,6 +159,7 @@ config MK6 config MK7 bool "Athlon/Duron/K7" + depends on X86_32 help Select this for an AMD Athlon K7-family processor. Enables use of some extended instructions, and passes appropriate optimization @@ -169,6 +174,7 @@ config MK8 config MCRUSOE bool "Crusoe" + depends on X86_32 help Select this for a Transmeta Crusoe processor. Treats the processor like a 586 with TSC, and sets some GCC optimization flags (like a @@ -176,11 +182,13 @@ config MCRUSOE config MEFFICEON bool "Efficeon" + depends on X86_32 help Select this for a Transmeta Efficeon processor. config MWINCHIPC6 bool "Winchip-C6" + depends on X86_32 help Select this for an IDT Winchip C6 chip. Linux and GCC treat this chip as a 586TSC with some extended instructions @@ -188,6 +196,7 @@ config MWINCHIPC6 config MWINCHIP2 bool "Winchip-2" + depends on X86_32 help Select this for an IDT Winchip-2. Linux and GCC treat this chip as a 586TSC with some extended instructions @@ -195,6 +204,7 @@ config MWINCHIP2 config MWINCHIP3D bool "Winchip-2A/Winchip-3" + depends on X86_32 help Select this for an IDT Winchip-2A or 3. Linux and GCC treat this chip as a 586TSC with some extended instructions @@ -204,16 +214,19 @@ config MWINCHIP3D config MGEODEGX1 bool "GeodeGX1" + depends on X86_32 help Select this for a Geode GX1 (Cyrix MediaGX) chip. config MGEODE_LX bool "Geode GX/LX" + depends on X86_32 help Select this for AMD Geode GX and LX processors. config MCYRIXIII bool "CyrixIII/VIA-C3" + depends on X86_32 help Select this for a Cyrix III or C3 chip. Presently Linux and GCC treat this chip as a generic 586. Whilst the CPU is 686 class, @@ -225,6 +238,7 @@ config MCYRIXIII config MVIAC3_2 bool "VIA C3-2 (Nehemiah)" + depends on X86_32 help Select this for a VIA C3 "Nehemiah". Selecting this enables usage of SSE and tells gcc to treat the CPU as a 686. @@ -232,15 +246,42 @@ config MVIAC3_2 config MVIAC7 bool "VIA C7" + depends on X86_32 help Select this for a VIA C7. Selecting this uses the correct cache shift and tells gcc to treat the CPU as a 686. +config MPSC + bool "Intel P4 / older Netburst based Xeon" + depends on X86_64 + help + Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey + Xeon CPUs with Intel 64bit which is compatible with x86-64. + Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the + Netburst core and shouldn't use this option. You can distinguish them + using the cpu family field + in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. + +config MCORE2 + bool "Core 2/newer Xeon" + help + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) + CPUs. You can distinguish newer from older Xeons by the CPU family + in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + +config GENERIC_CPU + bool "Generic-x86-64" + depends on X86_64 + help + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs. + endchoice config X86_GENERIC - bool "Generic x86 support" - help + bool "Generic x86 support" + depends on X86_32 + help Instead of just including optimizations for the selected x86 variant (e.g. PII, Crusoe or Athlon), include some more generic optimizations as well. This will make the kernel @@ -253,44 +294,31 @@ endif # # Define implied options from the CPU selection here -# +config X86_L1_CACHE_BYTES + int + default "128" if GENERIC_CPU || MPSC + default "64" if MK8 || MCORE2 + depends on X86_64 + +config X86_INTERNODE_CACHE_BYTES + int + default "4096" if X86_VSMP + default X86_L1_CACHE_BYTES if !X86_VSMP + depends on X86_64 + config X86_CMPXCHG - bool - depends on !M386 - default y + def_bool X86_64 || (X86_32 && !M386) config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || X86_GENERIC + default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 config X86_XADD bool - depends on !M386 - default y - -config RWSEM_GENERIC_SPINLOCK - bool - depends on !X86_XADD - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - depends on X86_XADD - default y - -config ARCH_HAS_ILOG2_U32 - bool - default n - -config ARCH_HAS_ILOG2_U64 - bool - default n - -config GENERIC_CALIBRATE_DELAY - bool + depends on X86_32 && !M386 default y config X86_PPRO_FENCE @@ -305,22 +333,22 @@ config X86_F00F_BUG config X86_WP_WORKS_OK bool - depends on !M386 + depends on X86_32 && !M386 default y config X86_INVLPG bool - depends on !M386 + depends on X86_32 && !M386 default y config X86_BSWAP bool - depends on !M386 + depends on X86_32 && !M386 default y config X86_POPAD_OK bool - depends on !M386 + depends on X86_32 && !M386 default y config X86_ALIGNMENT_16 @@ -330,7 +358,7 @@ config X86_ALIGNMENT_16 config X86_GOOD_APIC bool - depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 + depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64 default y config X86_INTEL_USERCOPY @@ -355,7 +383,7 @@ config X86_OOSTORE config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ + depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 default y # this should be set for all -march=.. options where the compiler @@ -367,6 +395,7 @@ config X86_CMOV config X86_MINIMUM_CPU_FAMILY int - default "4" if X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK + default "64" if X86_64 + default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) default "3" diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64 index 264623c30d5..cdd1458202f 100644 --- a/arch/x86/Kconfig.x86_64 +++ b/arch/x86/Kconfig.x86_64 @@ -78,25 +78,10 @@ config ISA config SBUS bool -config RWSEM_GENERIC_SPINLOCK - bool - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - config GENERIC_HWEIGHT bool default y -config GENERIC_CALIBRATE_DELAY - bool - default y - -config X86_CMPXCHG - bool - default y - config GENERIC_ISA_DMA bool default y @@ -125,13 +110,6 @@ config GENERIC_BUG default y depends on BUG -config ARCH_HAS_ILOG2_U32 - bool - default n - -config ARCH_HAS_ILOG2_U64 - bool - default n source "init/Kconfig" @@ -159,66 +137,7 @@ config X86_VSMP endchoice -choice - prompt "Processor family" - default GENERIC_CPU - -config MK8 - bool "AMD-Opteron/Athlon64" - help - Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs. - -config MPSC - bool "Intel P4 / older Netburst based Xeon" - help - Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey - Xeon CPUs with Intel 64bit which is compatible with x86-64. - Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the - Netburst core and shouldn't use this option. You can distinguish them - using the cpu family field - in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. - -config MCORE2 - bool "Intel Core2 / newer Xeon" - help - Optimize for Intel Core2 and newer Xeons (51xx) - You can distinguish the newer Xeons from the older ones using - the cpu family field in /proc/cpuinfo. 15 is an older Xeon - (use CONFIG_MPSC then), 6 is a newer one. - -config GENERIC_CPU - bool "Generic-x86-64" - help - Generic x86-64 CPU. - Run equally well on all x86-64 CPUs. - -endchoice - -# -# Define implied options from the CPU selection here -# -config X86_L1_CACHE_BYTES - int - default "128" if GENERIC_CPU || MPSC - default "64" if MK8 || MCORE2 - -config X86_L1_CACHE_SHIFT - int - default "7" if GENERIC_CPU || MPSC - default "6" if MK8 || MCORE2 - -config X86_INTERNODE_CACHE_BYTES - int - default "4096" if X86_VSMP - default X86_L1_CACHE_BYTES if !X86_VSMP - -config X86_TSC - bool - default y - -config X86_GOOD_APIC - bool - default y +source "arch/x86/Kconfig.cpu" config MICROCODE tristate "/dev/cpu/microcode - Intel CPU microcode support" diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index e655a89c551..769065bd23d 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -42,13 +42,7 @@ static struct cpu_features cpu; static u32 cpu_vendor[3]; static u32 err_flags[NCAPINTS]; -#ifdef CONFIG_X86_64 -static const int req_level = 64; -#elif defined(CONFIG_X86_MINIMUM_CPU_FAMILY) static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; -#else -static const int req_level = 3; -#endif static const u32 req_flags[NCAPINTS] = { -- cgit v1.2.3 From 012c6c72275e66f4e52ae2df629f53a8d127e39f Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 6 Nov 2007 21:57:20 +0100 Subject: x86: add X86_32 dependency to i386 specific symbols in Kconfig.i386 To ease unification of Kconfig.i386 and Kconfig.x86_64 add X86_32 dependencies to all i386 specific symbols. This patch introduce no functional changes but is one step towards unification. This smaller step is used to ease review of the patch set. Signed-off-by: Sam Ravnborg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/Kconfig.i386 | 90 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/arch/x86/Kconfig.i386 b/arch/x86/Kconfig.i386 index 9fe63f10e57..174b9092b46 100644 --- a/arch/x86/Kconfig.i386 +++ b/arch/x86/Kconfig.i386 @@ -137,6 +137,7 @@ config X86_PC config X86_ELAN bool "AMD Elan" + depends on X86_32 help Select this for an AMD Elan processor. @@ -146,6 +147,7 @@ config X86_ELAN config X86_VOYAGER bool "Voyager (NCR)" + depends on X86_32 select SMP if !BROKEN help Voyager is an MCA-based 32-way capable SMP architecture proprietary @@ -160,6 +162,7 @@ config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" select SMP select NUMA + depends on X86_32 help This option is used for getting Linux to run on a (IBM/Sequent) NUMA multiquad box. This changes the way that processors are bootstrapped, @@ -169,7 +172,7 @@ config X86_NUMAQ config X86_SUMMIT bool "Summit/EXA (IBM x440)" - depends on SMP + depends on X86_32 && SMP help This option is needed for IBM systems that use the Summit/EXA chipset. In particular, it is needed for the x440. @@ -179,7 +182,7 @@ config X86_SUMMIT config X86_BIGSMP bool "Support for other sub-arch SMP systems with more than 8 CPUs" - depends on SMP + depends on X86_32 && SMP help This option is needed for the systems that have more than 8 CPUs and if the system is not of any sub-arch type above. @@ -188,6 +191,7 @@ config X86_BIGSMP config X86_VISWS bool "SGI 320/540 (Visual Workstation)" + depends on X86_32 help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@ -199,6 +203,7 @@ config X86_VISWS config X86_GENERICARCH bool "Generic architecture (Summit, bigsmp, ES7000, default)" + depends on X86_32 help This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. It is intended for a generic binary kernel. @@ -206,7 +211,7 @@ config X86_GENERICARCH config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" - depends on SMP + depends on X86_32 && SMP help Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. @@ -218,6 +223,7 @@ endchoice config SCHED_NO_NO_OMIT_FRAME_POINTER bool "Single-depth WCHAN output" default y + depends on X86_32 help Calculate simpler /proc//wchan values. If this option is disabled then wchan values will recurse back to the @@ -228,7 +234,7 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER config PARAVIRT bool - depends on !(X86_VISWS || X86_VOYAGER) + depends on X86_32 && !(X86_VISWS || X86_VOYAGER) help This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly @@ -237,6 +243,7 @@ config PARAVIRT menuconfig PARAVIRT_GUEST bool "Paravirtualized guest support" + depends on X86_32 help Say Y here to get to see options related to running Linux under various hypervisors. This option alone does not add any kernel code. @@ -264,7 +271,7 @@ endif config ACPI_SRAT bool default y - depends on ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) + depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) select ACPI_NUMA config HAVE_ARCH_PARSE_SRAT @@ -275,12 +282,12 @@ config HAVE_ARCH_PARSE_SRAT config X86_SUMMIT_NUMA bool default y - depends on NUMA && (X86_SUMMIT || X86_GENERICARCH) + depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) config X86_CYCLONE_TIMER bool default y - depends on X86_SUMMIT || X86_GENERICARCH + depends on X86_32 && X86_SUMMIT || X86_GENERICARCH config ES7000_CLUSTERED_APIC bool @@ -290,7 +297,8 @@ config ES7000_CLUSTERED_APIC source "arch/x86/Kconfig.cpu" config HPET_TIMER - bool "HPET Timer Support" + bool + prompt "HPET Timer Support" if X86_32 help This enables the use of the HPET for the kernel's internal timer. HPET is the next generation timer replacing legacy 8254s. @@ -341,7 +349,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) + depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) help A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -366,17 +374,17 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC bool - depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH + depends on X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH) default y config X86_IO_APIC bool - depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH + depends on X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH) default y config X86_VISWS_APIC bool - depends on X86_VISWS + depends on X86_32 && X86_VISWS default y config X86_MCE @@ -398,7 +406,7 @@ config X86_MCE config X86_MCE_NONFATAL tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" - depends on X86_MCE + depends on X86_32 && X86_MCE help Enabling this feature starts a timer that triggers every 5 seconds which will look at the machine check registers to see if anything happened. @@ -411,14 +419,15 @@ config X86_MCE_NONFATAL config X86_MCE_P4THERMAL bool "check for P4 thermal throttling interrupt." - depends on X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS + depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS help Enabling this feature will cause a message to be printed when the P4 enters thermal throttling. config VM86 - default y bool "Enable VM86 support" if EMBEDDED + default y + depends on X86_32 help This option is required by programs like DOSEMU to run 16-bit legacy code on X86 processors. It also may be needed by software like @@ -427,6 +436,7 @@ config VM86 config TOSHIBA tristate "Toshiba Laptop support" + depends on X86_32 ---help--- This adds a driver to safely access the System Management Mode of the CPU on Toshiba portables with a genuine Toshiba BIOS. It does @@ -442,6 +452,7 @@ config TOSHIBA config I8K tristate "Dell laptop support" + depends on X86_32 ---help--- This adds a driver to safely access the System Management Mode of the CPU on the Dell Inspiron 8000. The System Management Mode @@ -462,7 +473,7 @@ config I8K config X86_REBOOTFIXUPS bool "Enable X86 board specific fixups for reboot" - depends on X86 + depends on X86_32 && X86 default n ---help--- This enables chipset and/or board specific fixups to be done @@ -521,6 +532,7 @@ choice prompt "High Memory Support" default HIGHMEM4G if !X86_NUMAQ default HIGHMEM64G if X86_NUMAQ + depends on X86_32 config NOHIGHMEM bool "off" @@ -580,6 +592,7 @@ choice depends on EXPERIMENTAL prompt "Memory split" if EMBEDDED default VMSPLIT_3G + depends on X86_32 help Select the desired split between kernel and user memory. @@ -617,16 +630,17 @@ config PAGE_OFFSET default 0x78000000 if VMSPLIT_2G_OPT default 0x40000000 if VMSPLIT_1G default 0xC0000000 + depends on X86_32 config HIGHMEM bool - depends on HIGHMEM64G || HIGHMEM4G + depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) default y config X86_PAE bool "PAE (Physical Address Extension) Support" default n - depends on !HIGHMEM4G + depends on X86_32 && !HIGHMEM4G select RESOURCES_64BIT help PAE is required for NX support, and furthermore enables @@ -637,7 +651,7 @@ config X86_PAE # Common NUMA Features config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" - depends on SMP && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL + depends on X86_32 && SMP && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT) help @@ -646,7 +660,7 @@ config NUMA cause boot failures. comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" - depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) + depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) config NODES_SHIFT int @@ -656,27 +670,27 @@ config NODES_SHIFT config HAVE_ARCH_BOOTMEM_NODE bool - depends on NUMA + depends on X86_32 && NUMA default y config ARCH_HAVE_MEMORY_PRESENT bool - depends on DISCONTIGMEM + depends on X86_32 && DISCONTIGMEM default y config NEED_NODE_MEMMAP_SIZE bool - depends on DISCONTIGMEM || SPARSEMEM + depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) default y config HAVE_ARCH_ALLOC_REMAP bool - depends on NUMA + depends on X86_32 && NUMA default y config ARCH_FLATMEM_ENABLE def_bool y - depends on (ARCH_SELECT_MEMORY_MODEL && X86_PC) + depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC config ARCH_DISCONTIGMEM_ENABLE def_bool y @@ -689,11 +703,11 @@ config ARCH_DISCONTIGMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y depends on (NUMA || (X86_PC && EXPERIMENTAL)) - select SPARSEMEM_STATIC + select SPARSEMEM_STATIC if X86_32 config ARCH_SELECT_MEMORY_MODEL def_bool y - depends on ARCH_SPARSEMEM_ENABLE + depends on X86_32 && ARCH_SPARSEMEM_ENABLE config ARCH_POPULATES_NODE_MAP def_bool y @@ -702,7 +716,7 @@ source "mm/Kconfig" config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" - depends on HIGHMEM4G || HIGHMEM64G + depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) help The VM uses one page table entry for each page of physical memory. For systems with a lot of RAM, this can be wasteful of precious @@ -710,7 +724,8 @@ config HIGHPTE entries in high memory. config MATH_EMULATION - bool "Math emulation" + bool + prompt "Math emulation" if X86_32 ---help--- Linux can emulate a math coprocessor (used for floating point operations) if you don't have one. 486DX and Pentium processors have @@ -770,7 +785,7 @@ config MTRR config EFI bool "Boot from EFI support" - depends on ACPI + depends on X86_32 && ACPI default n ---help--- This enables the kernel to boot on EFI platforms using @@ -788,7 +803,7 @@ config EFI config IRQBALANCE bool "Enable kernel irq balancing" - depends on SMP && X86_IO_APIC + depends on X86_32 && SMP && X86_IO_APIC default y help The default yes will allow the kernel to do irq load balancing. @@ -798,7 +813,7 @@ config IRQBALANCE # Summit needs it only when NUMA is on config BOOT_IOREMAP bool - depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) + depends on X86_32 && (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) default y config SECCOMP @@ -907,7 +922,8 @@ config RELOCATABLE kernel. config PHYSICAL_ALIGN - hex "Alignment value to which kernel should be aligned" + hex + prompt "Alignment value to which kernel should be aligned" if X86_32 default "0x100000" range 0x2000 0x400000 help @@ -940,6 +956,7 @@ config HOTPLUG_CPU config COMPAT_VDSO bool "Compat VDSO support" default y + depends on X86_32 help Map the VDSO to the predictable old-style address too. ---help--- @@ -974,7 +991,7 @@ config GENERIC_PENDING_IRQ config X86_SMP bool - depends on SMP && !X86_VOYAGER + depends on X86_32 && SMP && !X86_VOYAGER default y config X86_HT @@ -984,7 +1001,7 @@ config X86_HT config X86_BIOS_REBOOT bool - depends on !(X86_VISWS || X86_VOYAGER) + depends on X86_32 && !(X86_VISWS || X86_VOYAGER) default y config X86_TRAMPOLINE @@ -993,7 +1010,6 @@ config X86_TRAMPOLINE default y config KTIME_SCALAR - bool - default y + def_bool X86_32 source "arch/x86/Kconfig" -- cgit v1.2.3 From d2cacbcfaab96adfaea8d33241ed9ef4b8a5ce02 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 6 Nov 2007 22:12:16 +0100 Subject: x86: add X86_64 dependency to x86_64 specific symbols in Kconfig.x86_64 To ease unification of Kconfig.i386 and Kconfig.x86_64 add X86_64 dependencies to all x86_64 specific symbols. This patch introduce no functional changes but is one step towards unification. This smaller step is used to ease review of the patch set. Signed-off-by: Sam Ravnborg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/Kconfig.x86_64 | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64 index cdd1458202f..36bb856f5a3 100644 --- a/arch/x86/Kconfig.x86_64 +++ b/arch/x86/Kconfig.x86_64 @@ -18,7 +18,7 @@ config X86_64 . config 64BIT - def_bool y + def_bool X86_64 config X86 bool @@ -129,7 +129,7 @@ config X86_PC config X86_VSMP bool "Support for ScaleMP vSMP" - depends on PCI + depends on X86_64 && PCI help Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option @@ -275,7 +275,7 @@ config NUMA config K8_NUMA bool "Old style AMD Opteron NUMA detection" - depends on NUMA && PCI + depends on X86_64 && NUMA && PCI default y help Enable K8 NUMA node topology detection. You should say Y here if @@ -286,14 +286,14 @@ config K8_NUMA config NODES_SHIFT int - default "6" + default "6" if X86_64 depends on NEED_MULTIPLE_NODES # Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig. config X86_64_ACPI_NUMA bool "ACPI NUMA detection" - depends on NUMA + depends on X86_64 && NUMA select ACPI select PCI select ACPI_NUMA @@ -303,7 +303,7 @@ config X86_64_ACPI_NUMA config NUMA_EMU bool "NUMA emulation" - depends on NUMA + depends on X86_64 && NUMA help Enable NUMA emulation. A flat machine will be split into virtual nodes when booted with "numa=fake=N", where N is the @@ -324,7 +324,7 @@ config ARCH_SPARSEMEM_ENABLE select SPARSEMEM_VMEMMAP_ENABLE config ARCH_MEMORY_PROBE - def_bool y + def_bool X86_64 depends on MEMORY_HOTPLUG config ARCH_FLATMEM_ENABLE @@ -334,15 +334,15 @@ config ARCH_FLATMEM_ENABLE source "mm/Kconfig" config MEMORY_HOTPLUG_RESERVE - def_bool y + def_bool X86_64 depends on (MEMORY_HOTPLUG && DISCONTIGMEM) config HAVE_ARCH_EARLY_PFN_TO_NID - def_bool y + def_bool X86_64 depends on NUMA config OUT_OF_LINE_PFN_TO_PAGE - def_bool y + def_bool X86_64 depends on DISCONTIGMEM config NR_CPUS @@ -360,7 +360,7 @@ config NR_CPUS config PHYSICAL_ALIGN hex - default "0x200000" + default "0x200000" if X86_64 config HOTPLUG_CPU bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)" @@ -399,7 +399,7 @@ config GART_IOMMU default y select SWIOTLB select AGP - depends on PCI + depends on X86_64 && PCI help Support for full DMA access of devices with 32bit memory access only on systems with more than 3GB. This is usually needed for USB, @@ -414,7 +414,7 @@ config GART_IOMMU config CALGARY_IOMMU bool "IBM Calgary IOMMU support" select SWIOTLB - depends on PCI && EXPERIMENTAL + depends on X86_64 && PCI && EXPERIMENTAL help Support for hardware IOMMUs in IBM's xSeries x366 and x460 systems. Needed to run systems with more than 3GB of memory @@ -461,7 +461,7 @@ config X86_MCE config X86_MCE_INTEL bool "Intel MCE features" - depends on X86_MCE && X86_LOCAL_APIC + depends on X86_64 && X86_MCE && X86_LOCAL_APIC default y help Additional support for intel specific MCE features such as @@ -469,7 +469,7 @@ config X86_MCE_INTEL config X86_MCE_AMD bool "AMD MCE features" - depends on X86_MCE && X86_LOCAL_APIC + depends on X86_64 && X86_MCE && X86_LOCAL_APIC default y help Additional support for AMD specific MCE features such as @@ -576,7 +576,7 @@ config SECCOMP config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on X86_64 && EXPERIMENTAL help This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of critical functions, a canary @@ -601,7 +601,7 @@ config CC_STACKPROTECTOR_ALL source kernel/Kconfig.hz config K8_NB - def_bool y + def_bool X86_64 depends on AGP_AMD64 || GART_IOMMU || (PCI && NUMA) endmenu -- cgit v1.2.3 From bc0120fdb4798421a577630bf5cbd77fc2d6661d Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 6 Nov 2007 23:10:39 +0100 Subject: x86: copy x86_64 specific Kconfig symbols to Kconfig.i386 No functional changes. A prepatory step towards full unification. Signed-off-by: Sam Ravnborg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/Kconfig | 6 +- arch/x86/Kconfig.i386 | 215 +++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 198 insertions(+), 23 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e741fc772da..9fbb049950d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -353,11 +353,11 @@ config GEODE_MFGPT_TIMER MFGPTs have a better resolution and max interval than the generic PIT, and are suitable for use as high-res timers. +endif # X86_32 + config K8_NB def_bool y - depends on AGP_AMD64 - -endif # X86_32 + depends on AGP_AMD64 || (X86_64 && (GART_IOMMU || (PCI && NUMA))) source "drivers/pcmcia/Kconfig" diff --git a/arch/x86/Kconfig.i386 b/arch/x86/Kconfig.i386 index 174b9092b46..3be76720e89 100644 --- a/arch/x86/Kconfig.i386 +++ b/arch/x86/Kconfig.i386 @@ -218,6 +218,14 @@ config X86_ES7000 Only choose this option if you have such a system, otherwise you should say N here. +config X86_VSMP + bool "Support for ScaleMP vSMP" + depends on X86_64 && PCI + help + Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is + supposed to run on these EM64T-based machines. Only choose this option + if you have one of these machines. + endchoice config SCHED_NO_NO_OMIT_FRAME_POINTER @@ -299,20 +307,87 @@ source "arch/x86/Kconfig.cpu" config HPET_TIMER bool prompt "HPET Timer Support" if X86_32 + default X86_64 help - This enables the use of the HPET for the kernel's internal timer. - HPET is the next generation timer replacing legacy 8254s. - You can safely choose Y here. However, HPET will only be - activated if the platform and the BIOS support this feature. - Otherwise the 8254 will be used for timing services. + Use the IA-PC HPET (High Precision Event Timer) to manage + time in preference to the PIT and RTC, if a HPET is + present. + HPET is the next generation timer replacing legacy 8254s. + The HPET provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. You can find the HPET spec at + . + + You can safely choose Y here. However, HPET will only be + activated if the platform and the BIOS support this feature. + Otherwise the 8254 will be used for timing services. - Choose N to continue using the legacy 8254 timer. + Choose N to continue using the legacy 8254 timer. config HPET_EMULATE_RTC bool depends on HPET_TIMER && RTC=y default y +# Mark as embedded because too many people got it wrong. +# The code disables itself when not needed. +config GART_IOMMU + bool "GART IOMMU support" if EMBEDDED + default y + select SWIOTLB + select AGP + depends on X86_64 && PCI + help + Support for full DMA access of devices with 32bit memory access only + on systems with more than 3GB. This is usually needed for USB, + sound, many IDE/SATA chipsets and some other devices. + Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART + based hardware IOMMU and a software bounce buffer based IOMMU used + on Intel systems and as fallback. + The code is only active when needed (enough memory and limited + device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified + too. + +config CALGARY_IOMMU + bool "IBM Calgary IOMMU support" + select SWIOTLB + depends on X86_64 && PCI && EXPERIMENTAL + help + Support for hardware IOMMUs in IBM's xSeries x366 and x460 + systems. Needed to run systems with more than 3GB of memory + properly with 32-bit PCI devices that do not support DAC + (Double Address Cycle). Calgary also supports bus level + isolation, where all DMAs pass through the IOMMU. This + prevents them from going anywhere except their intended + destination. This catches hard-to-find kernel bugs and + mis-behaving drivers and devices that do not use the DMA-API + properly to set up their DMA buffers. The IOMMU can be + turned off at boot time with the iommu=off parameter. + Normally the kernel will make the right choice by itself. + If unsure, say Y. + +config CALGARY_IOMMU_ENABLED_BY_DEFAULT + bool "Should Calgary be enabled by default?" + default y + depends on CALGARY_IOMMU + help + Should Calgary be enabled by default? if you choose 'y', Calgary + will be used (if it exists). If you choose 'n', Calgary will not be + used even if it exists. If you choose 'n' and would like to use + Calgary anyway, pass 'iommu=calgary' on the kernel command line. + If unsure, say Y. + +# need this always selected by IOMMU for the VIA workaround +config SWIOTLB + bool + help + Support for software bounce buffers used on x86-64 systems + which don't have a hardware IOMMU (e.g. the current generation + of Intel's x86-64 CPUs). Using this PCI devices which can only + access 32-bits of memory can be used on systems with more than + 3 GB of memory. If unsure, say Y. + + config NR_CPUS int "Maximum number of CPUs (2-255)" range 2 255 @@ -329,7 +404,7 @@ config NR_CPUS config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" - depends on X86_HT + depends on (X86_64 && SMP) || (X86_32 && X86_HT) help SMT scheduler support improves the CPU scheduler's decision making when dealing with Intel Pentium 4 chips with HyperThreading at a @@ -338,7 +413,7 @@ config SCHED_SMT config SCHED_MC bool "Multi-core scheduler support" - depends on X86_HT + depends on (X86_64 && SMP) || (X86_32 && X86_HT) default y help Multi-core scheduler support improves the CPU scheduler's decision @@ -374,12 +449,12 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC bool - depends on X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH) + depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH)) default y config X86_IO_APIC bool - depends on X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH) + depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH)) default y config X86_VISWS_APIC @@ -404,6 +479,22 @@ config X86_MCE to disable it. MCE support simply ignores non-MCE processors like the 386 and 486, so nearly everyone can say Y here. +config X86_MCE_INTEL + bool "Intel MCE features" + depends on X86_64 && X86_MCE && X86_LOCAL_APIC + default y + help + Additional support for intel specific MCE features such as + the thermal monitor. + +config X86_MCE_AMD + bool "AMD MCE features" + depends on X86_64 && X86_MCE && X86_LOCAL_APIC + default y + help + Additional support for AMD specific MCE features such as + the DRAM Error Threshold. + config X86_MCE_NONFATAL tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" depends on X86_32 && X86_MCE @@ -651,19 +742,55 @@ config X86_PAE # Common NUMA Features config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" - depends on X86_32 && SMP && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL + depends on SMP + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT) help - NUMA support for i386. This is currently highly experimental - and should be only used for kernel development. It might also - cause boot failures. + Enable NUMA (Non Uniform Memory Access) support. + The kernel will try to allocate memory used by a CPU on the + local memory controller of the CPU and add some more + NUMA awareness to the kernel. + + For i386 this is currently highly experimental and should be only + used for kernel development. It might also cause boot failures. + For x86_64 this is recommended on all multiprocessor Opteron systems. + If the system is EM64T, you should say N unless your system is + EM64T NUMA. comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) +config K8_NUMA + bool "Old style AMD Opteron NUMA detection" + depends on X86_64 && NUMA && PCI + default y + help + Enable K8 NUMA node topology detection. You should say Y here if + you have a multi processor AMD K8 system. This uses an old + method to read the NUMA configuration directly from the builtin + Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA + instead, which also takes priority if both are compiled in. + +config X86_64_ACPI_NUMA + bool "ACPI NUMA detection" + depends on X86_64 && NUMA && ACPI && PCI + select ACPI_NUMA + default y + help + Enable ACPI SRAT based node topology detection. + +config NUMA_EMU + bool "NUMA emulation" + depends on X86_64 && NUMA + help + Enable NUMA emulation. A flat machine will be split + into virtual nodes when booted with "numa=fake=N", where N is the + number of nodes. This is only useful for debugging. + config NODES_SHIFT int + default "6" if X86_64 default "4" if X86_NUMAQ default "3" depends on NEED_MULTIPLE_NODES @@ -690,7 +817,7 @@ config HAVE_ARCH_ALLOC_REMAP config ARCH_FLATMEM_ENABLE def_bool y - depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC + depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA) config ARCH_DISCONTIGMEM_ENABLE def_bool y @@ -702,8 +829,9 @@ config ARCH_DISCONTIGMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on (NUMA || (X86_PC && EXPERIMENTAL)) + depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64)) select SPARSEMEM_STATIC if X86_32 + select SPARSEMEM_VMEMMAP_ENABLE if X86_64 config ARCH_SELECT_MEMORY_MODEL def_bool y @@ -712,6 +840,10 @@ config ARCH_SELECT_MEMORY_MODEL config ARCH_POPULATES_NODE_MAP def_bool y +config ARCH_MEMORY_PROBE + def_bool X86_64 + depends on MEMORY_HOTPLUG + source "mm/Kconfig" config HIGHPTE @@ -833,6 +965,30 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config CC_STACKPROTECTOR + bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" + depends on X86_64 && EXPERIMENTAL + help + This option turns on the -fstack-protector GCC feature. This + feature puts, at the beginning of critical functions, a canary + value on the stack just before the return address, and validates + the value just before actually returning. Stack based buffer + overflows (that need to overwrite this return address) now also + overwrite the canary, which gets detected and the attack is then + neutralized via a kernel panic. + + This feature requires gcc version 4.2 or above, or a distribution + gcc with the feature backported. Older versions are automatically + detected and for those versions, this configuration option is ignored. + +config CC_STACKPROTECTOR_ALL + bool "Use stack-protector for all functions" + depends on CC_STACKPROTECTOR + help + Normally, GCC only inserts the canary value protection for + functions that use large-ish on-stack buffers. By enabling + this option, GCC will be asked to do this for ALL functions. + source kernel/Kconfig.hz config KEXEC @@ -854,7 +1010,7 @@ config KEXEC config CRASH_DUMP bool "kernel crash dumps (EXPERIMENTAL)" depends on EXPERIMENTAL - depends on HIGHMEM + depends on X86_64 || (X86_32 && HIGHMEM) help Generate crash dump after being started by kexec. This should be normally only set in special crash dump kernels @@ -869,6 +1025,7 @@ config CRASH_DUMP config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) default "0x1000000" if X86_NUMAQ + default "0x200000" if X86_64 default "0x100000" help This gives the physical address where the kernel is loaded. @@ -921,10 +1078,15 @@ config RELOCATABLE must live at a different physical address than the primary kernel. + Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address + it has been loaded at and the compile time physical address + (CONFIG_PHYSICAL_START) is ignored. + config PHYSICAL_ALIGN hex prompt "Alignment value to which kernel should be aligned" if X86_32 - default "0x100000" + default "0x100000" if X86_32 + default "0x200000" if X86_64 range 0x2000 0x400000 help This value puts the alignment restrictions on physical address @@ -952,6 +1114,8 @@ config HOTPLUG_CPU Say Y here to experiment with turning CPUs off and on, and to enable suspend on SMP systems. CPUs can be controlled through /sys/devices/system/cpu. + Say N if you want to disable CPU hotplug and don't need to + suspend. config COMPAT_VDSO bool "Compat VDSO support" @@ -970,8 +1134,19 @@ endmenu config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y - depends on HIGHMEM + depends on X86_64 || (X86_32 && HIGHMEM) + +config MEMORY_HOTPLUG_RESERVE + def_bool X86_64 + depends on (MEMORY_HOTPLUG && DISCONTIGMEM) + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool X86_64 + depends on NUMA +config OUT_OF_LINE_PFN_TO_PAGE + def_bool X86_64 + depends on DISCONTIGMEM # # Use the generic interrupt handling code in kernel/irq/: @@ -996,7 +1171,7 @@ config X86_SMP config X86_HT bool - depends on SMP && !(X86_VISWS || X86_VOYAGER) + depends on SMP && !(X86_VISWS || X86_VOYAGER || MK8) default y config X86_BIOS_REBOOT -- cgit v1.2.3 From 8d5fffb928cd86a70823f66f8335fa41709ec109 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 6 Nov 2007 23:30:30 +0100 Subject: x86: move all simple arch settings to Kconfig Most of the arch settings were equal so combine them in the first part of Kconfig. Signed-off-by: Sam Ravnborg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/Kconfig | 136 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/Kconfig.i386 | 119 ------------------------------------------ arch/x86/Kconfig.x86_64 | 115 ---------------------------------------- 3 files changed, 136 insertions(+), 234 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9fbb049950d..d47b5a2e4a3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1,6 +1,86 @@ # x86 configuration ### Arch settings +config X86 + bool + default y + +config 64BIT + def_bool X86_64 + +config GENERIC_TIME + bool + default y + +config GENERIC_CMOS_UPDATE + bool + default y + +config CLOCKSOURCE_WATCHDOG + bool + default y + +config GENERIC_CLOCKEVENTS + bool + default y + +config GENERIC_CLOCKEVENTS_BROADCAST + bool + default y + depends on X86_64 || (X86_32 && X86_LOCAL_APIC) + +config LOCKDEP_SUPPORT + bool + default y + +config STACKTRACE_SUPPORT + bool + default y + +config SEMAPHORE_SLEEPERS + bool + default y + +config MMU + bool + default y + +config ZONE_DMA + bool + default y + +config QUICKLIST + bool + default X86_32 + +config SBUS + bool + +config GENERIC_ISA_DMA + bool + default y + +config GENERIC_IOMAP + bool + default y + +config GENERIC_BUG + bool + default y + depends on BUG + +config GENERIC_HWEIGHT + bool + default y + +config ARCH_MAY_HAVE_PC_FDC + bool + default y + +config DMI + bool + default y + config RWSEM_GENERIC_SPINLOCK def_bool !X86_XADD @@ -16,6 +96,62 @@ config ARCH_HAS_ILOG2_U64 config GENERIC_CALIBRATE_DELAY def_bool y +config GENERIC_TIME_VSYSCALL + bool + default X86_64 + + + + + +config ZONE_DMA32 + bool + default X86_64 + +config ARCH_POPULATES_NODE_MAP + def_bool y + +config AUDIT_ARCH + bool + default X86_64 + +# Use the generic interrupt handling code in kernel/irq/: +config GENERIC_HARDIRQS + bool + default y + +config GENERIC_IRQ_PROBE + bool + default y + +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + +config X86_SMP + bool + depends on X86_32 && SMP && !X86_VOYAGER + default y + +config X86_HT + bool + depends on SMP && !(X86_VISWS || X86_VOYAGER || MK8) + default y + +config X86_BIOS_REBOOT + bool + depends on X86_32 && !(X86_VISWS || X86_VOYAGER) + default y + +config X86_TRAMPOLINE + bool + depends on X86_SMP || (X86_VOYAGER && SMP) + default y + +config KTIME_SCALAR + def_bool X86_32 + menu "Power management options" depends on !X86_VOYAGER diff --git a/arch/x86/Kconfig.i386 b/arch/x86/Kconfig.i386 index 3be76720e89..b8b462a91dc 100644 --- a/arch/x86/Kconfig.i386 +++ b/arch/x86/Kconfig.i386 @@ -14,83 +14,6 @@ config X86_32 486, 586, Pentiums, and various instruction-set-compatible chips by AMD, Cyrix, and others. -config GENERIC_TIME - bool - default y - -config GENERIC_CMOS_UPDATE - bool - default y - -config CLOCKSOURCE_WATCHDOG - bool - default y - -config GENERIC_CLOCKEVENTS - bool - default y - -config GENERIC_CLOCKEVENTS_BROADCAST - bool - default y - depends on X86_LOCAL_APIC - -config LOCKDEP_SUPPORT - bool - default y - -config STACKTRACE_SUPPORT - bool - default y - -config SEMAPHORE_SLEEPERS - bool - default y - -config X86 - bool - default y - -config MMU - bool - default y - -config ZONE_DMA - bool - default y - -config QUICKLIST - bool - default y - -config SBUS - bool - -config GENERIC_ISA_DMA - bool - default y - -config GENERIC_IOMAP - bool - default y - -config GENERIC_BUG - bool - default y - depends on BUG - -config GENERIC_HWEIGHT - bool - default y - -config ARCH_MAY_HAVE_PC_FDC - bool - default y - -config DMI - bool - default y - source "init/Kconfig" menu "Processor type and features" @@ -837,9 +760,6 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on X86_32 && ARCH_SPARSEMEM_ENABLE -config ARCH_POPULATES_NODE_MAP - def_bool y - config ARCH_MEMORY_PROBE def_bool X86_64 depends on MEMORY_HOTPLUG @@ -1148,43 +1068,4 @@ config OUT_OF_LINE_PFN_TO_PAGE def_bool X86_64 depends on DISCONTIGMEM -# -# Use the generic interrupt handling code in kernel/irq/: -# -config GENERIC_HARDIRQS - bool - default y - -config GENERIC_IRQ_PROBE - bool - default y - -config GENERIC_PENDING_IRQ - bool - depends on GENERIC_HARDIRQS && SMP - default y - -config X86_SMP - bool - depends on X86_32 && SMP && !X86_VOYAGER - default y - -config X86_HT - bool - depends on SMP && !(X86_VISWS || X86_VOYAGER || MK8) - default y - -config X86_BIOS_REBOOT - bool - depends on X86_32 && !(X86_VISWS || X86_VOYAGER) - default y - -config X86_TRAMPOLINE - bool - depends on X86_SMP || (X86_VOYAGER && SMP) - default y - -config KTIME_SCALAR - def_bool X86_32 - source "arch/x86/Kconfig" diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64 index 36bb856f5a3..e441062472a 100644 --- a/arch/x86/Kconfig.x86_64 +++ b/arch/x86/Kconfig.x86_64 @@ -17,100 +17,6 @@ config X86_64 classical 32-bit x86 architecture. For details see . -config 64BIT - def_bool X86_64 - -config X86 - bool - default y - -config GENERIC_TIME - bool - default y - -config GENERIC_TIME_VSYSCALL - bool - default y - -config GENERIC_CMOS_UPDATE - bool - default y - -config CLOCKSOURCE_WATCHDOG - bool - default y - -config GENERIC_CLOCKEVENTS - bool - default y - -config GENERIC_CLOCKEVENTS_BROADCAST - bool - default y - -config ZONE_DMA32 - bool - default y - -config LOCKDEP_SUPPORT - bool - default y - -config STACKTRACE_SUPPORT - bool - default y - -config SEMAPHORE_SLEEPERS - bool - default y - -config MMU - bool - default y - -config ZONE_DMA - bool - default y - -config ISA - bool - -config SBUS - bool - -config GENERIC_HWEIGHT - bool - default y - -config GENERIC_ISA_DMA - bool - default y - -config GENERIC_IOMAP - bool - default y - -config ARCH_MAY_HAVE_PC_FDC - bool - default y - -config ARCH_POPULATES_NODE_MAP - def_bool y - -config DMI - bool - default y - -config AUDIT_ARCH - bool - default y - -config GENERIC_BUG - bool - default y - depends on BUG - - source "init/Kconfig" @@ -179,11 +85,6 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -config X86_HT - bool - depends on SMP && !MK8 - default y - config MATH_EMULATION bool @@ -606,20 +507,4 @@ config K8_NB endmenu -# -# Use the generic interrupt handling code in kernel/irq/: -# -config GENERIC_HARDIRQS - bool - default y - -config GENERIC_IRQ_PROBE - bool - default y - -config GENERIC_PENDING_IRQ - bool - depends on GENERIC_HARDIRQS && SMP - default y - source "arch/x86/Kconfig" -- cgit v1.2.3 From 506f1d07b310815d11527d3360b09d79d0bd59f1 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 9 Nov 2007 21:56:54 +0100 Subject: x86: move the rest of the menu's to Kconfig With this patch we have all the Kconfig file shared between i386 and x86_64. Signed-off-by: Sam Ravnborg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/Kconfig | 1052 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/Kconfig.i386 | 1053 ----------------------------------------------- arch/x86/Kconfig.x86_64 | 490 ---------------------- 3 files changed, 1052 insertions(+), 1543 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d47b5a2e4a3..34517bf14ba 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -151,7 +151,1059 @@ config X86_TRAMPOLINE config KTIME_SCALAR def_bool X86_32 +source "init/Kconfig" +menu "Processor type and features" + +source "kernel/time/Kconfig" + +config SMP + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, like most personal computers, say N. If + you have a system with more than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + singleprocessor machines. On a singleprocessor machine, the kernel + will run faster if you say N here. + + Note that if you say Y here and choose architecture "586" or + "Pentium" under "Processor family", the kernel will not work on 486 + architectures. Similarly, multiprocessor kernels for the "PPro" + architecture may not work on all Pentium based boards. + + People using multiprocessor machines who say Y here should also say + Y to "Enhanced Real Time Clock Support", below. The "Advanced Power + Management" code will be disabled if you say Y here. + + See also the , + , + and the SMP-HOWTO available at + . + + If you don't know what to do here, say N. + +choice + prompt "Subarchitecture Type" + default X86_PC + +config X86_PC + bool "PC-compatible" + help + Choose this option if your computer is a standard PC or compatible. + +config X86_ELAN + bool "AMD Elan" + depends on X86_32 + help + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + + If unsure, choose "PC-compatible" instead. + +config X86_VOYAGER + bool "Voyager (NCR)" + depends on X86_32 + select SMP if !BROKEN + help + Voyager is an MCA-based 32-way capable SMP architecture proprietary + to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. + + *** WARNING *** + + If you do not specifically know you have a Voyager based machine, + say N here, otherwise the kernel you build will not be bootable. + +config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" + select SMP + select NUMA + depends on X86_32 + help + This option is used for getting Linux to run on a (IBM/Sequent) NUMA + multiquad box. This changes the way that processors are bootstrapped, + and uses Clustered Logical APIC addressing mode instead of Flat Logical. + You will need a new lynxer.elf file to flash your firmware with - send + email to . + +config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on X86_32 && SMP + help + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. + + If you don't have one of these computers, you should say N here. + If you want to build a NUMA kernel, you must select ACPI. + +config X86_BIGSMP + bool "Support for other sub-arch SMP systems with more than 8 CPUs" + depends on X86_32 && SMP + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + + If you don't have such a system, you should say N here. + +config X86_VISWS + bool "SGI 320/540 (Visual Workstation)" + depends on X86_32 + help + The SGI Visual Workstation series is an IA32-based workstation + based on SGI systems chips with some legacy PC hardware attached. + + Say Y here to create a kernel to run on the SGI 320 or 540. + + A kernel compiled for the Visual Workstation will not run on PCs + and vice versa. See for details. + +config X86_GENERICARCH + bool "Generic architecture (Summit, bigsmp, ES7000, default)" + depends on X86_32 + help + This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. + It is intended for a generic binary kernel. + If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. + +config X86_ES7000 + bool "Support for Unisys ES7000 IA32 series" + depends on X86_32 && SMP + help + Support for Unisys ES7000 systems. Say 'Y' here if this kernel is + supposed to run on an IA32-based Unisys ES7000 system. + Only choose this option if you have such a system, otherwise you + should say N here. + +config X86_VSMP + bool "Support for ScaleMP vSMP" + depends on X86_64 && PCI + help + Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is + supposed to run on these EM64T-based machines. Only choose this option + if you have one of these machines. + +endchoice + +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool "Single-depth WCHAN output" + default y + depends on X86_32 + help + Calculate simpler /proc//wchan values. If this option + is disabled then wchan values will recurse back to the + caller function. This provides more accurate wchan values, + at the expense of slightly more scheduling overhead. + + If in doubt, say "Y". + +config PARAVIRT + bool + depends on X86_32 && !(X86_VISWS || X86_VOYAGER) + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + +menuconfig PARAVIRT_GUEST + bool "Paravirtualized guest support" + depends on X86_32 + help + Say Y here to get to see options related to running Linux under + various hypervisors. This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if PARAVIRT_GUEST + +source "arch/x86/xen/Kconfig" + +config VMI + bool "VMI Guest support" + select PARAVIRT + depends on !(X86_VISWS || X86_VOYAGER) + help + VMI provides a paravirtualized interface to the VMware ESX server + (it could be used by other hypervisors in theory too, but is not + at the moment), by linking the kernel to a GPL-ed ROM module + provided by the hypervisor. + +source "arch/x86/lguest/Kconfig" + +endif + +config ACPI_SRAT + bool + default y + depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) + select ACPI_NUMA + +config HAVE_ARCH_PARSE_SRAT + bool + default y + depends on ACPI_SRAT + +config X86_SUMMIT_NUMA + bool + default y + depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) + +config X86_CYCLONE_TIMER + bool + default y + depends on X86_32 && X86_SUMMIT || X86_GENERICARCH + +config ES7000_CLUSTERED_APIC + bool + default y + depends on SMP && X86_ES7000 && MPENTIUMIII + +source "arch/x86/Kconfig.cpu" + +config HPET_TIMER + bool + prompt "HPET Timer Support" if X86_32 + default X86_64 + help + Use the IA-PC HPET (High Precision Event Timer) to manage + time in preference to the PIT and RTC, if a HPET is + present. + HPET is the next generation timer replacing legacy 8254s. + The HPET provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. You can find the HPET spec at + . + + You can safely choose Y here. However, HPET will only be + activated if the platform and the BIOS support this feature. + Otherwise the 8254 will be used for timing services. + + Choose N to continue using the legacy 8254 timer. + +config HPET_EMULATE_RTC + bool + depends on HPET_TIMER && RTC=y + default y + +# Mark as embedded because too many people got it wrong. +# The code disables itself when not needed. +config GART_IOMMU + bool "GART IOMMU support" if EMBEDDED + default y + select SWIOTLB + select AGP + depends on X86_64 && PCI + help + Support for full DMA access of devices with 32bit memory access only + on systems with more than 3GB. This is usually needed for USB, + sound, many IDE/SATA chipsets and some other devices. + Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART + based hardware IOMMU and a software bounce buffer based IOMMU used + on Intel systems and as fallback. + The code is only active when needed (enough memory and limited + device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified + too. + +config CALGARY_IOMMU + bool "IBM Calgary IOMMU support" + select SWIOTLB + depends on X86_64 && PCI && EXPERIMENTAL + help + Support for hardware IOMMUs in IBM's xSeries x366 and x460 + systems. Needed to run systems with more than 3GB of memory + properly with 32-bit PCI devices that do not support DAC + (Double Address Cycle). Calgary also supports bus level + isolation, where all DMAs pass through the IOMMU. This + prevents them from going anywhere except their intended + destination. This catches hard-to-find kernel bugs and + mis-behaving drivers and devices that do not use the DMA-API + properly to set up their DMA buffers. The IOMMU can be + turned off at boot time with the iommu=off parameter. + Normally the kernel will make the right choice by itself. + If unsure, say Y. + +config CALGARY_IOMMU_ENABLED_BY_DEFAULT + bool "Should Calgary be enabled by default?" + default y + depends on CALGARY_IOMMU + help + Should Calgary be enabled by default? if you choose 'y', Calgary + will be used (if it exists). If you choose 'n', Calgary will not be + used even if it exists. If you choose 'n' and would like to use + Calgary anyway, pass 'iommu=calgary' on the kernel command line. + If unsure, say Y. + +# need this always selected by IOMMU for the VIA workaround +config SWIOTLB + bool + help + Support for software bounce buffers used on x86-64 systems + which don't have a hardware IOMMU (e.g. the current generation + of Intel's x86-64 CPUs). Using this PCI devices which can only + access 32-bits of memory can be used on systems with more than + 3 GB of memory. If unsure, say Y. + + +config NR_CPUS + int "Maximum number of CPUs (2-255)" + range 2 255 + depends on SMP + default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 + default "8" + help + This allows you to specify the maximum number of CPUs which this + kernel will support. The maximum supported value is 255 and the + minimum value which makes sense is 2. + + This is purely to save memory - each supported CPU adds + approximately eight kilobytes to the kernel image. + +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on (X86_64 && SMP) || (X86_32 && X86_HT) + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + +config SCHED_MC + bool "Multi-core scheduler support" + depends on (X86_64 && SMP) || (X86_32 && X86_HT) + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +source "kernel/Kconfig.preempt" + +config X86_UP_APIC + bool "Local APIC support on uniprocessors" + depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) + help + A local APIC (Advanced Programmable Interrupt Controller) is an + integrated interrupt controller in the CPU. If you have a single-CPU + system which has a processor with a local APIC, you can say Y here to + enable and use it. If you say Y here even though your machine doesn't + have a local APIC, then the kernel will still run with no slowdown at + all. The local APIC supports CPU-generated self-interrupts (timer, + performance counters), and the NMI watchdog which detects hard + lockups. + +config X86_UP_IOAPIC + bool "IO-APIC support on uniprocessors" + depends on X86_UP_APIC + help + An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an + SMP-capable replacement for PC-style interrupt controllers. Most + SMP systems and many recent uniprocessor systems have one. + + If you have a single-CPU system with an IO-APIC, you can say Y here + to use it. If you say Y here even though your machine doesn't have + an IO-APIC, then the kernel will still run with no slowdown at all. + +config X86_LOCAL_APIC + bool + depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH)) + default y + +config X86_IO_APIC + bool + depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH)) + default y + +config X86_VISWS_APIC + bool + depends on X86_32 && X86_VISWS + default y + +config X86_MCE + bool "Machine Check Exception" + depends on !X86_VOYAGER + ---help--- + Machine Check Exception support allows the processor to notify the + kernel if it detects a problem (e.g. overheating, component failure). + The action the kernel takes depends on the severity of the problem, + ranging from a warning message on the console, to halting the machine. + Your processor must be a Pentium or newer to support this - check the + flags in /proc/cpuinfo for mce. Note that some older Pentium systems + have a design flaw which leads to false MCE events - hence MCE is + disabled on all P5 processors, unless explicitly enabled with "mce" + as a boot argument. Similarly, if MCE is built in and creates a + problem on some new non-standard machine, you can boot with "nomce" + to disable it. MCE support simply ignores non-MCE processors like + the 386 and 486, so nearly everyone can say Y here. + +config X86_MCE_INTEL + bool "Intel MCE features" + depends on X86_64 && X86_MCE && X86_LOCAL_APIC + default y + help + Additional support for intel specific MCE features such as + the thermal monitor. + +config X86_MCE_AMD + bool "AMD MCE features" + depends on X86_64 && X86_MCE && X86_LOCAL_APIC + default y + help + Additional support for AMD specific MCE features such as + the DRAM Error Threshold. + +config X86_MCE_NONFATAL + tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" + depends on X86_32 && X86_MCE + help + Enabling this feature starts a timer that triggers every 5 seconds which + will look at the machine check registers to see if anything happened. + Non-fatal problems automatically get corrected (but still logged). + Disable this if you don't want to see these messages. + Seeing the messages this option prints out may be indicative of dying + or out-of-spec (ie, overclocked) hardware. + This option only does something on certain CPUs. + (AMD Athlon/Duron and Intel Pentium 4) + +config X86_MCE_P4THERMAL + bool "check for P4 thermal throttling interrupt." + depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS + help + Enabling this feature will cause a message to be printed when the P4 + enters thermal throttling. + +config VM86 + bool "Enable VM86 support" if EMBEDDED + default y + depends on X86_32 + help + This option is required by programs like DOSEMU to run 16-bit legacy + code on X86 processors. It also may be needed by software like + XFree86 to initialize some video cards via BIOS. Disabling this + option saves about 6k. + +config TOSHIBA + tristate "Toshiba Laptop support" + depends on X86_32 + ---help--- + This adds a driver to safely access the System Management Mode of + the CPU on Toshiba portables with a genuine Toshiba BIOS. It does + not work on models with a Phoenix BIOS. The System Management Mode + is used to set the BIOS and power saving options on Toshiba portables. + + For information on utilities to make use of this driver see the + Toshiba Linux utilities web site at: + . + + Say Y if you intend to run this kernel on a Toshiba portable. + Say N otherwise. + +config I8K + tristate "Dell laptop support" + depends on X86_32 + ---help--- + This adds a driver to safely access the System Management Mode + of the CPU on the Dell Inspiron 8000. The System Management Mode + is used to read cpu temperature and cooling fan status and to + control the fans on the I8K portables. + + This driver has been tested only on the Inspiron 8000 but it may + also work with other Dell laptops. You can force loading on other + models by passing the parameter `force=1' to the module. Use at + your own risk. + + For information on utilities to make use of this driver see the + I8K Linux utilities web site at: + + + Say Y if you intend to run this kernel on a Dell Inspiron 8000. + Say N otherwise. + +config X86_REBOOTFIXUPS + bool "Enable X86 board specific fixups for reboot" + depends on X86_32 && X86 + default n + ---help--- + This enables chipset and/or board specific fixups to be done + in order to get reboot to work correctly. This is only needed on + some combinations of hardware and BIOS. The symptom, for which + this config is intended, is when reboot ends with a stalled/hung + system. + + Currently, the only fixup is for the Geode machines using + CS5530A and CS5536 chipsets. + + Say Y if you want to enable the fixup. Currently, it's safe to + enable this option even if you don't need it. + Say N otherwise. + +config MICROCODE + tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support" + select FW_LOADER + ---help--- + If you say Y here, you will be able to update the microcode on + Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II, + Pentium III, Pentium 4, Xeon etc. You will obviously need the + actual microcode binary data itself which is not shipped with the + Linux kernel. + + For latest news and information on obtaining all the required + ingredients for this driver, check: + . + + To compile this driver as a module, choose M here: the + module will be called microcode. + +config MICROCODE_OLD_INTERFACE + bool + depends on MICROCODE + default y + +config X86_MSR + tristate "/dev/cpu/*/msr - Model-specific register support" + help + This device gives privileged processes access to the x86 + Model-Specific Registers (MSRs). It is a character device with + major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. + MSR accesses are directed to a specific CPU on multi-processor + systems. + +config X86_CPUID + tristate "/dev/cpu/*/cpuid - CPU information support" + help + This device gives processes access to the x86 CPUID instruction to + be executed on a specific processor. It is a character device + with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to + /dev/cpu/31/cpuid. + +choice + prompt "High Memory Support" + default HIGHMEM4G if !X86_NUMAQ + default HIGHMEM64G if X86_NUMAQ + depends on X86_32 + +config NOHIGHMEM + bool "off" + depends on !X86_NUMAQ + ---help--- + Linux can use up to 64 Gigabytes of physical memory on x86 systems. + However, the address space of 32-bit x86 processors is only 4 + Gigabytes large. That means that, if you have a large amount of + physical memory, not all of it can be "permanently mapped" by the + kernel. The physical memory that's not permanently mapped is called + "high memory". + + If you are compiling a kernel which will never run on a machine with + more than 1 Gigabyte total physical RAM, answer "off" here (default + choice and suitable for most users). This will result in a "3GB/1GB" + split: 3GB are mapped so that each process sees a 3GB virtual memory + space and the remaining part of the 4GB virtual memory space is used + by the kernel to permanently map as much physical memory as + possible. + + If the machine has between 1 and 4 Gigabytes physical RAM, then + answer "4GB" here. + + If more than 4 Gigabytes is used then answer "64GB" here. This + selection turns Intel PAE (Physical Address Extension) mode on. + PAE implements 3-level paging on IA32 processors. PAE is fully + supported by Linux, PAE mode is implemented on all recent Intel + processors (Pentium Pro and better). NOTE: If you say "64GB" here, + then the kernel will not boot on CPUs that don't support PAE! + + The actual amount of total physical memory will either be + auto detected or can be forced by using a kernel command line option + such as "mem=256M". (Try "man bootparam" or see the documentation of + your boot loader (lilo or loadlin) about how to pass options to the + kernel at boot time.) + + If unsure, say "off". + +config HIGHMEM4G + bool "4GB" + depends on !X86_NUMAQ + help + Select this if you have a 32-bit processor and between 1 and 4 + gigabytes of physical RAM. + +config HIGHMEM64G + bool "64GB" + depends on !M386 && !M486 + select X86_PAE + help + Select this if you have a 32-bit processor and more than 4 + gigabytes of physical RAM. + +endchoice + +choice + depends on EXPERIMENTAL + prompt "Memory split" if EMBEDDED + default VMSPLIT_3G + depends on X86_32 + help + Select the desired split between kernel and user memory. + + If the address range available to the kernel is less than the + physical memory installed, the remaining memory will be available + as "high memory". Accessing high memory is a little more costly + than low memory, as it needs to be mapped into the kernel first. + Note that increasing the kernel address space limits the range + available to user programs, making the address space there + tighter. Selecting anything other than the default 3G/1G split + will also likely make your kernel incompatible with binary-only + kernel modules. + + If you are not absolutely sure what you are doing, leave this + option alone! + + config VMSPLIT_3G + bool "3G/1G user/kernel split" + config VMSPLIT_3G_OPT + depends on !X86_PAE + bool "3G/1G user/kernel split (for full 1G low memory)" + config VMSPLIT_2G + bool "2G/2G user/kernel split" + config VMSPLIT_2G_OPT + depends on !X86_PAE + bool "2G/2G user/kernel split (for full 2G low memory)" + config VMSPLIT_1G + bool "1G/3G user/kernel split" +endchoice + +config PAGE_OFFSET + hex + default 0xB0000000 if VMSPLIT_3G_OPT + default 0x80000000 if VMSPLIT_2G + default 0x78000000 if VMSPLIT_2G_OPT + default 0x40000000 if VMSPLIT_1G + default 0xC0000000 + depends on X86_32 + +config HIGHMEM + bool + depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) + default y + +config X86_PAE + bool "PAE (Physical Address Extension) Support" + default n + depends on X86_32 && !HIGHMEM4G + select RESOURCES_64BIT + help + PAE is required for NX support, and furthermore enables + larger swapspace support for non-overcommit purposes. It + has the cost of more pagetable lookup overhead, and also + consumes more pagetable space per process. + +# Common NUMA Features +config NUMA + bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" + depends on SMP + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) + default n if X86_PC + default y if (X86_NUMAQ || X86_SUMMIT) + help + Enable NUMA (Non Uniform Memory Access) support. + The kernel will try to allocate memory used by a CPU on the + local memory controller of the CPU and add some more + NUMA awareness to the kernel. + + For i386 this is currently highly experimental and should be only + used for kernel development. It might also cause boot failures. + For x86_64 this is recommended on all multiprocessor Opteron systems. + If the system is EM64T, you should say N unless your system is + EM64T NUMA. + +comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" + depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) + +config K8_NUMA + bool "Old style AMD Opteron NUMA detection" + depends on X86_64 && NUMA && PCI + default y + help + Enable K8 NUMA node topology detection. You should say Y here if + you have a multi processor AMD K8 system. This uses an old + method to read the NUMA configuration directly from the builtin + Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA + instead, which also takes priority if both are compiled in. + +config X86_64_ACPI_NUMA + bool "ACPI NUMA detection" + depends on X86_64 && NUMA && ACPI && PCI + select ACPI_NUMA + default y + help + Enable ACPI SRAT based node topology detection. + +config NUMA_EMU + bool "NUMA emulation" + depends on X86_64 && NUMA + help + Enable NUMA emulation. A flat machine will be split + into virtual nodes when booted with "numa=fake=N", where N is the + number of nodes. This is only useful for debugging. + +config NODES_SHIFT + int + default "6" if X86_64 + default "4" if X86_NUMAQ + default "3" + depends on NEED_MULTIPLE_NODES + +config HAVE_ARCH_BOOTMEM_NODE + bool + depends on X86_32 && NUMA + default y + +config ARCH_HAVE_MEMORY_PRESENT + bool + depends on X86_32 && DISCONTIGMEM + default y + +config NEED_NODE_MEMMAP_SIZE + bool + depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) + default y + +config HAVE_ARCH_ALLOC_REMAP + bool + depends on X86_32 && NUMA + default y + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA) + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64)) + select SPARSEMEM_STATIC if X86_32 + select SPARSEMEM_VMEMMAP_ENABLE if X86_64 + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on X86_32 && ARCH_SPARSEMEM_ENABLE + +config ARCH_MEMORY_PROBE + def_bool X86_64 + depends on MEMORY_HOTPLUG + +source "mm/Kconfig" + +config HIGHPTE + bool "Allocate 3rd-level pagetables from highmem" + depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) + help + The VM uses one page table entry for each page of physical memory. + For systems with a lot of RAM, this can be wasteful of precious + low memory. Setting this option will put user-space page table + entries in high memory. + +config MATH_EMULATION + bool + prompt "Math emulation" if X86_32 + ---help--- + Linux can emulate a math coprocessor (used for floating point + operations) if you don't have one. 486DX and Pentium processors have + a math coprocessor built in, 486SX and 386 do not, unless you added + a 487DX or 387, respectively. (The messages during boot time can + give you some hints here ["man dmesg"].) Everyone needs either a + coprocessor or this emulation. + + If you don't have a math coprocessor, you need to say Y here; if you + say Y here even though you have a coprocessor, the coprocessor will + be used nevertheless. (This behavior can be changed with the kernel + command line option "no387", which comes handy if your coprocessor + is broken. Try "man bootparam" or see the documentation of your boot + loader (lilo or loadlin) about how to pass options to the kernel at + boot time.) This means that it is a good idea to say Y here if you + intend to use this kernel on different machines. + + More information about the internals of the Linux math coprocessor + emulation can be found in . + + If you are not sure, say Y; apart from resulting in a 66 KB bigger + kernel, it won't hurt. + +config MTRR + bool "MTRR (Memory Type Range Register) support" + ---help--- + On Intel P6 family processors (Pentium Pro, Pentium II and later) + the Memory Type Range Registers (MTRRs) may be used to control + processor access to memory ranges. This is most useful if you have + a video (VGA) card on a PCI or AGP bus. Enabling write-combining + allows bus write transfers to be combined into a larger transfer + before bursting over the PCI/AGP bus. This can increase performance + of image write operations 2.5 times or more. Saying Y here creates a + /proc/mtrr file which may be used to manipulate your processor's + MTRRs. Typically the X server should use this. + + This code has a reasonably generic interface so that similar + control registers on other processors can be easily supported + as well: + + The Cyrix 6x86, 6x86MX and M II processors have Address Range + Registers (ARRs) which provide a similar functionality to MTRRs. For + these, the ARRs are used to emulate the MTRRs. + The AMD K6-2 (stepping 8 and above) and K6-3 processors have two + MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing + write-combining. All of these processors are supported by this code + and it makes sense to say Y here if you have one of them. + + Saying Y here also fixes a problem with buggy SMP BIOSes which only + set the MTRRs for the boot CPU and not for the secondary CPUs. This + can lead to all sorts of problems, so it's good to say Y here. + + You can safely say Y even if your machine doesn't have MTRRs, you'll + just add about 9 KB to your kernel. + + See for more information. + +config EFI + bool "Boot from EFI support" + depends on X86_32 && ACPI + default n + ---help--- + This enables the kernel to boot on EFI platforms using + system configuration information passed to it from the firmware. + This also enables the kernel to use any EFI runtime services that are + available (such as the EFI variable services). + + This option is only useful on systems that have EFI firmware + and will result in a kernel image that is ~8k larger. In addition, + you must use the latest ELILO loader available at + in order to take advantage of + kernel initialization using EFI information (neither GRUB nor LILO know + anything about EFI). However, even with this option, the resultant + kernel should continue to boot on existing non-EFI platforms. + +config IRQBALANCE + bool "Enable kernel irq balancing" + depends on X86_32 && SMP && X86_IO_APIC + default y + help + The default yes will allow the kernel to do irq load balancing. + Saying no will keep the kernel from doing irq load balancing. + +# turning this on wastes a bunch of space. +# Summit needs it only when NUMA is on +config BOOT_IOREMAP + bool + depends on X86_32 && (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) + default y + +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + default y + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc//seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + +config CC_STACKPROTECTOR + bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" + depends on X86_64 && EXPERIMENTAL + help + This option turns on the -fstack-protector GCC feature. This + feature puts, at the beginning of critical functions, a canary + value on the stack just before the return address, and validates + the value just before actually returning. Stack based buffer + overflows (that need to overwrite this return address) now also + overwrite the canary, which gets detected and the attack is then + neutralized via a kernel panic. + + This feature requires gcc version 4.2 or above, or a distribution + gcc with the feature backported. Older versions are automatically + detected and for those versions, this configuration option is ignored. + +config CC_STACKPROTECTOR_ALL + bool "Use stack-protector for all functions" + depends on CC_STACKPROTECTOR + help + Normally, GCC only inserts the canary value protection for + functions that use large-ish on-stack buffers. By enabling + this option, GCC will be asked to do this for ALL functions. + +source kernel/Kconfig.hz + +config KEXEC + bool "kexec system call" + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on X86_64 || (X86_32 && HIGHMEM) + help + Generate crash dump after being started by kexec. + This should be normally only set in special crash dump kernels + which are loaded in the main kernel with kexec-tools into + a specially reserved region and then later executed after + a crash by kdump/kexec. The crash dump kernel must be compiled + to a memory address not used by the main kernel or BIOS using + PHYSICAL_START, or it must be built as a relocatable image + (CONFIG_RELOCATABLE=y). + For more details see Documentation/kdump/kdump.txt + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) + default "0x1000000" if X86_NUMAQ + default "0x200000" if X86_64 + default "0x100000" + help + This gives the physical address where the kernel is loaded. + + If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then + bzImage will decompress itself to above physical address and + run from there. Otherwise, bzImage will run from the address where + it has been loaded by the boot loader and will ignore above physical + address. + + In normal kdump cases one does not have to set/change this option + as now bzImage can be compiled as a completely relocatable image + (CONFIG_RELOCATABLE=y) and be used to load and run from a different + address. This option is mainly useful for the folks who don't want + to use a bzImage for capturing the crash dump and want to use a + vmlinux instead. vmlinux is not relocatable hence a kernel needs + to be specifically compiled to run from a specific memory area + (normally a reserved region) and this option comes handy. + + So if you are using bzImage for capturing the crash dump, leave + the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y. + Otherwise if you plan to use vmlinux for capturing the crash dump + change this value to start of the reserved region (Typically 16MB + 0x1000000). In other words, it can be set based on the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel. Typically this parameter is set as + crashkernel=64M@16M. Please take a look at + Documentation/kdump/kdump.txt for more details about crash dumps. + + Usage of bzImage for capturing the crash dump is recommended as + one does not have to build two kernels. Same kernel can be used + as production kernel and capture kernel. Above option should have + gone away after relocatable bzImage support is introduced. But it + is present because there are users out there who continue to use + vmlinux for dump capture. This option should go away down the + line. + + Don't change this unless you know what you are doing. + +config RELOCATABLE + bool "Build a relocatable kernel (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + This builds a kernel image that retains relocation information + so it can be loaded someplace besides the default 1MB. + The relocations tend to make the kernel binary about 10% larger, + but are discarded at runtime. + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. + + Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address + it has been loaded at and the compile time physical address + (CONFIG_PHYSICAL_START) is ignored. + +config PHYSICAL_ALIGN + hex + prompt "Alignment value to which kernel should be aligned" if X86_32 + default "0x100000" if X86_32 + default "0x200000" if X86_64 + range 0x2000 0x400000 + help + This value puts the alignment restrictions on physical address + where kernel is loaded and run from. Kernel is compiled for an + address which meets above alignment restriction. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is set, kernel will move itself to nearest + address aligned to above value and run from there. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is not set, kernel will ignore the run time + load address and decompress itself to the address it has been + compiled for and run from there. The address for which kernel is + compiled already meets above alignment restrictions. Hence the + end result is that kernel runs from a physical address meeting + above alignment restrictions. + + Don't change this unless you know what you are doing. + +config HOTPLUG_CPU + bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER + ---help--- + Say Y here to experiment with turning CPUs off and on, and to + enable suspend on SMP systems. CPUs can be controlled through + /sys/devices/system/cpu. + Say N if you want to disable CPU hotplug and don't need to + suspend. + +config COMPAT_VDSO + bool "Compat VDSO support" + default y + depends on X86_32 + help + Map the VDSO to the predictable old-style address too. + ---help--- + Say N here if you are running a sufficiently recent glibc + version (2.3.3 or later), to remove the high-mapped + VDSO mapping and to exclusively use the randomized VDSO. + + If unsure, say Y. + +endmenu + +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + depends on X86_64 || (X86_32 && HIGHMEM) + +config MEMORY_HOTPLUG_RESERVE + def_bool X86_64 + depends on (MEMORY_HOTPLUG && DISCONTIGMEM) + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool X86_64 + depends on NUMA + +config OUT_OF_LINE_PFN_TO_PAGE + def_bool X86_64 + depends on DISCONTIGMEM menu "Power management options" depends on !X86_VOYAGER diff --git a/arch/x86/Kconfig.i386 b/arch/x86/Kconfig.i386 index b8b462a91dc..7b8dc2604d5 100644 --- a/arch/x86/Kconfig.i386 +++ b/arch/x86/Kconfig.i386 @@ -14,1058 +14,5 @@ config X86_32 486, 586, Pentiums, and various instruction-set-compatible chips by AMD, Cyrix, and others. -source "init/Kconfig" - -menu "Processor type and features" - -source "kernel/time/Kconfig" - -config SMP - bool "Symmetric multi-processing support" - ---help--- - This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. - - If you say N here, the kernel will run on single and multiprocessor - machines, but will use only one CPU of a multiprocessor machine. If - you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel - will run faster if you say N here. - - Note that if you say Y here and choose architecture "586" or - "Pentium" under "Processor family", the kernel will not work on 486 - architectures. Similarly, multiprocessor kernels for the "PPro" - architecture may not work on all Pentium based boards. - - People using multiprocessor machines who say Y here should also say - Y to "Enhanced Real Time Clock Support", below. The "Advanced Power - Management" code will be disabled if you say Y here. - - See also the , - , - and the SMP-HOWTO available at - . - - If you don't know what to do here, say N. - -choice - prompt "Subarchitecture Type" - default X86_PC - -config X86_PC - bool "PC-compatible" - help - Choose this option if your computer is a standard PC or compatible. - -config X86_ELAN - bool "AMD Elan" - depends on X86_32 - help - Select this for an AMD Elan processor. - - Do not use this option for K6/Athlon/Opteron processors! - - If unsure, choose "PC-compatible" instead. - -config X86_VOYAGER - bool "Voyager (NCR)" - depends on X86_32 - select SMP if !BROKEN - help - Voyager is an MCA-based 32-way capable SMP architecture proprietary - to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. - - *** WARNING *** - - If you do not specifically know you have a Voyager based machine, - say N here, otherwise the kernel you build will not be bootable. - -config X86_NUMAQ - bool "NUMAQ (IBM/Sequent)" - select SMP - select NUMA - depends on X86_32 - help - This option is used for getting Linux to run on a (IBM/Sequent) NUMA - multiquad box. This changes the way that processors are bootstrapped, - and uses Clustered Logical APIC addressing mode instead of Flat Logical. - You will need a new lynxer.elf file to flash your firmware with - send - email to . - -config X86_SUMMIT - bool "Summit/EXA (IBM x440)" - depends on X86_32 && SMP - help - This option is needed for IBM systems that use the Summit/EXA chipset. - In particular, it is needed for the x440. - - If you don't have one of these computers, you should say N here. - If you want to build a NUMA kernel, you must select ACPI. - -config X86_BIGSMP - bool "Support for other sub-arch SMP systems with more than 8 CPUs" - depends on X86_32 && SMP - help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. - - If you don't have such a system, you should say N here. - -config X86_VISWS - bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 - help - The SGI Visual Workstation series is an IA32-based workstation - based on SGI systems chips with some legacy PC hardware attached. - - Say Y here to create a kernel to run on the SGI 320 or 540. - - A kernel compiled for the Visual Workstation will not run on PCs - and vice versa. See for details. - -config X86_GENERICARCH - bool "Generic architecture (Summit, bigsmp, ES7000, default)" - depends on X86_32 - help - This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. - It is intended for a generic binary kernel. - If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. - -config X86_ES7000 - bool "Support for Unisys ES7000 IA32 series" - depends on X86_32 && SMP - help - Support for Unisys ES7000 systems. Say 'Y' here if this kernel is - supposed to run on an IA32-based Unisys ES7000 system. - Only choose this option if you have such a system, otherwise you - should say N here. - -config X86_VSMP - bool "Support for ScaleMP vSMP" - depends on X86_64 && PCI - help - Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is - supposed to run on these EM64T-based machines. Only choose this option - if you have one of these machines. - -endchoice - -config SCHED_NO_NO_OMIT_FRAME_POINTER - bool "Single-depth WCHAN output" - default y - depends on X86_32 - help - Calculate simpler /proc//wchan values. If this option - is disabled then wchan values will recurse back to the - caller function. This provides more accurate wchan values, - at the expense of slightly more scheduling overhead. - - If in doubt, say "Y". - -config PARAVIRT - bool - depends on X86_32 && !(X86_VISWS || X86_VOYAGER) - help - This changes the kernel so it can modify itself when it is run - under a hypervisor, potentially improving performance significantly - over full virtualization. However, when run without a hypervisor - the kernel is theoretically slower and slightly larger. - -menuconfig PARAVIRT_GUEST - bool "Paravirtualized guest support" - depends on X86_32 - help - Say Y here to get to see options related to running Linux under - various hypervisors. This option alone does not add any kernel code. - - If you say N, all options in this submenu will be skipped and disabled. - -if PARAVIRT_GUEST - -source "arch/x86/xen/Kconfig" - -config VMI - bool "VMI Guest support" - select PARAVIRT - depends on !(X86_VISWS || X86_VOYAGER) - help - VMI provides a paravirtualized interface to the VMware ESX server - (it could be used by other hypervisors in theory too, but is not - at the moment), by linking the kernel to a GPL-ed ROM module - provided by the hypervisor. - -source "arch/x86/lguest/Kconfig" - -endif - -config ACPI_SRAT - bool - default y - depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) - select ACPI_NUMA - -config HAVE_ARCH_PARSE_SRAT - bool - default y - depends on ACPI_SRAT - -config X86_SUMMIT_NUMA - bool - default y - depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) - -config X86_CYCLONE_TIMER - bool - default y - depends on X86_32 && X86_SUMMIT || X86_GENERICARCH - -config ES7000_CLUSTERED_APIC - bool - default y - depends on SMP && X86_ES7000 && MPENTIUMIII - -source "arch/x86/Kconfig.cpu" - -config HPET_TIMER - bool - prompt "HPET Timer Support" if X86_32 - default X86_64 - help - Use the IA-PC HPET (High Precision Event Timer) to manage - time in preference to the PIT and RTC, if a HPET is - present. - HPET is the next generation timer replacing legacy 8254s. - The HPET provides a stable time base on SMP - systems, unlike the TSC, but it is more expensive to access, - as it is off-chip. You can find the HPET spec at - . - - You can safely choose Y here. However, HPET will only be - activated if the platform and the BIOS support this feature. - Otherwise the 8254 will be used for timing services. - - Choose N to continue using the legacy 8254 timer. - -config HPET_EMULATE_RTC - bool - depends on HPET_TIMER && RTC=y - default y - -# Mark as embedded because too many people got it wrong. -# The code disables itself when not needed. -config GART_IOMMU - bool "GART IOMMU support" if EMBEDDED - default y - select SWIOTLB - select AGP - depends on X86_64 && PCI - help - Support for full DMA access of devices with 32bit memory access only - on systems with more than 3GB. This is usually needed for USB, - sound, many IDE/SATA chipsets and some other devices. - Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART - based hardware IOMMU and a software bounce buffer based IOMMU used - on Intel systems and as fallback. - The code is only active when needed (enough memory and limited - device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified - too. - -config CALGARY_IOMMU - bool "IBM Calgary IOMMU support" - select SWIOTLB - depends on X86_64 && PCI && EXPERIMENTAL - help - Support for hardware IOMMUs in IBM's xSeries x366 and x460 - systems. Needed to run systems with more than 3GB of memory - properly with 32-bit PCI devices that do not support DAC - (Double Address Cycle). Calgary also supports bus level - isolation, where all DMAs pass through the IOMMU. This - prevents them from going anywhere except their intended - destination. This catches hard-to-find kernel bugs and - mis-behaving drivers and devices that do not use the DMA-API - properly to set up their DMA buffers. The IOMMU can be - turned off at boot time with the iommu=off parameter. - Normally the kernel will make the right choice by itself. - If unsure, say Y. - -config CALGARY_IOMMU_ENABLED_BY_DEFAULT - bool "Should Calgary be enabled by default?" - default y - depends on CALGARY_IOMMU - help - Should Calgary be enabled by default? if you choose 'y', Calgary - will be used (if it exists). If you choose 'n', Calgary will not be - used even if it exists. If you choose 'n' and would like to use - Calgary anyway, pass 'iommu=calgary' on the kernel command line. - If unsure, say Y. - -# need this always selected by IOMMU for the VIA workaround -config SWIOTLB - bool - help - Support for software bounce buffers used on x86-64 systems - which don't have a hardware IOMMU (e.g. the current generation - of Intel's x86-64 CPUs). Using this PCI devices which can only - access 32-bits of memory can be used on systems with more than - 3 GB of memory. If unsure, say Y. - - -config NR_CPUS - int "Maximum number of CPUs (2-255)" - range 2 255 - depends on SMP - default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 - default "8" - help - This allows you to specify the maximum number of CPUs which this - kernel will support. The maximum supported value is 255 and the - minimum value which makes sense is 2. - - This is purely to save memory - each supported CPU adds - approximately eight kilobytes to the kernel image. - -config SCHED_SMT - bool "SMT (Hyperthreading) scheduler support" - depends on (X86_64 && SMP) || (X86_32 && X86_HT) - help - SMT scheduler support improves the CPU scheduler's decision making - when dealing with Intel Pentium 4 chips with HyperThreading at a - cost of slightly increased overhead in some places. If unsure say - N here. - -config SCHED_MC - bool "Multi-core scheduler support" - depends on (X86_64 && SMP) || (X86_32 && X86_HT) - default y - help - Multi-core scheduler support improves the CPU scheduler's decision - making when dealing with multi-core CPU chips at a cost of slightly - increased overhead in some places. If unsure say N here. - -source "kernel/Kconfig.preempt" - -config X86_UP_APIC - bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) - help - A local APIC (Advanced Programmable Interrupt Controller) is an - integrated interrupt controller in the CPU. If you have a single-CPU - system which has a processor with a local APIC, you can say Y here to - enable and use it. If you say Y here even though your machine doesn't - have a local APIC, then the kernel will still run with no slowdown at - all. The local APIC supports CPU-generated self-interrupts (timer, - performance counters), and the NMI watchdog which detects hard - lockups. - -config X86_UP_IOAPIC - bool "IO-APIC support on uniprocessors" - depends on X86_UP_APIC - help - An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an - SMP-capable replacement for PC-style interrupt controllers. Most - SMP systems and many recent uniprocessor systems have one. - - If you have a single-CPU system with an IO-APIC, you can say Y here - to use it. If you say Y here even though your machine doesn't have - an IO-APIC, then the kernel will still run with no slowdown at all. - -config X86_LOCAL_APIC - bool - depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH)) - default y - -config X86_IO_APIC - bool - depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH)) - default y - -config X86_VISWS_APIC - bool - depends on X86_32 && X86_VISWS - default y - -config X86_MCE - bool "Machine Check Exception" - depends on !X86_VOYAGER - ---help--- - Machine Check Exception support allows the processor to notify the - kernel if it detects a problem (e.g. overheating, component failure). - The action the kernel takes depends on the severity of the problem, - ranging from a warning message on the console, to halting the machine. - Your processor must be a Pentium or newer to support this - check the - flags in /proc/cpuinfo for mce. Note that some older Pentium systems - have a design flaw which leads to false MCE events - hence MCE is - disabled on all P5 processors, unless explicitly enabled with "mce" - as a boot argument. Similarly, if MCE is built in and creates a - problem on some new non-standard machine, you can boot with "nomce" - to disable it. MCE support simply ignores non-MCE processors like - the 386 and 486, so nearly everyone can say Y here. - -config X86_MCE_INTEL - bool "Intel MCE features" - depends on X86_64 && X86_MCE && X86_LOCAL_APIC - default y - help - Additional support for intel specific MCE features such as - the thermal monitor. - -config X86_MCE_AMD - bool "AMD MCE features" - depends on X86_64 && X86_MCE && X86_LOCAL_APIC - default y - help - Additional support for AMD specific MCE features such as - the DRAM Error Threshold. - -config X86_MCE_NONFATAL - tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" - depends on X86_32 && X86_MCE - help - Enabling this feature starts a timer that triggers every 5 seconds which - will look at the machine check registers to see if anything happened. - Non-fatal problems automatically get corrected (but still logged). - Disable this if you don't want to see these messages. - Seeing the messages this option prints out may be indicative of dying - or out-of-spec (ie, overclocked) hardware. - This option only does something on certain CPUs. - (AMD Athlon/Duron and Intel Pentium 4) - -config X86_MCE_P4THERMAL - bool "check for P4 thermal throttling interrupt." - depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS - help - Enabling this feature will cause a message to be printed when the P4 - enters thermal throttling. - -config VM86 - bool "Enable VM86 support" if EMBEDDED - default y - depends on X86_32 - help - This option is required by programs like DOSEMU to run 16-bit legacy - code on X86 processors. It also may be needed by software like - XFree86 to initialize some video cards via BIOS. Disabling this - option saves about 6k. - -config TOSHIBA - tristate "Toshiba Laptop support" - depends on X86_32 - ---help--- - This adds a driver to safely access the System Management Mode of - the CPU on Toshiba portables with a genuine Toshiba BIOS. It does - not work on models with a Phoenix BIOS. The System Management Mode - is used to set the BIOS and power saving options on Toshiba portables. - - For information on utilities to make use of this driver see the - Toshiba Linux utilities web site at: - . - - Say Y if you intend to run this kernel on a Toshiba portable. - Say N otherwise. - -config I8K - tristate "Dell laptop support" - depends on X86_32 - ---help--- - This adds a driver to safely access the System Management Mode - of the CPU on the Dell Inspiron 8000. The System Management Mode - is used to read cpu temperature and cooling fan status and to - control the fans on the I8K portables. - - This driver has been tested only on the Inspiron 8000 but it may - also work with other Dell laptops. You can force loading on other - models by passing the parameter `force=1' to the module. Use at - your own risk. - - For information on utilities to make use of this driver see the - I8K Linux utilities web site at: - - - Say Y if you intend to run this kernel on a Dell Inspiron 8000. - Say N otherwise. - -config X86_REBOOTFIXUPS - bool "Enable X86 board specific fixups for reboot" - depends on X86_32 && X86 - default n - ---help--- - This enables chipset and/or board specific fixups to be done - in order to get reboot to work correctly. This is only needed on - some combinations of hardware and BIOS. The symptom, for which - this config is intended, is when reboot ends with a stalled/hung - system. - - Currently, the only fixup is for the Geode machines using - CS5530A and CS5536 chipsets. - - Say Y if you want to enable the fixup. Currently, it's safe to - enable this option even if you don't need it. - Say N otherwise. - -config MICROCODE - tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support" - select FW_LOADER - ---help--- - If you say Y here, you will be able to update the microcode on - Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II, - Pentium III, Pentium 4, Xeon etc. You will obviously need the - actual microcode binary data itself which is not shipped with the - Linux kernel. - - For latest news and information on obtaining all the required - ingredients for this driver, check: - . - - To compile this driver as a module, choose M here: the - module will be called microcode. - -config MICROCODE_OLD_INTERFACE - bool - depends on MICROCODE - default y - -config X86_MSR - tristate "/dev/cpu/*/msr - Model-specific register support" - help - This device gives privileged processes access to the x86 - Model-Specific Registers (MSRs). It is a character device with - major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. - MSR accesses are directed to a specific CPU on multi-processor - systems. - -config X86_CPUID - tristate "/dev/cpu/*/cpuid - CPU information support" - help - This device gives processes access to the x86 CPUID instruction to - be executed on a specific processor. It is a character device - with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to - /dev/cpu/31/cpuid. - -choice - prompt "High Memory Support" - default HIGHMEM4G if !X86_NUMAQ - default HIGHMEM64G if X86_NUMAQ - depends on X86_32 - -config NOHIGHMEM - bool "off" - depends on !X86_NUMAQ - ---help--- - Linux can use up to 64 Gigabytes of physical memory on x86 systems. - However, the address space of 32-bit x86 processors is only 4 - Gigabytes large. That means that, if you have a large amount of - physical memory, not all of it can be "permanently mapped" by the - kernel. The physical memory that's not permanently mapped is called - "high memory". - - If you are compiling a kernel which will never run on a machine with - more than 1 Gigabyte total physical RAM, answer "off" here (default - choice and suitable for most users). This will result in a "3GB/1GB" - split: 3GB are mapped so that each process sees a 3GB virtual memory - space and the remaining part of the 4GB virtual memory space is used - by the kernel to permanently map as much physical memory as - possible. - - If the machine has between 1 and 4 Gigabytes physical RAM, then - answer "4GB" here. - - If more than 4 Gigabytes is used then answer "64GB" here. This - selection turns Intel PAE (Physical Address Extension) mode on. - PAE implements 3-level paging on IA32 processors. PAE is fully - supported by Linux, PAE mode is implemented on all recent Intel - processors (Pentium Pro and better). NOTE: If you say "64GB" here, - then the kernel will not boot on CPUs that don't support PAE! - - The actual amount of total physical memory will either be - auto detected or can be forced by using a kernel command line option - such as "mem=256M". (Try "man bootparam" or see the documentation of - your boot loader (lilo or loadlin) about how to pass options to the - kernel at boot time.) - - If unsure, say "off". - -config HIGHMEM4G - bool "4GB" - depends on !X86_NUMAQ - help - Select this if you have a 32-bit processor and between 1 and 4 - gigabytes of physical RAM. - -config HIGHMEM64G - bool "64GB" - depends on !M386 && !M486 - select X86_PAE - help - Select this if you have a 32-bit processor and more than 4 - gigabytes of physical RAM. - -endchoice - -choice - depends on EXPERIMENTAL - prompt "Memory split" if EMBEDDED - default VMSPLIT_3G - depends on X86_32 - help - Select the desired split between kernel and user memory. - - If the address range available to the kernel is less than the - physical memory installed, the remaining memory will be available - as "high memory". Accessing high memory is a little more costly - than low memory, as it needs to be mapped into the kernel first. - Note that increasing the kernel address space limits the range - available to user programs, making the address space there - tighter. Selecting anything other than the default 3G/1G split - will also likely make your kernel incompatible with binary-only - kernel modules. - - If you are not absolutely sure what you are doing, leave this - option alone! - - config VMSPLIT_3G - bool "3G/1G user/kernel split" - config VMSPLIT_3G_OPT - depends on !X86_PAE - bool "3G/1G user/kernel split (for full 1G low memory)" - config VMSPLIT_2G - bool "2G/2G user/kernel split" - config VMSPLIT_2G_OPT - depends on !X86_PAE - bool "2G/2G user/kernel split (for full 2G low memory)" - config VMSPLIT_1G - bool "1G/3G user/kernel split" -endchoice - -config PAGE_OFFSET - hex - default 0xB0000000 if VMSPLIT_3G_OPT - default 0x80000000 if VMSPLIT_2G - default 0x78000000 if VMSPLIT_2G_OPT - default 0x40000000 if VMSPLIT_1G - default 0xC0000000 - depends on X86_32 - -config HIGHMEM - bool - depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) - default y - -config X86_PAE - bool "PAE (Physical Address Extension) Support" - default n - depends on X86_32 && !HIGHMEM4G - select RESOURCES_64BIT - help - PAE is required for NX support, and furthermore enables - larger swapspace support for non-overcommit purposes. It - has the cost of more pagetable lookup overhead, and also - consumes more pagetable space per process. - -# Common NUMA Features -config NUMA - bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" - depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) - default n if X86_PC - default y if (X86_NUMAQ || X86_SUMMIT) - help - Enable NUMA (Non Uniform Memory Access) support. - The kernel will try to allocate memory used by a CPU on the - local memory controller of the CPU and add some more - NUMA awareness to the kernel. - - For i386 this is currently highly experimental and should be only - used for kernel development. It might also cause boot failures. - For x86_64 this is recommended on all multiprocessor Opteron systems. - If the system is EM64T, you should say N unless your system is - EM64T NUMA. - -comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" - depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) - -config K8_NUMA - bool "Old style AMD Opteron NUMA detection" - depends on X86_64 && NUMA && PCI - default y - help - Enable K8 NUMA node topology detection. You should say Y here if - you have a multi processor AMD K8 system. This uses an old - method to read the NUMA configuration directly from the builtin - Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA - instead, which also takes priority if both are compiled in. - -config X86_64_ACPI_NUMA - bool "ACPI NUMA detection" - depends on X86_64 && NUMA && ACPI && PCI - select ACPI_NUMA - default y - help - Enable ACPI SRAT based node topology detection. - -config NUMA_EMU - bool "NUMA emulation" - depends on X86_64 && NUMA - help - Enable NUMA emulation. A flat machine will be split - into virtual nodes when booted with "numa=fake=N", where N is the - number of nodes. This is only useful for debugging. - -config NODES_SHIFT - int - default "6" if X86_64 - default "4" if X86_NUMAQ - default "3" - depends on NEED_MULTIPLE_NODES - -config HAVE_ARCH_BOOTMEM_NODE - bool - depends on X86_32 && NUMA - default y - -config ARCH_HAVE_MEMORY_PRESENT - bool - depends on X86_32 && DISCONTIGMEM - default y - -config NEED_NODE_MEMMAP_SIZE - bool - depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) - default y - -config HAVE_ARCH_ALLOC_REMAP - bool - depends on X86_32 && NUMA - default y - -config ARCH_FLATMEM_ENABLE - def_bool y - depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA) - -config ARCH_DISCONTIGMEM_ENABLE - def_bool y - depends on NUMA - -config ARCH_DISCONTIGMEM_DEFAULT - def_bool y - depends on NUMA - -config ARCH_SPARSEMEM_ENABLE - def_bool y - depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64)) - select SPARSEMEM_STATIC if X86_32 - select SPARSEMEM_VMEMMAP_ENABLE if X86_64 - -config ARCH_SELECT_MEMORY_MODEL - def_bool y - depends on X86_32 && ARCH_SPARSEMEM_ENABLE - -config ARCH_MEMORY_PROBE - def_bool X86_64 - depends on MEMORY_HOTPLUG - -source "mm/Kconfig" - -config HIGHPTE - bool "Allocate 3rd-level pagetables from highmem" - depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) - help - The VM uses one page table entry for each page of physical memory. - For systems with a lot of RAM, this can be wasteful of precious - low memory. Setting this option will put user-space page table - entries in high memory. - -config MATH_EMULATION - bool - prompt "Math emulation" if X86_32 - ---help--- - Linux can emulate a math coprocessor (used for floating point - operations) if you don't have one. 486DX and Pentium processors have - a math coprocessor built in, 486SX and 386 do not, unless you added - a 487DX or 387, respectively. (The messages during boot time can - give you some hints here ["man dmesg"].) Everyone needs either a - coprocessor or this emulation. - - If you don't have a math coprocessor, you need to say Y here; if you - say Y here even though you have a coprocessor, the coprocessor will - be used nevertheless. (This behavior can be changed with the kernel - command line option "no387", which comes handy if your coprocessor - is broken. Try "man bootparam" or see the documentation of your boot - loader (lilo or loadlin) about how to pass options to the kernel at - boot time.) This means that it is a good idea to say Y here if you - intend to use this kernel on different machines. - - More information about the internals of the Linux math coprocessor - emulation can be found in . - - If you are not sure, say Y; apart from resulting in a 66 KB bigger - kernel, it won't hurt. - -config MTRR - bool "MTRR (Memory Type Range Register) support" - ---help--- - On Intel P6 family processors (Pentium Pro, Pentium II and later) - the Memory Type Range Registers (MTRRs) may be used to control - processor access to memory ranges. This is most useful if you have - a video (VGA) card on a PCI or AGP bus. Enabling write-combining - allows bus write transfers to be combined into a larger transfer - before bursting over the PCI/AGP bus. This can increase performance - of image write operations 2.5 times or more. Saying Y here creates a - /proc/mtrr file which may be used to manipulate your processor's - MTRRs. Typically the X server should use this. - - This code has a reasonably generic interface so that similar - control registers on other processors can be easily supported - as well: - - The Cyrix 6x86, 6x86MX and M II processors have Address Range - Registers (ARRs) which provide a similar functionality to MTRRs. For - these, the ARRs are used to emulate the MTRRs. - The AMD K6-2 (stepping 8 and above) and K6-3 processors have two - MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing - write-combining. All of these processors are supported by this code - and it makes sense to say Y here if you have one of them. - - Saying Y here also fixes a problem with buggy SMP BIOSes which only - set the MTRRs for the boot CPU and not for the secondary CPUs. This - can lead to all sorts of problems, so it's good to say Y here. - - You can safely say Y even if your machine doesn't have MTRRs, you'll - just add about 9 KB to your kernel. - - See for more information. - -config EFI - bool "Boot from EFI support" - depends on X86_32 && ACPI - default n - ---help--- - This enables the kernel to boot on EFI platforms using - system configuration information passed to it from the firmware. - This also enables the kernel to use any EFI runtime services that are - available (such as the EFI variable services). - - This option is only useful on systems that have EFI firmware - and will result in a kernel image that is ~8k larger. In addition, - you must use the latest ELILO loader available at - in order to take advantage of - kernel initialization using EFI information (neither GRUB nor LILO know - anything about EFI). However, even with this option, the resultant - kernel should continue to boot on existing non-EFI platforms. - -config IRQBALANCE - bool "Enable kernel irq balancing" - depends on X86_32 && SMP && X86_IO_APIC - default y - help - The default yes will allow the kernel to do irq load balancing. - Saying no will keep the kernel from doing irq load balancing. - -# turning this on wastes a bunch of space. -# Summit needs it only when NUMA is on -config BOOT_IOREMAP - bool - depends on X86_32 && (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) - default y - -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - -config CC_STACKPROTECTOR - bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on X86_64 && EXPERIMENTAL - help - This option turns on the -fstack-protector GCC feature. This - feature puts, at the beginning of critical functions, a canary - value on the stack just before the return address, and validates - the value just before actually returning. Stack based buffer - overflows (that need to overwrite this return address) now also - overwrite the canary, which gets detected and the attack is then - neutralized via a kernel panic. - - This feature requires gcc version 4.2 or above, or a distribution - gcc with the feature backported. Older versions are automatically - detected and for those versions, this configuration option is ignored. - -config CC_STACKPROTECTOR_ALL - bool "Use stack-protector for all functions" - depends on CC_STACKPROTECTOR - help - Normally, GCC only inserts the canary value protection for - functions that use large-ish on-stack buffers. By enabling - this option, GCC will be asked to do this for ALL functions. - -source kernel/Kconfig.hz - -config KEXEC - bool "kexec system call" - help - kexec is a system call that implements the ability to shutdown your - current kernel, and to start another kernel. It is like a reboot - but it is independent of the system firmware. And like a reboot - you can start any kernel with it, not just Linux. - - The name comes from the similarity to the exec system call. - - It is an ongoing process to be certain the hardware in a machine - is properly shutdown, so do not be surprised if this code does not - initially work for you. It may help to enable device hotplugging - support. As of this writing the exact hardware interface is - strongly in flux, so no good recommendation can be made. - -config CRASH_DUMP - bool "kernel crash dumps (EXPERIMENTAL)" - depends on EXPERIMENTAL - depends on X86_64 || (X86_32 && HIGHMEM) - help - Generate crash dump after being started by kexec. - This should be normally only set in special crash dump kernels - which are loaded in the main kernel with kexec-tools into - a specially reserved region and then later executed after - a crash by kdump/kexec. The crash dump kernel must be compiled - to a memory address not used by the main kernel or BIOS using - PHYSICAL_START, or it must be built as a relocatable image - (CONFIG_RELOCATABLE=y). - For more details see Documentation/kdump/kdump.txt - -config PHYSICAL_START - hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) - default "0x1000000" if X86_NUMAQ - default "0x200000" if X86_64 - default "0x100000" - help - This gives the physical address where the kernel is loaded. - - If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then - bzImage will decompress itself to above physical address and - run from there. Otherwise, bzImage will run from the address where - it has been loaded by the boot loader and will ignore above physical - address. - - In normal kdump cases one does not have to set/change this option - as now bzImage can be compiled as a completely relocatable image - (CONFIG_RELOCATABLE=y) and be used to load and run from a different - address. This option is mainly useful for the folks who don't want - to use a bzImage for capturing the crash dump and want to use a - vmlinux instead. vmlinux is not relocatable hence a kernel needs - to be specifically compiled to run from a specific memory area - (normally a reserved region) and this option comes handy. - - So if you are using bzImage for capturing the crash dump, leave - the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y. - Otherwise if you plan to use vmlinux for capturing the crash dump - change this value to start of the reserved region (Typically 16MB - 0x1000000). In other words, it can be set based on the "X" value as - specified in the "crashkernel=YM@XM" command line boot parameter - passed to the panic-ed kernel. Typically this parameter is set as - crashkernel=64M@16M. Please take a look at - Documentation/kdump/kdump.txt for more details about crash dumps. - - Usage of bzImage for capturing the crash dump is recommended as - one does not have to build two kernels. Same kernel can be used - as production kernel and capture kernel. Above option should have - gone away after relocatable bzImage support is introduced. But it - is present because there are users out there who continue to use - vmlinux for dump capture. This option should go away down the - line. - - Don't change this unless you know what you are doing. - -config RELOCATABLE - bool "Build a relocatable kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - This builds a kernel image that retains relocation information - so it can be loaded someplace besides the default 1MB. - The relocations tend to make the kernel binary about 10% larger, - but are discarded at runtime. - - One use is for the kexec on panic case where the recovery kernel - must live at a different physical address than the primary - kernel. - - Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address - it has been loaded at and the compile time physical address - (CONFIG_PHYSICAL_START) is ignored. - -config PHYSICAL_ALIGN - hex - prompt "Alignment value to which kernel should be aligned" if X86_32 - default "0x100000" if X86_32 - default "0x200000" if X86_64 - range 0x2000 0x400000 - help - This value puts the alignment restrictions on physical address - where kernel is loaded and run from. Kernel is compiled for an - address which meets above alignment restriction. - - If bootloader loads the kernel at a non-aligned address and - CONFIG_RELOCATABLE is set, kernel will move itself to nearest - address aligned to above value and run from there. - - If bootloader loads the kernel at a non-aligned address and - CONFIG_RELOCATABLE is not set, kernel will ignore the run time - load address and decompress itself to the address it has been - compiled for and run from there. The address for which kernel is - compiled already meets above alignment restrictions. Hence the - end result is that kernel runs from a physical address meeting - above alignment restrictions. - - Don't change this unless you know what you are doing. - -config HOTPLUG_CPU - bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER - ---help--- - Say Y here to experiment with turning CPUs off and on, and to - enable suspend on SMP systems. CPUs can be controlled through - /sys/devices/system/cpu. - Say N if you want to disable CPU hotplug and don't need to - suspend. - -config COMPAT_VDSO - bool "Compat VDSO support" - default y - depends on X86_32 - help - Map the VDSO to the predictable old-style address too. - ---help--- - Say N here if you are running a sufficiently recent glibc - version (2.3.3 or later), to remove the high-mapped - VDSO mapping and to exclusively use the randomized VDSO. - - If unsure, say Y. - -endmenu - -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - depends on X86_64 || (X86_32 && HIGHMEM) - -config MEMORY_HOTPLUG_RESERVE - def_bool X86_64 - depends on (MEMORY_HOTPLUG && DISCONTIGMEM) - -config HAVE_ARCH_EARLY_PFN_TO_NID - def_bool X86_64 - depends on NUMA - -config OUT_OF_LINE_PFN_TO_PAGE - def_bool X86_64 - depends on DISCONTIGMEM source "arch/x86/Kconfig" diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64 index e441062472a..b262aaec67c 100644 --- a/arch/x86/Kconfig.x86_64 +++ b/arch/x86/Kconfig.x86_64 @@ -17,494 +17,4 @@ config X86_64 classical 32-bit x86 architecture. For details see . -source "init/Kconfig" - - -menu "Processor type and features" - -source "kernel/time/Kconfig" - -choice - prompt "Subarchitecture Type" - default X86_PC - -config X86_PC - bool "PC-compatible" - help - Choose this option if your computer is a standard PC or compatible. - -config X86_VSMP - bool "Support for ScaleMP vSMP" - depends on X86_64 && PCI - help - Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is - supposed to run on these EM64T-based machines. Only choose this option - if you have one of these machines. - -endchoice - -source "arch/x86/Kconfig.cpu" - -config MICROCODE - tristate "/dev/cpu/microcode - Intel CPU microcode support" - select FW_LOADER - ---help--- - If you say Y here the 'File systems' section, you will be - able to update the microcode on Intel processors. You will - obviously need the actual microcode binary data itself which is - not shipped with the Linux kernel. - - For latest news and information on obtaining all the required - ingredients for this driver, check: - . - - To compile this driver as a module, choose M here: the - module will be called microcode. - If you use modprobe or kmod you may also want to add the line - 'alias char-major-10-184 microcode' to your /etc/modules.conf file. - -config MICROCODE_OLD_INTERFACE - bool - depends on MICROCODE - default y - -config X86_MSR - tristate "/dev/cpu/*/msr - Model-specific register support" - help - This device gives privileged processes access to the x86 - Model-Specific Registers (MSRs). It is a character device with - major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. - MSR accesses are directed to a specific CPU on multi-processor - systems. - -config X86_CPUID - tristate "/dev/cpu/*/cpuid - CPU information support" - help - This device gives processes access to the x86 CPUID instruction to - be executed on a specific processor. It is a character device - with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to - /dev/cpu/31/cpuid. - -config MATH_EMULATION - bool - -config MCA - bool - -config EISA - bool - -config X86_IO_APIC - bool - default y - -config X86_LOCAL_APIC - bool - default y - -config MTRR - bool "MTRR (Memory Type Range Register) support" - ---help--- - On Intel P6 family processors (Pentium Pro, Pentium II and later) - the Memory Type Range Registers (MTRRs) may be used to control - processor access to memory ranges. This is most useful if you have - a video (VGA) card on a PCI or AGP bus. Enabling write-combining - allows bus write transfers to be combined into a larger transfer - before bursting over the PCI/AGP bus. This can increase performance - of image write operations 2.5 times or more. Saying Y here creates a - /proc/mtrr file which may be used to manipulate your processor's - MTRRs. Typically the X server should use this. - - This code has a reasonably generic interface so that similar - control registers on other processors can be easily supported - as well. - - Saying Y here also fixes a problem with buggy SMP BIOSes which only - set the MTRRs for the boot CPU and not for the secondary CPUs. This - can lead to all sorts of problems, so it's good to say Y here. - - Just say Y here, all x86-64 machines support MTRRs. - - See for more information. - -config SMP - bool "Symmetric multi-processing support" - ---help--- - This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. - - If you say N here, the kernel will run on single and multiprocessor - machines, but will use only one CPU of a multiprocessor machine. If - you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel - will run faster if you say N here. - - If you don't know what to do here, say N. - -config SCHED_SMT - bool "SMT (Hyperthreading) scheduler support" - depends on SMP - default n - help - SMT scheduler support improves the CPU scheduler's decision making - when dealing with Intel Pentium 4 chips with HyperThreading at a - cost of slightly increased overhead in some places. If unsure say - N here. - -config SCHED_MC - bool "Multi-core scheduler support" - depends on SMP - default y - help - Multi-core scheduler support improves the CPU scheduler's decision - making when dealing with multi-core CPU chips at a cost of slightly - increased overhead in some places. If unsure say N here. - -source "kernel/Kconfig.preempt" - -config NUMA - bool "Non Uniform Memory Access (NUMA) Support" - depends on SMP - help - Enable NUMA (Non Uniform Memory Access) support. The kernel - will try to allocate memory used by a CPU on the local memory - controller of the CPU and add some more NUMA awareness to the kernel. - This code is recommended on all multiprocessor Opteron systems. - If the system is EM64T, you should say N unless your system is EM64T - NUMA. - -config K8_NUMA - bool "Old style AMD Opteron NUMA detection" - depends on X86_64 && NUMA && PCI - default y - help - Enable K8 NUMA node topology detection. You should say Y here if - you have a multi processor AMD K8 system. This uses an old - method to read the NUMA configuration directly from the builtin - Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA - instead, which also takes priority if both are compiled in. - -config NODES_SHIFT - int - default "6" if X86_64 - depends on NEED_MULTIPLE_NODES - -# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig. - -config X86_64_ACPI_NUMA - bool "ACPI NUMA detection" - depends on X86_64 && NUMA - select ACPI - select PCI - select ACPI_NUMA - default y - help - Enable ACPI SRAT based node topology detection. - -config NUMA_EMU - bool "NUMA emulation" - depends on X86_64 && NUMA - help - Enable NUMA emulation. A flat machine will be split - into virtual nodes when booted with "numa=fake=N", where N is the - number of nodes. This is only useful for debugging. - -config ARCH_DISCONTIGMEM_ENABLE - bool - depends on NUMA - default y - -config ARCH_DISCONTIGMEM_DEFAULT - def_bool y - depends on NUMA - -config ARCH_SPARSEMEM_ENABLE - def_bool y - depends on (NUMA || EXPERIMENTAL) - select SPARSEMEM_VMEMMAP_ENABLE - -config ARCH_MEMORY_PROBE - def_bool X86_64 - depends on MEMORY_HOTPLUG - -config ARCH_FLATMEM_ENABLE - def_bool y - depends on !NUMA - -source "mm/Kconfig" - -config MEMORY_HOTPLUG_RESERVE - def_bool X86_64 - depends on (MEMORY_HOTPLUG && DISCONTIGMEM) - -config HAVE_ARCH_EARLY_PFN_TO_NID - def_bool X86_64 - depends on NUMA - -config OUT_OF_LINE_PFN_TO_PAGE - def_bool X86_64 - depends on DISCONTIGMEM - -config NR_CPUS - int "Maximum number of CPUs (2-255)" - range 2 255 - depends on SMP - default "8" - help - This allows you to specify the maximum number of CPUs which this - kernel will support. Current maximum is 255 CPUs due to - APIC addressing limits. Less depending on the hardware. - - This is purely to save memory - each supported CPU requires - memory in the static kernel configuration. - -config PHYSICAL_ALIGN - hex - default "0x200000" if X86_64 - -config HOTPLUG_CPU - bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && HOTPLUG && EXPERIMENTAL - help - Say Y here to experiment with turning CPUs off and on. CPUs - can be controlled through /sys/devices/system/cpu/cpu#. - This is also required for suspend/hibernation on SMP systems. - - Say N if you want to disable CPU hotplug and don't need to - suspend. - -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - -config HPET_TIMER - bool - default y - help - Use the IA-PC HPET (High Precision Event Timer) to manage - time in preference to the PIT and RTC, if a HPET is - present. The HPET provides a stable time base on SMP - systems, unlike the TSC, but it is more expensive to access, - as it is off-chip. You can find the HPET spec at - . - -config HPET_EMULATE_RTC - bool - depends on HPET_TIMER && RTC=y - default y - -# Mark as embedded because too many people got it wrong. -# The code disables itself when not needed. -config GART_IOMMU - bool "GART IOMMU support" if EMBEDDED - default y - select SWIOTLB - select AGP - depends on X86_64 && PCI - help - Support for full DMA access of devices with 32bit memory access only - on systems with more than 3GB. This is usually needed for USB, - sound, many IDE/SATA chipsets and some other devices. - Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART - based hardware IOMMU and a software bounce buffer based IOMMU used - on Intel systems and as fallback. - The code is only active when needed (enough memory and limited - device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified - too. - -config CALGARY_IOMMU - bool "IBM Calgary IOMMU support" - select SWIOTLB - depends on X86_64 && PCI && EXPERIMENTAL - help - Support for hardware IOMMUs in IBM's xSeries x366 and x460 - systems. Needed to run systems with more than 3GB of memory - properly with 32-bit PCI devices that do not support DAC - (Double Address Cycle). Calgary also supports bus level - isolation, where all DMAs pass through the IOMMU. This - prevents them from going anywhere except their intended - destination. This catches hard-to-find kernel bugs and - mis-behaving drivers and devices that do not use the DMA-API - properly to set up their DMA buffers. The IOMMU can be - turned off at boot time with the iommu=off parameter. - Normally the kernel will make the right choice by itself. - If unsure, say Y. - -config CALGARY_IOMMU_ENABLED_BY_DEFAULT - bool "Should Calgary be enabled by default?" - default y - depends on CALGARY_IOMMU - help - Should Calgary be enabled by default? if you choose 'y', Calgary - will be used (if it exists). If you choose 'n', Calgary will not be - used even if it exists. If you choose 'n' and would like to use - Calgary anyway, pass 'iommu=calgary' on the kernel command line. - If unsure, say Y. - -# need this always selected by IOMMU for the VIA workaround -config SWIOTLB - bool - help - Support for software bounce buffers used on x86-64 systems - which don't have a hardware IOMMU (e.g. the current generation - of Intel's x86-64 CPUs). Using this PCI devices which can only - access 32-bits of memory can be used on systems with more than - 3 GB of memory. If unsure, say Y. - -config X86_MCE - bool "Machine check support" if EMBEDDED - default y - help - Include a machine check error handler to report hardware errors. - This version will require the mcelog utility to decode some - machine check error logs. See - ftp://ftp.x86-64.org/pub/linux/tools/mcelog - -config X86_MCE_INTEL - bool "Intel MCE features" - depends on X86_64 && X86_MCE && X86_LOCAL_APIC - default y - help - Additional support for intel specific MCE features such as - the thermal monitor. - -config X86_MCE_AMD - bool "AMD MCE features" - depends on X86_64 && X86_MCE && X86_LOCAL_APIC - default y - help - Additional support for AMD specific MCE features such as - the DRAM Error Threshold. - -config KEXEC - bool "kexec system call" - help - kexec is a system call that implements the ability to shutdown your - current kernel, and to start another kernel. It is like a reboot - but it is independent of the system firmware. And like a reboot - you can start any kernel with it, not just Linux. - - The name comes from the similarity to the exec system call. - - It is an ongoing process to be certain the hardware in a machine - is properly shutdown, so do not be surprised if this code does not - initially work for you. It may help to enable device hotplugging - support. As of this writing the exact hardware interface is - strongly in flux, so no good recommendation can be made. - -config CRASH_DUMP - bool "kernel crash dumps (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - Generate crash dump after being started by kexec. - This should be normally only set in special crash dump kernels - which are loaded in the main kernel with kexec-tools into - a specially reserved region and then later executed after - a crash by kdump/kexec. The crash dump kernel must be compiled - to a memory address not used by the main kernel or BIOS using - PHYSICAL_START, or it must be built as a relocatable image - (CONFIG_RELOCATABLE=y). - For more details see Documentation/kdump/kdump.txt - -config RELOCATABLE - bool "Build a relocatable kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - Builds a relocatable kernel. This enables loading and running - a kernel binary from a different physical address than it has - been compiled for. - - One use is for the kexec on panic case where the recovery kernel - must live at a different physical address than the primary - kernel. - - Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address - it has been loaded at and the compile time physical address - (CONFIG_PHYSICAL_START) is ignored. - -config PHYSICAL_START - hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) - default "0x200000" - help - This gives the physical address where the kernel is loaded. It - should be aligned to 2MB boundary. - - If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then - bzImage will decompress itself to above physical address and - run from there. Otherwise, bzImage will run from the address where - it has been loaded by the boot loader and will ignore above physical - address. - - In normal kdump cases one does not have to set/change this option - as now bzImage can be compiled as a completely relocatable image - (CONFIG_RELOCATABLE=y) and be used to load and run from a different - address. This option is mainly useful for the folks who don't want - to use a bzImage for capturing the crash dump and want to use a - vmlinux instead. - - So if you are using bzImage for capturing the crash dump, leave - the value here unchanged to 0x200000 and set CONFIG_RELOCATABLE=y. - Otherwise if you plan to use vmlinux for capturing the crash dump - change this value to start of the reserved region (Typically 16MB - 0x1000000). In other words, it can be set based on the "X" value as - specified in the "crashkernel=YM@XM" command line boot parameter - passed to the panic-ed kernel. Typically this parameter is set as - crashkernel=64M@16M. Please take a look at - Documentation/kdump/kdump.txt for more details about crash dumps. - - Usage of bzImage for capturing the crash dump is advantageous as - one does not have to build two kernels. Same kernel can be used - as production kernel and capture kernel. - - Don't change this unless you know what you are doing. - -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - -config CC_STACKPROTECTOR - bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on X86_64 && EXPERIMENTAL - help - This option turns on the -fstack-protector GCC feature. This - feature puts, at the beginning of critical functions, a canary - value on the stack just before the return address, and validates - the value just before actually returning. Stack based buffer - overflows (that need to overwrite this return address) now also - overwrite the canary, which gets detected and the attack is then - neutralized via a kernel panic. - - This feature requires gcc version 4.2 or above, or a distribution - gcc with the feature backported. Older versions are automatically - detected and for those versions, this configuration option is ignored. - -config CC_STACKPROTECTOR_ALL - bool "Use stack-protector for all functions" - depends on CC_STACKPROTECTOR - help - Normally, GCC only inserts the canary value protection for - functions that use large-ish on-stack buffers. By enabling - this option, GCC will be asked to do this for ALL functions. - -source kernel/Kconfig.hz - -config K8_NB - def_bool X86_64 - depends on AGP_AMD64 || GART_IOMMU || (PCI && NUMA) - -endmenu - source "arch/x86/Kconfig" -- cgit v1.2.3 From 9c900a9c9d9351e55ab6a84e12e3a52c474c7c8b Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 10 Nov 2007 20:01:56 +0100 Subject: kconfig: factor out code in confdata.c This patch introduce no functional changes. Signed-off-by: Sam Ravnborg Cc: Roman Zippel --- scripts/kconfig/confdata.c | 119 ++++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 55 deletions(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index b2913e9da49..e0f402f3b75 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -83,6 +83,68 @@ char *conf_get_default_confname(void) return name; } +static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p) +{ + char *p2; + + switch (sym->type) { + case S_TRISTATE: + if (p[0] == 'm') { + sym->def[def].tri = mod; + sym->flags |= def_flags; + break; + } + case S_BOOLEAN: + if (p[0] == 'y') { + sym->def[def].tri = yes; + sym->flags |= def_flags; + break; + } + if (p[0] == 'n') { + sym->def[def].tri = no; + sym->flags |= def_flags; + break; + } + conf_warning("symbol value '%s' invalid for %s", p, sym->name); + break; + case S_OTHER: + if (*p != '"') { + for (p2 = p; *p2 && !isspace(*p2); p2++) + ; + sym->type = S_STRING; + goto done; + } + case S_STRING: + if (*p++ != '"') + break; + for (p2 = p; (p2 = strpbrk(p2, "\"\\")); p2++) { + if (*p2 == '"') { + *p2 = 0; + break; + } + memmove(p2, p2 + 1, strlen(p2)); + } + if (!p2) { + conf_warning("invalid string found"); + return 1; + } + case S_INT: + case S_HEX: + done: + if (sym_string_valid(sym, p)) { + sym->def[def].val = strdup(p); + sym->flags |= def_flags; + } else { + conf_warning("symbol value '%s' invalid for %s", p, sym->name); + return 1; + } + break; + default: + ; + } + return 0; +} + int conf_read_simple(const char *name, int def) { FILE *in = NULL; @@ -213,61 +275,8 @@ load: conf_warning("trying to reassign symbol %s", sym->name); break; } - switch (sym->type) { - case S_TRISTATE: - if (p[0] == 'm') { - sym->def[def].tri = mod; - sym->flags |= def_flags; - break; - } - case S_BOOLEAN: - if (p[0] == 'y') { - sym->def[def].tri = yes; - sym->flags |= def_flags; - break; - } - if (p[0] == 'n') { - sym->def[def].tri = no; - sym->flags |= def_flags; - break; - } - conf_warning("symbol value '%s' invalid for %s", p, sym->name); - break; - case S_OTHER: - if (*p != '"') { - for (p2 = p; *p2 && !isspace(*p2); p2++) - ; - sym->type = S_STRING; - goto done; - } - case S_STRING: - if (*p++ != '"') - break; - for (p2 = p; (p2 = strpbrk(p2, "\"\\")); p2++) { - if (*p2 == '"') { - *p2 = 0; - break; - } - memmove(p2, p2 + 1, strlen(p2)); - } - if (!p2) { - conf_warning("invalid string found"); - continue; - } - case S_INT: - case S_HEX: - done: - if (sym_string_valid(sym, p)) { - sym->def[def].val = strdup(p); - sym->flags |= def_flags; - } else { - conf_warning("symbol value '%s' invalid for %s", p, sym->name); - continue; - } - break; - default: - ; - } + if (conf_set_sym_val(sym, def, def_flags, p)) + continue; break; case '\r': case '\n': -- cgit v1.2.3 From 0f855aa64b3f63d35a891510cf7db932a435c116 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 10 Nov 2007 20:40:05 +0100 Subject: kconfig: add helper to set config symbol from environment variable Add conf_set_env_sym() that can set an already defined symbol based on the value of an environment variable. Unknown symbols are silently ignored. A warning is printed if the value of the environment variable is unexpected. Signed-off-by: Sam Ravnborg Cc: Roman Zippel --- scripts/kconfig/confdata.c | 27 +++++++++++++++++++++++++++ scripts/kconfig/lkc_proto.h | 1 + 2 files changed, 28 insertions(+) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index e0f402f3b75..e4fa3f30254 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -145,6 +145,33 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p) return 0; } +/* Read an environment variable and assign the value to the symbol */ +int conf_set_env_sym(const char *env, const char *symname, int def) +{ + struct symbol *sym; + char *p; + int def_flags; + + p = getenv(env); + if (p) { + char warning[200]; + sprintf(warning, "Environment variable (%s = \"%s\")", env, p); + conf_filename = warning; + def_flags = SYMBOL_DEF << def; + if (def == S_DEF_USER) { + sym = sym_find(symname); + if (!sym) + return 1; + } else { + sym = sym_lookup(symname, 0); + if (sym->type == S_UNKNOWN) + sym->type = S_OTHER; + } + conf_set_sym_val(sym, def, def_flags, p); + } + return 0; +} + int conf_read_simple(const char *name, int def) { FILE *in = NULL; diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h index 4d09f6ddefe..dca294e90cc 100644 --- a/scripts/kconfig/lkc_proto.h +++ b/scripts/kconfig/lkc_proto.h @@ -1,6 +1,7 @@ /* confdata.c */ P(conf_parse,void,(const char *name)); +P(conf_set_env_sym,int,(const char *envname, const char *symname, int def)); P(conf_read,int,(const char *name)); P(conf_read_simple,int,(const char *name, int)); P(conf_write,int,(const char *name)); -- cgit v1.2.3 From 2a113281f5cd2febbab21a93c8943f8d3eece4d3 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 10 Nov 2007 20:40:05 +0100 Subject: kconfig: use $K64BIT to set 64BIT with all*config targets The variable K64BIT can now be used to select the value of CONFIG_64BIT. This is for example useful for powerpc to generate allmodconfig for both bit sizes - like this: make ARCH=powerpc K64BIT=y make ARCH=powerpc K64BIT=n To use this the Kconfig file must use "64BIT" as the config value to select between 32 and 64 bit. Signed-off-by: Sam Ravnborg Cc: Roman Zippel --- README | 2 ++ scripts/kconfig/conf.c | 1 + 2 files changed, 3 insertions(+) diff --git a/README b/README index 159912cf515..592f8a23828 100644 --- a/README +++ b/README @@ -194,6 +194,8 @@ CONFIGURING the kernel: "make *config" checks for a file named "all{yes/mod/no/random}.config" for symbol values that are to be forced. If this file is not found, it checks for a file named "all.config" to contain forced values. + Finally it checks the environment variable K64BIT and if found, sets + the config symbol "64BIT" to the value of the K64BIT variable. NOTES on "make config": - having unnecessary drivers will make the kernel bigger, and can diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index a38787a881e..c6bee85c396 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -591,6 +591,7 @@ int main(int ac, char **av) conf_read_simple(name, S_DEF_USER); else if (!stat("all.config", &tmpstat)) conf_read_simple("all.config", S_DEF_USER); + conf_set_env_sym("K64BIT", "64BIT", S_DEF_USER); break; default: break; -- cgit v1.2.3 From d746d647f31bd3664f4a23985b78654129ffc1db Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 12 Nov 2007 20:14:19 +0100 Subject: x86: do not use $(ARCH) when not needed For x86 ARCH may say i386 or x86_64 and soon x86. Rely on CONFIG_X64_32 to select between 32/64 or just hardcode the value as appropriate. Signed-off-by: Sam Ravnborg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- Makefile | 9 +++++++-- arch/x86/Makefile | 10 +++++++--- arch/x86/Makefile_32 | 8 ++++---- arch/x86/Makefile_64 | 8 ++++---- arch/x86/boot/Makefile | 6 +++--- arch/x86/kernel/Makefile_32 | 3 ++- arch/x86/kernel/Makefile_64 | 2 ++ arch/x86/vdso/Makefile | 2 +- 8 files changed, 30 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index e28dde8887d..b32963adfb9 100644 --- a/Makefile +++ b/Makefile @@ -197,8 +197,13 @@ CROSS_COMPILE ?= UTS_MACHINE := $(ARCH) SRCARCH := $(ARCH) -# for i386 and x86_64 we use SRCARCH equal to x86 -SRCARCH := $(if $(filter x86_64 i386,$(SRCARCH)),x86,$(SRCARCH)) +# Additional ARCH settings for x86 +ifeq ($(ARCH),i386) + SRCARCH := x86 +endif +ifeq ($(ARCH),x86_64) + SRCARCH := x86 +endif KCONFIG_CONFIG ?= .config diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 309597386a7..116b03a4563 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -1,12 +1,16 @@ # Unified Makefile for i386 and x86_64 # select defconfig based on actual architecture -KBUILD_DEFCONFIG := $(ARCH)_defconfig +ifeq ($(ARCH),x86) + KBUILD_DEFCONFIG := i386_defconfig +else + KBUILD_DEFCONFIG := $(ARCH)_defconfig +endif -# # No need to remake these files +# No need to remake these files $(srctree)/arch/x86/Makefile%: ; -ifeq ($(ARCH),i386) +ifeq ($(CONFIG_X86_32),y) include $(srctree)/arch/x86/Makefile_32 else include $(srctree)/arch/x86/Makefile_64 diff --git a/arch/x86/Makefile_32 b/arch/x86/Makefile_32 index 346ac076687..50394da2f6c 100644 --- a/arch/x86/Makefile_32 +++ b/arch/x86/Makefile_32 @@ -160,7 +160,7 @@ archclean: $(Q)$(MAKE) $(clean)=arch/x86/boot define archhelp - echo '* bzImage - Compressed kernel image (arch/$(ARCH)/boot/bzImage)' + echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' echo ' install - Install kernel using' echo ' (your) ~/bin/installkernel or' echo ' (distribution) /sbin/installkernel or' @@ -170,6 +170,6 @@ define archhelp echo ' isoimage - Create a boot CD-ROM image' endef -CLEAN_FILES += arch/$(ARCH)/boot/fdimage \ - arch/$(ARCH)/boot/image.iso \ - arch/$(ARCH)/boot/mtools.conf +CLEAN_FILES += arch/x86/boot/fdimage \ + arch/x86/boot/image.iso \ + arch/x86/boot/mtools.conf diff --git a/arch/x86/Makefile_64 b/arch/x86/Makefile_64 index 57e714a47af..a804860022e 100644 --- a/arch/x86/Makefile_64 +++ b/arch/x86/Makefile_64 @@ -127,7 +127,7 @@ archclean: $(Q)$(MAKE) $(clean)=$(boot) define archhelp - echo '* bzImage - Compressed kernel image (arch/$(ARCH)/boot/bzImage)' + echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' echo ' install - Install kernel using' echo ' (your) ~/bin/installkernel or' echo ' (distribution) /sbin/installkernel or' @@ -137,8 +137,8 @@ define archhelp echo ' isoimage - Create a boot CD-ROM image' endef -CLEAN_FILES += arch/$(ARCH)/boot/fdimage \ - arch/$(ARCH)/boot/image.iso \ - arch/$(ARCH)/boot/mtools.conf +CLEAN_FILES += arch/x86/boot/fdimage \ + arch/x86/boot/image.iso \ + arch/x86/boot/mtools.conf diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 89dbf970e05..7a3116ccf38 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -49,10 +49,10 @@ HOSTCFLAGS_build.o := $(LINUXINCLUDE) # How to compile the 16-bit code. Note we always compile for -march=i386, # that way we can complain to the user if the CPU is insufficient. -cflags-i386 := -cflags-x86_64 := -m32 +cflags-$(CONFIG_X86_32) := +cflags-$(CONFIG_X86_64) := -m32 KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ - $(cflags-$(ARCH)) \ + $(cflags-y) \ -Wall -Wstrict-prototypes \ -march=i386 -mregparm=3 \ -include $(srctree)/$(src)/code16gcc.h \ diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index b9d67982030..a7bc93c2766 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -3,6 +3,7 @@ # extra-y := head_32.o init_task.o vmlinux.lds +CPPFLAGS_vmlinux.lds += -Ui386 obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ @@ -60,7 +61,7 @@ quiet_cmd_syscall = SYSCALL $@ cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \ -Wl,-T,$(filter-out FORCE,$^) -o $@ -export CPPFLAGS_vsyscall_32.lds += -P -C -U$(ARCH) +export CPPFLAGS_vsyscall_32.lds += -P -C -Ui386 vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \ $(call ld-option, -Wl$(comma)--hash-style=sysv) diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 24671c3838b..5a88890d8ee 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -3,7 +3,9 @@ # extra-y := head_64.o head64.o init_task.o vmlinux.lds +CPPFLAGS_vmlinux.lds += -Ux86_64 EXTRA_AFLAGS := -traditional + obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 7a2ba458393..e7bff0fbac2 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -20,7 +20,7 @@ quiet_cmd_syscall = SYSCALL $@ cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \ -Wl,-T,$(filter-out FORCE,$^) -o $@ -export CPPFLAGS_vdso.lds += -P -C -U$(ARCH) +export CPPFLAGS_vdso.lds += -P -C vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \ $(call ld-option, -Wl$(comma)--hash-style=sysv) \ -- cgit v1.2.3 From daa93fab824f2b8c35bd11670c7fab2f32b2de5f Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 12 Nov 2007 20:54:30 +0100 Subject: x86: enable "make ARCH=x86" After unification of the Kconfig files and introducing K64BIT support in kconfig it required only trivial changes to enable "make ARCH=x86". With this patch you can build for x86_64 in several ways: 1) make ARCH=x86_64 2) make ARCH=x86 K64BIT=y 3) make ARCH=x86 menuconfig => select 64-bit Likewise for i386 with the addition that i386 is default is you say ARCH=x86. Signed-off-by: Sam Ravnborg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- Makefile | 4 +++- arch/x86/Kconfig | 18 +++++++++++++++--- arch/x86/Kconfig.i386 | 18 ------------------ arch/x86/Kconfig.x86_64 | 20 -------------------- scripts/kconfig/Makefile | 7 +------ 5 files changed, 19 insertions(+), 48 deletions(-) delete mode 100644 arch/x86/Kconfig.i386 delete mode 100644 arch/x86/Kconfig.x86_64 diff --git a/Makefile b/Makefile index b32963adfb9..9c9c4bfb0e5 100644 --- a/Makefile +++ b/Makefile @@ -200,9 +200,11 @@ SRCARCH := $(ARCH) # Additional ARCH settings for x86 ifeq ($(ARCH),i386) SRCARCH := x86 + K64BIT := n endif ifeq ($(ARCH),x86_64) SRCARCH := x86 + K64BIT := y endif KCONFIG_CONFIG ?= .config @@ -339,7 +341,7 @@ KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION -export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC +export ARCH SRCARCH K64BIT CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 34517bf14ba..1eb59971af5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1,13 +1,25 @@ # x86 configuration +mainmenu "Linux Kernel Configuration for x86" + +# Select 32 or 64 bit +config 64BIT + bool "64-bit kernel" + default n + help + Say yes to build a 64-bit kernel - formerly known as x86_64 + Say no to build a 32-bit kernel - formerly known as i386 + +config X86_32 + def_bool !64BIT + +config X86_64 + def_bool 64BIT ### Arch settings config X86 bool default y -config 64BIT - def_bool X86_64 - config GENERIC_TIME bool default y diff --git a/arch/x86/Kconfig.i386 b/arch/x86/Kconfig.i386 deleted file mode 100644 index 7b8dc2604d5..00000000000 --- a/arch/x86/Kconfig.i386 +++ /dev/null @@ -1,18 +0,0 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. -# - -mainmenu "Linux Kernel Configuration" - -config X86_32 - bool - default y - help - This is Linux's home port. Linux was originally native to the Intel - 386, and runs on all the later x86 processors including the Intel - 486, 586, Pentiums, and various instruction-set-compatible chips by - AMD, Cyrix, and others. - - -source "arch/x86/Kconfig" diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64 deleted file mode 100644 index b262aaec67c..00000000000 --- a/arch/x86/Kconfig.x86_64 +++ /dev/null @@ -1,20 +0,0 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. -# -# Note: ISA is disabled and will hopefully never be enabled. -# If you managed to buy an ISA x86-64 box you'll have to fix all the -# ISA drivers you need yourself. -# - -mainmenu "Linux Kernel Configuration" - -config X86_64 - bool - default y - help - Port to the x86-64 architecture. x86-64 is a 64-bit extension to the - classical 32-bit x86 architecture. For details see - . - -source "arch/x86/Kconfig" diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 59594126e8b..1ad6f7fc490 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -4,12 +4,7 @@ PHONY += oldconfig xconfig gconfig menuconfig config silentoldconfig update-po-config -# If a arch/$(SRCARCH)/Kconfig.$(ARCH) file exist use it -ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/Kconfig.$(ARCH)),) - Kconfig := arch/$(SRCARCH)/Kconfig.$(ARCH) -else - Kconfig := arch/$(SRCARCH)/Kconfig -endif +Kconfig := arch/$(SRCARCH)/Kconfig xconfig: $(obj)/qconf $< $(Kconfig) -- cgit v1.2.3 From 6183289cd4356b790c5eaa619020fb887ec0fa44 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 13 Nov 2007 22:09:14 +0100 Subject: cmd64x: don't clear the other channel's interrupt Make sure to not clear the other IDE channel's interrupt when clearing an IDE interrupt via the MRDMODE register. Thanks to Bart for finding a coding mistake. Bart: This fixes regression from commit 66602c83dcb6a5d82772d88ae7a32cd4a1213528 ("cmd64x: use interrupt status from MRDMODE register (take 2)"). Extra thanks to Martin for reporting and bisecting the issue. From: Sergei Shtylyov Tested-by: Martin Rogge Tested-by: Milan Kocian Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/pci/cmd64x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c index ea0143ef5fe..51fca441c29 100644 --- a/drivers/ide/pci/cmd64x.c +++ b/drivers/ide/pci/cmd64x.c @@ -1,5 +1,5 @@ /* - * linux/drivers/ide/pci/cmd64x.c Version 1.50 May 10, 2007 + * linux/drivers/ide/pci/cmd64x.c Version 1.51 Nov 8, 2007 * * cmd64x.c: Enable interrupts at initialization time on Ultra/PCI machines. * Due to massive hardware bugs, UltraDMA is only supported @@ -339,7 +339,8 @@ static int cmd648_ide_dma_end (ide_drive_t *drive) u8 mrdmode = inb(hwif->dma_master + 0x01); /* clear the interrupt bit */ - outb(mrdmode | irq_mask, hwif->dma_master + 0x01); + outb((mrdmode & ~(MRDMODE_INTR_CH0 | MRDMODE_INTR_CH1)) | irq_mask, + hwif->dma_master + 0x01); return err; } -- cgit v1.2.3 From 2ad1e0558f369f11d180b7448d97164a0c5f07e2 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 13 Nov 2007 22:09:14 +0100 Subject: ide: BLK_DEV_IDECD help: remove outdated note LILO version 16 was released on 26-02-1995 (sic), so telling people to not use older versions no longer has any value. Signed-off-by: Adrian Bunk Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/Kconfig | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index d1e8df18722..e445fe6e4ba 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -203,10 +203,6 @@ config BLK_DEV_IDECD CD-ROM drive, you can say N to all other CD-ROM options, but be sure to say Y or M to "ISO 9660 CD-ROM file system support". - Note that older versions of LILO (LInux LOader) cannot properly deal - with IDE/ATAPI CD-ROMs, so install LILO 16 or higher, available from - . - To compile this driver as a module, choose M here: the module will be called ide-cd. -- cgit v1.2.3 From 03644cd497e27c3d274f39e58ddc577e9d73bb39 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 13 Nov 2007 22:09:15 +0100 Subject: ide-pmac: skip conservative PIO "downgrade" We can skip conservative PIO "downgrade" (PIO3 becomes PIO2 etc.) on PMAC. Problem reported by Mikael. Cc: Mikael Pettersson Acked-by: Benjamin Herrenschmidt Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ppc/pmac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index 816b5311dad..5afdfef7264 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1138,6 +1138,7 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif) hwif->drives[0].autotune = IDE_TUNE_AUTO; hwif->drives[1].autotune = IDE_TUNE_AUTO; hwif->host_flags = IDE_HFLAG_SET_PIO_MODE_KEEP_DMA | + IDE_HFLAG_PIO_NO_DOWNGRADE | IDE_HFLAG_POST_SET_MODE; hwif->pio_mask = ATA_PIO4; hwif->set_pio_mode = pmac_ide_set_pio_mode; -- cgit v1.2.3 From 12eda14f8930ccad0d8b75fecab87b90eecba5fb Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 13 Nov 2007 22:09:15 +0100 Subject: ide: add missing HOB bit clearing to ide_dump_ata_status() Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index af86433baed..1609b8604f5 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -514,6 +514,7 @@ static u8 ide_dump_ata_status(ide_drive_t *drive, const char *msg, u8 stat) if (drive->addressing == 1) { __u64 sectors = 0; u32 low = 0, high = 0; + hwif->OUTB(drive->ctl&~0x80, IDE_CONTROL_REG); low = ide_read_24(drive); hwif->OUTB(drive->ctl|0x80, IDE_CONTROL_REG); high = ide_read_24(drive); -- cgit v1.2.3 From c1f50cbb06363b36700c0a679a5bd3ddef0a97b6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 13 Nov 2007 22:09:15 +0100 Subject: ide: use drive->select.all for REQ_TYPE_ATA_TASK in execute_drive_cmd() Use drive->select.all for REQ_TYPE_ATA_TASK requests in execute_drive_cmd() (the obsolete bits 7 and 5 of the Device register need to be set). Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-io.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 755011827af..db22d1ff4e5 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -885,7 +885,6 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, return do_rw_taskfile(drive, args); } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) { u8 *args = rq->buffer; - u8 sel; if (!args) goto done; @@ -903,10 +902,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, hwif->OUTB(args[3], IDE_SECTOR_REG); hwif->OUTB(args[4], IDE_LCYL_REG); hwif->OUTB(args[5], IDE_HCYL_REG); - sel = (args[6] & ~0x10); - if (drive->select.b.unit) - sel |= 0x10; - hwif->OUTB(sel, IDE_SELECT_REG); + hwif->OUTB((args[6] & 0xEF)|drive->select.all, IDE_SELECT_REG); ide_cmd(drive, args[0], args[2], &drive_cmd_intr); return ide_started; } else if (rq->cmd_type == REQ_TYPE_ATA_CMD) { -- cgit v1.2.3 From 34c69b601b2ec8fc8ff6657a547ce3865d58e220 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 13 Nov 2007 22:09:15 +0100 Subject: ide: don't BUG() on unsupported transfer modes Fix ide-cris, cs5530, sc1200 and sis5513 host drivers to just return instead of OOPS-ing for unsupported modes in ->set_dma_mode methods. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/cris/ide-cris.c | 3 +-- drivers/ide/pci/cs5530.c | 3 +-- drivers/ide/pci/sc1200.c | 3 +-- drivers/ide/pci/sis5513.c | 1 - 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c index e196aefa207..7f5bc2ee6c7 100644 --- a/drivers/ide/cris/ide-cris.c +++ b/drivers/ide/cris/ide-cris.c @@ -748,8 +748,7 @@ static void cris_set_dma_mode(ide_drive_t *drive, const u8 speed) hold = ATA_DMA2_HOLD; break; default: - BUG(); - break; + return; } if (speed >= XFER_UDMA_0) diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c index 599408952bd..547690395ee 100644 --- a/drivers/ide/pci/cs5530.c +++ b/drivers/ide/pci/cs5530.c @@ -117,8 +117,7 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode) case XFER_MW_DMA_1: timings = 0x00012121; break; case XFER_MW_DMA_2: timings = 0x00002020; break; default: - BUG(); - break; + return; } basereg = CS5530_BASEREG(drive->hwif); reg = inl(basereg + 4); /* get drive0 config register */ diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c index 0a7b3202066..707d5ff66b0 100644 --- a/drivers/ide/pci/sc1200.c +++ b/drivers/ide/pci/sc1200.c @@ -186,8 +186,7 @@ static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode) } break; default: - BUG(); - break; + return; } if (unit == 0) { /* are we configuring drive0? */ diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c index 6b7bb53acef..f6e2ab3dd16 100644 --- a/drivers/ide/pci/sis5513.c +++ b/drivers/ide/pci/sis5513.c @@ -356,7 +356,6 @@ static void sis_set_dma_mode(ide_drive_t *drive, const u8 speed) sis_program_timings(drive, speed); break; default: - BUG(); break; } } -- cgit v1.2.3 From 24ffbd62583024f85bdba72cd373d050aa1a1b15 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 13 Nov 2007 22:09:16 +0100 Subject: it821x/jmicron: fix return value of {it821x,jmicron}_init_one() Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/pci/it821x.c | 3 +-- drivers/ide/pci/jmicron.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c index 5c997543531..99b7d763b6c 100644 --- a/drivers/ide/pci/it821x.c +++ b/drivers/ide/pci/it821x.c @@ -653,8 +653,7 @@ static const struct ide_port_info it821x_chipsets[] __devinitdata = { static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_device_id *id) { - ide_setup_pci_device(dev, &it821x_chipsets[id->driver_data]); - return 0; + return ide_setup_pci_device(dev, &it821x_chipsets[id->driver_data]); } static const struct pci_device_id it821x_pci_tbl[] = { diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c index bdf64d99770..0083eaf89c7 100644 --- a/drivers/ide/pci/jmicron.c +++ b/drivers/ide/pci/jmicron.c @@ -139,8 +139,7 @@ static const struct ide_port_info jmicron_chipset __devinitdata = { static int __devinit jmicron_init_one(struct pci_dev *dev, const struct pci_device_id *id) { - ide_setup_pci_device(dev, &jmicron_chipset); - return 0; + return ide_setup_pci_device(dev, &jmicron_chipset); } /* All JMB PATA controllers have and will continue to have the same -- cgit v1.2.3 From 0fd4980fa75acc78c747b1f43d1204f6572a4845 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 13 Nov 2007 22:09:16 +0100 Subject: ide: remove stale/incorrect comment from setup-pci.c Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/setup-pci.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 02d14bf85ab..25fd0905322 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -7,11 +7,6 @@ * May be copied or modified under the terms of the GNU General Public License */ -/* - * This module provides support for automatic detection and - * configuration of all PCI IDE interfaces present in a system. - */ - #include #include #include -- cgit v1.2.3 From 3bba11e5c47dfc1d381a1ece26464fb7eea2d79c Mon Sep 17 00:00:00 2001 From: Ali Ayoub Date: Tue, 13 Nov 2007 15:26:57 -0800 Subject: mlx4_core: Fix possible bad free in mlx4_buf_free() When mlx4_buf_free() is called from the error path of mlx4_buf_alloc(), it may be passed a buffer structure that does not have all pages filled in. Add a check for NULL to mlx4_buf_free() so we avoid passing NULL to dma_free_coherent() (which will crash). Signed-off-by: Ali Ayoub Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/mlx4/alloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index f8d63d39f59..b226e019bc8 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -171,9 +171,10 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) buf->u.direct.map); else { for (i = 0; i < buf->nbufs; ++i) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - buf->u.page_list[i].buf, - buf->u.page_list[i].map); + if (buf->u.page_list[i].buf) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->u.page_list[i].buf, + buf->u.page_list[i].map); kfree(buf->u.page_list); } } -- cgit v1.2.3 From a6e7550d8f73d6b75c20afff321f0f06fe144775 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Wed, 24 Oct 2007 15:49:39 -0700 Subject: IB/ipath: Fix memory leak in ipath_resize_cq() if copy_to_user() fails Signed-off-by: Ralph Campbell Signed-off-by: Patrick Marchand Latifi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_cq.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 645ed71fd79..08d8ae148cd 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -404,7 +404,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (ret) - goto bail; + goto bail_free; } spin_lock_irq(&cq->lock); @@ -424,10 +424,8 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) else n = head - tail; if (unlikely((u32)cqe < n)) { - spin_unlock_irq(&cq->lock); - vfree(wc); ret = -EOVERFLOW; - goto bail; + goto bail_unlock; } for (n = 0; tail != head; n++) { if (cq->ip) @@ -459,7 +457,12 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) } ret = 0; + goto bail; +bail_unlock: + spin_unlock_irq(&cq->lock); +bail_free: + vfree(wc); bail: return ret; } -- cgit v1.2.3 From f4ad1bcc4425a772ea584e1f24abadc64c2b839f Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 26 Oct 2007 08:02:39 -0700 Subject: IB/ipath: Fix race with ACK retry timeout list management When an ACK is received, the QP is removed from the timeout list and then if there are still pending send WQEs, the QP is put back on the timeout list. It is possible that another post send has put the QP on the timeout list thus, a check needs to be made before trying to do it again or the list is corrupted. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5c29b2bfea1..120a61b03bc 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -959,8 +959,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, /* If this is a partial ACK, reset the retransmit timer. */ if (qp->s_last != qp->s_tail) { spin_lock(&dev->pending_lock); - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); /* * If we get a partial ACK for a resent operation, -- cgit v1.2.3 From 40ebb5615e2e069d3b8936b894ceff436c914003 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Fri, 2 Nov 2007 15:33:51 +0200 Subject: IB/ehca: Return physical link information in query_port() Newer firmware versions return physical port information to the partition, so hand that information to the consumer if it's present. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_hca.c | 20 ++++++++++++++------ drivers/infiniband/hw/ehca/hipz_hw.h | 6 +++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 15806d14046..5bd7b591987 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -151,7 +151,6 @@ int ehca_query_port(struct ib_device *ibdev, } memset(props, 0, sizeof(struct ib_port_attr)); - props->state = rblock->state; switch (rblock->max_mtu) { case 0x1: @@ -188,11 +187,20 @@ int ehca_query_port(struct ib_device *ibdev, props->subnet_timeout = rblock->subnet_timeout; props->init_type_reply = rblock->init_type_reply; - props->active_width = IB_WIDTH_12X; - props->active_speed = 0x1; - - /* at the moment (logical) link state is always LINK_UP */ - props->phys_state = 0x5; + if (rblock->state && rblock->phys_width) { + props->phys_state = rblock->phys_pstate; + props->state = rblock->phys_state; + props->active_width = rblock->phys_width; + props->active_speed = rblock->phys_speed; + } else { + /* old firmware releases don't report physical + * port info, so use default values + */ + props->phys_state = 5; + props->state = rblock->state; + props->active_width = IB_WIDTH_12X; + props->active_speed = 0x1; + } query_port1: ehca_free_fw_ctrlblock(rblock); diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index d9739e55451..485b8400359 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -402,7 +402,11 @@ struct hipz_query_port { u64 max_msg_sz; u32 max_mtu; u32 vl_cap; - u8 reserved2[1900]; + u32 phys_pstate; + u32 phys_state; + u32 phys_speed; + u32 phys_width; + u8 reserved2[1884]; u64 guid_entries[255]; } __attribute__ ((packed)); -- cgit v1.2.3 From 51aaa54eb9e9f01878aa5d62277fd156e458dfe1 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Fri, 2 Nov 2007 15:41:49 +0200 Subject: IB/ehca: Fix static rate calculation The IPD (inter-packet delay) formula was a little off and assumed a fixed physical link rate; fix the formula and query the actual physical link rate, now that we can get it. Also, refactor the calculation into a common function ehca_calc_ipd() and use that instead of duplicating code. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_av.c | 48 +++++++++++++++++++++++++------ drivers/infiniband/hw/ehca/ehca_classes.h | 1 - drivers/infiniband/hw/ehca/ehca_iverbs.h | 3 ++ drivers/infiniband/hw/ehca/ehca_main.c | 3 -- drivers/infiniband/hw/ehca/ehca_qp.c | 29 ++++++++----------- 5 files changed, 54 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 97d108634c5..453eb995c1d 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -50,6 +50,38 @@ static struct kmem_cache *av_cache; +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd) +{ + int path = ib_rate_to_mult(path_rate); + int link, ret; + struct ib_port_attr pa; + + if (path_rate == IB_RATE_PORT_CURRENT) { + *ipd = 0; + return 0; + } + + if (unlikely(path < 0)) { + ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", + path_rate); + return -EINVAL; + } + + ret = ehca_query_port(&shca->ib_device, port, &pa); + if (unlikely(ret < 0)) { + ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); + return ret; + } + + link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; + + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; + + return 0; +} + struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { int ret; @@ -69,15 +101,13 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) av->av.slid_path_bits = ah_attr->src_path_bits; if (ehca_static_rate < 0) { - int ah_mult = ib_rate_to_mult(ah_attr->static_rate); - int ehca_mult = - ib_rate_to_mult(shca->sport[ah_attr->port_num].rate ); - - if (ah_mult >= ehca_mult) - av->av.ipd = 0; - else - av->av.ipd = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; + u32 ipd; + if (ehca_calc_ipd(shca, ah_attr->port_num, + ah_attr->static_rate, &ipd)) { + ret = -EINVAL; + goto create_ah_exit1; + } + av->av.ipd = ipd; } else av->av.ipd = ehca_static_rate; diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 2d660ae189e..87f12d4312a 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -95,7 +95,6 @@ struct ehca_sma_attr { struct ehca_sport { struct ib_cq *ibcq_aqp1; struct ib_qp *ibqp_aqp1; - enum ib_rate rate; enum ib_port_state port_state; struct ehca_sma_attr saved_attr; }; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index dce503bb7d6..5485799cdc8 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -189,6 +189,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void ehca_poll_eqs(unsigned long data); +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c6cd38c5321..90d4334179b 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -327,9 +327,6 @@ static int ehca_sense_attributes(struct ehca_shca *shca) shca->hw_level = ehca_hw_level; ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); - shca->sport[0].rate = IB_RATE_30_GBPS; - shca->sport[1].rate = IB_RATE_30_GBPS; - shca->hca_cap = rblock->hca_cap_indicators; ehca_gen_dbg(" ... HCA capabilities:"); for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index de182648b28..2e3e6547cb7 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1196,10 +1196,6 @@ static int internal_modify_qp(struct ib_qp *ibqp, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); } if (attr_mask & IB_QP_AV) { - int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult(shca->sport[my_qp-> - init_attr.port_num].rate); - mqpcb->dlid = attr->ah_attr.dlid; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); mqpcb->source_path_bits = attr->ah_attr.src_path_bits; @@ -1207,11 +1203,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->service_level = attr->ah_attr.sl; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - if (ah_mult < ehca_mult) - mqpcb->max_static_rate = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; - else - mqpcb->max_static_rate = 0; + if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + attr->ah_attr.static_rate, + &mqpcb->max_static_rate)) { + ret = -EINVAL; + goto modify_qp_exit2; + } update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); /* @@ -1280,10 +1277,6 @@ static int internal_modify_qp(struct ib_qp *ibqp, (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); } if (attr_mask & IB_QP_ALT_PATH) { - int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult( - shca->sport[my_qp->init_attr.port_num].rate); - if (attr->alt_port_num < 1 || attr->alt_port_num > shca->num_ports) { ret = -EINVAL; @@ -1309,10 +1302,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; mqpcb->service_level_al = attr->alt_ah_attr.sl; - if (ah_mult > 0 && ah_mult < ehca_mult) - mqpcb->max_static_rate_al = (ehca_mult - 1) / ah_mult; - else - mqpcb->max_static_rate_al = 0; + if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + attr->alt_ah_attr.static_rate, + &mqpcb->max_static_rate_al)) { + ret = -EINVAL; + goto modify_qp_exit2; + } /* OpenIB doesn't support alternate retry counts - copy them */ mqpcb->retry_count_al = mqpcb->retry_count; -- cgit v1.2.3 From 9a7666494bac60b99d2bd7d904bd22e8c9b1e3f7 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 9 Nov 2007 09:21:58 -0600 Subject: RDMA/cxgb3: Set the max_qp_init_rd_atom attribute in query_device The device attribute max_qp_init_rd_atom is not getting set in cxgb3's query_device method. Version 1.0.4 of librdmacm now validates the user's requested initiator and responder resources against the max supported by the device. Since iw_cxgb3 wasn't setting this attribute (and it defaulted to 0), all rdma_connect()s fail if there are initiator resources requested by the app. Fix this by setting the correct value in iwch_query_device(). Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index f0c77758937..b5436ca92e6 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1000,6 +1000,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->max_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; + props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_cq = dev->attr.max_cqs; props->max_cqe = dev->attr.max_cqes_per_cq; props->max_mr = dev->attr.max_mem_regs; -- cgit v1.2.3 From 5f1a485d5905aa641f33009019b3699076666a4c Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Tue, 13 Nov 2007 20:40:55 -0800 Subject: [PKT_SCHED]: Check subqueue status before calling hard_start_xmit The only qdiscs that check subqueue state before dequeue'ing are PRIO and RR. The other qdiscs, including the default pfifo_fast qdisc, will allow traffic bound for subqueue 0 through to hard_start_xmit. The check for netif_queue_stopped() is done above in pkt_sched.h, so it is unnecessary for qdisc_restart(). However, if the underlying driver is multiqueue capable, and only sets queue states on subqueues, this will allow packets to enter the driver when it's currently unable to process packets, resulting in expensive requeues and driver entries. This patch re-adds the check for the subqueue status before calling hard_start_xmit, so we can try and avoid the driver entry when the queues are stopped. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index fa1a6f45dc4..e595e6570ce 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -134,7 +134,7 @@ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; - int ret; + int ret = NETDEV_TX_BUSY; /* Dequeue packet */ if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) @@ -145,7 +145,8 @@ static inline int qdisc_restart(struct net_device *dev) spin_unlock(&dev->queue_lock); HARD_TX_LOCK(dev, smp_processor_id()); - ret = dev_hard_start_xmit(skb, dev); + if (!netif_subqueue_stopped(dev, skb)) + ret = dev_hard_start_xmit(skb, dev); HARD_TX_UNLOCK(dev); spin_lock(&dev->queue_lock); -- cgit v1.2.3 From 4e04b84ea542a3e3f2fa308b3a7e7ed149f9a3a2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Nov 2007 20:46:09 -0800 Subject: [EP93xx_ETH]: Build fix after 2.6.24 NAPI changes. Reported by rmk from kautobuild output: drivers/net/arm/ep93xx_eth.c:420: error: implicit declaration of function '__netif_rx_schedule_prep' Signed-off-by: David S. Miller --- drivers/net/arm/ep93xx_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c index 7f016f3d5bf..91a6590d107 100644 --- a/drivers/net/arm/ep93xx_eth.c +++ b/drivers/net/arm/ep93xx_eth.c @@ -417,7 +417,7 @@ static irqreturn_t ep93xx_irq(int irq, void *dev_id) if (status & REG_INTSTS_RX) { spin_lock(&ep->rx_lock); - if (likely(__netif_rx_schedule_prep(dev, &ep->napi))) { + if (likely(netif_rx_schedule_prep(dev, &ep->napi))) { wrl(ep, REG_INTEN, REG_INTEN_TX); __netif_rx_schedule(dev, &ep->napi); } -- cgit v1.2.3 From e2ac455a18806b31c2d0da0a51d8740af5010b7a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Nov 2007 20:47:35 -0800 Subject: [NETX]: Fix build failure added by 2.6.24 statistics cleanup. Reported by rmk from kautobuild output: drivers/net/netx-eth.c: In function 'netx_eth_hard_start_xmit': drivers/net/netx-eth.c:131: error: 'dev' undeclared (first use in this function) drivers/net/netx-eth.c:131: error: (Each undeclared identifier is reported only once drivers/net/netx-eth.c:131: error: for each function it appears in.) drivers/net/netx-eth.c: In function 'netx_eth_receive': drivers/net/netx-eth.c:158: error: 'dev' undeclared (first use in this function) Signed-off-by: David S. Miller --- drivers/net/netx-eth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/netx-eth.c b/drivers/net/netx-eth.c index eb0aff787df..5267e031daa 100644 --- a/drivers/net/netx-eth.c +++ b/drivers/net/netx-eth.c @@ -128,8 +128,8 @@ netx_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev) FIFO_PTR_FRAMELEN(len)); ndev->trans_start = jiffies; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + ndev->stats.tx_packets++; + ndev->stats.tx_bytes += skb->len; netif_stop_queue(ndev); spin_unlock_irq(&priv->lock); @@ -155,7 +155,7 @@ static void netx_eth_receive(struct net_device *ndev) if (unlikely(skb == NULL)) { printk(KERN_NOTICE "%s: Low memory, packet dropped.\n", ndev->name); - dev->stats.rx_dropped++; + ndev->stats.rx_dropped++; return; } -- cgit v1.2.3 From f0163ac45b40bd032b877c747796146d52d4e800 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 13 Nov 2007 21:00:09 -0800 Subject: [E1000]: Fix schedule while atomic when called from mii-tool. mii-tool can cause the driver to call msleep during nway reset, bugzilla.kernel.org bug 8430. Fix by simply calling reinit_locked outside of the spinlock, which is safe from ethtool, so it should be safe from here. Signed-off-by: Jesse Brandeburg Signed-off-by: Auke Kok Signed-off-by: David S. Miller --- drivers/net/e1000/e1000_main.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 72deff0d4d9..cf39473ef90 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -4804,6 +4804,7 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) spin_unlock_irqrestore(&adapter->stats_lock, flags); return -EIO; } + spin_unlock_irqrestore(&adapter->stats_lock, flags); if (adapter->hw.media_type == e1000_media_type_copper) { switch (data->reg_num) { case PHY_CTRL: @@ -4824,12 +4825,8 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) DUPLEX_HALF; retval = e1000_set_spd_dplx(adapter, spddplx); - if (retval) { - spin_unlock_irqrestore( - &adapter->stats_lock, - flags); + if (retval) return retval; - } } if (netif_running(adapter->netdev)) e1000_reinit_locked(adapter); @@ -4838,11 +4835,8 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) break; case M88E1000_PHY_SPEC_CTRL: case M88E1000_EXT_PHY_SPEC_CTRL: - if (e1000_phy_reset(&adapter->hw)) { - spin_unlock_irqrestore( - &adapter->stats_lock, flags); + if (e1000_phy_reset(&adapter->hw)) return -EIO; - } break; } } else { @@ -4857,7 +4851,6 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) break; } } - spin_unlock_irqrestore(&adapter->stats_lock, flags); break; default: return -EOPNOTSUPP; -- cgit v1.2.3 From 746aa32d280084dbd520249170852e4616799928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Tue, 13 Nov 2007 21:01:23 -0800 Subject: [TCP] FRTO: Limit snd_cwnd if TCP was application limited MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise TCP might violate packet ordering principles that FRTO is based on. If conventional recovery path is chosen, this won't be significant at all. In practice, any small enough value will be sufficient to provide proper operation for FRTO, yet other users of snd_cwnd might benefit from a "close enough" value. FRTO's formula is now equal to what tcp_enter_cwr() uses. FRTO used to check application limitedness a bit differently but I changed that in commit 575ee7140dabe9b9c4f66f4f867039b97e548867 and as a result checking for application limitedness became completely non-existing. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 20c9440ab85..b59da5308ac 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1669,6 +1669,9 @@ void tcp_enter_frto(struct sock *sk) } tcp_verify_left_out(tp); + /* Too bad if TCP was application limited */ + tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); + /* Earlier loss recovery underway (see RFC4138; Appendix B). * The last condition is necessary at least in tp->frto_counter case. */ -- cgit v1.2.3 From 23aeeec365dcf8bc87fae44c533e50d0bb4f23cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Tue, 13 Nov 2007 21:03:13 -0800 Subject: [TCP] FRTO: Plug potential LOST-bit leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It might be possible that, in some extreme scenario that I just cannot now construct in my mind, end_seq <= frto_highmark check does not match causing the lost_out and LOST bits become out-of-sync due to clearing and recounting in the loop. This may fix LOST-bit leak reported by Chazarain Guillaume . Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b59da5308ac..12ae9a68cda 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1704,6 +1704,8 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) break; + + TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; /* * Count the retransmission made on RTO correctly (only when * waiting for the first ACK and did not get it)... @@ -1717,7 +1719,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) } else { if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; } /* Don't lost mark skbs that were fwd transmitted after RTO */ -- cgit v1.2.3 From cb4da1a34de3840ce49dc7292a063e1ef7f127af Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 13 Nov 2007 21:10:32 -0800 Subject: [IWLWIFI]: Not correctly dealing with hotunplug. It makes no sense to enable interrupts if a device has been unplugged. In addition if in doubt IRQ_HANDLED should be returned. Signed-off-by: Oliver Neukum Acked-by: Zhu Yi Signed-off-by: David S. Miller --- drivers/net/wireless/iwlwifi/iwl3945-base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index be7c9f42a34..ee752f1e21d 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -4850,7 +4850,7 @@ static irqreturn_t iwl_isr(int irq, void *data) if ((inta == 0xFFFFFFFF) || ((inta & 0xFFFFFFF0) == 0xa5a5a5a0)) { /* Hardware disappeared */ IWL_WARNING("HARDWARE GONE?? INTA == 0x%080x\n", inta); - goto none; + goto unplugged; } IWL_DEBUG_ISR("ISR inta 0x%08x, enabled 0x%08x, fh 0x%08x\n", @@ -4858,6 +4858,7 @@ static irqreturn_t iwl_isr(int irq, void *data) /* iwl_irq_tasklet() will service interrupts and re-enable them */ tasklet_schedule(&priv->irq_tasklet); +unplugged: spin_unlock(&priv->lock); return IRQ_HANDLED; -- cgit v1.2.3 From 6dd10a62353a50b30b30e0c18653650975b29c71 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Nov 2007 21:12:14 -0800 Subject: [NET] random : secure_tcp_sequence_number should not assume CONFIG_KTIME_SCALAR All 32 bits machines but i386 dont have CONFIG_KTIME_SCALAR. On these machines, ktime.tv64 is more than 4 times the (correct) result given by ktime_to_ns() Again on these machines, using ktime_get_real().tv64 >> 6 give a 32bits rollover every 64 seconds, which is not wanted (less than the 120 s MSL) Using ktime_to_ns() is the portable way to get nsecs from a ktime, and have correct code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/char/random.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1756b1f7cb7..5fee0566182 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1494,7 +1494,7 @@ __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK; seq += keyptr->count; - seq += ktime_get_real().tv64; + seq += ktime_to_ns(ktime_get_real()); return seq; } @@ -1556,7 +1556,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, * overlaps less than one time per MSL (2 minutes). * Choosing a clock of 64 ns period is OK. (period of 274 s) */ - seq += ktime_get_real().tv64 >> 6; + seq += ktime_to_ns(ktime_get_real()) >> 6; #if 0 printk("init_seq(%lx, %lx, %d, %d) = %d\n", saddr, daddr, sport, dport, seq); @@ -1616,7 +1616,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, seq = half_md4_transform(hash, keyptr->secret); seq |= ((u64)keyptr->count) << (32 - HASH_BITS); - seq += ktime_get_real().tv64; + seq += ktime_to_ns(ktime_get_real()); seq &= (1ull << 48) - 1; #if 0 printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n", -- cgit v1.2.3 From ce1d18e0064d55106a7042c07cfca97cad66f407 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Tue, 13 Nov 2007 21:15:24 -0800 Subject: [SYSCTL]: Fix warning for token-ring from sysctl checker As seen when booting ppc64_defconfig: sysctl table check failed: /net/token-ring .3.14 procname does not match binary path procname Signed-off-by: Olof Johansson Signed-off-by: David S. Miller --- kernel/sysctl_check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 5a2f2b2bf88..4abc6d2306f 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -738,7 +738,7 @@ static struct trans_ctl_table trans_net_table[] = { { NET_ROSE, "rose", trans_net_rose_table }, { NET_IPV6, "ipv6", trans_net_ipv6_table }, { NET_X25, "x25", trans_net_x25_table }, - { NET_TR, "tr", trans_net_tr_table }, + { NET_TR, "token-ring", trans_net_tr_table }, { NET_DECNET, "decnet", trans_net_decnet_table }, /* NET_ECONET not used */ { NET_SCTP, "sctp", trans_net_sctp_table }, -- cgit v1.2.3 From 8cbdeec637c1ce87bf329c5c19a9964e36bdf9fb Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Tue, 13 Nov 2007 21:16:29 -0800 Subject: [BONDING]: Fix resource use after free Fix bond_destroy and bond_free_all to not reference the struct net_device after calling unregister_netdevice. Bug and offending change reported by Moni Shoua Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a198404a3e3..423298c84a1 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1847,9 +1847,9 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) */ void bond_destroy(struct bonding *bond) { - unregister_netdevice(bond->dev); bond_deinit(bond->dev); bond_destroy_sysfs_entry(bond); + unregister_netdevice(bond->dev); } /* @@ -4475,8 +4475,8 @@ static void bond_free_all(void) bond_mc_list_destroy(bond); /* Release the bonded slaves */ bond_release_all(bond_dev); - unregister_netdevice(bond_dev); bond_deinit(bond_dev); + unregister_netdevice(bond_dev); } #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From 18b2b7bd09811779309592a10080fe9ffb93144d Mon Sep 17 00:00:00 2001 From: Sreenivasa Honnur Date: Wed, 14 Nov 2007 01:41:06 -0800 Subject: [S2IO]: Fixed memory leak when MSI-X vector allocation fails - Fixed memory leak by freeing MSI-X local entry memories when vector allocation fails in s2io_add_isr. - Added two utility functions remove_msix_isr and remove_inta_isr to eliminate code duplication. - Incorporated following review comments from Jeff - Removed redundant stats->mem_freed and synchronize_irq call - do_rem_msix_isr is renamed as remove_msix_isr - do_rem_inta_isr is renamed as remove_inta_isr Signed-off-by: Sreenivasa Honnur Signed-off-by: David S. Miller --- drivers/net/s2io.c | 110 +++++++++++++++++++++++++---------------------------- 1 file changed, 51 insertions(+), 59 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index b8c0e7b4ca1..63266670624 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -84,7 +84,7 @@ #include "s2io.h" #include "s2io-regs.h" -#define DRV_VERSION "2.0.26.5" +#define DRV_VERSION "2.0.26.6" /* S2io Driver name & version. */ static char s2io_driver_name[] = "Neterion"; @@ -3775,6 +3775,40 @@ static int __devinit s2io_test_msi(struct s2io_nic *sp) return err; } + +static void remove_msix_isr(struct s2io_nic *sp) +{ + int i; + u16 msi_control; + + for (i = 0; i < MAX_REQUESTED_MSI_X; i++) { + if (sp->s2io_entries[i].in_use == + MSIX_REGISTERED_SUCCESS) { + int vector = sp->entries[i].vector; + void *arg = sp->s2io_entries[i].arg; + free_irq(vector, arg); + } + } + + kfree(sp->entries); + kfree(sp->s2io_entries); + sp->entries = NULL; + sp->s2io_entries = NULL; + + pci_read_config_word(sp->pdev, 0x42, &msi_control); + msi_control &= 0xFFFE; /* Disable MSI */ + pci_write_config_word(sp->pdev, 0x42, msi_control); + + pci_disable_msix(sp->pdev); +} + +static void remove_inta_isr(struct s2io_nic *sp) +{ + struct net_device *dev = sp->dev; + + free_irq(sp->pdev->irq, dev); +} + /* ********************************************************* * * Functions defined below concern the OS part of the driver * * ********************************************************* */ @@ -3809,28 +3843,9 @@ static int s2io_open(struct net_device *dev) int ret = s2io_enable_msi_x(sp); if (!ret) { - u16 msi_control; - ret = s2io_test_msi(sp); - /* rollback MSI-X, will re-enable during add_isr() */ - kfree(sp->entries); - sp->mac_control.stats_info->sw_stat.mem_freed += - (MAX_REQUESTED_MSI_X * - sizeof(struct msix_entry)); - kfree(sp->s2io_entries); - sp->mac_control.stats_info->sw_stat.mem_freed += - (MAX_REQUESTED_MSI_X * - sizeof(struct s2io_msix_entry)); - sp->entries = NULL; - sp->s2io_entries = NULL; - - pci_read_config_word(sp->pdev, 0x42, &msi_control); - msi_control &= 0xFFFE; /* Disable MSI */ - pci_write_config_word(sp->pdev, 0x42, msi_control); - - pci_disable_msix(sp->pdev); - + remove_msix_isr(sp); } if (ret) { @@ -6719,15 +6734,22 @@ static int s2io_add_isr(struct s2io_nic * sp) } } if (err) { + remove_msix_isr(sp); DBG_PRINT(ERR_DBG,"%s:MSI-X-%d registration " "failed\n", dev->name, i); - DBG_PRINT(ERR_DBG, "Returned: %d\n", err); - return -1; + DBG_PRINT(ERR_DBG, "%s: defaulting to INTA\n", + dev->name); + sp->config.intr_type = INTA; + break; } sp->s2io_entries[i].in_use = MSIX_REGISTERED_SUCCESS; } - printk("MSI-X-TX %d entries enabled\n",msix_tx_cnt); - printk("MSI-X-RX %d entries enabled\n",msix_rx_cnt); + if (!err) { + printk(KERN_INFO "MSI-X-TX %d entries enabled\n", + msix_tx_cnt); + printk(KERN_INFO "MSI-X-RX %d entries enabled\n", + msix_rx_cnt); + } } if (sp->config.intr_type == INTA) { err = request_irq((int) sp->pdev->irq, s2io_isr, IRQF_SHARED, @@ -6742,40 +6764,10 @@ static int s2io_add_isr(struct s2io_nic * sp) } static void s2io_rem_isr(struct s2io_nic * sp) { - struct net_device *dev = sp->dev; - struct swStat *stats = &sp->mac_control.stats_info->sw_stat; - - if (sp->config.intr_type == MSI_X) { - int i; - u16 msi_control; - - for (i=1; (sp->s2io_entries[i].in_use == - MSIX_REGISTERED_SUCCESS); i++) { - int vector = sp->entries[i].vector; - void *arg = sp->s2io_entries[i].arg; - - synchronize_irq(vector); - free_irq(vector, arg); - } - - kfree(sp->entries); - stats->mem_freed += - (MAX_REQUESTED_MSI_X * sizeof(struct msix_entry)); - kfree(sp->s2io_entries); - stats->mem_freed += - (MAX_REQUESTED_MSI_X * sizeof(struct s2io_msix_entry)); - sp->entries = NULL; - sp->s2io_entries = NULL; - - pci_read_config_word(sp->pdev, 0x42, &msi_control); - msi_control &= 0xFFFE; /* Disable MSI */ - pci_write_config_word(sp->pdev, 0x42, msi_control); - - pci_disable_msix(sp->pdev); - } else { - synchronize_irq(sp->pdev->irq); - free_irq(sp->pdev->irq, dev); - } + if (sp->config.intr_type == MSI_X) + remove_msix_isr(sp); + else + remove_inta_isr(sp); } static void do_s2io_card_down(struct s2io_nic * sp, int do_io) -- cgit v1.2.3 From e383d19e90cfbbf8e00512d44194ce175b3f60a2 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 14 Nov 2007 16:33:27 +0200 Subject: mlx4_core: Fix thinko in QP destroy (incorrect bitmap_free) Fix thinko in commit eaf559bf ("mlx4_core: Don't free special QPs in QP number bitmap"). The old commit had the logic exactly backwards and ended up freeing *only* special QPs, which not only left the original bug in place but also introduced the problem that the QP number bitmap would get full after a while. Found by Dotan Barak of Mellanox. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index cc4b1be1821..42b47639c81 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -240,7 +240,7 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn); mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); - if (qp->qpn < dev->caps.sqp_start + 8) + if (qp->qpn >= dev->caps.sqp_start + 8) mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); } EXPORT_SYMBOL_GPL(mlx4_qp_free); -- cgit v1.2.3 From fb93134dfc2a6e6fbedc7c270a31da03fce88db9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Nov 2007 15:45:21 -0800 Subject: [TCP]: Fix size calculation in sk_stream_alloc_pskb We round up the header size in sk_stream_alloc_pskb so that TSO packets get zero tail room. Unfortunately this rounding up is not coordinated with the select_size() function used by TCP to calculate the second parameter of sk_stream_alloc_pskb. As a result, we may allocate more than a page of data in the non-TSO case when exactly one page is desired. In fact, rounding up the head room is detrimental in the non-TSO case because it makes memory that would otherwise be available to the payload head room. TSO doesn't need this either, all it wants is the guarantee that there is no tail room. So this patch fixes this by adjusting the skb_reserve call so that exactly the requested amount (which all callers have calculated in a precise way) is made available as tail room. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 5504fb9fa88..567e468d749 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1235,14 +1235,16 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, gfp_t gfp) { struct sk_buff *skb; - int hdr_len; - hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); - skb = alloc_skb_fclone(size + hdr_len, gfp); + skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); if (skb) { skb->truesize += mem; if (sk_stream_wmem_schedule(sk, skb->truesize)) { - skb_reserve(skb, hdr_len); + /* + * Make sure that we have exactly size bytes + * available to the caller, no more, no less. + */ + skb_reserve(skb, skb_tailroom(skb) - size); return skb; } __kfree_skb(skb); -- cgit v1.2.3 From 96a2d41a3e495734b63bff4e5dd0112741b93b38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Wed, 14 Nov 2007 15:47:18 -0800 Subject: [TCP]: Make sure write_queue_from does not begin with NULL ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NULL ptr can be returned from tcp_write_queue_head to cached_skb and then assigned to skb if packets_out was zero. Without this, system is vulnerable to a carefully crafted ACKs which obviously is remotely triggerable. Besides, there's very little that needs to be done in sacktag if there weren't any packets outstanding, just skipping the rest doesn't hurt. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 12ae9a68cda..3f126ece8eb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1269,6 +1269,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window)) return 0; + if (!tp->packets_out) + goto out; + /* SACK fastpath: * if the only SACK change is the increase of the end_seq of * the first block then only apply that SACK block @@ -1515,6 +1518,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) tcp_update_reordering(sk, tp->fackets_out - reord, 0); +out: + #if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); BUG_TRAP((int)tp->lost_out >= 0); -- cgit v1.2.3 From 8a856397f1b023b53907d13e742c216d90dd5034 Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Wed, 14 Nov 2007 15:51:01 -0800 Subject: [FS_ENET]: Fix module build. If fs_enet is build as module, on PPC_CPM_NEW_BINDING platforms mii-fec/mii-bitbang should be build as module, as well. On other platforms, mii-fec/mii-bitbang must be included into the main module. Otherwise some symbols remain undefined. Additionally, fs_enet uses libphy, so add a select PHYLIB. Building modules, stage 2. MODPOST 5 modules ERROR: "fs_scc_ops" [drivers/net/fs_enet/fs_enet.ko] undefined! make[1]: *** [__modpost] Error 1 make: *** [modules] Error 2 Signed-off-by: Jochen Friedrich Signed-off-by: David S. Miller --- drivers/net/fs_enet/Kconfig | 11 ++++++++++- drivers/net/fs_enet/Makefile | 15 ++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/fs_enet/Kconfig b/drivers/net/fs_enet/Kconfig index 2765e49e07d..562ea68ed99 100644 --- a/drivers/net/fs_enet/Kconfig +++ b/drivers/net/fs_enet/Kconfig @@ -2,6 +2,7 @@ config FS_ENET tristate "Freescale Ethernet Driver" depends on CPM1 || CPM2 select MII + select PHYLIB config FS_ENET_HAS_SCC bool "Chip has an SCC usable for ethernet" @@ -11,11 +12,19 @@ config FS_ENET_HAS_SCC config FS_ENET_HAS_FCC bool "Chip has an FCC usable for ethernet" depends on FS_ENET && CPM2 - select MDIO_BITBANG default y config FS_ENET_HAS_FEC bool "Chip has an FEC usable for ethernet" depends on FS_ENET && CPM1 + select FS_ENET_MDIO_FEC default y +config FS_ENET_MDIO_FEC + tristate "MDIO driver for FEC" + depends on FS_ENET && CPM1 + +config FS_ENET_MDIO_FCC + tristate "MDIO driver for FCC" + depends on FS_ENET && CPM2 + select MDIO_BITBANG diff --git a/drivers/net/fs_enet/Makefile b/drivers/net/fs_enet/Makefile index 02d4dc18ba6..1ffbe0756a0 100644 --- a/drivers/net/fs_enet/Makefile +++ b/drivers/net/fs_enet/Makefile @@ -4,7 +4,16 @@ obj-$(CONFIG_FS_ENET) += fs_enet.o -obj-$(CONFIG_8xx) += mac-fec.o mac-scc.o mii-fec.o -obj-$(CONFIG_CPM2) += mac-fcc.o mii-bitbang.o +fs_enet-$(CONFIG_FS_ENET_HAS_SCC) += mac-scc.o +fs_enet-$(CONFIG_FS_ENET_HAS_FEC) += mac-fec.o +fs_enet-$(CONFIG_FS_ENET_HAS_FCC) += mac-fcc.o -fs_enet-objs := fs_enet-main.o +ifeq ($(CONFIG_PPC_CPM_NEW_BINDING),y) +obj-$(CONFIG_FS_ENET_MDIO_FEC) += mii-fec.o +obj-$(CONFIG_FS_ENET_MDIO_FCC) += mii-bitbang.o +else +fs_enet-$(CONFIG_FS_ENET_MDIO_FEC) += mii-fec.o +fs_enet-$(CONFIG_FS_ENET_MDIO_FCC) += mii-bitbang.o +endif + +fs_enet-objs := fs_enet-main.o $(fs_enet-m) -- cgit v1.2.3 From c67625a1ecd7caf4c0490fc5278d6bb736a5297f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 14 Nov 2007 15:53:16 -0800 Subject: [NET]: Remove notifier block from chain when register_netdevice_notifier fails Commit fcc5a03ac42564e9e255c1134dda47442289e466: [NET]: Allow netdev REGISTER/CHANGENAME events to fail makes the register_netdevice_notifier() handle the error from the NETDEV_REGISTER event, sent to the registering block. The bad news is that in this case the notifier block is not removed from the list, but the error is returned to the caller. In case the caller is in module init function and handles this error this can abort the module loading. The notifier block will be then removed from the kernel, but will be left in the list. Oops :( I think that the notifier block should be removed from the chain in case of error, regardless whether this error is handled by the caller or not. In the worst case (the error is _not_ handled) module will not receive the events any longer. Signed-off-by: Pavel Emelyanov Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index dd40b35bb00..86d62611f2f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1171,6 +1171,8 @@ rollback: nb->notifier_call(nb, NETDEV_UNREGISTER, dev); } } + + raw_notifier_chain_unregister(&netdev_chain, nb); goto unlock; } -- cgit v1.2.3 From e1cd8f78f8cbfa314a095dbf704707217c8ee197 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Wed, 14 Nov 2007 15:55:09 -0800 Subject: [TCP] FRTO: Clear frto_highmark only after process_frto that uses it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I broke this in commit 3de96471bd7fb76406e975ef6387abe3a0698149: [TCP]: Wrap-safed reordering detection FRTO check tcp_process_frto should always see a valid frto_highmark. An invalid frto_highmark (zero) is very likely what ultimately caused a seqno compare in tcp_frto_enter_loss to do the wrong leading to the LOST-bit leak. Having LOST-bits integry ensured like done after commit 23aeeec365dcf8bc87fae44c533e50d0bb4f23cc: [TCP] FRTO: Plug potential LOST-bit leak won't hurt. It may still be useful in some other, possibly legimate, scenario. Reported by Chazarain Guillaume . Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f126ece8eb..0f0c1c9829a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3113,11 +3113,11 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets); + if (tp->frto_counter) + frto_cwnd = tcp_process_frto(sk, flag); /* Guarantee sacktag reordering detection against wrap-arounds */ if (before(tp->frto_highmark, tp->snd_una)) tp->frto_highmark = 0; - if (tp->frto_counter) - frto_cwnd = tcp_process_frto(sk, flag); if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ -- cgit v1.2.3 From 186fd777a8c63c28acdbcb0e9aefa7ebc858641e Mon Sep 17 00:00:00 2001 From: Frank Lichtenheld Date: Wed, 14 Nov 2007 15:57:38 -0800 Subject: [ISDN] sc: Fix sndpkt to have the correct number of arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit isdn_if.writebuf_skb has an additional ack flag argument which was missing from sndpkt leading to the following warning: CC [M] drivers/isdn/sc/init.o drivers/isdn/sc/init.c: In function ‘sc_init’: drivers/isdn/sc/init.c:281: warning: assignment from incompatible pointer type Note that this doesn't actually do anything with the flag, it just fixes the warning (and probably accessing the last argument). Signed-off-by: Frank Lichtenheld Signed-off-by: David S. Miller --- drivers/isdn/sc/card.h | 2 +- drivers/isdn/sc/packet.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/sc/card.h b/drivers/isdn/sc/card.h index 5992f63c383..0120bcf8831 100644 --- a/drivers/isdn/sc/card.h +++ b/drivers/isdn/sc/card.h @@ -109,7 +109,7 @@ void memcpy_fromshmem(int card, void *dest, const void *src, size_t n); int get_card_from_id(int driver); int indicate_status(int card, int event, ulong Channel, char *Data); irqreturn_t interrupt_handler(int interrupt, void *cardptr); -int sndpkt(int devId, int channel, struct sk_buff *data); +int sndpkt(int devId, int channel, int ack, struct sk_buff *data); void rcvpkt(int card, RspMessage *rcvmsg); int command(isdn_ctrl *cmd); int reset(int card); diff --git a/drivers/isdn/sc/packet.c b/drivers/isdn/sc/packet.c index 92016a2608e..5ff6ae86844 100644 --- a/drivers/isdn/sc/packet.c +++ b/drivers/isdn/sc/packet.c @@ -20,7 +20,7 @@ #include "message.h" #include "card.h" -int sndpkt(int devId, int channel, struct sk_buff *data) +int sndpkt(int devId, int channel, int ack, struct sk_buff *data) { LLData ReqLnkWrite; int status; -- cgit v1.2.3 From 66ba886254edbbd9442d30f1eef6f6fb0145027d Mon Sep 17 00:00:00 2001 From: Frank Lichtenheld Date: Wed, 14 Nov 2007 15:59:43 -0800 Subject: [ISDN] sc: Really, really fix warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CC [M] drivers/isdn/sc/shmem.o drivers/isdn/sc/shmem.c: In function ‘memcpy_toshmem’: drivers/isdn/sc/shmem.c:53: warning: passing argument 1 of ‘memcpy_toio’ makes pointer from integer without a cast Commit 9317d4313e0cd51b2256ea9a9316f2d8561e37a8: ISDN/sc: fix longstanding warning claimed to fix it, but it didn't. [ Changed the "void *" to be "void __iomem *" -DaveM ] Signed-off-by: Frank Lichtenheld Acked-by:Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/sc/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/sc/shmem.c b/drivers/isdn/sc/shmem.c index e0331e0094f..712220cef13 100644 --- a/drivers/isdn/sc/shmem.c +++ b/drivers/isdn/sc/shmem.c @@ -50,7 +50,7 @@ void memcpy_toshmem(int card, void *dest, const void *src, size_t n) outb(((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE) >> 14) | 0x80, sc_adapter[card]->ioport[sc_adapter[card]->shmem_pgport]); - memcpy_toio(sc_adapter[card]->rambase + dest_rem, src, n); + memcpy_toio((void __iomem *)(sc_adapter[card]->rambase + dest_rem), src, n); spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); pr_debug("%s: set page to %#x\n",sc_adapter[card]->devicename, ((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE)>>14)|0x80); -- cgit v1.2.3 From d90bf5a976793edfa88d3bb2393f0231eb8ce1e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Nov 2007 16:14:05 -0800 Subject: [NET]: rt_check_expire() can take a long time, add a cond_resched() On commit 39c90ece7565f5c47110c2fa77409d7a9478bd5b: [IPV4]: Convert rt_check_expire() from softirq processing to workqueue. we converted rt_check_expire() from softirq to workqueue, allowing the function to perform all work it was supposed to do. When the IP route cache is big, rt_check_expire() can take a long time to run. (default settings : 20% of the hash table is scanned at each invocation) Adding cond_resched() helps giving cpu to higher priority tasks if necessary. Using a "if (need_resched())" test before calling "cond_resched();" is necessary to avoid spending too much time doing the resched check. (My tests gave a time reduction from 88 ms to 25 ms per rt_check_expire() run on my i686 test machine) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 45651834e1e..1bff9ed349f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -578,6 +578,9 @@ static void rt_check_expire(struct work_struct *work) i = (i + 1) & rt_hash_mask; rthp = &rt_hash_table[i].chain; + if (need_resched()) + cond_resched(); + if (*rthp == NULL) continue; spin_lock_bh(rt_hash_lock_addr(i)); -- cgit v1.2.3 From a0af5f14542a2e0687f95e093a33ea7301fe8cc5 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Fri, 9 Nov 2007 16:25:08 +0100 Subject: mac80211: Fix queuing of scan containing a SSID This patch fixes scanning for specific ssid's which is broken due to the scan being queued up without respecting the ssid to scan for. Signed-off-by: Helmut Schaa Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/ieee80211_sta.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4b4ed2a5803..d575ccd67e9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -241,6 +241,8 @@ struct ieee80211_if_sta { u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; size_t ssid_len; + u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; + size_t scan_ssid_len; u16 aid; u16 ap_capab, capab; u8 *extra_ie; /* to be added to the end of AssocReq */ diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index fda0e06453e..fc6a3ff3d90 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -1998,7 +1998,10 @@ void ieee80211_sta_work(struct work_struct *work) if (ifsta->state != IEEE80211_AUTHENTICATE && ifsta->state != IEEE80211_ASSOCIATE && test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { - ieee80211_sta_start_scan(dev, NULL, 0); + if (ifsta->scan_ssid_len) + ieee80211_sta_start_scan(dev, ifsta->scan_ssid, ifsta->scan_ssid_len); + else + ieee80211_sta_start_scan(dev, NULL, 0); return; } @@ -2868,6 +2871,9 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len) return -EBUSY; } + ifsta->scan_ssid_len = ssid_len; + if (ssid_len) + memcpy(ifsta->scan_ssid, ssid, ssid_len); set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request); queue_work(local->hw.workqueue, &ifsta->work); return 0; -- cgit v1.2.3 From 14577f239fe5193d556ef1471c8667dabd556418 Mon Sep 17 00:00:00 2001 From: Mohamed Abbas Date: Mon, 12 Nov 2007 11:37:42 +0800 Subject: iwl3945: place CCK rates in front of OFDM for supported rates The patch fixes association failure (reason = 18) bug by arranging CCK rates before OFDM rates. This patch will register with mac80211 the modified rate arrangement with CCK rate first. Change rate scale algorithm also to deal with rate change. Fix Txpower and rate Table commands to be constructed correctly after rearrangement. Signed-off-by: Mohamed Abbas Signed-off-by: Zhu Yi Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 56 +++++++++++-- drivers/net/wireless/iwlwifi/iwl-3945-rs.h | 29 +++++-- drivers/net/wireless/iwlwifi/iwl-3945.c | 126 +++++++++++++++------------- drivers/net/wireless/iwlwifi/iwl3945-base.c | 4 +- 4 files changed, 143 insertions(+), 72 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c index 262ab0b5582..c48b1b537d2 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c @@ -71,19 +71,19 @@ struct iwl_rate_scale_priv { }; static s32 iwl_expected_tpt_g[IWL_RATE_COUNT] = { - 0, 0, 76, 104, 130, 168, 191, 202, 7, 13, 35, 58 + 7, 13, 35, 58, 0, 0, 76, 104, 130, 168, 191, 202 }; static s32 iwl_expected_tpt_g_prot[IWL_RATE_COUNT] = { - 0, 0, 0, 80, 93, 113, 123, 125, 7, 13, 35, 58 + 7, 13, 35, 58, 0, 0, 0, 80, 93, 113, 123, 125 }; static s32 iwl_expected_tpt_a[IWL_RATE_COUNT] = { - 40, 57, 72, 98, 121, 154, 177, 186, 0, 0, 0, 0 + 0, 0, 0, 0, 40, 57, 72, 98, 121, 154, 177, 186 }; static s32 iwl_expected_tpt_b[IWL_RATE_COUNT] = { - 0, 0, 0, 0, 0, 0, 0, 0, 7, 13, 35, 58 + 7, 13, 35, 58, 0, 0, 0, 0, 0, 0, 0, 0 }; struct iwl_tpt_entry { @@ -350,6 +350,10 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, sta->last_txrate = sta->txrate; + /* For MODE_IEEE80211A mode it start at IWL_FIRST_OFDM_RATE */ + if (local->hw.conf.phymode == MODE_IEEE80211A) + sta->last_txrate += IWL_FIRST_OFDM_RATE; + IWL_DEBUG_RATE("leave\n"); } @@ -417,6 +421,33 @@ static void rs_free_sta(void *priv, void *priv_sta) IWL_DEBUG_RATE("leave\n"); } + +/* + * get ieee prev rate from rate scale table. + * for A and B mode we need to overright prev + * value + */ +static int rs_adjust_next_rate(struct iwl_priv *priv, int rate) +{ + int next_rate = iwl_get_prev_ieee_rate(rate); + + switch (priv->phymode) { + case MODE_IEEE80211A: + if (rate == IWL_RATE_12M_INDEX) + next_rate = IWL_RATE_9M_INDEX; + else if (rate == IWL_RATE_6M_INDEX) + next_rate = IWL_RATE_6M_INDEX; + break; + case MODE_IEEE80211B: + if (rate == IWL_RATE_11M_INDEX_TABLE) + next_rate = IWL_RATE_5M_INDEX_TABLE; + break; + default: + break; + } + + return next_rate; +} /** * rs_tx_status - Update rate control values based on Tx results * @@ -479,7 +510,8 @@ static void rs_tx_status(void *priv_rate, last_index = scale_rate_index; } else { current_count = priv->retry_rate; - last_index = iwl_get_prev_ieee_rate(scale_rate_index); + last_index = rs_adjust_next_rate(priv, + scale_rate_index); } /* Update this rate accounting for as many retries @@ -494,9 +526,10 @@ static void rs_tx_status(void *priv_rate, if (retries) scale_rate_index = - iwl_get_prev_ieee_rate(scale_rate_index); + rs_adjust_next_rate(priv, scale_rate_index); } + /* Update the last index window with success/failure based on ACK */ IWL_DEBUG_RATE("Update rate %d with %s.\n", last_index, @@ -672,7 +705,10 @@ static struct ieee80211_rate *rs_get_rate(void *priv_rate, } rate_mask = sta->supp_rates; - index = min(sta->txrate & 0xffff, IWL_RATE_COUNT - 1); + index = min(sta->last_txrate & 0xffff, IWL_RATE_COUNT - 1); + + if (priv->phymode == (u8) MODE_IEEE80211A) + rate_mask = rate_mask << IWL_FIRST_OFDM_RATE; rs_priv = (void *)sta->rate_ctrl_priv; @@ -801,7 +837,11 @@ static struct ieee80211_rate *rs_get_rate(void *priv_rate, out: sta->last_txrate = index; - sta->txrate = sta->last_txrate; + if (priv->phymode == (u8) MODE_IEEE80211A) + sta->txrate = sta->last_txrate - IWL_FIRST_OFDM_RATE; + else + sta->txrate = sta->last_txrate; + sta_info_put(sta); IWL_DEBUG_RATE("leave: %d\n", index); diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.h b/drivers/net/wireless/iwlwifi/iwl-3945-rs.h index b926738e0ea..bec4d3ffca1 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.h +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.h @@ -36,10 +36,17 @@ struct iwl_rate_info { u8 next_rs; /* next rate used in rs algo */ u8 prev_rs_tgg; /* previous rate used in TGG rs algo */ u8 next_rs_tgg; /* next rate used in TGG rs algo */ + u8 table_rs_index; /* index in rate scale table cmd */ + u8 prev_table_rs; /* prev in rate table cmd */ + }; enum { - IWL_RATE_6M_INDEX = 0, + IWL_RATE_1M_INDEX = 0, + IWL_RATE_2M_INDEX, + IWL_RATE_5M_INDEX, + IWL_RATE_11M_INDEX, + IWL_RATE_6M_INDEX, IWL_RATE_9M_INDEX, IWL_RATE_12M_INDEX, IWL_RATE_18M_INDEX, @@ -47,15 +54,27 @@ enum { IWL_RATE_36M_INDEX, IWL_RATE_48M_INDEX, IWL_RATE_54M_INDEX, - IWL_RATE_1M_INDEX, - IWL_RATE_2M_INDEX, - IWL_RATE_5M_INDEX, - IWL_RATE_11M_INDEX, IWL_RATE_COUNT, IWL_RATE_INVM_INDEX, IWL_RATE_INVALID = IWL_RATE_INVM_INDEX }; +enum { + IWL_RATE_6M_INDEX_TABLE = 0, + IWL_RATE_9M_INDEX_TABLE, + IWL_RATE_12M_INDEX_TABLE, + IWL_RATE_18M_INDEX_TABLE, + IWL_RATE_24M_INDEX_TABLE, + IWL_RATE_36M_INDEX_TABLE, + IWL_RATE_48M_INDEX_TABLE, + IWL_RATE_54M_INDEX_TABLE, + IWL_RATE_1M_INDEX_TABLE, + IWL_RATE_2M_INDEX_TABLE, + IWL_RATE_5M_INDEX_TABLE, + IWL_RATE_11M_INDEX_TABLE, + IWL_RATE_INVM_INDEX_TABLE = IWL_RATE_INVM_INDEX, +}; + enum { IWL_FIRST_OFDM_RATE = IWL_RATE_6M_INDEX, IWL_LAST_OFDM_RATE = IWL_RATE_54M_INDEX, diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index 19bcb01e278..3a45fe99a83 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -54,7 +54,9 @@ IWL_RATE_##rp##M_INDEX, \ IWL_RATE_##rn##M_INDEX, \ IWL_RATE_##pp##M_INDEX, \ - IWL_RATE_##np##M_INDEX } + IWL_RATE_##np##M_INDEX, \ + IWL_RATE_##r##M_INDEX_TABLE, \ + IWL_RATE_##ip##M_INDEX_TABLE } /* * Parameter order: @@ -65,6 +67,10 @@ * */ const struct iwl_rate_info iwl_rates[IWL_RATE_COUNT] = { + IWL_DECLARE_RATE_INFO(1, INV, 2, INV, 2, INV, 2), /* 1mbps */ + IWL_DECLARE_RATE_INFO(2, 1, 5, 1, 5, 1, 5), /* 2mbps */ + IWL_DECLARE_RATE_INFO(5, 2, 6, 2, 11, 2, 11), /*5.5mbps */ + IWL_DECLARE_RATE_INFO(11, 9, 12, 5, 12, 5, 18), /* 11mbps */ IWL_DECLARE_RATE_INFO(6, 5, 9, 5, 11, 5, 11), /* 6mbps */ IWL_DECLARE_RATE_INFO(9, 6, 11, 5, 11, 5, 11), /* 9mbps */ IWL_DECLARE_RATE_INFO(12, 11, 18, 11, 18, 11, 18), /* 12mbps */ @@ -73,10 +79,6 @@ const struct iwl_rate_info iwl_rates[IWL_RATE_COUNT] = { IWL_DECLARE_RATE_INFO(36, 24, 48, 24, 48, 24, 48), /* 36mbps */ IWL_DECLARE_RATE_INFO(48, 36, 54, 36, 54, 36, 54), /* 48mbps */ IWL_DECLARE_RATE_INFO(54, 48, INV, 48, INV, 48, INV),/* 54mbps */ - IWL_DECLARE_RATE_INFO(1, INV, 2, INV, 2, INV, 2), /* 1mbps */ - IWL_DECLARE_RATE_INFO(2, 1, 5, 1, 5, 1, 5), /* 2mbps */ - IWL_DECLARE_RATE_INFO(5, 2, 6, 2, 11, 2, 11), /*5.5mbps */ - IWL_DECLARE_RATE_INFO(11, 9, 12, 5, 12, 5, 18), /* 11mbps */ }; /* 1 = enable the iwl_disable_events() function */ @@ -662,10 +664,11 @@ void iwl_hw_build_tx_cmd_rate(struct iwl_priv *priv, cmd->cmd.tx.tx_flags = tx_flags; /* OFDM */ - cmd->cmd.tx.supp_rates[0] = rate_mask & IWL_OFDM_RATES_MASK; + cmd->cmd.tx.supp_rates[0] = + ((rate_mask & IWL_OFDM_RATES_MASK) >> IWL_FIRST_OFDM_RATE) & 0xFF; /* CCK */ - cmd->cmd.tx.supp_rates[1] = (rate_mask >> 8) & 0xF; + cmd->cmd.tx.supp_rates[1] = (rate_mask & 0xF); IWL_DEBUG_RATE("Tx sta id: %d, rate: %d (plcp), flags: 0x%4X " "cck/ofdm mask: 0x%x/0x%x\n", sta_id, @@ -1432,7 +1435,7 @@ static void iwl_hw_reg_set_scan_power(struct iwl_priv *priv, u32 scan_tbl_index, /* use this channel group's 6Mbit clipping/saturation pwr, * but cap at regulatory scan power restriction (set during init * based on eeprom channel data) for this channel. */ - power = min(ch_info->scan_power, clip_pwrs[IWL_RATE_6M_INDEX]); + power = min(ch_info->scan_power, clip_pwrs[IWL_RATE_6M_INDEX_TABLE]); /* further limit to user's max power preference. * FIXME: Other spectrum management power limitations do not @@ -1447,7 +1450,7 @@ static void iwl_hw_reg_set_scan_power(struct iwl_priv *priv, u32 scan_tbl_index, * *index*. */ power_index = ch_info->power_info[rate_index].power_table_index - (power - ch_info->power_info - [IWL_RATE_6M_INDEX].requested_power) * 2; + [IWL_RATE_6M_INDEX_TABLE].requested_power) * 2; /* store reference index that we use when adjusting *all* scan * powers. So we can accommodate user (all channel) or spectrum @@ -1476,7 +1479,7 @@ static void iwl_hw_reg_set_scan_power(struct iwl_priv *priv, u32 scan_tbl_index, */ int iwl_hw_reg_send_txpower(struct iwl_priv *priv) { - int rate_idx; + int rate_idx, i; const struct iwl_channel_info *ch_info = NULL; struct iwl_txpowertable_cmd txpower = { .channel = priv->active_rxon.channel, @@ -1500,20 +1503,36 @@ int iwl_hw_reg_send_txpower(struct iwl_priv *priv) } /* fill cmd with power settings for all rates for current channel */ - for (rate_idx = 0; rate_idx < IWL_RATE_COUNT; rate_idx++) { - txpower.power[rate_idx].tpc = ch_info->power_info[rate_idx].tpc; - txpower.power[rate_idx].rate = iwl_rates[rate_idx].plcp; + /* Fill OFDM rate */ + for (rate_idx = IWL_FIRST_OFDM_RATE, i = 0; + rate_idx <= IWL_LAST_OFDM_RATE; rate_idx++, i++) { + + txpower.power[i].tpc = ch_info->power_info[i].tpc; + txpower.power[i].rate = iwl_rates[rate_idx].plcp; IWL_DEBUG_POWER("ch %d:%d rf %d dsp %3d rate code 0x%02x\n", le16_to_cpu(txpower.channel), txpower.band, - txpower.power[rate_idx].tpc.tx_gain, - txpower.power[rate_idx].tpc.dsp_atten, - txpower.power[rate_idx].rate); + txpower.power[i].tpc.tx_gain, + txpower.power[i].tpc.dsp_atten, + txpower.power[i].rate); + } + /* Fill CCK rates */ + for (rate_idx = IWL_FIRST_CCK_RATE; + rate_idx <= IWL_LAST_CCK_RATE; rate_idx++, i++) { + txpower.power[i].tpc = ch_info->power_info[i].tpc; + txpower.power[i].rate = iwl_rates[rate_idx].plcp; + + IWL_DEBUG_POWER("ch %d:%d rf %d dsp %3d rate code 0x%02x\n", + le16_to_cpu(txpower.channel), + txpower.band, + txpower.power[i].tpc.tx_gain, + txpower.power[i].tpc.dsp_atten, + txpower.power[i].rate); } return iwl_send_cmd_pdu(priv, REPLY_TX_PWR_TABLE_CMD, - sizeof(struct iwl_txpowertable_cmd), &txpower); + sizeof(struct iwl_txpowertable_cmd), &txpower); } @@ -1549,7 +1568,7 @@ static int iwl_hw_reg_set_new_power(struct iwl_priv *priv, power_info = ch_info->power_info; /* update OFDM Txpower settings */ - for (i = IWL_FIRST_OFDM_RATE; i <= IWL_LAST_OFDM_RATE; + for (i = IWL_RATE_6M_INDEX_TABLE; i <= IWL_RATE_54M_INDEX_TABLE; i++, ++power_info) { int delta_idx; @@ -1573,14 +1592,14 @@ static int iwl_hw_reg_set_new_power(struct iwl_priv *priv, * ... all CCK power settings for a given channel are the *same*. */ if (power_changed) { power = - ch_info->power_info[IWL_RATE_12M_INDEX]. + ch_info->power_info[IWL_RATE_12M_INDEX_TABLE]. requested_power + IWL_CCK_FROM_OFDM_POWER_DIFF; /* do all CCK rates' iwl_channel_power_info structures */ - for (i = IWL_FIRST_CCK_RATE; i <= IWL_LAST_CCK_RATE; i++) { + for (i = IWL_RATE_1M_INDEX_TABLE; i <= IWL_RATE_11M_INDEX_TABLE; i++) { power_info->requested_power = power; power_info->base_power_index = - ch_info->power_info[IWL_RATE_12M_INDEX]. + ch_info->power_info[IWL_RATE_12M_INDEX_TABLE]. base_power_index + IWL_CCK_FROM_OFDM_INDEX_DIFF; ++power_info; } @@ -1674,7 +1693,7 @@ static int iwl_hw_reg_comp_txpower_temp(struct iwl_priv *priv) for (scan_tbl_index = 0; scan_tbl_index < IWL_NUM_SCAN_RATES; scan_tbl_index++) { s32 actual_index = (scan_tbl_index == 0) ? - IWL_RATE_1M_INDEX : IWL_RATE_6M_INDEX; + IWL_RATE_1M_INDEX_TABLE : IWL_RATE_6M_INDEX_TABLE; iwl_hw_reg_set_scan_power(priv, scan_tbl_index, actual_index, clip_pwrs, ch_info, a_band); @@ -1905,19 +1924,19 @@ static void iwl_hw_reg_init_channel_groups(struct iwl_priv *priv) for (rate_index = 0; rate_index < IWL_RATE_COUNT; rate_index++, clip_pwrs++) { switch (rate_index) { - case IWL_RATE_36M_INDEX: + case IWL_RATE_36M_INDEX_TABLE: if (i == 0) /* B/G */ *clip_pwrs = satur_pwr; else /* A */ *clip_pwrs = satur_pwr - 5; break; - case IWL_RATE_48M_INDEX: + case IWL_RATE_48M_INDEX_TABLE: if (i == 0) *clip_pwrs = satur_pwr - 7; else *clip_pwrs = satur_pwr - 10; break; - case IWL_RATE_54M_INDEX: + case IWL_RATE_54M_INDEX_TABLE: if (i == 0) *clip_pwrs = satur_pwr - 9; else @@ -2031,7 +2050,7 @@ int iwl3945_txpower_set_from_eeprom(struct iwl_priv *priv) } /* set tx power for CCK rates, based on OFDM 12 Mbit settings*/ - pwr_info = &ch_info->power_info[IWL_RATE_12M_INDEX]; + pwr_info = &ch_info->power_info[IWL_RATE_12M_INDEX_TABLE]; power = pwr_info->requested_power + IWL_CCK_FROM_OFDM_POWER_DIFF; pwr_index = pwr_info->power_table_index + @@ -2047,9 +2066,9 @@ int iwl3945_txpower_set_from_eeprom(struct iwl_priv *priv) /* fill each CCK rate's iwl_channel_power_info structure * NOTE: All CCK-rate Txpwrs are the same for a given chnl! * NOTE: CCK rates start at end of OFDM rates! */ - for (rate_index = IWL_OFDM_RATES; - rate_index < IWL_RATE_COUNT; rate_index++) { - pwr_info = &ch_info->power_info[rate_index]; + for (rate_index = 0; + rate_index < IWL_CCK_RATES; rate_index++) { + pwr_info = &ch_info->power_info[rate_index+IWL_OFDM_RATES]; pwr_info->requested_power = power; pwr_info->power_table_index = pwr_index; pwr_info->base_power_index = base_pwr_index; @@ -2061,7 +2080,7 @@ int iwl3945_txpower_set_from_eeprom(struct iwl_priv *priv) for (scan_tbl_index = 0; scan_tbl_index < IWL_NUM_SCAN_RATES; scan_tbl_index++) { s32 actual_index = (scan_tbl_index == 0) ? - IWL_RATE_1M_INDEX : IWL_RATE_6M_INDEX; + IWL_RATE_1M_INDEX_TABLE : IWL_RATE_6M_INDEX_TABLE; iwl_hw_reg_set_scan_power(priv, scan_tbl_index, actual_index, clip_pwrs, ch_info, a_band); } @@ -2139,17 +2158,20 @@ int iwl_hw_get_rx_read(struct iwl_priv *priv) */ int iwl3945_init_hw_rate_table(struct iwl_priv *priv) { - int rc, i; + int rc, i, index, prev_index; struct iwl_rate_scaling_cmd rate_cmd = { .reserved = {0, 0, 0}, }; struct iwl_rate_scaling_info *table = rate_cmd.table; for (i = 0; i < ARRAY_SIZE(iwl_rates); i++) { - table[i].rate_n_flags = + index = iwl_rates[i].table_rs_index; + + table[index].rate_n_flags = iwl_hw_set_rate_n_flags(iwl_rates[i].plcp, 0); - table[i].try_cnt = priv->retry_rate; - table[i].next_rate_index = iwl_get_prev_ieee_rate(i); + table[index].try_cnt = priv->retry_rate; + prev_index = iwl_get_prev_ieee_rate(i); + table[index].next_rate_index = iwl_rates[prev_index].table_rs_index; } switch (priv->phymode) { @@ -2157,26 +2179,26 @@ int iwl3945_init_hw_rate_table(struct iwl_priv *priv) IWL_DEBUG_RATE("Select A mode rate scale\n"); /* If one of the following CCK rates is used, * have it fall back to the 6M OFDM rate */ - for (i = IWL_FIRST_CCK_RATE; i <= IWL_LAST_CCK_RATE; i++) - table[i].next_rate_index = IWL_FIRST_OFDM_RATE; + for (i = IWL_RATE_1M_INDEX_TABLE; i <= IWL_RATE_11M_INDEX_TABLE; i++) + table[i].next_rate_index = iwl_rates[IWL_FIRST_OFDM_RATE].table_rs_index; /* Don't fall back to CCK rates */ - table[IWL_RATE_12M_INDEX].next_rate_index = IWL_RATE_9M_INDEX; + table[IWL_RATE_12M_INDEX_TABLE].next_rate_index = IWL_RATE_9M_INDEX_TABLE; /* Don't drop out of OFDM rates */ - table[IWL_FIRST_OFDM_RATE].next_rate_index = - IWL_FIRST_OFDM_RATE; + table[IWL_RATE_6M_INDEX_TABLE].next_rate_index = + iwl_rates[IWL_FIRST_OFDM_RATE].table_rs_index; break; case MODE_IEEE80211B: IWL_DEBUG_RATE("Select B mode rate scale\n"); /* If an OFDM rate is used, have it fall back to the * 1M CCK rates */ - for (i = IWL_FIRST_OFDM_RATE; i <= IWL_LAST_OFDM_RATE; i++) - table[i].next_rate_index = IWL_FIRST_CCK_RATE; + for (i = IWL_RATE_6M_INDEX_TABLE; i <= IWL_RATE_54M_INDEX_TABLE; i++) + table[i].next_rate_index = iwl_rates[IWL_FIRST_CCK_RATE].table_rs_index; /* CCK shouldn't fall back to OFDM... */ - table[IWL_RATE_11M_INDEX].next_rate_index = IWL_RATE_5M_INDEX; + table[IWL_RATE_11M_INDEX_TABLE].next_rate_index = IWL_RATE_5M_INDEX_TABLE; break; default: @@ -2248,22 +2270,12 @@ unsigned int iwl_hw_get_beacon_cmd(struct iwl_priv *priv, tx_beacon_cmd->tx.tx_flags = (TX_CMD_FLG_SEQ_CTL_MSK | TX_CMD_FLG_TSF_MSK); - /* supp_rates[0] == OFDM */ - tx_beacon_cmd->tx.supp_rates[0] = IWL_OFDM_BASIC_RATES_MASK; - - /* supp_rates[1] == CCK - * - * NOTE: IWL_*_RATES_MASK are not in the order that supp_rates - * expects so we have to shift them around. - * - * supp_rates expects: - * CCK rates are bit0..3 - * - * However IWL_*_RATES_MASK has: - * CCK rates are bit8..11 - */ + /* supp_rates[0] == OFDM start at IWL_FIRST_OFDM_RATE*/ + tx_beacon_cmd->tx.supp_rates[0] = + (IWL_OFDM_BASIC_RATES_MASK >> IWL_FIRST_OFDM_RATE) & 0xFF; + tx_beacon_cmd->tx.supp_rates[1] = - (IWL_CCK_BASIC_RATES_MASK >> 8) & 0xF; + (IWL_CCK_BASIC_RATES_MASK & 0xF); return (sizeof(struct iwl_tx_beacon_cmd) + frame_size); } diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 4f22a7174ca..e4ddbc9ac24 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -5331,13 +5331,13 @@ static int iwl_init_geos(struct iwl_priv *priv) /* 5.2GHz channels start after the 2.4GHz channels */ modes[A].mode = MODE_IEEE80211A; modes[A].channels = &channels[ARRAY_SIZE(iwl_eeprom_band_1)]; - modes[A].rates = rates; + modes[A].rates = &rates[4]; modes[A].num_rates = 8; /* just OFDM */ modes[A].num_channels = 0; modes[B].mode = MODE_IEEE80211B; modes[B].channels = channels; - modes[B].rates = &rates[8]; + modes[B].rates = rates; modes[B].num_rates = 4; /* just CCK */ modes[B].num_channels = 0; -- cgit v1.2.3 From 755a957d407c3fcac58360d9309b1664078ac15d Mon Sep 17 00:00:00 2001 From: Ivo van Doorn Date: Mon, 12 Nov 2007 15:02:22 +0100 Subject: rt2x00: Fix chipset revision validation The validation of the chipset revision was broken since for rt2500usb and rt73usb different registers should be read. When rt2500usb was loaded for a rt73 device it would false think the chipset was correct because the wrong register was read and validated. This has been fixed by expanding the check to also see if the first 4 bits of the revision is not-0 (When reading the wrong register offset the returned value is usually 0 which can be interpreted as invalid) Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2500pci.c | 4 ++-- drivers/net/wireless/rt2x00/rt2500usb.c | 4 ++-- drivers/net/wireless/rt2x00/rt2x00.h | 8 +++++--- drivers/net/wireless/rt2x00/rt73usb.c | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c index ff2d63267b1..702321c3016 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/rt2x00/rt2500pci.c @@ -620,7 +620,7 @@ static void rt2500pci_link_tuner(struct rt2x00_dev *rt2x00dev) * up to version C the link tuning should halt after 20 * seconds. */ - if (rt2x00_get_rev(&rt2x00dev->chip) < RT2560_VERSION_D && + if (rt2x00_rev(&rt2x00dev->chip) < RT2560_VERSION_D && rt2x00dev->link.count > 20) return; @@ -630,7 +630,7 @@ static void rt2500pci_link_tuner(struct rt2x00_dev *rt2x00dev) * Chipset versions C and lower should directly continue * to the dynamic CCA tuning. */ - if (rt2x00_get_rev(&rt2x00dev->chip) < RT2560_VERSION_D) + if (rt2x00_rev(&rt2x00dev->chip) < RT2560_VERSION_D) goto dynamic_cca_tune; /* diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 7cdc80a122b..277a020b35e 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -753,7 +753,7 @@ static int rt2500usb_init_registers(struct rt2x00_dev *rt2x00dev) rt2x00_set_field16(®, MAC_CSR1_HOST_READY, 1); rt2500usb_register_write(rt2x00dev, MAC_CSR1, reg); - if (rt2x00_get_rev(&rt2x00dev->chip) >= RT2570_VERSION_C) { + if (rt2x00_rev(&rt2x00dev->chip) >= RT2570_VERSION_C) { rt2500usb_register_read(rt2x00dev, PHY_CSR2, ®); reg &= ~0x0002; } else { @@ -1257,7 +1257,7 @@ static int rt2500usb_init_eeprom(struct rt2x00_dev *rt2x00dev) rt2500usb_register_read(rt2x00dev, MAC_CSR0, ®); rt2x00_set_chip(rt2x00dev, RT2570, value, reg); - if (rt2x00_rev(&rt2x00dev->chip, 0xffff0)) { + if (!rt2x00_check_rev(&rt2x00dev->chip, 0)) { ERROR(rt2x00dev, "Invalid RT chipset detected.\n"); return -ENODEV; } diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 9845e584b73..d1ad5251a77 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -751,14 +751,16 @@ static inline char rt2x00_rf(const struct rt2x00_chip *chipset, const u16 chip) return (chipset->rf == chip); } -static inline u16 rt2x00_get_rev(const struct rt2x00_chip *chipset) +static inline u16 rt2x00_rev(const struct rt2x00_chip *chipset) { return chipset->rev; } -static inline u16 rt2x00_rev(const struct rt2x00_chip *chipset, const u32 mask) +static inline u16 rt2x00_check_rev(const struct rt2x00_chip *chipset, + const u32 rev) { - return chipset->rev & mask; + return (((chipset->rev & 0xffff0) == rev) && + !!(chipset->rev & 0x0000f)); } /* diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 46c8c0840a6..dc640bf6b5e 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -1486,7 +1486,7 @@ static int rt73usb_init_eeprom(struct rt2x00_dev *rt2x00dev) rt73usb_register_read(rt2x00dev, MAC_CSR0, ®); rt2x00_set_chip(rt2x00dev, RT2571, value, reg); - if (!rt2x00_rev(&rt2x00dev->chip, 0x25730)) { + if (!rt2x00_check_rev(&rt2x00dev->chip, 0x25730)) { ERROR(rt2x00dev, "Invalid RT chipset detected.\n"); return -ENODEV; } -- cgit v1.2.3 From 66fbb541a5d2d58fdae21c1e7b558a75bfbd483f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 15 Nov 2007 09:31:10 +0800 Subject: iwl4965: fix not correctly dealing with hotunplug The interrupt handler returns IRQ_NONE if it detects that the device is gone. That's incorrect because the device may have raised the interrupt. Not acknowledging it may trigger the spurious interrupt detection and kill drivers sharing the interrupt line. Signed-off-by: Oliver Neukum Signed-off-by: Zhu Yi Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl4965-base.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c index d60adcb9bd4..d5107bb413c 100644 --- a/drivers/net/wireless/iwlwifi/iwl4965-base.c +++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c @@ -5156,9 +5156,10 @@ static irqreturn_t iwl_isr(int irq, void *data) } if ((inta == 0xFFFFFFFF) || ((inta & 0xFFFFFFF0) == 0xa5a5a5a0)) { - /* Hardware disappeared */ + /* Hardware disappeared. It might have already raised + * an interrupt */ IWL_WARNING("HARDWARE GONE?? INTA == 0x%080x\n", inta); - goto none; + goto unplugged; } IWL_DEBUG_ISR("ISR inta 0x%08x, enabled 0x%08x, fh 0x%08x\n", @@ -5166,8 +5167,9 @@ static irqreturn_t iwl_isr(int irq, void *data) /* iwl_irq_tasklet() will service interrupts and re-enable them */ tasklet_schedule(&priv->irq_tasklet); - spin_unlock(&priv->lock); + unplugged: + spin_unlock(&priv->lock); return IRQ_HANDLED; none: -- cgit v1.2.3 From 8a146a2b0d6e97941a5c2dc5d8a3ea1e6c3ab997 Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Wed, 14 Nov 2007 16:58:27 -0800 Subject: eCryptfs: cast page->index to loff_t instead of off_t page->index should be cast to loff_t instead of off_t. Signed-off-by: Michael Halcrow Reported-by: Eric Sandeen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/read_write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 2150edf9a58..6b7474a4336 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -87,7 +87,7 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, loff_t offset; int rc; - offset = ((((off_t)page_for_lower->index) << PAGE_CACHE_SHIFT) + offset = ((((loff_t)page_for_lower->index) << PAGE_CACHE_SHIFT) + offset_in_page); virt = kmap(page_for_lower); rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size); -- cgit v1.2.3 From 1299342bacbe9038bef473d9b5b3cbebad112d4c Mon Sep 17 00:00:00 2001 From: Andrey Borzenkov Date: Wed, 14 Nov 2007 16:58:28 -0800 Subject: Fix Oops in toshiba_acpi error return path When backlight_device_register() fails, return after undo initialization, do not try to use pointer that just was reset to NULL This fixes this oops: [ 1595.177672] [] show_trace_log_lvl+0x1a/0x30 [ 1595.177706] [] show_trace+0x12/0x20 [ 1595.177718] [] dump_stack+0x15/0x20 [ 1595.177728] [] kobject_shadow_add+0x125/0x1c0 [ 1595.177754] [] kobject_add+0xa/0x10 [ 1595.177764] [] device_add+0x97/0x5d0 [ 1595.177776] [] device_register+0x12/0x20 [ 1595.177786] [] backlight_device_register+0x9f/0x110 [backlight] [ 1595.177814] [] toshiba_acpi_init+0x117/0x15e [toshiba_acpi] [ 1595.177834] [] sys_init_module+0xfd/0x14e0 [ 1595.177871] [] sysenter_past_esp+0x5f/0x99 [ 1595.177883] ======================= [ 1595.177890] Could not register toshiba backlight device [ 1595.177985] BUG: unable to handle kernel NULL pointer dereference at virtual address 00000004 ... [ 1595.394097] EIP: 0060:[] Not tainted VLI [ 1595.394101] EFLAGS: 00010282 (2.6.23-rc9-1avb #24) [ 1595.480081] EIP is at toshiba_acpi_init+0x143/0x15e [toshiba_acpi] Signed-off-by: Andrey Borzenkov Cc: John Belmonte Acked-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/toshiba_acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/toshiba_acpi.c b/drivers/acpi/toshiba_acpi.c index a736ef7bdee..9e8c20c6a0b 100644 --- a/drivers/acpi/toshiba_acpi.c +++ b/drivers/acpi/toshiba_acpi.c @@ -591,9 +591,12 @@ static int __init toshiba_acpi_init(void) NULL, &toshiba_backlight_data); if (IS_ERR(toshiba_backlight_device)) { + int ret = PTR_ERR(toshiba_backlight_device); + printk(KERN_ERR "Could not register toshiba backlight device\n"); toshiba_backlight_device = NULL; toshiba_acpi_exit(); + return ret; } toshiba_backlight_device->props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1; -- cgit v1.2.3 From 779d20892f8e716677194dc879eea2b5f1e75678 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 14 Nov 2007 16:58:29 -0800 Subject: rtc_hctosys expects RTCs in UTC (doc) The RTC "hctosys" mechanism expects that RTC clock will use UTC, not local time (e.g. PST). Say so in Kconfig and in the kernel message. (Strictly speaking, the RTC clock should be tracking the POSIX epoch. That's not worth going into here. Goofing timezones means clocks are wrong by many hours; the POSIX-v-UTC differences just cost seconds.) Signed-off-by: David Brownell Acked-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/Kconfig | 4 +++- drivers/rtc/hctosys.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index cbde770eb12..7958635f12c 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -36,7 +36,9 @@ config RTC_HCTOSYS_DEVICE help The RTC device that will be used to (re)initialize the system clock, usually rtc0. Initialization is done when the system - starts up, and when it resumes from a low power state. + starts up, and when it resumes from a low power state. This + device should record time in UTC, since the kernel won't do + timezone correction. The driver for this RTC device must be loaded before late_initcall functions run, so it must usually be statically linked. diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c index 178527252c6..33c0e98243e 100644 --- a/drivers/rtc/hctosys.c +++ b/drivers/rtc/hctosys.c @@ -47,8 +47,8 @@ static int __init rtc_hctosys(void) do_settimeofday(&tv); dev_info(rtc->dev.parent, - "setting the system clock to " - "%d-%02d-%02d %02d:%02d:%02d (%u)\n", + "setting system clock to " + "%d-%02d-%02d %02d:%02d:%02d UTC (%u)\n", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, (unsigned int) tv.tv_sec); -- cgit v1.2.3 From a4b1d50e6158ecaa8fdb6a716389149bace35b52 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 14 Nov 2007 16:58:30 -0800 Subject: RTCs: handle NVRAM better Several of the RTC drivers are exporting binary "nvram" files in sysfs. Such NVRAM (or on many systems, EEPROM) data is often initialized during system manufacture to hold data about identity (serial numbers, Ethernet addresses, etc), configuration, calibration, and so forth. This patch improves integrity and security of those files: - Correctly initializes the size in one of the two cases where that was not yet being done. - Improves system security/integrity by making this state not be world-writable by default. Letting arbitrary userspace code mangle such state by default is at least Not A Good Thing; and it could sometimes be worse, depending on the particular data that might be corrupted. (I disregard the paranoiac "don't let anyone read it either" approach. Anyone storing passwords in such memory doesn't really care about security.) Signed-off-by: David Brownell Acked-by: Atsushi Nemoto Cc: Torsten Ertbjerg Rasmussen Cc: Mark Zhan Cc: Thomas Hommel Acked-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-ds1553.c | 2 +- drivers/rtc/rtc-ds1742.c | 5 ++++- drivers/rtc/rtc-m48t59.c | 3 ++- drivers/rtc/rtc-stk17ta8.c | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index bb53c09bad1..d9e848dcd45 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -291,7 +291,7 @@ static ssize_t ds1553_nvram_write(struct kobject *kobj, static struct bin_attribute ds1553_nvram_attr = { .attr = { .name = "nvram", - .mode = S_IRUGO | S_IWUGO, + .mode = S_IRUGO | S_IWUSR, }, .size = RTC_OFFSET, .read = ds1553_nvram_read, diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index c535b78698e..2e73f0b183b 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -160,10 +160,13 @@ static ssize_t ds1742_nvram_write(struct kobject *kobj, static struct bin_attribute ds1742_nvram_attr = { .attr = { .name = "nvram", - .mode = S_IRUGO | S_IWUGO, + .mode = S_IRUGO | S_IWUSR, }, .read = ds1742_nvram_read, .write = ds1742_nvram_write, + /* REVISIT: size in sysfs won't match actual size... if it's + * not a constant, each RTC should have its own attribute. + */ }; static int __devinit ds1742_rtc_probe(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c index 2bad1637330..cd0bbc0e803 100644 --- a/drivers/rtc/rtc-m48t59.c +++ b/drivers/rtc/rtc-m48t59.c @@ -353,11 +353,12 @@ static ssize_t m48t59_nvram_write(struct kobject *kobj, static struct bin_attribute m48t59_nvram_attr = { .attr = { .name = "nvram", - .mode = S_IRUGO | S_IWUGO, + .mode = S_IRUGO | S_IWUSR, .owner = THIS_MODULE, }, .read = m48t59_nvram_read, .write = m48t59_nvram_write, + .size = M48T59_NVRAM_SIZE, }; static int __devinit m48t59_rtc_probe(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c index 8288b6b2bf2..a265da7c6ff 100644 --- a/drivers/rtc/rtc-stk17ta8.c +++ b/drivers/rtc/rtc-stk17ta8.c @@ -291,7 +291,7 @@ static ssize_t stk17ta8_nvram_write(struct kobject *kobj, static struct bin_attribute stk17ta8_nvram_attr = { .attr = { .name = "nvram", - .mode = S_IRUGO | S_IWUGO, + .mode = S_IRUGO | S_IWUSR, .owner = THIS_MODULE, }, .size = RTC_OFFSET, -- cgit v1.2.3 From 682d73f685536fdb09322dde8caad339480e7bad Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 14 Nov 2007 16:58:32 -0800 Subject: rtc-ds1307 exports NVRAM Export the NVRAM on DS1307 and DS1338 chips, like several of the other drivers do for such combination RTC-and-NVRAM chips. Signed-off-by: David Brownell Acked-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/Kconfig | 4 +-- drivers/rtc/rtc-ds1307.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 7958635f12c..e5cdc0294aa 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -135,8 +135,8 @@ config RTC_DRV_DS1307 The first seven registers on these chips hold an RTC, and other registers may add features such as NVRAM, a trickle charger for - the RTC/NVRAM backup power, and alarms. This driver may not - expose all those available chip features. + the RTC/NVRAM backup power, and alarms. NVRAM is visible in + sysfs, but other chip features may not be available. This driver can also be built as a module. If so, the module will be called rtc-ds1307. diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index db6f3f0d898..bc1c7fe94ad 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -89,6 +89,7 @@ enum ds_type { struct ds1307 { u8 reg_addr; + bool has_nvram; u8 regs[8]; enum ds_type type; struct i2c_msg msg[2]; @@ -242,6 +243,87 @@ static const struct rtc_class_ops ds13xx_rtc_ops = { .set_time = ds1307_set_time, }; +/*----------------------------------------------------------------------*/ + +#define NVRAM_SIZE 56 + +static ssize_t +ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client; + struct ds1307 *ds1307; + struct i2c_msg msg[2]; + int result; + + client = to_i2c_client(container_of(kobj, struct device, kobj)); + ds1307 = i2c_get_clientdata(client); + + if (unlikely(off >= NVRAM_SIZE)) + return 0; + if ((off + count) > NVRAM_SIZE) + count = NVRAM_SIZE - off; + if (unlikely(!count)) + return count; + + msg[0].addr = client->addr; + msg[0].flags = 0; + msg[0].len = 1; + msg[0].buf = buf; + + buf[0] = 8 + off; + + msg[1].addr = client->addr; + msg[1].flags = I2C_M_RD; + msg[1].len = count; + msg[1].buf = buf; + + result = i2c_transfer(to_i2c_adapter(client->dev.parent), msg, 2); + if (result != 2) { + dev_err(&client->dev, "%s error %d\n", "nvram read", result); + return -EIO; + } + return count; +} + +static ssize_t +ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client; + u8 buffer[NVRAM_SIZE + 1]; + int ret; + + client = to_i2c_client(container_of(kobj, struct device, kobj)); + + if (unlikely(off >= NVRAM_SIZE)) + return -EFBIG; + if ((off + count) > NVRAM_SIZE) + count = NVRAM_SIZE - off; + if (unlikely(!count)) + return count; + + buffer[0] = 8 + off; + memcpy(buffer + 1, buf, count); + + ret = i2c_master_send(client, buffer, count + 1); + return (ret < 0) ? ret : (ret - 1); +} + +static struct bin_attribute nvram = { + .attr = { + .name = "nvram", + .mode = S_IRUGO | S_IWUSR, + .owner = THIS_MODULE, + }, + + .read = ds1307_nvram_read, + .write = ds1307_nvram_write, + .size = NVRAM_SIZE, +}; + +/*----------------------------------------------------------------------*/ + static struct i2c_driver ds1307_driver; static int __devinit ds1307_probe(struct i2c_client *client) @@ -413,6 +495,14 @@ read_rtc: goto exit_free; } + if (chip->nvram56) { + err = sysfs_create_bin_file(&client->dev.kobj, &nvram); + if (err == 0) { + ds1307->has_nvram = true; + dev_info(&client->dev, "56 bytes nvram\n"); + } + } + return 0; exit_bad: @@ -432,6 +522,9 @@ static int __devexit ds1307_remove(struct i2c_client *client) { struct ds1307 *ds1307 = i2c_get_clientdata(client); + if (ds1307->has_nvram) + sysfs_remove_bin_file(&client->dev.kobj, &nvram); + rtc_device_unregister(ds1307->rtc); kfree(ds1307); return 0; -- cgit v1.2.3 From 3cc2c17700c98b0af778566b0af6292b23b01430 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 14 Nov 2007 16:58:33 -0800 Subject: drivers/video/ps3fb: fix memset size error The size passed to memset is wrong. Signed-off-by Li Zefan Acked-by: Geert Uytterhoeven Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/ps3fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/ps3fb.c b/drivers/video/ps3fb.c index b3463ddcfd6..75836aa8319 100644 --- a/drivers/video/ps3fb.c +++ b/drivers/video/ps3fb.c @@ -727,7 +727,7 @@ static int ps3fb_blank(int blank, struct fb_info *info) static int ps3fb_get_vblank(struct fb_vblank *vblank) { - memset(vblank, 0, sizeof(&vblank)); + memset(vblank, 0, sizeof(*vblank)); vblank->flags = FB_VBLANK_HAVE_VSYNC; return 0; } -- cgit v1.2.3 From e9b5a495dc23f58ecaa9517f1ff4dd9ac724935f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 14 Nov 2007 16:58:34 -0800 Subject: W1: fix memset size error The size argument passed to memset is wrong. Signed-off-by Li Zefan Acked-by: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/w1/masters/ds2490.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c index 299e274d241..b63b5e044a4 100644 --- a/drivers/w1/masters/ds2490.c +++ b/drivers/w1/masters/ds2490.c @@ -233,7 +233,7 @@ static int ds_recv_status_nodump(struct ds_device *dev, struct ds_status *st, { int count, err; - memset(st, 0, sizeof(st)); + memset(st, 0, sizeof(*st)); count = 0; err = usb_bulk_msg(dev->udev, usb_rcvbulkpipe(dev->udev, dev->ep[EP_STATUS]), buf, size, &count, 100); -- cgit v1.2.3 From cc550defe9790b495c96fafabc5a6528dc586f24 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 14 Nov 2007 16:58:35 -0800 Subject: slab: fix typo in allocation failure handling This patch fixes wrong array index in allocation failure handling. Cc: Pekka Enberg Acked-by: Christoph Lameter Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index cfa6be4e378..c31cd3682a0 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1043,7 +1043,7 @@ static struct array_cache **alloc_alien_cache(int node, int limit) } ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); if (!ac_ptr[i]) { - for (i--; i <= 0; i--) + for (i--; i >= 0; i--) kfree(ac_ptr[i]); kfree(ac_ptr); return NULL; -- cgit v1.2.3 From dbd0cf48842700c3a694dcd32b29e63e27f37acc Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 14 Nov 2007 16:58:36 -0800 Subject: serial: add PNP ID for Davicom ISA 33.6K modem This should resolve these bug reports of the modem not working: http://bugzilla.kernel.org/show_bug.cgi?id=4355 http://www.linuxquestions.org/questions/linux-newbie-8/connect-script-failed-on-ppp-go-123975/ I don't have hardware to test this, but the initial report in the kernel bugzilla indicates that this change fixed the problem. Signed-off-by: Bjorn Helgaas Cc: Dmitry Vavilov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/8250_pnp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index 926f58a674a..dafc6ff5a05 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -69,6 +69,8 @@ static const struct pnp_device_id pnp_dev_table[] = { { "CTL3001", 0 }, /* Creative Labs Modem Blaster 28.8 DSVD PnP Voice */ { "CTL3011", 0 }, + /* Davicom ISA 33.6K Modem */ + { "DAV0336", 0 }, /* Creative */ /* Creative Modem Blaster Flash56 DI5601-1 */ { "DMB1032", 0 }, -- cgit v1.2.3 From 6fc48af82cef55546d640778698943b6227b7fb0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 14 Nov 2007 16:58:38 -0800 Subject: sysctl: check length at deprecated_sysctl_warning Original patch assumed args->nlen < CTL_MAXNAME, but it can be false. Signed-off-by: Tetsuo Handa Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3a1744fed2b..0deed82a615 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2620,6 +2620,10 @@ static int deprecated_sysctl_warning(struct __sysctl_args *args) int name[CTL_MAXNAME]; int i; + /* Check args->nlen. */ + if (args->nlen < 0 || args->nlen > CTL_MAXNAME) + return -ENOTDIR; + /* Read in the sysctl name for better debug message logging */ for (i = 0; i < args->nlen; i++) if (get_user(name[i], args->name + i)) -- cgit v1.2.3 From ddd73611b7bddbc0a9079f27a1471f635100aaab Mon Sep 17 00:00:00 2001 From: Pascal Terjan Date: Wed, 14 Nov 2007 16:58:39 -0800 Subject: cm40x0_cs.c: fix debug macros When PCMCIA_DEBUG is set, cm40x0_cs.c and cm4000_cs.c don't build because the definition of reader_to_dev uses a non-existent handle field of the struct pcmcia_device in the call to handle_to_dev. As handle_to_dev works on struct pcmcia_device, the fix is quite trivial. Signed-off-by: Pascal Terjan Cc: Harald Welte Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/pcmcia/cm4000_cs.c | 2 +- drivers/char/pcmcia/cm4040_cs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index cc5d77797de..02518da6a38 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -47,7 +47,7 @@ /* #define ATR_CSUM */ #ifdef PCMCIA_DEBUG -#define reader_to_dev(x) (&handle_to_dev(x->p_dev->handle)) +#define reader_to_dev(x) (&handle_to_dev(x->p_dev)) static int pc_debug = PCMCIA_DEBUG; module_param(pc_debug, int, 0600); #define DEBUGP(n, rdr, x, args...) do { \ diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index a0b9c8728d5..5f291bf739a 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -41,7 +41,7 @@ #ifdef PCMCIA_DEBUG -#define reader_to_dev(x) (&handle_to_dev(x->p_dev->handle)) +#define reader_to_dev(x) (&handle_to_dev(x->p_dev)) static int pc_debug = PCMCIA_DEBUG; module_param(pc_debug, int, 0600); #define DEBUGP(n, rdr, x, args...) do { \ -- cgit v1.2.3 From 5a6dca7c15623626943786977aca738520ea5ba3 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 14 Nov 2007 16:58:41 -0800 Subject: lib: move bitmap.o from lib-y to obj-y. mac80211 has a reference to __bitmap_empty() via bitmap_empty(). In lib/bitmap.c this is flagged with an EXPORT_SYMBOL(), but this is ultimately ineffective due to bitmap.o being linked in lib-y, resulting in: ERROR: "__bitmap_empty" [net/mac80211/mac80211.ko] undefined! Moving bitmap.o to obj-y fixes this up. Signed-off-by: Paul Mundt Cc: "John W. Linville" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 3a0983b7741..b6793ed28d8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -4,7 +4,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ - idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ + idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o @@ -14,7 +14,7 @@ lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o kref.o klist.o obj-y += div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ - bust_spinlocks.o hexdump.o kasprintf.o + bust_spinlocks.o hexdump.o kasprintf.o bitmap.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG -- cgit v1.2.3 From fcbaa088fc8625381bd1096bc2eedc4f58a1572c Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 14 Nov 2007 16:58:42 -0800 Subject: uml: fix symlink loops symlinks to directories in the non-O= case were lacking -n, which meant that, when the link already existed, a new link pointing at itself was created in the target directory. Signed-off-by: Jeff Dike Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 768a5d14b75..31999bc1c8a 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -168,7 +168,7 @@ ifneq ($(KBUILD_SRC),) $(Q)mkdir -p $(objtree)/include/asm-um $(Q)ln -fsn $(srctree)/include/asm-$(HEADER_ARCH) include/asm-um/arch else - $(Q)cd $(TOPDIR)/include/asm-um && ln -sf ../asm-$(HEADER_ARCH) arch + $(Q)cd $(TOPDIR)/include/asm-um && ln -fsn ../asm-$(SUBARCH) arch endif $(objtree)/$(ARCH_DIR)/include: @@ -180,7 +180,7 @@ $(ARCH_DIR)/include/sysdep: $(objtree)/$(ARCH_DIR)/include ifneq ($(KBUILD_SRC),) $(Q)ln -fsn $(srctree)/$(ARCH_DIR)/include/sysdep-$(SUBARCH) $(ARCH_DIR)/include/sysdep else - $(Q)cd $(ARCH_DIR)/include && ln -sf sysdep-$(SUBARCH) sysdep + $(Q)cd $(ARCH_DIR)/include && ln -fsn sysdep-$(SUBARCH) sysdep endif $(ARCH_DIR)/os: @@ -188,7 +188,7 @@ $(ARCH_DIR)/os: ifneq ($(KBUILD_SRC),) $(Q)ln -fsn $(srctree)/$(ARCH_DIR)/os-$(OS) $(ARCH_DIR)/os else - $(Q)cd $(ARCH_DIR) && ln -sf os-$(OS) os + $(Q)cd $(ARCH_DIR) && ln -fsn os-$(OS) os endif # Generated files -- cgit v1.2.3 From 108b4c3638be251d6b42eaa633e8a6b852b960f0 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 14 Nov 2007 16:58:43 -0800 Subject: rtc: tweak driver documentation for rtc periodic The max_user_freq member is not really meant for RTC drivers to modify, so update the rtc documentation so drivers writers know what is expected of them when handling periodic events. Signed-off-by: Mike Frysinger Acked-by: Alessandro Zummo Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/rtc.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt index c931d613f64..e20b19c1b60 100644 --- a/Documentation/rtc.txt +++ b/Documentation/rtc.txt @@ -180,9 +180,10 @@ driver returns ENOIOCTLCMD. Some common examples: * RTC_IRQP_SET, RTC_IRQP_READ: the irq_set_freq function will be called to set the frequency while the framework will handle the read for you since the frequency is stored in the irq_freq member of the rtc_device - structure. Also make sure you set the max_user_freq member in your - initialization routines so the framework can sanity check the user - input for you. + structure. Your driver needs to initialize the irq_freq member during + init. Make sure you check the requested frequency is in range of your + hardware in the irq_set_freq function. If you cannot actually change + the frequency, just return -ENOTTY. If all else fails, check out the rtc-test.c driver! -- cgit v1.2.3 From 0f8c0234f275c8198cbb68f16e035fa46254e372 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 14 Nov 2007 16:58:45 -0800 Subject: chipsfb: uses/depends on PCI chipsfb uses PCI interfaces and should depend on PCI. CC drivers/video/chipsfb.o drivers/video/chipsfb.c: In function 'chipsfb_pci_init': drivers/video/chipsfb.c:378: error: implicit declaration of function 'pci_request_region' drivers/video/chipsfb.c:435: error: implicit declaration of function 'pci_release_region' make[2]: *** [drivers/video/chipsfb.o] Error 1 make[1]: *** [drivers/video] Error 2 make: *** [drivers] Error 2 !CONFIG_PCI causes the build to fail. Signed-off-by: Randy Dunlap Cc: Kamalesh Babulal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index cc4b60f899c..7d86e9eae91 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -503,7 +503,7 @@ config FB_VALKYRIE config FB_CT65550 bool "Chips 65550 display support" - depends on (FB = y) && PPC32 + depends on (FB = y) && PPC32 && PCI select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT -- cgit v1.2.3 From 03ad369ac900116f35da7505f768ebbd481d09a4 Mon Sep 17 00:00:00 2001 From: Frank Lichtenheld Date: Wed, 14 Nov 2007 16:58:47 -0800 Subject: uvesafb: fix warnings about unused variables on non-x86 Variables that are only used in #ifdef CONFIG_X86 should also only be declared there. Signed-off-by: Frank Lichtenheld Cc: Michal Januszewski Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/uvesafb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index b983d262ab7..d1d6c0facd5 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -926,8 +926,10 @@ static int uvesafb_setpalette(struct uvesafb_pal_entry *entries, int count, int start, struct fb_info *info) { struct uvesafb_ktask *task; +#ifdef CONFIG_X86 struct uvesafb_par *par = info->par; int i = par->mode_idx; +#endif int err = 0; /* @@ -1103,11 +1105,11 @@ static int uvesafb_pan_display(struct fb_var_screeninfo *var, static int uvesafb_blank(int blank, struct fb_info *info) { - struct uvesafb_par *par = info->par; struct uvesafb_ktask *task; int err = 1; - #ifdef CONFIG_X86 + struct uvesafb_par *par = info->par; + if (par->vbe_ib.capabilities & VBE_CAP_VGACOMPAT) { int loop = 10000; u8 seq = 0, crtc17 = 0; -- cgit v1.2.3 From df9d177aa28d50e64bae6fbd6b263833079e3571 Mon Sep 17 00:00:00 2001 From: Philippe Elie Date: Wed, 14 Nov 2007 16:58:48 -0800 Subject: oProfile: oops when profile_pc() returns ~0LU Instruction pointer returned by profile_pc() can be a random value. This break the assumption than we can safely set struct op_sample.eip field to a magic value to signal to the per-cpu buffer reader side special event like task switch ending up in a segfault in get_task_mm() when profile_pc() return ~0UL. Fixed by sanitizing the sampled eip and reject/log invalid eip. Problem reported by Sami Farin, patch tested by him. Signed-off-by: Philippe Elie Tested-by: Sami Farin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/oprofile/cpu_buffer.c | 7 +++++++ drivers/oprofile/cpu_buffer.h | 1 + drivers/oprofile/oprofile_stats.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index a83c3db7d18..c93d3d2640a 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -64,6 +64,8 @@ int alloc_cpu_buffers(void) b->head_pos = 0; b->sample_received = 0; b->sample_lost_overflow = 0; + b->backtrace_aborted = 0; + b->sample_invalid_eip = 0; b->cpu = i; INIT_DELAYED_WORK(&b->work, wq_sync_buffer); } @@ -175,6 +177,11 @@ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, cpu_buf->sample_received++; + if (pc == ESCAPE_CODE) { + cpu_buf->sample_invalid_eip++; + return 0; + } + if (nr_available_slots(cpu_buf) < 3) { cpu_buf->sample_lost_overflow++; return 0; diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index 49900d9e323..c66c025abe7 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -42,6 +42,7 @@ struct oprofile_cpu_buffer { unsigned long sample_received; unsigned long sample_lost_overflow; unsigned long backtrace_aborted; + unsigned long sample_invalid_eip; int cpu; struct delayed_work work; } ____cacheline_aligned; diff --git a/drivers/oprofile/oprofile_stats.c b/drivers/oprofile/oprofile_stats.c index f0acb661c25..d1f6d776e9e 100644 --- a/drivers/oprofile/oprofile_stats.c +++ b/drivers/oprofile/oprofile_stats.c @@ -26,6 +26,8 @@ void oprofile_reset_stats(void) cpu_buf = &cpu_buffer[i]; cpu_buf->sample_received = 0; cpu_buf->sample_lost_overflow = 0; + cpu_buf->backtrace_aborted = 0; + cpu_buf->sample_invalid_eip = 0; } atomic_set(&oprofile_stats.sample_lost_no_mm, 0); @@ -61,6 +63,8 @@ void oprofile_create_stats_files(struct super_block * sb, struct dentry * root) &cpu_buf->sample_lost_overflow); oprofilefs_create_ro_ulong(sb, cpudir, "backtrace_aborted", &cpu_buf->backtrace_aborted); + oprofilefs_create_ro_ulong(sb, cpudir, "sample_invalid_eip", + &cpu_buf->sample_invalid_eip); } oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mm", -- cgit v1.2.3 From d4d5d205b653fe68c898d42e7a27a7363a4fb3ba Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 14 Nov 2007 16:58:51 -0800 Subject: uml: fix recvmsg return value checking Stupid bug - we need to compare the return value of recvmsg to the value of iov_len, not its size. This caused port_helper processes not to be killed on shutdown on x86_64 because the pids weren't being passed out properly. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/os-Linux/file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index b542a3a021b..f8346275862 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -496,8 +496,7 @@ int os_rcv_fd(int fd, int *helper_pid_out) n = recvmsg(fd, &msg, 0); if(n < 0) return -errno; - - else if(n != sizeof(iov.iov_len)) + else if(n != iov.iov_len) *helper_pid_out = -1; cmsg = CMSG_FIRSTHDR(&msg); -- cgit v1.2.3 From 8cd8fa557f439f23baef2158b271667d0c579482 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 14 Nov 2007 16:58:53 -0800 Subject: uml: update address space affected by pud_clear pud_clear wasn't setting the _PAGE_NEWPAGE bit, fooling tlb_flush into thinking that this area of the address space was up-to-date and not unmapping whatever was covered by the pud. This manifested itself as ldconfig on x86_64 complaining about the first library it looked at not being a valid ELF file. A config file is mapped at 0x4000000, as the only thing mapped under its pud, and unmapped. The unmapping caused a pud_clear, which, due to this bug, didn't actually unmap the config file data on the host. The first library is then mapped at the same location, but is not actually mapped on the host because accesses to it cause no page faults. As a result, ldconfig sees the old config file data. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-um/pgtable-3level.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-um/pgtable-3level.h b/include/asm-um/pgtable-3level.h index aa82b88db80..3ebafbaacb2 100644 --- a/include/asm-um/pgtable-3level.h +++ b/include/asm-um/pgtable-3level.h @@ -71,7 +71,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline void pud_clear (pud_t *pud) { - set_pud(pud, __pud(0)); + set_pud(pud, __pud(_PAGE_NEWPAGE)); } #define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) -- cgit v1.2.3 From cfe36bde59bc1ae868e775ad82386c3acaabb738 Mon Sep 17 00:00:00 2001 From: Diego Calleja Date: Wed, 14 Nov 2007 16:58:54 -0800 Subject: Improve cgroup printks When I boot with the 'quiet' parameter, I see on the screen: [ 0.000000] Initializing cgroup subsys cpuset [ 0.000000] Initializing cgroup subsys cpu [ 39.036026] Initializing cgroup subsys cpuacct [ 39.036080] Initializing cgroup subsys debug [ 39.036118] Initializing cgroup subsys ns This patch lowers the priority of those messages, adds a "cgroup: " prefix to another couple of printks and kills the useless reference to the source file. Signed-off-by: Diego Calleja Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3fe21e19c96..1a3c23936d4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1,6 +1,4 @@ /* - * kernel/cgroup.c - * * Generic process-grouping system. * * Based originally on the cpuset system, extracted by Paul Menage @@ -2200,7 +2198,8 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; struct list_head *l; - printk(KERN_ERR "Initializing cgroup subsys %s\n", ss->name); + + printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); /* Create the top cgroup state for this subsystem */ ss->root = &rootnode; @@ -2273,7 +2272,7 @@ int __init cgroup_init_early(void) BUG_ON(!ss->create); BUG_ON(!ss->destroy); if (ss->subsys_id != i) { - printk(KERN_ERR "Subsys %s id == %d\n", + printk(KERN_ERR "cgroup: Subsys %s id == %d\n", ss->name, ss->subsys_id); BUG(); } @@ -2605,7 +2604,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename)); if (IS_ERR(dentry)) { printk(KERN_INFO - "Couldn't allocate dentry for %s: %ld\n", nodename, + "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename, PTR_ERR(dentry)); ret = PTR_ERR(dentry); goto out_release; -- cgit v1.2.3 From 28822f22e18fc3c422f64b5bf0bb1e6c306af634 Mon Sep 17 00:00:00 2001 From: Stanislav Brabec Date: Wed, 14 Nov 2007 16:58:55 -0800 Subject: drivers/video/s1d13xxxfb.c: fix build as module with dbg Attached patch fixes two compilation problems of s1d13xxxfb.c: - Fixes outdated dbg() message to fix compilation error with debugging enabled. - Do not read kernel command line options when compiled as module. Signed-off-by: Stanislav Brabec Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/s1d13xxxfb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/s1d13xxxfb.c b/drivers/video/s1d13xxxfb.c index a5333c19078..b829dc7c5ed 100644 --- a/drivers/video/s1d13xxxfb.c +++ b/drivers/video/s1d13xxxfb.c @@ -540,7 +540,7 @@ s1d13xxxfb_probe(struct platform_device *pdev) int ret = 0; u8 revision; - dbg("probe called: device is %p\n", dev); + dbg("probe called: device is %p\n", pdev); printk(KERN_INFO "Epson S1D13XXX FB Driver\n"); @@ -753,8 +753,11 @@ static struct platform_driver s1d13xxxfb_driver = { static int __init s1d13xxxfb_init(void) { + +#ifndef MODULE if (fb_get_options("s1d13xxxfb", NULL)) return -ENODEV; +#endif return platform_driver_register(&s1d13xxxfb_driver); } -- cgit v1.2.3 From e47776a0a41a14a5634633c96e590827f552c4b5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Nov 2007 16:58:56 -0800 Subject: Forbid user to change file flags on quota files Forbid user from changing file flags on quota files. User has no bussiness in playing with these flags when quota is on. Furthermore there is a remote possibility of deadlock due to a lock inversion between quota file's i_mutex and transaction's start (i_mutex for quota file is locked only when trasaction is started in quota operations) in ext3 and ext4. Signed-off-by: Jan Kara Cc: LIOU Payphone Cc: Acked-by: Dave Kleikamp Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/ioctl.c | 5 +++++ fs/ext3/ioctl.c | 5 +++++ fs/ext4/ioctl.c | 5 +++++ fs/jfs/ioctl.c | 3 +++ fs/reiserfs/ioctl.c | 3 +++ 5 files changed, 21 insertions(+) diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index c2324d5fe4a..320b2cb3d4d 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -47,6 +47,11 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, flags &= ~EXT2_DIRSYNC_FL; mutex_lock(&inode->i_mutex); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + mutex_unlock(&inode->i_mutex); + return -EPERM; + } oldflags = ei->i_flags; /* diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 4a2a02c95bf..023a070f55f 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -51,6 +51,11 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, flags &= ~EXT3_DIRSYNC_FL; mutex_lock(&inode->i_mutex); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + mutex_unlock(&inode->i_mutex); + return -EPERM; + } oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c04c7ccba9e..e7f894bdb42 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -51,6 +51,11 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, flags &= ~EXT4_DIRSYNC_FL; mutex_lock(&inode->i_mutex); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + mutex_unlock(&inode->i_mutex); + return -EPERM; + } oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 3c8663bea98..dfda12a073e 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -79,6 +79,9 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, if (!S_ISDIR(inode->i_mode)) flags &= ~JFS_DIRSYNC_FL; + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) + return -EPERM; jfs_get_inode_flags(jfs_inode); oldflags = jfs_inode->mode2; diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index c438a8f83f2..e0f0f098a52 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -57,6 +57,9 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (get_user(flags, (int __user *)arg)) return -EFAULT; + /* Is it quota file? Do not allow user to mess with it. */ + if (IS_NOQUOTA(inode)) + return -EPERM; if (((flags ^ REISERFS_I(inode)-> i_attrs) & (REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) -- cgit v1.2.3 From 62ec56524f0eeaa1aa4f7281425fa34d400cdacc Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Wed, 14 Nov 2007 16:58:58 -0800 Subject: LXFB: use the correct MSR number for panel support A relatively recent version of the Geode LX datasheet listed the wrong address for one of the MSRs that controls TFT panels, resulting in breakage. This patch corrects the MSR address. Signed-off-by: Jordan Crouse Cc: "Antonino A. Daplas" Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/geode/lxfb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/geode/lxfb.h b/drivers/video/geode/lxfb.h index 6c227f9592a..ca13c48d19b 100644 --- a/drivers/video/geode/lxfb.h +++ b/drivers/video/geode/lxfb.h @@ -33,7 +33,7 @@ void lx_set_palette_reg(struct fb_info *, unsigned int, unsigned int, #define MSR_LX_GLD_CONFIG 0x48002001 #define MSR_LX_GLCP_DOTPLL 0x4c000015 -#define MSR_LX_DF_PADSEL 0x48000011 +#define MSR_LX_DF_PADSEL 0x48002011 #define MSR_LX_DC_SPARE 0x80000011 #define MSR_LX_DF_GLCONFIG 0x48002001 -- cgit v1.2.3 From 43054412db5e5b3eda1eff6c2245ff4257560340 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 14 Nov 2007 16:59:00 -0800 Subject: lguest_user.c: fix memory leak This patch fixes a memory leak spotted by the Coverity checker. Signed-off-by: Adrian Bunk Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/lguest/lguest_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 9d716fa42ca..3b92a61ba8d 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -184,7 +184,7 @@ static int initialize(struct file *file, const unsigned long __user *input) free_regs: free_page(lg->regs_page); release_guest: - memset(lg, 0, sizeof(*lg)); + kfree(lg); unlock: mutex_unlock(&lguest_lock); return err; -- cgit v1.2.3 From a9e60e5c3c4721dd216047e4c58c4eb89789f519 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 14 Nov 2007 16:59:02 -0800 Subject: video/sis/: fix negative array index This patch fixes the possible usage of a negative value as an array index spotted by the Coverity checker. sisfb_validate_mode() could return a negative error code and we must check for that prior to using its return value as an array index. Signed-off-by: Adrian Bunk Cc: Thomas Winischhofer Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/sis/sis_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c index bc7d2368373..37bd24b8d83 100644 --- a/drivers/video/sis/sis_main.c +++ b/drivers/video/sis/sis_main.c @@ -1248,7 +1248,6 @@ sisfb_do_set_var(struct fb_var_screeninfo *var, int isactive, struct fb_info *in if(found_mode) { ivideo->sisfb_mode_idx = sisfb_validate_mode(ivideo, ivideo->sisfb_mode_idx, ivideo->currentvbflags); - ivideo->mode_no = sisbios_mode[ivideo->sisfb_mode_idx].mode_no[ivideo->mni]; } else { ivideo->sisfb_mode_idx = -1; } @@ -1260,6 +1259,8 @@ sisfb_do_set_var(struct fb_var_screeninfo *var, int isactive, struct fb_info *in return -EINVAL; } + ivideo->mode_no = sisbios_mode[ivideo->sisfb_mode_idx].mode_no[ivideo->mni]; + if(sisfb_search_refresh_rate(ivideo, ivideo->refresh_rate, ivideo->sisfb_mode_idx) == 0) { ivideo->rate_idx = sisbios_mode[ivideo->sisfb_mode_idx].rate_idx; ivideo->refresh_rate = 60; -- cgit v1.2.3 From 7105458563213b6f6fb523065474cfe1d6c22a67 Mon Sep 17 00:00:00 2001 From: Damian Jurd Date: Wed, 14 Nov 2007 16:59:04 -0800 Subject: 8250_pnp: add support for "LG C1 EXPRESS DUAL" machines The following is an extra entry to enable the touch screen on the new LG C1 EXPRESS DUAL machine. Cc: Russell King Cc: Bjorn Helgaas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/8250_pnp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index dafc6ff5a05..ad0755919bc 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -347,6 +347,11 @@ static const struct pnp_device_id pnp_dev_table[] = { /* Fujitsu Wacom Tablet PC devices */ { "FUJ02E5", 0 }, { "FUJ02E6", 0 }, + /* + * LG C1 EXPRESS DUAL (C1-PB11A3) touch screen (actually a FUJ02E6 in + * disguise) + */ + { "LTS0001", 0 }, /* Rockwell's (PORALiNK) 33600 INT PNP */ { "WCI0003", 0 }, /* Unkown PnP modems */ -- cgit v1.2.3 From e1a1c997afe907e6ec4799e4be0f38cffd8b418c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 14 Nov 2007 16:59:08 -0800 Subject: proc: fix proc_kill_inodes to kill dentries on all proc superblocks It appears we overlooked support for removing generic proc files when we added support for multiple proc super blocks. Handle that now. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Eric W. Biederman Acked-by: Pavel Emelyanov Cc: Alexey Dobriyan Acked-by: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 39 ++++++++++++++++++++++----------------- fs/proc/internal.h | 2 ++ fs/proc/root.c | 2 +- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 1bdb6243575..a9806bc21ec 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -561,28 +561,33 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp static void proc_kill_inodes(struct proc_dir_entry *de) { struct list_head *p; - struct super_block *sb = proc_mnt->mnt_sb; + struct super_block *sb; /* * Actually it's a partial revoke(). */ - file_list_lock(); - list_for_each(p, &sb->s_files) { - struct file * filp = list_entry(p, struct file, f_u.fu_list); - struct dentry * dentry = filp->f_path.dentry; - struct inode * inode; - const struct file_operations *fops; - - if (dentry->d_op != &proc_dentry_operations) - continue; - inode = dentry->d_inode; - if (PDE(inode) != de) - continue; - fops = filp->f_op; - filp->f_op = NULL; - fops_put(fops); + spin_lock(&sb_lock); + list_for_each_entry(sb, &proc_fs_type.fs_supers, s_instances) { + file_list_lock(); + list_for_each(p, &sb->s_files) { + struct file *filp = list_entry(p, struct file, + f_u.fu_list); + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode; + const struct file_operations *fops; + + if (dentry->d_op != &proc_dentry_operations) + continue; + inode = dentry->d_inode; + if (PDE(inode) != de) + continue; + fops = filp->f_op; + filp->f_op = NULL; + fops_put(fops); + } + file_list_unlock(); } - file_list_unlock(); + spin_unlock(&sb_lock); } static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1820eb2ef76..1b2b6c6bb47 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -78,3 +78,5 @@ static inline int proc_fd(struct inode *inode) { return PROC_I(inode)->fd; } + +extern struct file_system_type proc_fs_type; diff --git a/fs/proc/root.c b/fs/proc/root.c index ec9cb3b6c93..1f86bb860e0 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -98,7 +98,7 @@ static void proc_kill_sb(struct super_block *sb) put_pid_ns(ns); } -static struct file_system_type proc_fs_type = { +struct file_system_type proc_fs_type = { .name = "proc", .get_sb = proc_get_sb, .kill_sb = proc_kill_sb, -- cgit v1.2.3 From 3ad33b2436b545cbe8b28e53f3710432cad457ab Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Wed, 14 Nov 2007 16:59:10 -0800 Subject: Migration: find correct vma in new_vma_page() We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas. For anon-regions, we just fail to migrate any pages beyond the 1st vma in the range. This occurs because do_mbind() collects a list of pages to migrate by calling check_range(). check_range() walks the task's mm, spanning vmas as necessary, to collect the migratable pages into a list. Then, do_mbind() calls migrate_pages() passing the list of pages, a function to allocate new pages based on vma policy [new_vma_page()], and a pointer to the first vma of the range. For each page in the list, new_vma_page() calls page_address_in_vma() passing the page and the vma [first in range] to obtain the address to get for alloc_page_vma(). The page address is needed to get interleaving policy correct. If the pages in the list come from multiple vmas, eventually, new_page_address() will pass that page to page_address_in_vma() with the incorrect vma. For !PageAnon pages, this will result in a bug check in rmap.c:vma_address(). For anon pages, vma_address() will just return EFAULT and fail the migration. This patch modifies new_vma_page() to check the return value from page_address_in_vma(). If the return value is EFAULT, new_vma_page() searchs forward via vm_next for the vma that maps the page--i.e., that does not return EFAULT. This assumes that the pages in the list handed to migrate_pages() is in address order. This is currently case. The patch documents this assumption in a new comment block for new_vma_page(). If new_vma_page() cannot locate the vma mapping the page in a forward search in the mm, it will pass a NULL vma to alloc_page_vma(). This will result in the allocation using the task policy, if any, else system default policy. This situation is unlikely, but the patch documents this behavior with a comment. Note, this patch results in restarting from the first vma in a multi-vma range each time new_vma_page() is called. If this is not acceptable, we can make the vma argument a pointer, both in new_vma_page() and it's caller unmap_and_move() so that the value held by the loop in migrate_pages() always passes down the last vma in which a page was found. This will require changes to all new_page_t functions passed to migrate_pages(). Is this necessary? For this patch to work, we can't bug check in vma_address() for pages outside the argument vma. This patch removes the BUG_ON(). All other callers [besides new_vma_page()] already check the return status. Tested on x86_64, 4 node NUMA platform. Signed-off-by: Lee Schermerhorn Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 21 +++++++++++++++++++-- mm/rmap.c | 7 ++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c1592a94582..83c69f8a64c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -722,12 +722,29 @@ out: } +/* + * Allocate a new page for page migration based on vma policy. + * Start assuming that page is mapped by vma pointed to by @private. + * Search forward from there, if not. N.B., this assumes that the + * list of pages handed to migrate_pages()--which is how we get here-- + * is in virtual address order. + */ static struct page *new_vma_page(struct page *page, unsigned long private, int **x) { struct vm_area_struct *vma = (struct vm_area_struct *)private; + unsigned long uninitialized_var(address); - return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, - page_address_in_vma(page, vma)); + while (vma) { + address = page_address_in_vma(page, vma); + if (address != -EFAULT) + break; + vma = vma->vm_next; + } + + /* + * if !vma, alloc_page_vma() will use task or system default policy + */ + return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); } #else diff --git a/mm/rmap.c b/mm/rmap.c index 8990f909492..dc3be5f5b0d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -183,7 +183,9 @@ static void page_unlock_anon_vma(struct anon_vma *anon_vma) } /* - * At what user virtual address is page expected in vma? + * At what user virtual address is page expected in @vma? + * Returns virtual address or -EFAULT if page's index/offset is not + * within the range mapped the @vma. */ static inline unsigned long vma_address(struct page *page, struct vm_area_struct *vma) @@ -193,8 +195,7 @@ vma_address(struct page *page, struct vm_area_struct *vma) address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { - /* page should be within any vma from prio_tree_next */ - BUG_ON(!PageAnon(page)); + /* page should be within @vma mapping range */ return -EFAULT; } return address; -- cgit v1.2.3 From dbc0e4cefd003834440fe7ac5464616c5235cb94 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 14 Nov 2007 16:59:12 -0800 Subject: memory hotremove: unset migrate type "ISOLATE" after removal We should unset migrate type "ISOLATE" when we successfully removed memory. But current code has BUG and cannot works well. This patch also includes bugfix? to change get_pageblock_flags to get_pageblock_migratetype(). Thanks to Badari Pulavarty for finding this. Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 ++-- mm/page_isolation.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3a47871a29d..987abe6375e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -574,8 +574,8 @@ repeat: /* Ok, all of our target is islaoted. We cannot do rollback at this point. */ offline_isolated_pages(start_pfn, end_pfn); - /* reset pagetype flags */ - start_isolate_page_range(start_pfn, end_pfn); + /* reset pagetype flags and makes migrate type to be MOVABLE */ + undo_isolate_page_range(start_pfn, end_pfn); /* removal success */ zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 8f92a29695c..3444b58033c 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -55,7 +55,7 @@ start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) return 0; undo: for (pfn = start_pfn; - pfn <= undo_pfn; + pfn < undo_pfn; pfn += pageblock_nr_pages) unset_migratetype_isolate(pfn_to_page(pfn)); @@ -76,7 +76,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (!page || get_pageblock_flags(page) != MIGRATE_ISOLATE) + if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) continue; unset_migratetype_isolate(page); } @@ -126,7 +126,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) */ for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page && get_pageblock_flags(page) != MIGRATE_ISOLATE) + if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) break; } if (pfn < end_pfn) -- cgit v1.2.3 From 546040dc4872f807d40b69bed86605636082564c Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 14 Nov 2007 16:59:14 -0800 Subject: make getdelays cgroupstats aware Update the getdelays utility to become cgroupstats aware. A new -C option has been added. It takes in a control group path and prints out a summary of the states of tasks in the control group Signed-off-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/accounting/getdelays.c | 43 +++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index ab82b7f5331..d6cb1a86fd6 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -25,6 +25,7 @@ #include #include +#include /* * Generic macros for dealing with netlink sockets. Might be duplicated @@ -78,6 +79,7 @@ static void usage(void) fprintf(stderr, " -i: print IO accounting (works only with -p)\n"); fprintf(stderr, " -l: listen forever\n"); fprintf(stderr, " -v: debug on\n"); + fprintf(stderr, " -C: container path\n"); } /* @@ -212,6 +214,14 @@ void task_context_switch_counts(struct taskstats *t) t->nvcsw, t->nivcsw); } +void print_cgroupstats(struct cgroupstats *c) +{ + printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, " + "uninterruptible %llu\n", c->nr_sleeping, c->nr_io_wait, + c->nr_running, c->nr_stopped, c->nr_uninterruptible); +} + + void print_ioacct(struct taskstats *t) { printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n", @@ -239,11 +249,14 @@ int main(int argc, char *argv[]) int maskset = 0; char *logfile = NULL; int loop = 0; + int containerset = 0; + char containerpath[1024]; + int cfd = 0; struct msgtemplate msg; while (1) { - c = getopt(argc, argv, "qdiw:r:m:t:p:vl"); + c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:"); if (c < 0) break; @@ -260,6 +273,10 @@ int main(int argc, char *argv[]) printf("printing task/process context switch rates\n"); print_task_context_switch_counts = 1; break; + case 'C': + containerset = 1; + strncpy(containerpath, optarg, strlen(optarg) + 1); + break; case 'w': logfile = strdup(optarg); printf("write to file %s\n", logfile); @@ -334,6 +351,11 @@ int main(int argc, char *argv[]) } } + if (tid && containerset) { + fprintf(stderr, "Select either -t or -C, not both\n"); + goto err; + } + if (tid) { rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, cmd_type, &tid, sizeof(__u32)); @@ -344,6 +366,20 @@ int main(int argc, char *argv[]) } } + if (containerset) { + cfd = open(containerpath, O_RDONLY); + if (cfd < 0) { + perror("error opening container file"); + goto err; + } + rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET, + CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)); + if (rc < 0) { + perror("error sending cgroupstats command"); + goto err; + } + } + do { int i; @@ -422,6 +458,9 @@ int main(int argc, char *argv[]) } break; + case CGROUPSTATS_TYPE_CGROUP_STATS: + print_cgroupstats(NLA_DATA(na)); + break; default: fprintf(stderr, "Unknown nla_type %d\n", na->nla_type); @@ -443,5 +482,7 @@ err: close(nl_sd); if (fd) close(fd); + if (cfd) + close(cfd); return 0; } -- cgit v1.2.3 From 5fce25a9df4865bdd5e3dc4853b269dc1677a02a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Nov 2007 16:59:15 -0800 Subject: mm: speed up writeback ramp-up on clean systems We allow violation of bdi limits if there is a lot of room on the system. Once we hit half the total limit we start enforcing bdi limits and bdi ramp-up should happen. Doing it this way avoids many small writeouts on an otherwise idle system and should also speed up the ramp-up. Signed-off-by: Peter Zijlstra Reviewed-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 838a5e31394..81a91e6f1f9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -355,8 +355,8 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, */ static void balance_dirty_pages(struct address_space *mapping) { - long bdi_nr_reclaimable; - long bdi_nr_writeback; + long nr_reclaimable, bdi_nr_reclaimable; + long nr_writeback, bdi_nr_writeback; long background_thresh; long dirty_thresh; long bdi_thresh; @@ -376,11 +376,26 @@ static void balance_dirty_pages(struct address_space *mapping) get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); + + nr_reclaimable = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); + nr_writeback = global_page_state(NR_WRITEBACK); + bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); + if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) break; + /* + * Throttle it only when the background writeback cannot + * catch-up. This avoids (excessively) small writeouts + * when the bdi limits are ramping up. + */ + if (nr_reclaimable + nr_writeback < + (background_thresh + dirty_thresh) / 2) + break; + if (!bdi->dirty_exceeded) bdi->dirty_exceeded = 1; -- cgit v1.2.3 From 887c3cb18865a4f9e0786e5a5b3ef47ff469b956 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Wed, 14 Nov 2007 16:59:20 -0800 Subject: Add IORESOUCE_BUSY flag for System RAM i386 and x86-64 registers System RAM as IORESOURCE_MEM | IORESOURCE_BUSY. But ia64 registers it as IORESOURCE_MEM only. In addition, memory hotplug code registers new memory as IORESOURCE_MEM too. This difference causes a failure of memory unplug of x86-64. This patch fixes it. This patch adds IORESOURCE_BUSY to avoid potential overlap mapping by PCI device. Signed-off-by: Yasunori Goto Signed-off-by: Badari Pulavarty Cc: Luck, Tony" Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/efi.c | 6 ++---- kernel/resource.c | 2 +- mm/memory_hotplug.c | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 5181bf551f3..8e8f8b6193e 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1113,7 +1113,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, if (md->num_pages == 0) /* should not happen */ continue; - flags = IORESOURCE_MEM; + flags = IORESOURCE_MEM | IORESOURCE_BUSY; switch (md->type) { case EFI_MEMORY_MAPPED_IO: @@ -1135,12 +1135,11 @@ efi_initialize_iomem_resources(struct resource *code_resource, case EFI_ACPI_MEMORY_NVS: name = "ACPI Non-volatile Storage"; - flags |= IORESOURCE_BUSY; break; case EFI_UNUSABLE_MEMORY: name = "reserved"; - flags |= IORESOURCE_BUSY | IORESOURCE_DISABLED; + flags |= IORESOURCE_DISABLED; break; case EFI_RESERVED_TYPE: @@ -1149,7 +1148,6 @@ efi_initialize_iomem_resources(struct resource *code_resource, case EFI_ACPI_RECLAIM_MEMORY: default: name = "reserved"; - flags |= IORESOURCE_BUSY; break; } diff --git a/kernel/resource.c b/kernel/resource.c index a358142ff48..2eb553d9b51 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -277,7 +277,7 @@ walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg, int ret = -1; res.start = (u64) start_pfn << PAGE_SHIFT; res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; - res.flags = IORESOURCE_MEM; + res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) { pfn = (unsigned long)(res.start >> PAGE_SHIFT); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 987abe6375e..9512a544d04 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -39,7 +39,7 @@ static struct resource *register_memory_resource(u64 start, u64 size) res->name = "System RAM"; res->start = start; res->end = start + size - 1; - res->flags = IORESOURCE_MEM; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, res) < 0) { printk("System RAM resource %llx - %llx cannot be added\n", (unsigned long long)res->start, (unsigned long long)res->end); -- cgit v1.2.3 From cbff2fbf55c21f50298b1aef1263b11bf510e35f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Nov 2007 16:59:21 -0800 Subject: acpi: make ACPI_PROCFS default to y Zillions of people are getting my-battery-monitor-doesnt-work problems (including me). Lessen the damage by making ACPI_PROCFS default to on. Cc: Len Brown Cc: "Rafael J. Wysocki" Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index ce9dead0f49..087a7028ae8 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -50,6 +50,7 @@ config ACPI_SLEEP config ACPI_PROCFS bool "Deprecated /proc/acpi files" depends on PROC_FS + default y ---help--- For backwards compatibility, this option allows deprecated /proc/acpi/ files to exist, even when -- cgit v1.2.3 From 350d0076c5763ca2b88ca05e3889bfa7c1905f21 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Wed, 14 Nov 2007 16:59:22 -0800 Subject: spi: fix double-free on spi_unregister_master After 49dce689ad4ef0fd1f970ef762168e4bd46f69a3, device_for_each_child iteration hits the master device itself. Do not call spi_unregister_device() for the master device. Signed-off-by: Atsushi Nemoto Acked-by: David Brownell Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/spi/spi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 89769ce16f8..b31f4431849 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -457,10 +457,11 @@ done: EXPORT_SYMBOL_GPL(spi_register_master); -static int __unregister(struct device *dev, void *unused) +static int __unregister(struct device *dev, void *master_dev) { /* note: before about 2.6.14-rc1 this would corrupt memory: */ - spi_unregister_device(to_spi_device(dev)); + if (dev != master_dev) + spi_unregister_device(to_spi_device(dev)); return 0; } @@ -478,7 +479,8 @@ void spi_unregister_master(struct spi_master *master) { int dummy; - dummy = device_for_each_child(master->dev.parent, NULL, __unregister); + dummy = device_for_each_child(master->dev.parent, &master->dev, + __unregister); device_unregister(&master->dev); } EXPORT_SYMBOL_GPL(spi_unregister_master); -- cgit v1.2.3 From ba0a7f39ce8cd54a1b2f3adb03509ff251a91bde Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Wed, 14 Nov 2007 16:59:23 -0800 Subject: spi: fix error paths on txx9spi_probe Some error paths in txx9spi_probe wrongly return 0. This patch fixes them by using the devres interfaces. Signed-off-by: Atsushi Nemoto Acked-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/spi/spi_txx9.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/spi/spi_txx9.c b/drivers/spi/spi_txx9.c index cc5094f37dd..363ac8e6882 100644 --- a/drivers/spi/spi_txx9.c +++ b/drivers/spi/spi_txx9.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -74,7 +75,6 @@ struct txx9spi { struct list_head queue; wait_queue_head_t waitq; void __iomem *membase; - int irq; int baseclk; struct clk *clk; u32 max_speed_hz, min_speed_hz; @@ -350,12 +350,12 @@ static int __init txx9spi_probe(struct platform_device *dev) struct resource *res; int ret = -ENODEV; u32 mcr; + int irq; master = spi_alloc_master(&dev->dev, sizeof(*c)); if (!master) return ret; c = spi_master_get_devdata(master); - c->irq = -1; platform_set_drvdata(dev, master); INIT_WORK(&c->work, txx9spi_work); @@ -381,32 +381,36 @@ static int __init txx9spi_probe(struct platform_device *dev) res = platform_get_resource(dev, IORESOURCE_MEM, 0); if (!res) - goto exit; - c->membase = ioremap(res->start, res->end - res->start + 1); + goto exit_busy; + if (!devm_request_mem_region(&dev->dev, + res->start, res->end - res->start + 1, + "spi_txx9")) + goto exit_busy; + c->membase = devm_ioremap(&dev->dev, + res->start, res->end - res->start + 1); if (!c->membase) - goto exit; + goto exit_busy; /* enter config mode */ mcr = txx9spi_rd(c, TXx9_SPMCR); mcr &= ~(TXx9_SPMCR_OPMODE | TXx9_SPMCR_SPSTP | TXx9_SPMCR_BCLR); txx9spi_wr(c, mcr | TXx9_SPMCR_CONFIG | TXx9_SPMCR_BCLR, TXx9_SPMCR); - c->irq = platform_get_irq(dev, 0); - if (c->irq < 0) - goto exit; - ret = request_irq(c->irq, txx9spi_interrupt, 0, dev->name, c); - if (ret) { - c->irq = -1; + irq = platform_get_irq(dev, 0); + if (irq < 0) + goto exit_busy; + ret = devm_request_irq(&dev->dev, irq, txx9spi_interrupt, 0, + "spi_txx9", c); + if (ret) goto exit; - } c->workqueue = create_singlethread_workqueue(master->dev.parent->bus_id); if (!c->workqueue) - goto exit; + goto exit_busy; c->last_chipselect = -1; dev_info(&dev->dev, "at %#llx, irq %d, %dMHz\n", - (unsigned long long)res->start, c->irq, + (unsigned long long)res->start, irq, (c->baseclk + 500000) / 1000000); master->bus_num = dev->id; @@ -418,13 +422,11 @@ static int __init txx9spi_probe(struct platform_device *dev) if (ret) goto exit; return 0; +exit_busy: + ret = -EBUSY; exit: if (c->workqueue) destroy_workqueue(c->workqueue); - if (c->irq >= 0) - free_irq(c->irq, c); - if (c->membase) - iounmap(c->membase); if (c->clk) { clk_disable(c->clk); clk_put(c->clk); @@ -442,8 +444,6 @@ static int __exit txx9spi_remove(struct platform_device *dev) spi_unregister_master(master); platform_set_drvdata(dev, NULL); destroy_workqueue(c->workqueue); - free_irq(c->irq, c); - iounmap(c->membase); clk_disable(c->clk); clk_put(c->clk); spi_master_put(master); -- cgit v1.2.3 From e62aa046e1748b8ea0354951685478030392cf56 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Wed, 14 Nov 2007 16:59:24 -0800 Subject: paride: pf driver fixes The pf driver for parallel port floppy drives seems to be broken. At least with Imation SuperDisk with EPAT chip, the driver calls pi_connect() and pi_disconnect after each transferred sector. At least with EPAT, this operation is very expensive - causes drive recalibration. Thus, transferring even a single byte (dd if=/dev/pf0 of=/dev/null bs=1 count=1) takes 20 seconds, making the driver useless. The pf_next_buf() function seems to be broken as it returns 1 always (except when pf_run is non-zero), causing the loop in do_pf_read_drq (and do_pf_write_drq) to be executed only once. The following patch fixes this problem. It also fixes swapped descriptions in pf_lock() function and removes DBMSG macro, which seems useless. Signed-off-by: Ondrej Zary Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/paride/pf.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index ceffa6034e2..e7fe6ca97dd 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -488,13 +488,11 @@ static int pf_atapi(struct pf_unit *pf, char *cmd, int dlen, char *buf, char *fu return r; } -#define DBMSG(msg) ((verbose>1)?(msg):NULL) - static void pf_lock(struct pf_unit *pf, int func) { char lo_cmd[12] = { ATAPI_LOCK, pf->lun << 5, 0, 0, func, 0, 0, 0, 0, 0, 0, 0 }; - pf_atapi(pf, lo_cmd, 0, pf_scratch, func ? "unlock" : "lock"); + pf_atapi(pf, lo_cmd, 0, pf_scratch, func ? "lock" : "unlock"); } static void pf_eject(struct pf_unit *pf) @@ -555,7 +553,7 @@ static void pf_mode_sense(struct pf_unit *pf) { ATAPI_MODE_SENSE, pf->lun << 5, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0 }; char buf[8]; - pf_atapi(pf, ms_cmd, 8, buf, DBMSG("mode sense")); + pf_atapi(pf, ms_cmd, 8, buf, "mode sense"); pf->media_status = PF_RW; if (buf[3] & 0x80) pf->media_status = PF_RO; @@ -591,7 +589,7 @@ static void pf_get_capacity(struct pf_unit *pf) char buf[8]; int bs; - if (pf_atapi(pf, rc_cmd, 8, buf, DBMSG("get capacity"))) { + if (pf_atapi(pf, rc_cmd, 8, buf, "get capacity")) { pf->media_status = PF_NM; return; } @@ -804,13 +802,18 @@ static int pf_next_buf(void) pf_buf += 512; pf_block++; if (!pf_run) - return 0; - if (!pf_count) return 1; - spin_lock_irqsave(&pf_spin_lock, saved_flags); - pf_end_request(1); - spin_unlock_irqrestore(&pf_spin_lock, saved_flags); - return 1; + if (!pf_count) { + spin_lock_irqsave(&pf_spin_lock, saved_flags); + pf_end_request(1); + pf_req = elv_next_request(pf_queue); + spin_unlock_irqrestore(&pf_spin_lock, saved_flags); + if (!pf_req) + return 1; + pf_count = pf_req->current_nr_sectors; + pf_buf = pf_req->buffer; + } + return 0; } static inline void next_request(int success) -- cgit v1.2.3 From 90d8dabf74179e6615bd4688a118e12ec29ab7aa Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 14 Nov 2007 16:59:26 -0800 Subject: drivers/misc: Move misplaced pci_dev_put's Move pci_dev_put outside the loops in which it occurs. Within the loop, pci_dev_put is done implicitly by pci_get_device. The problem was detected using the following semantic patch, and corrected by hand. @@ expression dev; expression E; @@ - pci_dev_put(dev) ... when != dev = E - pci_get_device(...,dev) Signed-off-by: Julia Lawall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/ioc4.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c index 6a5a05d1f39..05172d2613d 100644 --- a/drivers/misc/ioc4.c +++ b/drivers/misc/ioc4.c @@ -244,10 +244,11 @@ ioc4_variant(struct ioc4_driver_data *idd) idd->idd_pdev->bus->number == pdev->bus->number && 3 == PCI_SLOT(pdev->devfn)) found = 1; - pci_dev_put(pdev); } while (pdev && !found); - if (NULL != pdev) + if (NULL != pdev) { + pci_dev_put(pdev); return IOC4_VARIANT_IO9; + } /* IO10: Look for a Vitesse VSC 7174 at the same bus and slot 3. */ pdev = NULL; @@ -258,10 +259,11 @@ ioc4_variant(struct ioc4_driver_data *idd) idd->idd_pdev->bus->number == pdev->bus->number && 3 == PCI_SLOT(pdev->devfn)) found = 1; - pci_dev_put(pdev); } while (pdev && !found); - if (NULL != pdev) + if (NULL != pdev) { + pci_dev_put(pdev); return IOC4_VARIANT_IO10; + } /* PCI-RT: No SCSI/SATA controller will be present */ return IOC4_VARIANT_PCI_RT; -- cgit v1.2.3 From 348badf1e825323c419dd118f65783db0f7d2ec8 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Wed, 14 Nov 2007 16:59:27 -0800 Subject: dmaengine: fix broken device refcounting When a DMA device is unregistered, its reference count is decremented twice for each channel: Once dma_class_dev_release() and once in dma_chan_cleanup(). This may result in the DMA device driver's remove() function completing before all channels have been cleaned up, causing lots of use-after-free fun. Fix it by incrementing the device's reference count twice for each channel during registration. [dan.j.williams@intel.com: kill unnecessary client refcounting] Signed-off-by: Haavard Skinnemoen Signed-off-by: Dan Williams Signed-off-by: Shannon Nelson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dma/dmaengine.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 82489923af0..d59b2f41730 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -182,10 +182,9 @@ static void dma_client_chan_alloc(struct dma_client *client) /* we are done once this client rejects * an available resource */ - if (ack == DMA_ACK) { + if (ack == DMA_ACK) dma_chan_get(chan); - kref_get(&device->refcount); - } else if (ack == DMA_NAK) + else if (ack == DMA_NAK) return; } } @@ -272,11 +271,8 @@ static void dma_clients_notify_removed(struct dma_chan *chan) /* client was holding resources for this channel so * free it */ - if (ack == DMA_ACK) { + if (ack == DMA_ACK) dma_chan_put(chan); - kref_put(&chan->device->refcount, - dma_async_device_cleanup); - } } mutex_unlock(&dma_list_mutex); @@ -316,11 +312,8 @@ void dma_async_client_unregister(struct dma_client *client) ack = client->event_callback(client, chan, DMA_RESOURCE_REMOVED); - if (ack == DMA_ACK) { + if (ack == DMA_ACK) dma_chan_put(chan); - kref_put(&chan->device->refcount, - dma_async_device_cleanup); - } } list_del(&client->global_node); @@ -397,6 +390,8 @@ int dma_async_device_register(struct dma_device *device) goto err_out; } + /* One for the channel, one of the class device */ + kref_get(&device->refcount); kref_get(&device->refcount); kref_init(&chan->refcount); chan->slow_ref = 0; -- cgit v1.2.3 From 19cd7537bdae6685c31677a01e08850612ba87f6 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 14 Nov 2007 16:59:29 -0800 Subject: atmel_serial build warnings begone Remove annoying build warnings about unused variables in atmel_serial, which afflict both AT91 and AVR32 builds. Signed-off-by: David Brownell Acked-by: Haavard Skinnemoen Cc: Andrew Victor Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/atmel_serial.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c index 4d6b3c56d20..111da57f533 100644 --- a/drivers/serial/atmel_serial.c +++ b/drivers/serial/atmel_serial.c @@ -204,8 +204,6 @@ static u_int atmel_get_mctrl(struct uart_port *port) */ static void atmel_stop_tx(struct uart_port *port) { - struct atmel_uart_port *atmel_port = (struct atmel_uart_port *) port; - UART_PUT_IDR(port, ATMEL_US_TXRDY); } @@ -214,8 +212,6 @@ static void atmel_stop_tx(struct uart_port *port) */ static void atmel_start_tx(struct uart_port *port) { - struct atmel_uart_port *atmel_port = (struct atmel_uart_port *) port; - UART_PUT_IER(port, ATMEL_US_TXRDY); } @@ -224,8 +220,6 @@ static void atmel_start_tx(struct uart_port *port) */ static void atmel_stop_rx(struct uart_port *port) { - struct atmel_uart_port *atmel_port = (struct atmel_uart_port *) port; - UART_PUT_IDR(port, ATMEL_US_RXRDY); } @@ -409,7 +403,6 @@ static irqreturn_t atmel_interrupt(int irq, void *dev_id) */ static int atmel_startup(struct uart_port *port) { - struct atmel_uart_port *atmel_port = (struct atmel_uart_port *) port; int retval; /* @@ -456,8 +449,6 @@ static int atmel_startup(struct uart_port *port) */ static void atmel_shutdown(struct uart_port *port) { - struct atmel_uart_port *atmel_port = (struct atmel_uart_port *) port; - /* * Disable all interrupts, port and break condition. */ -- cgit v1.2.3 From 5b23dbe8173c212d6a326e35347b038705603d39 Mon Sep 17 00:00:00 2001 From: Adam Litke Date: Wed, 14 Nov 2007 16:59:33 -0800 Subject: hugetlb: follow_hugetlb_page() for write access When calling get_user_pages(), a write flag is passed in by the caller to indicate if write access is required on the faulted-in pages. Currently, follow_hugetlb_page() ignores this flag and always faults pages for read-only access. This can cause data corruption because a device driver that calls get_user_pages() with write set will not expect COW faults to occur on the returned pages. This patch passes the write flag down to follow_hugetlb_page() and makes sure hugetlb_fault() is called with the right write_access parameter. [ezk@cs.sunysb.edu: build fix] Signed-off-by: Adam Litke Reviewed-by: Ken Chen Cc: David Gibson Cc: William Lee Irwin III Cc: Badari Pulavarty Signed-off-by: Erez Zadok Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 ++-- mm/hugetlb.c | 5 +++-- mm/memory.c | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ea0f50bfbe0..e22368656b3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -19,7 +19,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); -int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); +int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); @@ -106,7 +106,7 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } -#define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; }) +#define follow_hugetlb_page(m,v,p,vs,a,b,i,w) ({ BUG(); 0; }) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8b809ecefa3..e2c80631d36 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -868,7 +868,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, - unsigned long *position, int *length, int i) + unsigned long *position, int *length, int i, + int write) { unsigned long pfn_offset; unsigned long vaddr = *position; @@ -890,7 +891,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, int ret; spin_unlock(&mm->page_table_lock); - ret = hugetlb_fault(mm, vma, vaddr, 0); + ret = hugetlb_fault(mm, vma, vaddr, write); spin_lock(&mm->page_table_lock); if (!(ret & VM_FAULT_ERROR)) continue; diff --git a/mm/memory.c b/mm/memory.c index 9791e478684..7b0403bfc97 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1036,7 +1036,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (is_vm_hugetlb_page(vma)) { i = follow_hugetlb_page(mm, vma, pages, vmas, - &start, &len, i); + &start, &len, i, write); continue; } -- cgit v1.2.3 From 6c55be8b962f1bdc592d579e81fc27b11ea53dfc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 14 Nov 2007 16:59:35 -0800 Subject: raid5: fix unending write sequence handling stripe 7629696, state=0x14 cnt=1, pd_idx=2 ops=0:0:0 check 5: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800ffcffcc0 written 0000000000000000 check 4: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800fdd4e360 written 0000000000000000 check 3: state 0x1 toread 0000000000000000 read 0000000000000000 write 0000000000000000 written 0000000000000000 check 2: state 0x1 toread 0000000000000000 read 0000000000000000 write 0000000000000000 written 0000000000000000 check 1: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800ff517e40 written 0000000000000000 check 0: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800fd4cae60 written 0000000000000000 locked=4 uptodate=2 to_read=0 to_write=4 failed=0 failed_num=0 for sector 7629696, rmw=0 rcw=0 These blocks were prepared to be written out, but were never handled in ops_run_biodrain(), so they remain locked forever. The operations flags are all clear which means handle_stripe() thinks nothing else needs to be done. This state suggests that the STRIPE_OP_PREXOR bit was sampled 'set' when it should not have been. This patch cleans up cases where the code looks at sh->ops.pending when it should be looking at the consistent stack-based snapshot of the operations flags. Report from Joel: Resync done. Patch fix this bug. Signed-off-by: Dan Williams Tested-by: Joel Bertrand Cc: Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1cfc984cc7b..a5aad8cad84 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -688,7 +688,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) } static struct dma_async_tx_descriptor * -ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) +ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, + unsigned long pending) { int disks = sh->disks; int pd_idx = sh->pd_idx, i; @@ -696,7 +697,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) /* check if prexor is active which means only process blocks * that are part of a read-modify-write (Wantprexor) */ - int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); + int prexor = test_bit(STRIPE_OP_PREXOR, &pending); pr_debug("%s: stripe %llu\n", __FUNCTION__, (unsigned long long)sh->sector); @@ -773,7 +774,8 @@ static void ops_complete_write(void *stripe_head_ref) } static void -ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) +ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, + unsigned long pending) { /* kernel stack size limits the total number of disks */ int disks = sh->disks; @@ -781,7 +783,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) int count = 0, pd_idx = sh->pd_idx, i; struct page *xor_dest; - int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); + int prexor = test_bit(STRIPE_OP_PREXOR, &pending); unsigned long flags; dma_async_tx_callback callback; @@ -808,7 +810,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) } /* check whether this postxor is part of a write */ - callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ? + callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ? ops_complete_write : ops_complete_postxor; /* 1/ if we prexor'd then the dest is reused as a source @@ -896,12 +898,12 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending) tx = ops_run_prexor(sh, tx); if (test_bit(STRIPE_OP_BIODRAIN, &pending)) { - tx = ops_run_biodrain(sh, tx); + tx = ops_run_biodrain(sh, tx, pending); overlap_clear++; } if (test_bit(STRIPE_OP_POSTXOR, &pending)) - ops_run_postxor(sh, tx); + ops_run_postxor(sh, tx, pending); if (test_bit(STRIPE_OP_CHECK, &pending)) ops_run_check(sh); -- cgit v1.2.3 From 348ea204cc23cda35faf962414b674c57da647d7 Mon Sep 17 00:00:00 2001 From: Adam Litke Date: Wed, 14 Nov 2007 16:59:37 -0800 Subject: hugetlb: split alloc_huge_page into private and shared components Hugetlbfs implements a quota system which can limit the amount of memory that can be used by the filesystem. Before allocating a new huge page for a file, the quota is checked and debited. The quota is then credited when truncating the file. I found a few bugs in the code for both MAP_PRIVATE and MAP_SHARED mappings. Before detailing the problems and my proposed solutions, we should agree on a definition of quotas that properly addresses both private and shared pages. Since the purpose of quotas is to limit total memory consumption on a per-filesystem basis, I argue that all pages allocated by the fs (private and shared) should be charged against quota. Private Mappings ================ The current code will debit quota for private pages sometimes, but will never credit it. At a minimum, this causes a leak in the quota accounting which renders the accounting essentially useless as it is. Shared pages have a one to one mapping with a hugetlbfs file and are easy to account by debiting on allocation and crediting on truncate. Private pages are anonymous in nature and have a many to one relationship with their hugetlbfs files (due to copy on write). Because private pages are not indexed by the mapping's radix tree, thier quota cannot be credited at file truncation time. Crediting must be done when the page is unmapped and freed. Shared Pages ============ I discovered an issue concerning the interaction between the MAP_SHARED reservation system and quotas. Since quota is not checked until page instantiation, an over-quota mmap/reservation will initially succeed. When instantiating the first over-quota page, the program will receive SIGBUS. This is inconsistent since the reservation is supposed to be a guarantee. The solution is to debit the full amount of quota at reservation time and credit the unused portion when the reservation is released. This patch series brings quotas back in line by making the following modifications: * Private pages - Debit quota in alloc_huge_page() - Credit quota in free_huge_page() * Shared pages - Debit quota for entire reservation at mmap time - Credit quota for instantiated pages in free_huge_page() - Credit quota for unused reservation at munmap time This patch: The shared page reservation and dynamic pool resizing features have made the allocation of private vs. shared huge pages quite different. By splitting out the private/shared-specific portions of the process into their own functions, readability is greatly improved. alloc_huge_page now calls the proper helper and performs common operations. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Adam Litke Cc: Ken Chen Cc: Andy Whitcroft Cc: Dave Hansen Cc: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e2c80631d36..f43b3dca12b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -353,35 +353,43 @@ void return_unused_surplus_pages(unsigned long unused_resv_pages) } } -static struct page *alloc_huge_page(struct vm_area_struct *vma, - unsigned long addr) + +static struct page *alloc_huge_page_shared(struct vm_area_struct *vma, + unsigned long addr) { - struct page *page = NULL; - int use_reserved_page = vma->vm_flags & VM_MAYSHARE; + struct page *page; spin_lock(&hugetlb_lock); - if (!use_reserved_page && (free_huge_pages <= resv_huge_pages)) - goto fail; - page = dequeue_huge_page(vma, addr); - if (!page) - goto fail; - spin_unlock(&hugetlb_lock); - set_page_refcounted(page); return page; +} -fail: - spin_unlock(&hugetlb_lock); +static struct page *alloc_huge_page_private(struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *page = NULL; - /* - * Private mappings do not use reserved huge pages so the allocation - * may have failed due to an undersized hugetlb pool. Try to grab a - * surplus huge page from the buddy allocator. - */ - if (!use_reserved_page) + spin_lock(&hugetlb_lock); + if (free_huge_pages > resv_huge_pages) + page = dequeue_huge_page(vma, addr); + spin_unlock(&hugetlb_lock); + if (!page) page = alloc_buddy_huge_page(vma, addr); + return page; +} +static struct page *alloc_huge_page(struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *page; + + if (vma->vm_flags & VM_MAYSHARE) + page = alloc_huge_page_shared(vma, addr); + else + page = alloc_huge_page_private(vma, addr); + if (page) + set_page_refcounted(page); return page; } -- cgit v1.2.3 From c79fb75e5a514a5a35f22c229042aa29f4237e3a Mon Sep 17 00:00:00 2001 From: Adam Litke Date: Wed, 14 Nov 2007 16:59:38 -0800 Subject: hugetlb: fix quota management for private mappings The hugetlbfs quota management system was never taught to handle MAP_PRIVATE mappings when that support was added. Currently, quota is debited at page instantiation and credited at file truncation. This approach works correctly for shared pages but is incomplete for private pages. In addition to hugetlb_no_page(), private pages can be instantiated by hugetlb_cow(); but this function does not respect quotas. Private huge pages are treated very much like normal, anonymous pages. They are not "backed" by the hugetlbfs file and are not stored in the mapping's radix tree. This means that private pages are invisible to truncate_hugepages() so that function will not credit the quota. This patch (based on a prototype provided by Ken Chen) moves quota crediting for all pages into free_huge_page(). page->private is used to store a pointer to the mapping to which this page belongs. This is used to credit quota on the appropriate hugetlbfs instance. Signed-off-by: Adam Litke Cc: Ken Chen Cc: Ken Chen Cc: Andy Whitcroft Cc: Dave Hansen Cc: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 1 - mm/hugetlb.c | 13 ++++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 12aca8ed605..6513f565586 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -364,7 +364,6 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) ++next; truncate_huge_page(page); unlock_page(page); - hugetlb_put_quota(mapping); freed++; } huge_pagevec_release(&pvec); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f43b3dca12b..3992bd5120e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -116,7 +116,9 @@ static void update_and_free_page(struct page *page) static void free_huge_page(struct page *page) { int nid = page_to_nid(page); + struct address_space *mapping; + mapping = (struct address_space *) page_private(page); BUG_ON(page_count(page)); INIT_LIST_HEAD(&page->lru); @@ -129,6 +131,9 @@ static void free_huge_page(struct page *page) enqueue_huge_page(page); } spin_unlock(&hugetlb_lock); + if (mapping) + hugetlb_put_quota(mapping); + set_page_private(page, 0); } /* @@ -388,8 +393,10 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, page = alloc_huge_page_shared(vma, addr); else page = alloc_huge_page_private(vma, addr); - if (page) + if (page) { set_page_refcounted(page); + set_page_private(page, (unsigned long) vma->vm_file->f_mapping); + } return page; } @@ -730,6 +737,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, set_huge_ptep_writable(vma, address, ptep); return 0; } + if (hugetlb_get_quota(vma->vm_file->f_mapping)) + return VM_FAULT_SIGBUS; page_cache_get(old_page); new_page = alloc_huge_page(vma, address); @@ -796,7 +805,6 @@ retry: err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); if (err) { put_page(page); - hugetlb_put_quota(mapping); if (err == -EEXIST) goto retry; goto out; @@ -830,7 +838,6 @@ out: backout: spin_unlock(&mm->page_table_lock); - hugetlb_put_quota(mapping); unlock_page(page); put_page(page); goto out; -- cgit v1.2.3 From 2fc39cec6a9b5b41727d3386b780b69422a15152 Mon Sep 17 00:00:00 2001 From: Adam Litke Date: Wed, 14 Nov 2007 16:59:39 -0800 Subject: hugetlb: debit quota in alloc_huge_page Now that quota is credited by free_huge_page(), calls to hugetlb_get_quota() seem out of place. The alloc/free API is unbalanced because we handle the hugetlb_put_quota() but expect the caller to open-code hugetlb_get_quota(). Move the get inside alloc_huge_page to clean up this disparity. This patch has been kept apart from the previous patch because of the somewhat dodgy ERR_PTR() use herein. Moving the quota logic means that alloc_huge_page() has two failure modes. Quota failure must result in a SIGBUS while a standard allocation failure is OOM. Unfortunately, ERR_PTR() doesn't like the small positive errnos we have in VM_FAULT_* so they must be negated before they are used. Does anyone take issue with the way I am using PTR_ERR. If so, what are your thoughts on how to clean this up (without needing an if,else if,else block at each alloc_huge_page() callsite)? Signed-off-by: Adam Litke Cc: Ken Chen Cc: Andy Whitcroft Cc: Dave Hansen Cc: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3992bd5120e..bc12b0adfa8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -388,6 +388,10 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) { struct page *page; + struct address_space *mapping = vma->vm_file->f_mapping; + + if (hugetlb_get_quota(mapping)) + return ERR_PTR(-VM_FAULT_SIGBUS); if (vma->vm_flags & VM_MAYSHARE) page = alloc_huge_page_shared(vma, addr); @@ -395,9 +399,10 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, page = alloc_huge_page_private(vma, addr); if (page) { set_page_refcounted(page); - set_page_private(page, (unsigned long) vma->vm_file->f_mapping); - } - return page; + set_page_private(page, (unsigned long) mapping); + return page; + } else + return ERR_PTR(-VM_FAULT_OOM); } static int __init hugetlb_init(void) @@ -737,15 +742,13 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, set_huge_ptep_writable(vma, address, ptep); return 0; } - if (hugetlb_get_quota(vma->vm_file->f_mapping)) - return VM_FAULT_SIGBUS; page_cache_get(old_page); new_page = alloc_huge_page(vma, address); - if (!new_page) { + if (IS_ERR(new_page)) { page_cache_release(old_page); - return VM_FAULT_OOM; + return -PTR_ERR(new_page); } spin_unlock(&mm->page_table_lock); @@ -789,12 +792,9 @@ retry: size = i_size_read(mapping->host) >> HPAGE_SHIFT; if (idx >= size) goto out; - if (hugetlb_get_quota(mapping)) - goto out; page = alloc_huge_page(vma, address); - if (!page) { - hugetlb_put_quota(mapping); - ret = VM_FAULT_OOM; + if (IS_ERR(page)) { + ret = -PTR_ERR(page); goto out; } clear_huge_page(page, address); -- cgit v1.2.3 From 9a119c056dc2a9970901954a6d561d50a95e528d Mon Sep 17 00:00:00 2001 From: Adam Litke Date: Wed, 14 Nov 2007 16:59:41 -0800 Subject: hugetlb: allow bulk updating in hugetlb_*_quota() Add a second parameter 'delta' to hugetlb_get_quota and hugetlb_put_quota to allow bulk updating of the sbinfo->free_blocks counter. This will be used by the next patch in the series. Signed-off-by: Adam Litke Cc: Ken Chen Cc: Andy Whitcroft Cc: Dave Hansen Cc: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 10 +++++----- include/linux/hugetlb.h | 4 ++-- mm/hugetlb.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6513f565586..09ee07f0266 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -858,15 +858,15 @@ out_free: return -ENOMEM; } -int hugetlb_get_quota(struct address_space *mapping) +int hugetlb_get_quota(struct address_space *mapping, long delta) { int ret = 0; struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); if (sbinfo->free_blocks > -1) { spin_lock(&sbinfo->stat_lock); - if (sbinfo->free_blocks > 0) - sbinfo->free_blocks--; + if (sbinfo->free_blocks - delta >= 0) + sbinfo->free_blocks -= delta; else ret = -ENOMEM; spin_unlock(&sbinfo->stat_lock); @@ -875,13 +875,13 @@ int hugetlb_get_quota(struct address_space *mapping) return ret; } -void hugetlb_put_quota(struct address_space *mapping) +void hugetlb_put_quota(struct address_space *mapping, long delta) { struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); if (sbinfo->free_blocks > -1) { spin_lock(&sbinfo->stat_lock); - sbinfo->free_blocks++; + sbinfo->free_blocks += delta; spin_unlock(&sbinfo->stat_lock); } } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e22368656b3..8104e5af75a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -165,8 +165,8 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) extern const struct file_operations hugetlbfs_file_operations; extern struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t); -int hugetlb_get_quota(struct address_space *mapping); -void hugetlb_put_quota(struct address_space *mapping); +int hugetlb_get_quota(struct address_space *mapping, long delta); +void hugetlb_put_quota(struct address_space *mapping, long delta); static inline int is_file_hugepages(struct file *file) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bc12b0adfa8..1e317465ecd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -132,7 +132,7 @@ static void free_huge_page(struct page *page) } spin_unlock(&hugetlb_lock); if (mapping) - hugetlb_put_quota(mapping); + hugetlb_put_quota(mapping, 1); set_page_private(page, 0); } @@ -390,7 +390,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *page; struct address_space *mapping = vma->vm_file->f_mapping; - if (hugetlb_get_quota(mapping)) + if (hugetlb_get_quota(mapping, 1)) return ERR_PTR(-VM_FAULT_SIGBUS); if (vma->vm_flags & VM_MAYSHARE) -- cgit v1.2.3 From 90d8b7e6129e8f4e5b3bc1a2cfbe585372ce8646 Mon Sep 17 00:00:00 2001 From: Adam Litke Date: Wed, 14 Nov 2007 16:59:42 -0800 Subject: hugetlb: enforce quotas during reservation for shared mappings When a MAP_SHARED mmap of a hugetlbfs file succeeds, huge pages are reserved to guarantee no problems will occur later when instantiating pages. If quotas are in force, page instantiation could fail due to a race with another process or an oversized (but approved) shared mapping. To prevent these scenarios, debit the quota for the full reservation amount up front and credit the unused quota when the reservation is released. Signed-off-by: Adam Litke Cc: Ken Chen Cc: Andy Whitcroft Cc: Dave Hansen Cc: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1e317465ecd..b52b6ddd6c1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -367,7 +367,7 @@ static struct page *alloc_huge_page_shared(struct vm_area_struct *vma, spin_lock(&hugetlb_lock); page = dequeue_huge_page(vma, addr); spin_unlock(&hugetlb_lock); - return page; + return page ? page : ERR_PTR(-VM_FAULT_OOM); } static struct page *alloc_huge_page_private(struct vm_area_struct *vma, @@ -375,13 +375,16 @@ static struct page *alloc_huge_page_private(struct vm_area_struct *vma, { struct page *page = NULL; + if (hugetlb_get_quota(vma->vm_file->f_mapping, 1)) + return ERR_PTR(-VM_FAULT_SIGBUS); + spin_lock(&hugetlb_lock); if (free_huge_pages > resv_huge_pages) page = dequeue_huge_page(vma, addr); spin_unlock(&hugetlb_lock); if (!page) page = alloc_buddy_huge_page(vma, addr); - return page; + return page ? page : ERR_PTR(-VM_FAULT_OOM); } static struct page *alloc_huge_page(struct vm_area_struct *vma, @@ -390,19 +393,16 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *page; struct address_space *mapping = vma->vm_file->f_mapping; - if (hugetlb_get_quota(mapping, 1)) - return ERR_PTR(-VM_FAULT_SIGBUS); - if (vma->vm_flags & VM_MAYSHARE) page = alloc_huge_page_shared(vma, addr); else page = alloc_huge_page_private(vma, addr); - if (page) { + + if (!IS_ERR(page)) { set_page_refcounted(page); set_page_private(page, (unsigned long) mapping); - return page; - } else - return ERR_PTR(-VM_FAULT_OOM); + } + return page; } static int __init hugetlb_init(void) @@ -1148,6 +1148,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to) if (chg < 0) return chg; + if (hugetlb_get_quota(inode->i_mapping, chg)) + return -ENOSPC; ret = hugetlb_acct_memory(chg); if (ret < 0) return ret; @@ -1158,5 +1160,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to) void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) { long chg = region_truncate(&inode->i_mapping->private_list, offset); - hugetlb_acct_memory(freed - chg); + hugetlb_put_quota(inode->i_mapping, (chg - freed)); + hugetlb_acct_memory(-(chg - freed)); } -- cgit v1.2.3 From 8cde045c7ee97573be6ce495b8f7c918182a2c7a Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 14 Nov 2007 16:59:43 -0800 Subject: mm/hugetlb.c: make a function static return_unused_surplus_pages() can become static. Signed-off-by: Adrian Bunk Acked-by: Adam Litke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b52b6ddd6c1..abe1e9f2a94 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -328,7 +328,7 @@ free: * allocated to satisfy the reservation must be explicitly freed if they were * never used. */ -void return_unused_surplus_pages(unsigned long unused_resv_pages) +static void return_unused_surplus_pages(unsigned long unused_resv_pages) { static int nid = -1; struct page *page; -- cgit v1.2.3 From 45c682a68a87251d9a01383ce076ab21ee09812e Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Wed, 14 Nov 2007 16:59:44 -0800 Subject: hugetlb: fix i_blocks accounting For administrative purpose, we want to query actual block usage for hugetlbfs file via fstat. Currently, hugetlbfs always return 0. Fix that up since kernel already has all the information to track it properly. Signed-off-by: Ken Chen Acked-by: Adam Litke Cc: Badari Pulavarty Cc: David Gibson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 ++ mm/hugetlb.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8104e5af75a..24968790bc3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -168,6 +168,8 @@ struct file *hugetlb_file_setup(const char *name, size_t); int hugetlb_get_quota(struct address_space *mapping, long delta); void hugetlb_put_quota(struct address_space *mapping, long delta); +#define BLOCKS_PER_HUGEPAGE (HPAGE_SIZE / 512) + static inline int is_file_hugepages(struct file *file) { if (file->f_op == &hugetlbfs_file_operations) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index abe1e9f2a94..6121b57bbe9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -801,6 +801,7 @@ retry: if (vma->vm_flags & VM_SHARED) { int err; + struct inode *inode = mapping->host; err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); if (err) { @@ -809,6 +810,10 @@ retry: goto retry; goto out; } + + spin_lock(&inode->i_lock); + inode->i_blocks += BLOCKS_PER_HUGEPAGE; + spin_unlock(&inode->i_lock); } else lock_page(page); } @@ -1160,6 +1165,11 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to) void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) { long chg = region_truncate(&inode->i_mapping->private_list, offset); + + spin_lock(&inode->i_lock); + inode->i_blocks -= BLOCKS_PER_HUGEPAGE * freed; + spin_unlock(&inode->i_lock); + hugetlb_put_quota(inode->i_mapping, (chg - freed)); hugetlb_acct_memory(-(chg - freed)); } -- cgit v1.2.3 From cfb5285660aad4931b2ebbfa902ea48a37dfffa1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Nov 2007 16:59:45 -0800 Subject: revert "Task Control Groups: example CPU accounting subsystem" Revert 62d0df64065e7c135d0002f069444fbdfc64768f. This was originally intended as a simple initial example of how to create a control groups subsystem; it wasn't intended for mainline, but I didn't make this clear enough to Andrew. The CFS cgroup subsystem now has better functionality for the per-cgroup usage accounting (based directly on CFS stats) than the "usage" status file in this patch, and the "load" status file is rather simplistic - although having a per-cgroup load average report would be a useful feature, I don't believe this patch actually provides it. If it gets into the final 2.6.24 we'd probably have to support this interface for ever. Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup_subsys.h | 6 -- include/linux/cpu_acct.h | 14 ---- init/Kconfig | 7 -- kernel/Makefile | 1 - kernel/cpu_acct.c | 186 ------------------------------------------ kernel/sched.c | 14 +--- 6 files changed, 3 insertions(+), 225 deletions(-) delete mode 100644 include/linux/cpu_acct.h delete mode 100644 kernel/cpu_acct.c diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 0b9bfbde816..d62fcee9a08 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -13,12 +13,6 @@ SUBSYS(cpuset) /* */ -#ifdef CONFIG_CGROUP_CPUACCT -SUBSYS(cpuacct) -#endif - -/* */ - #ifdef CONFIG_CGROUP_DEBUG SUBSYS(debug) #endif diff --git a/include/linux/cpu_acct.h b/include/linux/cpu_acct.h deleted file mode 100644 index 6b5fd8a66c8..00000000000 --- a/include/linux/cpu_acct.h +++ /dev/null @@ -1,14 +0,0 @@ - -#ifndef _LINUX_CPU_ACCT_H -#define _LINUX_CPU_ACCT_H - -#include -#include - -#ifdef CONFIG_CGROUP_CPUACCT -extern void cpuacct_charge(struct task_struct *, cputime_t cputime); -#else -static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {} -#endif - -#endif diff --git a/init/Kconfig b/init/Kconfig index 8b88d0bedcb..5b92e3aa136 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -301,13 +301,6 @@ config CGROUP_NS for instance virtual servers and checkpoint/restart jobs. -config CGROUP_CPUACCT - bool "Simple CPU accounting cgroup subsystem" - depends on CGROUPS - help - Provides a simple Resource Controller for monitoring the - total CPU consumed by the tasks in a cgroup - config CPUSETS bool "Cpuset support" depends on SMP && CGROUPS diff --git a/kernel/Makefile b/kernel/Makefile index f60afe74259..dfa96956dae 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -40,7 +40,6 @@ obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup.o obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o obj-$(CONFIG_CPUSETS) += cpuset.o -obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_STOP_MACHINE) += stop_machine.o diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c deleted file mode 100644 index 731e47e7f16..00000000000 --- a/kernel/cpu_acct.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * kernel/cpu_acct.c - CPU accounting cgroup subsystem - * - * Copyright (C) Google Inc, 2006 - * - * Developed by Paul Menage (menage@google.com) and Balbir Singh - * (balbir@in.ibm.com) - * - */ - -/* - * Example cgroup subsystem for reporting total CPU usage of tasks in a - * cgroup, along with percentage load over a time interval - */ - -#include -#include -#include -#include - -#include - -struct cpuacct { - struct cgroup_subsys_state css; - spinlock_t lock; - /* total time used by this class */ - cputime64_t time; - - /* time when next load calculation occurs */ - u64 next_interval_check; - - /* time used in current period */ - cputime64_t current_interval_time; - - /* time used in last period */ - cputime64_t last_interval_time; -}; - -struct cgroup_subsys cpuacct_subsys; - -static inline struct cpuacct *cgroup_ca(struct cgroup *cont) -{ - return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id), - struct cpuacct, css); -} - -static inline struct cpuacct *task_ca(struct task_struct *task) -{ - return container_of(task_subsys_state(task, cpuacct_subsys_id), - struct cpuacct, css); -} - -#define INTERVAL (HZ * 10) - -static inline u64 next_interval_boundary(u64 now) -{ - /* calculate the next interval boundary beyond the - * current time */ - do_div(now, INTERVAL); - return (now + 1) * INTERVAL; -} - -static struct cgroup_subsys_state *cpuacct_create( - struct cgroup_subsys *ss, struct cgroup *cont) -{ - struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); - - if (!ca) - return ERR_PTR(-ENOMEM); - spin_lock_init(&ca->lock); - ca->next_interval_check = next_interval_boundary(get_jiffies_64()); - return &ca->css; -} - -static void cpuacct_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) -{ - kfree(cgroup_ca(cont)); -} - -/* Lazily update the load calculation if necessary. Called with ca locked */ -static void cpuusage_update(struct cpuacct *ca) -{ - u64 now = get_jiffies_64(); - - /* If we're not due for an update, return */ - if (ca->next_interval_check > now) - return; - - if (ca->next_interval_check <= (now - INTERVAL)) { - /* If it's been more than an interval since the last - * check, then catch up - the last interval must have - * been zero load */ - ca->last_interval_time = 0; - ca->next_interval_check = next_interval_boundary(now); - } else { - /* If a steal takes the last interval time negative, - * then we just ignore it */ - if ((s64)ca->current_interval_time > 0) - ca->last_interval_time = ca->current_interval_time; - else - ca->last_interval_time = 0; - ca->next_interval_check += INTERVAL; - } - ca->current_interval_time = 0; -} - -static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft) -{ - struct cpuacct *ca = cgroup_ca(cont); - u64 time; - - spin_lock_irq(&ca->lock); - cpuusage_update(ca); - time = cputime64_to_jiffies64(ca->time); - spin_unlock_irq(&ca->lock); - - /* Convert 64-bit jiffies to seconds */ - time *= 1000; - do_div(time, HZ); - return time; -} - -static u64 load_read(struct cgroup *cont, struct cftype *cft) -{ - struct cpuacct *ca = cgroup_ca(cont); - u64 time; - - /* Find the time used in the previous interval */ - spin_lock_irq(&ca->lock); - cpuusage_update(ca); - time = cputime64_to_jiffies64(ca->last_interval_time); - spin_unlock_irq(&ca->lock); - - /* Convert time to a percentage, to give the load in the - * previous period */ - time *= 100; - do_div(time, INTERVAL); - - return time; -} - -static struct cftype files[] = { - { - .name = "usage", - .read_uint = cpuusage_read, - }, - { - .name = "load", - .read_uint = load_read, - } -}; - -static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont) -{ - return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); -} - -void cpuacct_charge(struct task_struct *task, cputime_t cputime) -{ - - struct cpuacct *ca; - unsigned long flags; - - if (!cpuacct_subsys.active) - return; - rcu_read_lock(); - ca = task_ca(task); - if (ca) { - spin_lock_irqsave(&ca->lock, flags); - cpuusage_update(ca); - ca->time = cputime64_add(ca->time, cputime); - ca->current_interval_time = - cputime64_add(ca->current_interval_time, cputime); - spin_unlock_irqrestore(&ca->lock, flags); - } - rcu_read_unlock(); -} - -struct cgroup_subsys cpuacct_subsys = { - .name = "cpuacct", - .create = cpuacct_create, - .destroy = cpuacct_destroy, - .populate = cpuacct_populate, - .subsys_id = cpuacct_subsys_id, -}; diff --git a/kernel/sched.c b/kernel/sched.c index b18f231a487..4fb3532dd7e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #include #include @@ -3338,13 +3337,9 @@ void account_user_time(struct task_struct *p, cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; cputime64_t tmp; - struct rq *rq = this_rq(); p->utime = cputime_add(p->utime, cputime); - if (p != rq->idle) - cpuacct_charge(p, cputime); - /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); if (TASK_NICE(p) > 0) @@ -3408,10 +3403,9 @@ void account_system_time(struct task_struct *p, int hardirq_offset, cpustat->irq = cputime64_add(cpustat->irq, tmp); else if (softirq_count()) cpustat->softirq = cputime64_add(cpustat->softirq, tmp); - else if (p != rq->idle) { + else if (p != rq->idle) cpustat->system = cputime64_add(cpustat->system, tmp); - cpuacct_charge(p, cputime); - } else if (atomic_read(&rq->nr_iowait) > 0) + else if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else cpustat->idle = cputime64_add(cpustat->idle, tmp); @@ -3447,10 +3441,8 @@ void account_steal_time(struct task_struct *p, cputime_t steal) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else cpustat->idle = cputime64_add(cpustat->idle, tmp); - } else { + } else cpustat->steal = cputime64_add(cpustat->steal, tmp); - cpuacct_charge(p, -tmp); - } } /* -- cgit v1.2.3 From f433dc56344cb72cc3de5ba0819021cec3aef807 Mon Sep 17 00:00:00 2001 From: Dmitri Vorobiev Date: Wed, 14 Nov 2007 16:59:47 -0800 Subject: Fixes to the BFS filesystem driver I found a few bugs in the BFS driver. Detailed description of the bugs as well as the steps to reproduce the errors are given in the kernel bugzilla. Please follow these links for more information: http://bugzilla.kernel.org/show_bug.cgi?id=9363 http://bugzilla.kernel.org/show_bug.cgi?id=9364 http://bugzilla.kernel.org/show_bug.cgi?id=9365 http://bugzilla.kernel.org/show_bug.cgi?id=9366 This patch fixes the bugs described above. Besides, the patch introduces coding style changes to make the BFS driver conform to the requirements specified for Linux kernel code. Finally, I made a few cosmetic changes such as removal of trivial debug output. Also, the patch removes the fields `si_lf_ioff' and `si_lf_sblk' of the in-core superblock structure. These fields are initialized but never actually used. If you are wondering why I need BFS, here is the answer: I am using this driver in the context of Linux kernel classes I am teaching in the Moscow State University and in the International Institute of Information Technology in Pune, India. Signed-off-by: Dmitri Vorobiev Cc: Tigran Aivazian Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/bfs/bfs.h | 4 +- fs/bfs/dir.c | 146 ++++++++++++++++++++++++++++++--------------------------- fs/bfs/file.c | 62 +++++++++++++++--------- fs/bfs/inode.c | 127 ++++++++++++++++++++++++------------------------- 4 files changed, 184 insertions(+), 155 deletions(-) diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 130f6c66c5b..ac7a8b1d6c3 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -14,8 +14,6 @@ struct bfs_sb_info { unsigned long si_blocks; unsigned long si_freeb; unsigned long si_freei; - unsigned long si_lf_ioff; - unsigned long si_lf_sblk; unsigned long si_lf_eblk; unsigned long si_lasti; unsigned long * si_imap; @@ -39,7 +37,7 @@ static inline struct bfs_sb_info *BFS_SB(struct super_block *sb) static inline struct bfs_inode_info *BFS_I(struct inode *inode) { - return list_entry(inode, struct bfs_inode_info, vfs_inode); + return container_of(inode, struct bfs_inode_info, vfs_inode); } diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 097f1497f74..1fd056d0fc3 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -21,29 +21,32 @@ #define dprintf(x...) #endif -static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino); -static struct buffer_head * bfs_find_entry(struct inode * dir, - const unsigned char * name, int namelen, struct bfs_dirent ** res_dir); +static int bfs_add_entry(struct inode *dir, const unsigned char *name, + int namelen, int ino); +static struct buffer_head *bfs_find_entry(struct inode *dir, + const unsigned char *name, int namelen, + struct bfs_dirent **res_dir); -static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) +static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) { - struct inode * dir = f->f_path.dentry->d_inode; - struct buffer_head * bh; - struct bfs_dirent * de; + struct inode *dir = f->f_path.dentry->d_inode; + struct buffer_head *bh; + struct bfs_dirent *de; unsigned int offset; int block; lock_kernel(); - if (f->f_pos & (BFS_DIRENT_SIZE-1)) { - printf("Bad f_pos=%08lx for %s:%08lx\n", (unsigned long)f->f_pos, - dir->i_sb->s_id, dir->i_ino); + if (f->f_pos & (BFS_DIRENT_SIZE - 1)) { + printf("Bad f_pos=%08lx for %s:%08lx\n", + (unsigned long)f->f_pos, + dir->i_sb->s_id, dir->i_ino); unlock_kernel(); return -EBADF; } while (f->f_pos < dir->i_size) { - offset = f->f_pos & (BFS_BSIZE-1); + offset = f->f_pos & (BFS_BSIZE - 1); block = BFS_I(dir)->i_sblock + (f->f_pos >> BFS_BSIZE_BITS); bh = sb_bread(dir->i_sb, block); if (!bh) { @@ -54,7 +57,9 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) de = (struct bfs_dirent *)(bh->b_data + offset); if (de->ino) { int size = strnlen(de->name, BFS_NAMELEN); - if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) { + if (filldir(dirent, de->name, size, f->f_pos, + le16_to_cpu(de->ino), + DT_UNKNOWN) < 0) { brelse(bh); unlock_kernel(); return 0; @@ -62,7 +67,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) } offset += BFS_DIRENT_SIZE; f->f_pos += BFS_DIRENT_SIZE; - } while (offset < BFS_BSIZE && f->f_pos < dir->i_size); + } while ((offset < BFS_BSIZE) && (f->f_pos < dir->i_size)); brelse(bh); } @@ -78,13 +83,13 @@ const struct file_operations bfs_dir_operations = { extern void dump_imap(const char *, struct super_block *); -static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, - struct nameidata *nd) +static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { int err; - struct inode * inode; - struct super_block * s = dir->i_sb; - struct bfs_sb_info * info = BFS_SB(s); + struct inode *inode; + struct super_block *s = dir->i_sb; + struct bfs_sb_info *info = BFS_SB(s); unsigned long ino; inode = new_inode(s); @@ -97,7 +102,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, iput(inode); return -ENOSPC; } - set_bit(ino, info->si_imap); + set_bit(ino, info->si_imap); info->si_freei--; inode->i_uid = current->fsuid; inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; @@ -113,9 +118,10 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, BFS_I(inode)->i_eblock = 0; insert_inode_hash(inode); mark_inode_dirty(inode); - dump_imap("create",s); + dump_imap("create", s); - err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len, inode->i_ino); + err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len, + inode->i_ino); if (err) { inode_dec_link_count(inode); iput(inode); @@ -127,11 +133,12 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, return 0; } -static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - struct inode * inode = NULL; - struct buffer_head * bh; - struct bfs_dirent * de; + struct inode *inode = NULL; + struct buffer_head *bh; + struct bfs_dirent *de; if (dentry->d_name.len > BFS_NAMELEN) return ERR_PTR(-ENAMETOOLONG); @@ -152,13 +159,15 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st return NULL; } -static int bfs_link(struct dentry * old, struct inode * dir, struct dentry * new) +static int bfs_link(struct dentry *old, struct inode *dir, + struct dentry *new) { - struct inode * inode = old->d_inode; + struct inode *inode = old->d_inode; int err; lock_kernel(); - err = bfs_add_entry(dir, new->d_name.name, new->d_name.len, inode->i_ino); + err = bfs_add_entry(dir, new->d_name.name, new->d_name.len, + inode->i_ino); if (err) { unlock_kernel(); return err; @@ -172,23 +181,23 @@ static int bfs_link(struct dentry * old, struct inode * dir, struct dentry * new return 0; } - -static int bfs_unlink(struct inode * dir, struct dentry * dentry) +static int bfs_unlink(struct inode *dir, struct dentry *dentry) { int error = -ENOENT; - struct inode * inode; - struct buffer_head * bh; - struct bfs_dirent * de; + struct inode *inode; + struct buffer_head *bh; + struct bfs_dirent *de; inode = dentry->d_inode; lock_kernel(); bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); - if (!bh || le16_to_cpu(de->ino) != inode->i_ino) + if (!bh || (le16_to_cpu(de->ino) != inode->i_ino)) goto out_brelse; if (!inode->i_nlink) { - printf("unlinking non-existent file %s:%lu (nlink=%d)\n", inode->i_sb->s_id, - inode->i_ino, inode->i_nlink); + printf("unlinking non-existent file %s:%lu (nlink=%d)\n", + inode->i_sb->s_id, inode->i_ino, + inode->i_nlink); inode->i_nlink = 1; } de->ino = 0; @@ -205,12 +214,12 @@ out_brelse: return error; } -static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, - struct inode * new_dir, struct dentry * new_dentry) +static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { - struct inode * old_inode, * new_inode; - struct buffer_head * old_bh, * new_bh; - struct bfs_dirent * old_de, * new_de; + struct inode *old_inode, *new_inode; + struct buffer_head *old_bh, *new_bh; + struct bfs_dirent *old_de, *new_de; int error = -ENOENT; old_bh = new_bh = NULL; @@ -223,7 +232,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, old_dentry->d_name.name, old_dentry->d_name.len, &old_de); - if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino) + if (!old_bh || (le16_to_cpu(old_de->ino) != old_inode->i_ino)) goto end_rename; error = -EPERM; @@ -239,7 +248,8 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, if (!new_bh) { error = bfs_add_entry(new_dir, new_dentry->d_name.name, - new_dentry->d_name.len, old_inode->i_ino); + new_dentry->d_name.len, + old_inode->i_ino); if (error) goto end_rename; } @@ -268,11 +278,12 @@ const struct inode_operations bfs_dir_inops = { .rename = bfs_rename, }; -static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino) +static int bfs_add_entry(struct inode *dir, const unsigned char *name, + int namelen, int ino) { - struct buffer_head * bh; - struct bfs_dirent * de; - int block, sblock, eblock, off, eoff; + struct buffer_head *bh; + struct bfs_dirent *de; + int block, sblock, eblock, off, pos; int i; dprintf("name=%s, namelen=%d\n", name, namelen); @@ -284,27 +295,24 @@ static int bfs_add_entry(struct inode * dir, const unsigned char * name, int nam sblock = BFS_I(dir)->i_sblock; eblock = BFS_I(dir)->i_eblock; - eoff = dir->i_size % BFS_BSIZE; - for (block=sblock; block<=eblock; block++) { + for (block = sblock; block <= eblock; block++) { bh = sb_bread(dir->i_sb, block); - if(!bh) + if (!bh) return -ENOSPC; - for (off=0; offb_data + off); - if (block==eblock && off>=eoff) { - /* Do not read/interpret the garbage in the end of eblock. */ - de->ino = 0; - } if (!de->ino) { - if ((block-sblock)*BFS_BSIZE + off >= dir->i_size) { + pos = (block - sblock) * BFS_BSIZE + off; + if (pos >= dir->i_size) { dir->i_size += BFS_DIRENT_SIZE; dir->i_ctime = CURRENT_TIME_SEC; } dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); de->ino = cpu_to_le16((u16)ino); - for (i=0; iname[i] = (i < namelen) ? name[i] : 0; + for (i = 0; i < BFS_NAMELEN; i++) + de->name[i] = + (i < namelen) ? name[i] : 0; mark_buffer_dirty(bh); brelse(bh); return 0; @@ -315,25 +323,26 @@ static int bfs_add_entry(struct inode * dir, const unsigned char * name, int nam return -ENOSPC; } -static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer) +static inline int bfs_namecmp(int len, const unsigned char *name, + const char *buffer) { - if (len < BFS_NAMELEN && buffer[len]) + if ((len < BFS_NAMELEN) && buffer[len]) return 0; return !memcmp(name, buffer, len); } -static struct buffer_head * bfs_find_entry(struct inode * dir, - const unsigned char * name, int namelen, struct bfs_dirent ** res_dir) +static struct buffer_head *bfs_find_entry(struct inode *dir, + const unsigned char *name, int namelen, + struct bfs_dirent **res_dir) { - unsigned long block, offset; - struct buffer_head * bh; - struct bfs_dirent * de; + unsigned long block = 0, offset = 0; + struct buffer_head *bh = NULL; + struct bfs_dirent *de; *res_dir = NULL; if (namelen > BFS_NAMELEN) return NULL; - bh = NULL; - block = offset = 0; + while (block * BFS_BSIZE + offset < dir->i_size) { if (!bh) { bh = sb_bread(dir->i_sb, BFS_I(dir)->i_sblock + block); @@ -344,7 +353,8 @@ static struct buffer_head * bfs_find_entry(struct inode * dir, } de = (struct bfs_dirent *)(bh->b_data + offset); offset += BFS_DIRENT_SIZE; - if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) { + if (le16_to_cpu(de->ino) && + bfs_namecmp(namelen, name, de->name)) { *res_dir = de; return bh; } diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 911b4ccf470..b11e63e8fbc 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -2,6 +2,11 @@ * fs/bfs/file.c * BFS file operations. * Copyright (C) 1999,2000 Tigran Aivazian + * + * Make the file block allocation algorithm understand the size + * of the underlying block device. + * Copyright (C) 2007 Dmitri Vorobiev + * */ #include @@ -27,7 +32,8 @@ const struct file_operations bfs_file_operations = { .splice_read = generic_file_splice_read, }; -static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb) +static int bfs_move_block(unsigned long from, unsigned long to, + struct super_block *sb) { struct buffer_head *bh, *new; @@ -43,21 +49,22 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo } static int bfs_move_blocks(struct super_block *sb, unsigned long start, - unsigned long end, unsigned long where) + unsigned long end, unsigned long where) { unsigned long i; dprintf("%08lx-%08lx->%08lx\n", start, end, where); for (i = start; i <= end; i++) if(bfs_move_block(i, where + i, sb)) { - dprintf("failed to move block %08lx -> %08lx\n", i, where + i); + dprintf("failed to move block %08lx -> %08lx\n", i, + where + i); return -EIO; } return 0; } -static int bfs_get_block(struct inode * inode, sector_t block, - struct buffer_head * bh_result, int create) +static int bfs_get_block(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create) { unsigned long phys; int err; @@ -66,9 +73,6 @@ static int bfs_get_block(struct inode * inode, sector_t block, struct bfs_inode_info *bi = BFS_I(inode); struct buffer_head *sbh = info->si_sbh; - if (block > info->si_blocks) - return -EIO; - phys = bi->i_sblock + block; if (!create) { if (phys <= bi->i_eblock) { @@ -79,21 +83,29 @@ static int bfs_get_block(struct inode * inode, sector_t block, return 0; } - /* if the file is not empty and the requested block is within the range - of blocks allocated for this file, we can grant it */ - if (inode->i_size && phys <= bi->i_eblock) { + /* + * If the file is not empty and the requested block is within the + * range of blocks allocated for this file, we can grant it. + */ + if (bi->i_sblock && (phys <= bi->i_eblock)) { dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", create, (unsigned long)block, phys); map_bh(bh_result, sb, phys); return 0; } - /* the rest has to be protected against itself */ + /* The file will be extended, so let's see if there is enough space. */ + if (phys >= info->si_blocks) + return -ENOSPC; + + /* The rest has to be protected against itself. */ lock_kernel(); - /* if the last data block for this file is the last allocated - block, we can extend the file trivially, without moving it - anywhere */ + /* + * If the last data block for this file is the last allocated + * block, we can extend the file trivially, without moving it + * anywhere. + */ if (bi->i_eblock == info->si_lf_eblk) { dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", create, (unsigned long)block, phys); @@ -106,13 +118,19 @@ static int bfs_get_block(struct inode * inode, sector_t block, goto out; } - /* Ok, we have to move this entire file to the next free block */ + /* Ok, we have to move this entire file to the next free block. */ phys = info->si_lf_eblk + 1; - if (bi->i_sblock) { /* if data starts on block 0 then there is no data */ + if (phys + block >= info->si_blocks) { + err = -ENOSPC; + goto out; + } + + if (bi->i_sblock) { err = bfs_move_blocks(inode->i_sb, bi->i_sblock, - bi->i_eblock, phys); + bi->i_eblock, phys); if (err) { - dprintf("failed to move ino=%08lx -> fs corruption\n", inode->i_ino); + dprintf("failed to move ino=%08lx -> fs corruption\n", + inode->i_ino); goto out; } } else @@ -124,8 +142,10 @@ static int bfs_get_block(struct inode * inode, sector_t block, phys += block; info->si_lf_eblk = bi->i_eblock = phys; - /* this assumes nothing can write the inode back while we are here - * and thus update inode->i_blocks! (XXX)*/ + /* + * This assumes nothing can write the inode back while we are here + * and thus update inode->i_blocks! (XXX) + */ info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks; mark_inode_dirty(inode); mark_buffer_dirty(sbh); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 7bd9c2bbe6e..294c41baef6 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -30,25 +30,26 @@ MODULE_LICENSE("GPL"); #define dprintf(x...) #endif -void dump_imap(const char *prefix, struct super_block * s); +void dump_imap(const char *prefix, struct super_block *s); -static void bfs_read_inode(struct inode * inode) +static void bfs_read_inode(struct inode *inode) { unsigned long ino = inode->i_ino; - struct bfs_inode * di; - struct buffer_head * bh; + struct bfs_inode *di; + struct buffer_head *bh; int block, off; - if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { + if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); make_bad_inode(inode); return; } - block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; bh = sb_bread(inode->i_sb, block); if (!bh) { - printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); + printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, + ino); make_bad_inode(inode); return; } @@ -56,7 +57,7 @@ static void bfs_read_inode(struct inode * inode) off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; di = (struct bfs_inode *)bh->b_data + off; - inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode); + inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode); if (le32_to_cpu(di->i_vtype) == BFS_VDIR) { inode->i_mode |= S_IFDIR; inode->i_op = &bfs_dir_inops; @@ -70,48 +71,48 @@ static void bfs_read_inode(struct inode * inode) BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock); + BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); inode->i_uid = le32_to_cpu(di->i_uid); inode->i_gid = le32_to_cpu(di->i_gid); inode->i_nlink = le32_to_cpu(di->i_nlink); inode->i_size = BFS_FILESIZE(di); inode->i_blocks = BFS_FILEBLOCKS(di); - if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks); inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime); inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime); inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */ brelse(bh); } -static int bfs_write_inode(struct inode * inode, int unused) +static int bfs_write_inode(struct inode *inode, int unused) { unsigned int ino = (u16)inode->i_ino; unsigned long i_sblock; - struct bfs_inode * di; - struct buffer_head * bh; + struct bfs_inode *di; + struct buffer_head *bh; int block, off; dprintf("ino=%08x\n", ino); - if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { + if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino); return -EIO; } lock_kernel(); - block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; bh = sb_bread(inode->i_sb, block); if (!bh) { - printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino); + printf("Unable to read inode %s:%08x\n", + inode->i_sb->s_id, ino); unlock_kernel(); return -EIO; } - off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; + off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; di = (struct bfs_inode *)bh->b_data + off; if (ino == BFS_ROOT_INO) @@ -133,27 +134,26 @@ static int bfs_write_inode(struct inode * inode, int unused) di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); - dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off); brelse(bh); unlock_kernel(); return 0; } -static void bfs_delete_inode(struct inode * inode) +static void bfs_delete_inode(struct inode *inode) { unsigned long ino = inode->i_ino; - struct bfs_inode * di; - struct buffer_head * bh; + struct bfs_inode *di; + struct buffer_head *bh; int block, off; - struct super_block * s = inode->i_sb; - struct bfs_sb_info * info = BFS_SB(s); - struct bfs_inode_info * bi = BFS_I(inode); + struct super_block *s = inode->i_sb; + struct bfs_sb_info *info = BFS_SB(s); + struct bfs_inode_info *bi = BFS_I(inode); dprintf("ino=%08lx\n", ino); truncate_inode_pages(&inode->i_data, 0); - if (ino < BFS_ROOT_INO || ino > info->si_lasti) { + if ((ino < BFS_ROOT_INO) || (ino > info->si_lasti)) { printf("invalid ino=%08lx\n", ino); return; } @@ -162,31 +162,35 @@ static void bfs_delete_inode(struct inode * inode) inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; lock_kernel(); mark_inode_dirty(inode); - block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + + block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; bh = sb_bread(s, block); if (!bh) { - printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); + printf("Unable to read inode %s:%08lx\n", + inode->i_sb->s_id, ino); unlock_kernel(); return; } - off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; - di = (struct bfs_inode *) bh->b_data + off; + off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; + di = (struct bfs_inode *)bh->b_data + off; + memset((void *)di, 0, sizeof(struct bfs_inode)); + mark_buffer_dirty(bh); + brelse(bh); + if (bi->i_dsk_ino) { - info->si_freeb += 1 + bi->i_eblock - bi->i_sblock; + info->si_freeb += BFS_FILEBLOCKS(bi); info->si_freei++; clear_bit(ino, info->si_imap); dump_imap("delete_inode", s); } - di->i_ino = 0; - di->i_sblock = 0; - mark_buffer_dirty(bh); - brelse(bh); - /* if this was the last file, make the previous - block "last files last block" even if there is no real file there, - saves us 1 gap */ - if (info->si_lf_eblk == BFS_I(inode)->i_eblock) { - info->si_lf_eblk = BFS_I(inode)->i_sblock - 1; + /* + * If this was the last file, make the previous block + * "last block of the last file" even if there is no + * real file there, saves us 1 gap. + */ + if (info->si_lf_eblk == bi->i_eblock) { + info->si_lf_eblk = bi->i_sblock - 1; mark_buffer_dirty(info->si_sbh); } unlock_kernel(); @@ -228,7 +232,7 @@ static void bfs_write_super(struct super_block *s) unlock_kernel(); } -static struct kmem_cache * bfs_inode_cachep; +static struct kmem_cache *bfs_inode_cachep; static struct inode *bfs_alloc_inode(struct super_block *sb) { @@ -279,7 +283,7 @@ static const struct super_operations bfs_sops = { .statfs = bfs_statfs, }; -void dump_imap(const char *prefix, struct super_block * s) +void dump_imap(const char *prefix, struct super_block *s) { #ifdef DEBUG int i; @@ -287,25 +291,26 @@ void dump_imap(const char *prefix, struct super_block * s) if (!tmpbuf) return; - for (i=BFS_SB(s)->si_lasti; i>=0; i--) { - if (i > PAGE_SIZE-100) break; + for (i = BFS_SB(s)->si_lasti; i >= 0; i--) { + if (i > PAGE_SIZE - 100) break; if (test_bit(i, BFS_SB(s)->si_imap)) strcat(tmpbuf, "1"); else strcat(tmpbuf, "0"); } - printk(KERN_ERR "BFS-fs: %s: lasti=%08lx <%s>\n", prefix, BFS_SB(s)->si_lasti, tmpbuf); + printf("BFS-fs: %s: lasti=%08lx <%s>\n", + prefix, BFS_SB(s)->si_lasti, tmpbuf); free_page((unsigned long)tmpbuf); #endif } static int bfs_fill_super(struct super_block *s, void *data, int silent) { - struct buffer_head * bh; - struct bfs_super_block * bfs_sb; - struct inode * inode; + struct buffer_head *bh; + struct bfs_super_block *bfs_sb; + struct inode *inode; unsigned i, imap_len; - struct bfs_sb_info * info; + struct bfs_sb_info *info; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -329,14 +334,14 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_magic = BFS_MAGIC; info->si_sbh = bh; - info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode) - + BFS_ROOT_INO - 1; - - imap_len = info->si_lasti/8 + 1; + info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / + sizeof(struct bfs_inode) + + BFS_ROOT_INO - 1; + imap_len = (info->si_lasti / 8) + 1; info->si_imap = kzalloc(imap_len, GFP_KERNEL); if (!info->si_imap) goto out; - for (i=0; isi_imap); s->s_op = &bfs_sops; @@ -352,16 +357,15 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) goto out; } - info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ - info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start))>>BFS_BSIZE_BITS; + info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS; + info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 + - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; - info->si_lf_sblk = 0; - info->si_lf_ioff = 0; bh = NULL; - for (i=BFS_ROOT_INO; i<=info->si_lasti; i++) { + for (i = BFS_ROOT_INO; i <= info->si_lasti; i++) { struct bfs_inode *di; - int block = (i - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + int block = (i - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; unsigned long sblock, eblock; @@ -384,11 +388,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) sblock = le32_to_cpu(di->i_sblock); eblock = le32_to_cpu(di->i_eblock); - if (eblock > info->si_lf_eblk) { + if (eblock > info->si_lf_eblk) info->si_lf_eblk = eblock; - info->si_lf_sblk = sblock; - info->si_lf_ioff = BFS_INO2OFF(i); - } } brelse(bh); if (!(s->s_flags & MS_RDONLY)) { -- cgit v1.2.3 From 314de8a9e17f70243eacc80d2dd22a5d74b09fce Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 14 Nov 2007 16:59:48 -0800 Subject: Linux Kernel Markers: fix marker mutex not taken upon module load Upon module load, we must take the markers mutex. It implies that the marker mutex must be nested inside the module mutex. It implies changing the nesting order : now the marker mutex nests inside the module mutex. Make the necessary changes to reverse the order in which the mutexes are taken. Includes some cleanup from Dave Hansen . Signed-off-by: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/marker.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/kernel/marker.c b/kernel/marker.c index ccb48d9a365..5323cfaedbc 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -28,7 +28,7 @@ extern struct marker __start___markers[]; extern struct marker __stop___markers[]; /* - * module_mutex nests inside markers_mutex. Markers mutex protects the builtin + * markers_mutex nests inside module_mutex. Markers mutex protects the builtin * and module markers, the hash table and deferred_sync. */ static DEFINE_MUTEX(markers_mutex); @@ -257,7 +257,6 @@ static void disable_marker(struct marker *elem) * @refcount: number of references left to the given probe_module (out) * * Updates the probe callback corresponding to a range of markers. - * Must be called with markers_mutex held. */ void marker_update_probe_range(struct marker *begin, struct marker *end, struct module *probe_module, @@ -266,6 +265,7 @@ void marker_update_probe_range(struct marker *begin, struct marker *iter; struct marker_entry *mark_entry; + mutex_lock(&markers_mutex); for (iter = begin; iter < end; iter++) { mark_entry = get_marker(iter->name); if (mark_entry && mark_entry->refcount) { @@ -281,6 +281,7 @@ void marker_update_probe_range(struct marker *begin, disable_marker(iter); } } + mutex_unlock(&markers_mutex); } /* @@ -293,7 +294,6 @@ static void marker_update_probes(struct module *probe_module) { int refcount = 0; - mutex_lock(&markers_mutex); /* Core kernel markers */ marker_update_probe_range(__start___markers, __stop___markers, probe_module, &refcount); @@ -303,7 +303,6 @@ static void marker_update_probes(struct module *probe_module) synchronize_sched(); deferred_sync = 0; } - mutex_unlock(&markers_mutex); } /** @@ -320,7 +319,7 @@ int marker_probe_register(const char *name, const char *format, marker_probe_func *probe, void *private) { struct marker_entry *entry; - int ret = 0, need_update = 0; + int ret = 0; mutex_lock(&markers_mutex); entry = get_marker(name); @@ -335,11 +334,11 @@ int marker_probe_register(const char *name, const char *format, ret = add_marker(name, format, probe, private); if (ret) goto end; - need_update = 1; + mutex_unlock(&markers_mutex); + marker_update_probes(NULL); + return ret; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(NULL); return ret; } EXPORT_SYMBOL_GPL(marker_probe_register); @@ -355,7 +354,6 @@ void *marker_probe_unregister(const char *name) struct module *probe_module; struct marker_entry *entry; void *private; - int need_update = 0; mutex_lock(&markers_mutex); entry = get_marker(name); @@ -368,11 +366,11 @@ void *marker_probe_unregister(const char *name) probe_module = __module_text_address((unsigned long)entry->probe); private = remove_marker(name); deferred_sync = 1; - need_update = 1; + mutex_unlock(&markers_mutex); + marker_update_probes(probe_module); + return private; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(probe_module); return private; } EXPORT_SYMBOL_GPL(marker_probe_unregister); @@ -392,7 +390,6 @@ void *marker_probe_unregister_private_data(void *private) struct marker_entry *entry; int found = 0; unsigned int i; - int need_update = 0; mutex_lock(&markers_mutex); for (i = 0; i < MARKER_TABLE_SIZE; i++) { @@ -414,11 +411,11 @@ iter_end: probe_module = __module_text_address((unsigned long)entry->probe); private = remove_marker(entry->name); deferred_sync = 1; - need_update = 1; + mutex_unlock(&markers_mutex); + marker_update_probes(probe_module); + return private; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(probe_module); return private; } EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); @@ -434,7 +431,7 @@ EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); int marker_arm(const char *name) { struct marker_entry *entry; - int ret = 0, need_update = 0; + int ret = 0; mutex_lock(&markers_mutex); entry = get_marker(name); @@ -447,11 +444,9 @@ int marker_arm(const char *name) */ if (entry->refcount++) goto end; - need_update = 1; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(NULL); + marker_update_probes(NULL); return ret; } EXPORT_SYMBOL_GPL(marker_arm); @@ -467,7 +462,7 @@ EXPORT_SYMBOL_GPL(marker_arm); int marker_disarm(const char *name) { struct marker_entry *entry; - int ret = 0, need_update = 0; + int ret = 0; mutex_lock(&markers_mutex); entry = get_marker(name); @@ -486,11 +481,9 @@ int marker_disarm(const char *name) ret = -EPERM; goto end; } - need_update = 1; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(NULL); + marker_update_probes(NULL); return ret; } EXPORT_SYMBOL_GPL(marker_disarm); -- cgit v1.2.3 From 5f9468cebfdb7b809139e7682d388f9c31297936 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 14 Nov 2007 16:59:49 -0800 Subject: Linux Kernel Markers: document format string Describes the format string standard further: Use of field names before the type specifiers.. Signed-off-by: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/markers.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/markers.txt b/Documentation/markers.txt index 295a71bc301..d9f50a19fa0 100644 --- a/Documentation/markers.txt +++ b/Documentation/markers.txt @@ -35,12 +35,14 @@ In order to use the macro trace_mark, you should include linux/marker.h. And, -trace_mark(subsystem_event, "%d %s", someint, somestring); +trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring); Where : - subsystem_event is an identifier unique to your event - subsystem is the name of your subsystem. - event is the name of the event to mark. -- "%d %s" is the formatted string for the serializer. +- "myint %d mystring %s" is the formatted string for the serializer. "myint" and + "mystring" are repectively the field names associated with the first and + second parameter. - someint is an integer. - somestring is a char pointer. -- cgit v1.2.3 From cc9f2f8f68efcc73d8793a4df2c4c50196e90080 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 14 Nov 2007 16:59:50 -0800 Subject: Linux Kernel Markers: fix samples to follow format string standard Add the field names to marker example format string. Signed-off-by: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- samples/markers/marker-example.c | 3 ++- samples/markers/probe-example.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c index e787c6d16dd..05e438f8b4e 100644 --- a/samples/markers/marker-example.c +++ b/samples/markers/marker-example.c @@ -19,7 +19,8 @@ static int my_open(struct inode *inode, struct file *file) { int i; - trace_mark(subsystem_event, "%d %s", 123, "example string"); + trace_mark(subsystem_event, "integer %d string %s", 123, + "example string"); for (i = 0; i < 10; i++) trace_mark(subsystem_eventb, MARK_NOARGS); return -EPERM; diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c index 238b2e384fc..a3679753561 100644 --- a/samples/markers/probe-example.c +++ b/samples/markers/probe-example.c @@ -53,7 +53,7 @@ void probe_subsystem_eventb(const struct marker *mdata, void *private, static struct probe_data probe_array[] = { { .name = "subsystem_event", - .format = "%d %s", + .format = "integer %d string %s", .probe_func = probe_subsystem_event }, { .name = "subsystem_eventb", .format = MARK_NOARGS, -- cgit v1.2.3 From 7bb67c14fd3778504fb77da30ce11582336dfced Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 14 Nov 2007 16:59:51 -0800 Subject: I/OAT: Add support for version 2 of ioatdma device Add support for version 2 of the ioatdma device. This device handles the descriptor chain and DCA services slightly differently: - Instead of moving the dma descriptors between a busy and an idle chain, this new version uses a single circular chain so that we don't have rewrite the next_descriptor pointers as we add new requests, and the device doesn't need to re-read the last descriptor. - The new device has the DCA tags defined internally instead of needing them defined statically. Signed-off-by: Shannon Nelson Cc: "Williams, Dan J" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dma/ioat.c | 11 + drivers/dma/ioat_dca.c | 164 ++++++++++++ drivers/dma/ioat_dma.c | 578 +++++++++++++++++++++++++++++++++------- drivers/dma/ioatdma.h | 32 +-- drivers/dma/ioatdma_hw.h | 33 ++- drivers/dma/ioatdma_registers.h | 106 ++++++-- include/linux/pci_ids.h | 1 + 7 files changed, 780 insertions(+), 145 deletions(-) diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c index f204c39fb41..16e0fd8facf 100644 --- a/drivers/dma/ioat.c +++ b/drivers/dma/ioat.c @@ -39,10 +39,14 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Intel Corporation"); static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v1 platforms */ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + + /* I/OAT v2 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, { 0, } }; @@ -74,10 +78,17 @@ static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase) if (device->dma && ioat_dca_enabled) device->dca = ioat_dca_init(pdev, iobase); break; + case IOAT_VER_2_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat2_dca_init(pdev, iobase); + break; default: err = -ENODEV; break; } + if (!device->dma) + err = -ENODEV; return err; } diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c index ba985715b80..0fa8a98051a 100644 --- a/drivers/dma/ioat_dca.c +++ b/drivers/dma/ioat_dca.c @@ -261,3 +261,167 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) return dca; } + +static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat2_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu) +{ + u8 tag; + + tag = ioat_dca_get_tag(dca, cpu); + tag = (~tag) & 0x1F; + return tag; +} + +static struct dca_ops ioat2_dca_ops = { + .add_requester = ioat2_dca_add_requester, + .remove_requester = ioat2_dca_remove_requester, + .get_tag = ioat2_dca_get_tag, +}; + +static int ioat2_dca_count_dca_slots(void *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u32 tag_map; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat2_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat2_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) { + csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) { + pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET); + for (i = 0; i < 5; i++) { + bit = (tag_map >> (4 * i)) & 0x0f; + if (bit < 8) + ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; + else + ioatdca->tag_map[i] = 0; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index 7e4a785c2df..c1c2dcc6fc2 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -36,18 +36,24 @@ #include "ioatdma_registers.h" #include "ioatdma_hw.h" -#define INITIAL_IOAT_DESC_COUNT 128 - #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) +static int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); + /* internal functions */ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); + +static struct ioat_desc_sw * +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); static struct ioat_desc_sw * -ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( struct ioatdma_device *device, @@ -130,6 +136,12 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) ioat_chan->device = device; ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); ioat_chan->xfercap = xfercap; + ioat_chan->desccount = 0; + if (ioat_chan->device->version != IOAT_VER_1_2) { + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE + | IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } spin_lock_init(&ioat_chan->cleanup_lock); spin_lock_init(&ioat_chan->desc_lock); INIT_LIST_HEAD(&ioat_chan->free_desc); @@ -161,13 +173,17 @@ static void ioat_set_dest(dma_addr_t addr, tx_to_ioat_desc(tx)->dst = addr; } -static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx) +static inline void __ioat1_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan); +static inline void __ioat2_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan); + +static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); struct ioat_desc_sw *first = tx_to_ioat_desc(tx); struct ioat_desc_sw *prev, *new; struct ioat_dma_descriptor *hw; - int append = 0; dma_cookie_t cookie; LIST_HEAD(new_chain); u32 copy; @@ -209,7 +225,7 @@ static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx) list_add_tail(&new->node, &new_chain); desc_count++; prev = new; - } while (len && (new = ioat_dma_get_next_descriptor(ioat_chan))); + } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; if (new->async_tx.callback) { @@ -246,20 +262,98 @@ static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx) first->async_tx.phys; __list_splice(&new_chain, ioat_chan->used_desc.prev); + ioat_chan->dmacount += desc_count; ioat_chan->pending += desc_count; - if (ioat_chan->pending >= 4) { - append = 1; - ioat_chan->pending = 0; - } + if (ioat_chan->pending >= ioat_pending_level) + __ioat1_dma_memcpy_issue_pending(ioat_chan); spin_unlock_bh(&ioat_chan->desc_lock); - if (append) - writeb(IOAT_CHANCMD_APPEND, - ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); + return cookie; +} + +static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + struct ioat_desc_sw *first = tx_to_ioat_desc(tx); + struct ioat_desc_sw *new; + struct ioat_dma_descriptor *hw; + dma_cookie_t cookie; + u32 copy; + size_t len; + dma_addr_t src, dst; + int orig_ack; + unsigned int desc_count = 0; + + /* src and dest and len are stored in the initial descriptor */ + len = first->len; + src = first->src; + dst = first->dst; + orig_ack = first->async_tx.ack; + new = first; + + /* ioat_chan->desc_lock is still in force in version 2 path */ + + do { + copy = min((u32) len, ioat_chan->xfercap); + + new->async_tx.ack = 1; + + hw = new->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + desc_count++; + } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); + + hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + if (new->async_tx.callback) { + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; + if (first != new) { + /* move callback into to last desc */ + new->async_tx.callback = first->async_tx.callback; + new->async_tx.callback_param + = first->async_tx.callback_param; + first->async_tx.callback = NULL; + first->async_tx.callback_param = NULL; + } + } + + new->tx_cnt = desc_count; + new->async_tx.ack = orig_ack; /* client is in control of this ack */ + + /* store the original values for use in later cleanup */ + if (new != first) { + new->src = first->src; + new->dst = first->dst; + new->len = first->len; + } + + /* cookie incr and addition to used_list must be atomic */ + cookie = ioat_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + ioat_chan->common.cookie = new->async_tx.cookie = cookie; + + ioat_chan->dmacount += desc_count; + ioat_chan->pending += desc_count; + if (ioat_chan->pending >= ioat_pending_level) + __ioat2_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); return cookie; } +/** + * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair + * @ioat_chan: the channel supplying the memory pool for the descriptors + * @flags: allocation flags + */ static struct ioat_desc_sw *ioat_dma_alloc_descriptor( struct ioat_dma_chan *ioat_chan, gfp_t flags) @@ -284,15 +378,57 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor( dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); desc_sw->async_tx.tx_set_src = ioat_set_src; desc_sw->async_tx.tx_set_dest = ioat_set_dest; - desc_sw->async_tx.tx_submit = ioat_tx_submit; + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + desc_sw->async_tx.tx_submit = ioat1_tx_submit; + break; + case IOAT_VER_2_0: + desc_sw->async_tx.tx_submit = ioat2_tx_submit; + break; + } INIT_LIST_HEAD(&desc_sw->async_tx.tx_list); + desc_sw->hw = desc; desc_sw->async_tx.phys = phys; return desc_sw; } -/* returns the actual number of allocated descriptors */ +static int ioat_initial_desc_count = 256; +module_param(ioat_initial_desc_count, int, 0644); +MODULE_PARM_DESC(ioat_initial_desc_count, + "initial descriptors per channel (default: 256)"); + +/** + * ioat2_dma_massage_chan_desc - link the descriptors into a circle + * @ioat_chan: the channel to be massaged + */ +static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *desc, *_desc; + + /* setup used_desc */ + ioat_chan->used_desc.next = ioat_chan->free_desc.next; + ioat_chan->used_desc.prev = NULL; + + /* pull free_desc out of the circle so that every node is a hw + * descriptor, but leave it pointing to the list + */ + ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; + ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; + + /* circle link the hw descriptors */ + desc = to_ioat_desc(ioat_chan->free_desc.next); + desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { + desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + } +} + +/** + * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors + * @chan: the channel to be filled out + */ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); @@ -304,7 +440,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) /* have we already been set up? */ if (!list_empty(&ioat_chan->free_desc)) - return INITIAL_IOAT_DESC_COUNT; + return ioat_chan->desccount; /* Setup register to interrupt and write completion status on error */ chanctrl = IOAT_CHANCTRL_ERR_INT_EN | @@ -320,7 +456,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) } /* Allocate descriptors */ - for (i = 0; i < INITIAL_IOAT_DESC_COUNT; i++) { + for (i = 0; i < ioat_initial_desc_count; i++) { desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); if (!desc) { dev_err(&ioat_chan->device->pdev->dev, @@ -330,7 +466,10 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) list_add_tail(&desc->node, &tmp_list); } spin_lock_bh(&ioat_chan->desc_lock); + ioat_chan->desccount = i; list_splice(&tmp_list, &ioat_chan->free_desc); + if (ioat_chan->device->version != IOAT_VER_1_2) + ioat2_dma_massage_chan_desc(ioat_chan); spin_unlock_bh(&ioat_chan->desc_lock); /* allocate a completion writeback area */ @@ -347,10 +486,14 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); tasklet_enable(&ioat_chan->cleanup_task); - ioat_dma_start_null_desc(ioat_chan); - return i; + ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ + return ioat_chan->desccount; } +/** + * ioat_dma_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ static void ioat_dma_free_chan_resources(struct dma_chan *chan) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); @@ -364,22 +507,45 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) /* Delay 100ms after reset to allow internal DMA logic to quiesce * before removing DMA descriptor resources. */ - writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); mdelay(100); spin_lock_bh(&ioat_chan->desc_lock); - list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) { - list_del(&desc->node); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + list_for_each_entry_safe(desc, _desc, + &ioat_chan->used_desc, node) { + in_use_descs++; + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, + &ioat_chan->free_desc, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + break; + case IOAT_VER_2_0: + list_for_each_entry_safe(desc, _desc, + ioat_chan->free_desc.next, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + desc = to_ioat_desc(ioat_chan->free_desc.next); pci_pool_free(ioatdma_device->dma_pool, desc->hw, desc->async_tx.phys); kfree(desc); + INIT_LIST_HEAD(&ioat_chan->free_desc); + INIT_LIST_HEAD(&ioat_chan->used_desc); + break; } spin_unlock_bh(&ioat_chan->desc_lock); @@ -395,6 +561,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) ioat_chan->last_completion = ioat_chan->completion_addr = 0; ioat_chan->pending = 0; + ioat_chan->dmacount = 0; } /** @@ -406,7 +573,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) * has run out. */ static struct ioat_desc_sw * -ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) { struct ioat_desc_sw *new = NULL; @@ -425,7 +592,82 @@ ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) return new; } -static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy( +static struct ioat_desc_sw * +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *new = NULL; + + /* + * used.prev points to where to start processing + * used.next points to next free descriptor + * if used.prev == NULL, there are none waiting to be processed + * if used.next == used.prev.prev, there is only one free descriptor, + * and we need to use it to as a noop descriptor before + * linking in a new set of descriptors, since the device + * has probably already read the pointer to it + */ + if (ioat_chan->used_desc.prev && + ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { + + struct ioat_desc_sw *desc = NULL; + struct ioat_desc_sw *noop_desc = NULL; + int i; + + /* set up the noop descriptor */ + noop_desc = to_ioat_desc(ioat_chan->used_desc.next); + noop_desc->hw->size = 0; + noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; + noop_desc->hw->src_addr = 0; + noop_desc->hw->dst_addr = 0; + + ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; + ioat_chan->pending++; + ioat_chan->dmacount++; + + /* get a few more descriptors */ + for (i = 16; i; i--) { + desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); + BUG_ON(!desc); + list_add_tail(&desc->node, ioat_chan->used_desc.next); + + desc->hw->next + = to_ioat_desc(desc->node.next)->async_tx.phys; + to_ioat_desc(desc->node.prev)->hw->next + = desc->async_tx.phys; + ioat_chan->desccount++; + } + + ioat_chan->used_desc.next = noop_desc->node.next; + } + new = to_ioat_desc(ioat_chan->used_desc.next); + prefetch(new); + ioat_chan->used_desc.next = new->node.next; + + if (ioat_chan->used_desc.prev == NULL) + ioat_chan->used_desc.prev = &new->node; + + prefetch(new->hw); + return new; +} + +static struct ioat_desc_sw *ioat_dma_get_next_descriptor( + struct ioat_dma_chan *ioat_chan) +{ + if (!ioat_chan) + return NULL; + + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + return ioat1_dma_get_next_descriptor(ioat_chan); + break; + case IOAT_VER_2_0: + return ioat2_dma_get_next_descriptor(ioat_chan); + break; + } + return NULL; +} + +static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( struct dma_chan *chan, size_t len, int int_en) @@ -441,19 +683,62 @@ static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy( return new ? &new->async_tx : NULL; } +static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( + struct dma_chan *chan, + size_t len, + int int_en) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_desc_sw *new; + + spin_lock_bh(&ioat_chan->desc_lock); + new = ioat2_dma_get_next_descriptor(ioat_chan); + new->len = len; + + /* leave ioat_chan->desc_lock set in version 2 path */ + return new ? &new->async_tx : NULL; +} + + /** * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended * descriptors to hw * @chan: DMA channel handle */ -static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan) +static inline void __ioat1_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan) +{ + ioat_chan->pending = 0; + writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); +} + +static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); if (ioat_chan->pending != 0) { - ioat_chan->pending = 0; - writeb(IOAT_CHANCMD_APPEND, - ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); + spin_lock_bh(&ioat_chan->desc_lock); + __ioat1_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + } +} + +static inline void __ioat2_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan) +{ + ioat_chan->pending = 0; + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); +} + +static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + + if (ioat_chan->pending != 0) { + spin_lock_bh(&ioat_chan->desc_lock); + __ioat2_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); } } @@ -465,11 +750,17 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) chan->reg_base + IOAT_CHANCTRL_OFFSET); } +/** + * ioat_dma_memcpy_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + */ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) { unsigned long phys_complete; struct ioat_desc_sw *desc, *_desc; dma_cookie_t cookie = 0; + unsigned long desc_phys; + struct ioat_desc_sw *latest_desc; prefetch(ioat_chan->completion_virt); @@ -507,56 +798,115 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) cookie = 0; spin_lock_bh(&ioat_chan->desc_lock); - list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { - - /* - * Incoming DMA requests may use multiple descriptors, due to - * exceeding xfercap, perhaps. If so, only the last one will - * have a cookie, and require unmapping. - */ - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + list_for_each_entry_safe(desc, _desc, + &ioat_chan->used_desc, node) { /* - * yes we are unmapping both _page and _single alloc'd - * regions with unmap_page. Is this *really* that bad? + * Incoming DMA requests may use multiple descriptors, + * due to exceeding xfercap, perhaps. If so, only the + * last one will have a cookie, and require unmapping. */ - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - if (desc->async_tx.callback) { - desc->async_tx.callback( - desc->async_tx.callback_param); - desc->async_tx.callback = NULL; + if (desc->async_tx.cookie) { + cookie = desc->async_tx.cookie; + + /* + * yes we are unmapping both _page and _single + * alloc'd regions with unmap_page. Is this + * *really* that bad? + */ + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + + if (desc->async_tx.callback) { + desc->async_tx.callback(desc->async_tx.callback_param); + desc->async_tx.callback = NULL; + } } - } - if (desc->async_tx.phys != phys_complete) { - /* - * a completed entry, but not the last, so cleanup - * if the client is done with the descriptor - */ - if (desc->async_tx.ack) { - list_del(&desc->node); - list_add_tail(&desc->node, - &ioat_chan->free_desc); - } else + if (desc->async_tx.phys != phys_complete) { + /* + * a completed entry, but not the last, so clean + * up if the client is done with the descriptor + */ + if (desc->async_tx.ack) { + list_del(&desc->node); + list_add_tail(&desc->node, + &ioat_chan->free_desc); + } else + desc->async_tx.cookie = 0; + } else { + /* + * last used desc. Do not remove, so we can + * append from it, but don't look at it next + * time, either + */ desc->async_tx.cookie = 0; - } else { - /* - * last used desc. Do not remove, so we can append from - * it, but don't look at it next time, either - */ - desc->async_tx.cookie = 0; - /* TODO check status bits? */ + /* TODO check status bits? */ + break; + } + } + break; + case IOAT_VER_2_0: + /* has some other thread has already cleaned up? */ + if (ioat_chan->used_desc.prev == NULL) break; + + /* work backwards to find latest finished desc */ + desc = to_ioat_desc(ioat_chan->used_desc.next); + latest_desc = NULL; + do { + desc = to_ioat_desc(desc->node.prev); + desc_phys = (unsigned long)desc->async_tx.phys + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; + if (desc_phys == phys_complete) { + latest_desc = desc; + break; + } + } while (&desc->node != ioat_chan->used_desc.prev); + + if (latest_desc != NULL) { + + /* work forwards to clear finished descriptors */ + for (desc = to_ioat_desc(ioat_chan->used_desc.prev); + &desc->node != latest_desc->node.next && + &desc->node != ioat_chan->used_desc.next; + desc = to_ioat_desc(desc->node.next)) { + if (desc->async_tx.cookie) { + cookie = desc->async_tx.cookie; + desc->async_tx.cookie = 0; + + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + + if (desc->async_tx.callback) { + desc->async_tx.callback(desc->async_tx.callback_param); + desc->async_tx.callback = NULL; + } + } + } + + /* move used.prev up beyond those that are finished */ + if (&desc->node == ioat_chan->used_desc.next) + ioat_chan->used_desc.prev = NULL; + else + ioat_chan->used_desc.prev = &desc->node; } + break; } spin_unlock_bh(&ioat_chan->desc_lock); @@ -621,8 +971,6 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, return dma_async_is_complete(cookie, last_complete, last_used); } -/* PCI API */ - static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) { struct ioat_desc_sw *desc; @@ -633,21 +981,34 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL | IOAT_DMA_DESCRIPTOR_CTL_INT_GN | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - desc->hw->next = 0; desc->hw->size = 0; desc->hw->src_addr = 0; desc->hw->dst_addr = 0; desc->async_tx.ack = 1; - - list_add_tail(&desc->node, &ioat_chan->used_desc); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + desc->hw->next = 0; + list_add_tail(&desc->node, &ioat_chan->used_desc); + + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); + + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + break; + case IOAT_VER_2_0: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + + ioat_chan->dmacount++; + __ioat2_dma_memcpy_issue_pending(ioat_chan); + break; + } spin_unlock_bh(&ioat_chan->desc_lock); - - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); } /* @@ -693,14 +1054,14 @@ static int ioat_dma_self_test(struct ioatdma_device *device) dma_chan = container_of(device->common.channels.next, struct dma_chan, device_node); - if (ioat_dma_alloc_chan_resources(dma_chan) < 1) { + if (device->common.device_alloc_chan_resources(dma_chan) < 1) { dev_err(&device->pdev->dev, "selftest cannot allocate chan resource\n"); err = -ENODEV; goto out; } - tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0); + tx = device->common.device_prep_dma_memcpy(dma_chan, IOAT_TEST_SIZE, 0); if (!tx) { dev_err(&device->pdev->dev, "Self-test prep failed, disabling\n"); @@ -710,24 +1071,25 @@ static int ioat_dma_self_test(struct ioatdma_device *device) async_tx_ack(tx); addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, - DMA_TO_DEVICE); - ioat_set_src(addr, tx, 0); + DMA_TO_DEVICE); + tx->tx_set_src(addr, tx, 0); addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, - DMA_FROM_DEVICE); - ioat_set_dest(addr, tx, 0); + DMA_FROM_DEVICE); + tx->tx_set_dest(addr, tx, 0); tx->callback = ioat_dma_test_callback; tx->callback_param = (void *)0x8086; - cookie = ioat_tx_submit(tx); + cookie = tx->tx_submit(tx); if (cookie < 0) { dev_err(&device->pdev->dev, "Self-test setup failed, disabling\n"); err = -ENODEV; goto free_resources; } - ioat_dma_memcpy_issue_pending(dma_chan); + device->common.device_issue_pending(dma_chan); msleep(1); - if (ioat_dma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + if (device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) + != DMA_SUCCESS) { dev_err(&device->pdev->dev, "Self-test copy timed out, disabling\n"); err = -ENODEV; @@ -741,7 +1103,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device) } free_resources: - ioat_dma_free_chan_resources(dma_chan); + device->common.device_free_chan_resources(dma_chan); out: kfree(src); kfree(dest); @@ -941,16 +1303,28 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, INIT_LIST_HEAD(&device->common.channels); ioat_dma_enumerate_channels(device); - dma_cap_set(DMA_MEMCPY, device->common.cap_mask); device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources; device->common.device_free_chan_resources = ioat_dma_free_chan_resources; - device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy; + device->common.dev = &pdev->dev; + + dma_cap_set(DMA_MEMCPY, device->common.cap_mask); device->common.device_is_tx_complete = ioat_dma_is_complete; - device->common.device_issue_pending = ioat_dma_memcpy_issue_pending; device->common.device_dependency_added = ioat_dma_dependency_added; - device->common.dev = &pdev->dev; + switch (device->version) { + case IOAT_VER_1_2: + device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + device->common.device_issue_pending = + ioat1_dma_memcpy_issue_pending; + break; + case IOAT_VER_2_0: + device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; + device->common.device_issue_pending = + ioat2_dma_memcpy_issue_pending; + break; + } + dev_err(&device->pdev->dev, "Intel(R) I/OAT DMA Engine found," " %d channels, device version 0x%02x, driver version %s\n", diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h index 5f9881e7b0e..b668234ef65 100644 --- a/drivers/dma/ioatdma.h +++ b/drivers/dma/ioatdma.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -28,7 +28,7 @@ #include #include -#define IOAT_DMA_VERSION "1.26" +#define IOAT_DMA_VERSION "2.04" enum ioat_interrupt { none = 0, @@ -39,6 +39,8 @@ enum ioat_interrupt { }; #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 +#define IOAT_DMA_DCA_ANY_CPU ~0 + /** * struct ioatdma_device - internal representation of a IOAT device @@ -47,6 +49,9 @@ enum ioat_interrupt { * @dma_pool: for allocating DMA descriptors * @common: embedded struct dma_device * @version: version of ioatdma device + * @irq_mode: which style irq to use + * @msix_entries: irq handlers + * @idx: per channel data */ struct ioatdma_device { @@ -63,23 +68,7 @@ struct ioatdma_device { /** * struct ioat_dma_chan - internal representation of a DMA channel - * @device: - * @reg_base: - * @sw_in_use: - * @completion: - * @completion_low: - * @completion_high: - * @completed_cookie: last cookie seen completed on cleanup - * @cookie: value of last cookie given to client - * @last_completion: - * @xfercap: - * @desc_lock: - * @free_desc: - * @used_desc: - * @resource: - * @device_node: */ - struct ioat_dma_chan { void __iomem *reg_base; @@ -95,6 +84,8 @@ struct ioat_dma_chan { struct list_head used_desc; int pending; + int dmacount; + int desccount; struct ioatdma_device *device; struct dma_chan common; @@ -134,12 +125,13 @@ struct ioat_desc_sw { struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase); void ioat_dma_remove(struct ioatdma_device *device); -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, - void __iomem *iobase); +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); #else #define ioat_dma_probe(pdev, iobase) NULL #define ioat_dma_remove(device) do { } while (0) #define ioat_dca_init(pdev, iobase) NULL +#define ioat2_dca_init(pdev, iobase) NULL #endif #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h index 9e7434e1551..dd470fa91d8 100644 --- a/drivers/dma/ioatdma_hw.h +++ b/drivers/dma/ioatdma_hw.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -22,12 +22,19 @@ #define _IOAT_HW_H_ /* PCI Configuration Space Values */ -#define IOAT_PCI_VID 0x8086 -#define IOAT_PCI_DID 0x1A38 -#define IOAT_PCI_RID 0x00 -#define IOAT_PCI_SVID 0x8086 -#define IOAT_PCI_SID 0x8086 -#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_PCI_VID 0x8086 + +/* CB device ID's */ +#define IOAT_PCI_DID_5000 0x1A38 +#define IOAT_PCI_DID_CNB 0x360B +#define IOAT_PCI_DID_SCNB 0x65FF +#define IOAT_PCI_DID_SNB 0x402F + +#define IOAT_PCI_RID 0x00 +#define IOAT_PCI_SVID 0x8086 +#define IOAT_PCI_SID 0x8086 +#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_VER_2_0 0x20 /* Version 2.0 */ struct ioat_dma_descriptor { uint32_t size; @@ -47,6 +54,16 @@ struct ioat_dma_descriptor { #define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008 #define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010 #define IOAT_DMA_DESCRIPTOR_NUL 0x00000020 -#define IOAT_DMA_DESCRIPTOR_OPCODE 0xFF000000 +#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040 +#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080 +#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100 +#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200 +#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400 + +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000 +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000 + +#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001 +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000 #endif diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h index baaab5ea146..9832d7ebd93 100644 --- a/drivers/dma/ioatdma_registers.h +++ b/drivers/dma/ioatdma_registers.h @@ -42,26 +42,25 @@ #define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */ #define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */ #define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */ -#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */ +#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */ #define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */ #define IOAT_VER_OFFSET 0x08 /* 8-bit */ #define IOAT_VER_MAJOR_MASK 0xF0 #define IOAT_VER_MINOR_MASK 0x0F -#define GET_IOAT_VER_MAJOR(x) ((x) & IOAT_VER_MAJOR_MASK) +#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4) #define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK) #define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ #define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ #define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */ -#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalesing Supported */ +#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ #define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ #define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 - #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ /* DMA Channel Registers */ @@ -74,25 +73,101 @@ #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 #define IOAT_CHANCTRL_INT_DISABLE 0x0001 -#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatability */ -#define IOAT_DMA_COMP_V1 0x0001 /* Compatability with DMA version 1 */ - -#define IOAT_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */ -#define IOAT_CHANSTS_OFFSET_LOW 0x04 -#define IOAT_CHANSTS_OFFSET_HIGH 0x08 -#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR 0xFFFFFFFFFFFFFFC0UL +#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ +#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ +#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */ + + +#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */ +#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */ +#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET) +#define IOAT1_CHANSTS_OFFSET_LOW 0x04 +#define IOAT2_CHANSTS_OFFSET_LOW 0x08 +#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW) +#define IOAT1_CHANSTS_OFFSET_HIGH 0x08 +#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C +#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) +#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F #define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 +#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 -#define IOAT_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ -#define IOAT_CHAINADDR_OFFSET_LOW 0x0C -#define IOAT_CHAINADDR_OFFSET_HIGH 0x10 -#define IOAT_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */ + +#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */ + +#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */ +#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000 +#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */ + +/* CB DCA Memory Space Registers */ +#define IOAT_DCAOFFSET_OFFSET 0x14 +/* CB_BAR + IOAT_DCAOFFSET value */ +#define IOAT_DCA_VER_OFFSET 0x00 +#define IOAT_DCA_VER_MAJOR_MASK 0xF0 +#define IOAT_DCA_VER_MINOR_MASK 0x0F + +#define IOAT_DCA_COMP_OFFSET 0x02 +#define IOAT_DCA_COMP_V1 0x1 + +#define IOAT_FSB_CAPABILITY_OFFSET 0x04 +#define IOAT_FSB_CAPABILITY_PREFETCH 0x1 + +#define IOAT_PCI_CAPABILITY_OFFSET 0x06 +#define IOAT_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08 +#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1 + +#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A +#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1 + +#define IOAT_APICID_TAG_MAP_OFFSET 0x0C +#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F +#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0 +#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0 +#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4 +#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00 +#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8 +#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000 +#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12 +#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000 +#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16 +#define IOAT_APICID_TAG_CB2_VALID 0x8080808080 + +#define IOAT_DCA_GREQID_OFFSET 0x10 +#define IOAT_DCA_GREQID_SIZE 0x04 +#define IOAT_DCA_GREQID_MASK 0xFFFF +#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000 +#define IOAT_DCA_GREQID_VALID 0x20000000 +#define IOAT_DCA_GREQID_LASTID 0x80000000 + + + +#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ +#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ +#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET) +#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C +#define IOAT2_CHAINADDR_OFFSET_LOW 0x10 +#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW) +#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10 +#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14 +#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH) + +#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */ +#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */ +#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET) #define IOAT_CHANCMD_RESET 0x20 #define IOAT_CHANCMD_RESUME 0x10 #define IOAT_CHANCMD_ABORT 0x08 @@ -124,6 +199,7 @@ #define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 #define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 #define IOAT_CHANERR_SOFT_ERR 0x4000 +#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index cd6cdb3cd7a..1ee009e8fec 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2332,6 +2332,7 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b +#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff #define PCI_DEVICE_ID_INTEL_TOLAPAI_0 0x5031 #define PCI_DEVICE_ID_INTEL_TOLAPAI_1 0x5032 -- cgit v1.2.3 From c06a018fa5362fa9ed0768bd747c0fab26bc8849 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Wed, 14 Nov 2007 16:59:54 -0800 Subject: reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file This is not a new problem in 2.6.23-git17. 2.6.22/2.6.23 is buggy in the same way. Reiserfs could accumulate dirty sub-page-size files until umount time. They cannot be synced to disk by pdflush routines or explicit `sync' commands. Only `umount' can do the trick. The direct cause is: the dirty page's PG_dirty is wrongly _cleared_. Call trace: [] cancel_dirty_page+0xd0/0xf0 [] :reiserfs:reiserfs_cut_from_item+0x660/0x710 [] :reiserfs:reiserfs_do_truncate+0x271/0x530 [] :reiserfs:reiserfs_truncate_file+0xfd/0x3b0 [] :reiserfs:reiserfs_file_release+0x1e0/0x340 [] __fput+0xcc/0x1b0 [] fput+0x16/0x20 [] filp_close+0x56/0x90 [] sys_close+0xad/0x110 [] system_call+0x7e/0x83 Fix the bug by removing the cancel_dirty_page() call. Tests show that it causes no bad behaviors on various write sizes. === for the patient === Here are more detailed demonstrations of the problem. 1) the page has both PG_dirty(D)/PAGECACHE_TAG_DIRTY(d) after being written to; and then only PAGECACHE_TAG_DIRTY(d) remains after the file is closed. ------------------------------ screen 0 ------------------------------ [T0] root /home/wfg# cat > /test/tiny [T1] hi [T2] root /home/wfg# ------------------------------ screen 1 ------------------------------ [T1] root /home/wfg# echo /test/tiny > /proc/filecache [T1] root /home/wfg# cat /proc/filecache # file /test/tiny # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback # idx len state refcnt 0 1 ___UD__Bd_ 2 [T2] root /home/wfg# cat /proc/filecache # file /test/tiny # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback # idx len state refcnt 0 1 ___U___Bd_ 2 2) note the non-zero 'cancelled_write_bytes' after /tmp/hi is copied. ------------------------------ screen 0 ------------------------------ [T0] root /home/wfg# echo hi > /tmp/hi [T1] root /home/wfg# cp /tmp/hi /dev/stdin /test [T2] hi [T3] root /home/wfg# ------------------------------ screen 1 ------------------------------ [T1] root /proc/4397# cd /proc/`pidof cp` [T1] root /proc/4713# cat io rchar: 8396 wchar: 3 syscr: 20 syscw: 1 read_bytes: 0 write_bytes: 20480 cancelled_write_bytes: 4096 [T2] root /proc/4713# cat io rchar: 8399 wchar: 6 syscr: 21 syscw: 2 read_bytes: 0 write_bytes: 24576 cancelled_write_bytes: 4096 //Question: the 'write_bytes' is a bit more than expected ;-) Tested-by: Maxim Levitsky Cc: Peter Zijlstra Cc: Jeff Mahoney Signed-off-by: Fengguang Wu Reviewed-by: Chris Mason Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/stree.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index ca41567d789..d2db2417b2b 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -1458,9 +1458,6 @@ static void unmap_buffers(struct page *page, loff_t pos) } bh = next; } while (bh != head); - if (PAGE_SIZE == bh->b_size) { - cancel_dirty_page(page, PAGE_CACHE_SIZE); - } } } } -- cgit v1.2.3 From 4c06be10c790008aa2b2d19df2872ff39990b7bd Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 14 Nov 2007 16:59:56 -0800 Subject: rtc: release correct region in error path The misc_register() error path always released an I/O port region, even if the region was memory-mapped (only mips uses memory-mapped RTC, as far as I can see). Signed-off-by: Bjorn Helgaas Cc: Alessandro Zummo Cc: David Brownell Acked-by: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/rtc.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index ec6b65ec69e..a162e1b3d5d 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -918,6 +918,14 @@ static const struct file_operations rtc_proc_fops = { }; #endif +static void rtc_release_region(void) +{ + if (RTC_IOMAPPED) + release_region(RTC_PORT(0), RTC_IO_EXTENT); + else + release_mem_region(RTC_PORT(0), RTC_IO_EXTENT); +} + static int __init rtc_init(void) { #ifdef CONFIG_PROC_FS @@ -992,10 +1000,7 @@ no_irq: /* Yeah right, seeing as irq 8 doesn't even hit the bus. */ rtc_has_irq = 0; printk(KERN_ERR "rtc: IRQ %d is not free.\n", RTC_IRQ); - if (RTC_IOMAPPED) - release_region(RTC_PORT(0), RTC_IO_EXTENT); - else - release_mem_region(RTC_PORT(0), RTC_IO_EXTENT); + rtc_release_region(); return -EIO; } hpet_rtc_timer_init(); @@ -1009,7 +1014,7 @@ no_irq: free_irq(RTC_IRQ, NULL); rtc_has_irq = 0; #endif - release_region(RTC_PORT(0), RTC_IO_EXTENT); + rtc_release_region(); return -ENODEV; } @@ -1091,10 +1096,7 @@ static void __exit rtc_exit (void) if (rtc_has_irq) free_irq (rtc_irq, &rtc_port); #else - if (RTC_IOMAPPED) - release_region(RTC_PORT(0), RTC_IO_EXTENT); - else - release_mem_region(RTC_PORT(0), RTC_IO_EXTENT); + rtc_release_region(); #ifdef RTC_IRQ if (rtc_has_irq) free_irq (RTC_IRQ, NULL); -- cgit v1.2.3 From 9626f1f117be21b6e4b7a1cb49814fc065dd3d2d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 14 Nov 2007 16:59:57 -0800 Subject: rtc: fall back to requesting only the ports we actually use Firmware like PNPBIOS or ACPI can report the address space consumed by the RTC. The actual space consumed may be less than the size (RTC_IO_EXTENT) assumed by the RTC driver. The PNP core doesn't request resources yet, but I'd like to make it do so. If/when it does, the RTC_IO_EXTENT request may fail, which prevents the RTC driver from loading. Since we only use the RTC index and data registers at RTC_PORT(0) and RTC_PORT(1), we can fall back to requesting just enough space for those. If the PNP core requests resources, this results in typical I/O port usage like this: 0070-0073 : 00:06 <-- PNP device 00:06 responds to 70-73 0070-0071 : rtc <-- RTC driver uses only 70-71 instead of the current: 0070-0077 : rtc <-- RTC_IO_EXTENT == 8 Signed-off-by: Bjorn Helgaas Cc: Alessandro Zummo Cc: David Brownell Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/rtc.c | 36 ++++++++++++++++++++++++++++++------ include/linux/mc146818rtc.h | 3 +++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index a162e1b3d5d..0c66b802736 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -918,12 +918,29 @@ static const struct file_operations rtc_proc_fops = { }; #endif +static resource_size_t rtc_size; + +static struct resource * __init rtc_request_region(resource_size_t size) +{ + struct resource *r; + + if (RTC_IOMAPPED) + r = request_region(RTC_PORT(0), size, "rtc"); + else + r = request_mem_region(RTC_PORT(0), size, "rtc"); + + if (r) + rtc_size = size; + + return r; +} + static void rtc_release_region(void) { if (RTC_IOMAPPED) - release_region(RTC_PORT(0), RTC_IO_EXTENT); + release_region(RTC_PORT(0), rtc_size); else - release_mem_region(RTC_PORT(0), RTC_IO_EXTENT); + release_mem_region(RTC_PORT(0), rtc_size); } static int __init rtc_init(void) @@ -976,10 +993,17 @@ found: } no_irq: #else - if (RTC_IOMAPPED) - r = request_region(RTC_PORT(0), RTC_IO_EXTENT, "rtc"); - else - r = request_mem_region(RTC_PORT(0), RTC_IO_EXTENT, "rtc"); + r = rtc_request_region(RTC_IO_EXTENT); + + /* + * If we've already requested a smaller range (for example, because + * PNPBIOS or ACPI told us how the device is configured), the request + * above might fail because it's too big. + * + * If so, request just the range we actually use. + */ + if (!r) + r = rtc_request_region(RTC_IO_EXTENT_USED); if (!r) { #ifdef RTC_IRQ rtc_has_irq = 0; diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 580b3f4956e..2f4e957af65 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -109,8 +109,11 @@ struct cmos_rtc_board_info { #ifndef ARCH_RTC_LOCATION /* Override by ? */ #define RTC_IO_EXTENT 0x8 +#define RTC_IO_EXTENT_USED 0x2 #define RTC_IOMAPPED 1 /* Default to I/O mapping. */ +#else +#define RTC_IO_EXTENT_USED RTC_IO_EXTENT #endif /* ARCH_RTC_LOCATION */ #endif /* _MC146818RTC_H */ -- cgit v1.2.3 From 57510c2f934a05c53232814761a058399b2ca282 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Nov 2007 16:59:58 -0800 Subject: i5000_edac: no need to __stringify() KBUILD_BASENAME The i5000_edac driver's PCI registration structure has the name ""i5000_edac"" (with extra set of double-quotes) which is probably not intentional. Get rid of __stringify. Signed-off-by: Darrick J. Wong Cc: Doug Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/edac/i5000_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 96f7e63e399..a1f24c42d5f 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1462,7 +1462,7 @@ MODULE_DEVICE_TABLE(pci, i5000_pci_tbl); * */ static struct pci_driver i5000_driver = { - .name = __stringify(KBUILD_BASENAME), + .name = KBUILD_BASENAME, .probe = i5000_init_one, .remove = __devexit_p(i5000_remove_one), .id_table = i5000_pci_tbl, -- cgit v1.2.3 From e02f5f52cafbea013817d81f1acc4baf50d6324b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 14 Nov 2007 16:59:59 -0800 Subject: serial: only use PNP IRQ if it's valid "Luming Yu" says: There is a "ttyS1 irq is -1" problem observed on tiger4 which cause the serial port broken. It is because that there is __no__ ACPI IRQ resource assigned for the serial port. So the value of the IRQ for the port is never changed since it got initialized to -1. If PNP supplies a valid IRQ, use it. Otherwise, leave port.irq == 0, which means "no IRQ" to the serial core. Signed-off-by: Bjorn Helgaas Cc: Yu Luming Acked-by: Matthew Wilcox Cc: Alan Cox Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/8250_pnp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index ad0755919bc..1de098e7549 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -439,7 +439,8 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) } memset(&port, 0, sizeof(struct uart_port)); - port.irq = pnp_irq(dev, 0); + if (pnp_irq_valid(dev, 0)) + port.irq = pnp_irq(dev, 0); if (pnp_port_valid(dev, 0)) { port.iobase = pnp_port_start(dev, 0); port.iotype = UPIO_PORT; -- cgit v1.2.3 From d5cd97872dca9b79c31224ca014bcea7ca01f5f1 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 14 Nov 2007 17:00:00 -0800 Subject: sunrpc/xprtrdma/transport.c: fix use-after-free Fix an obvious use-after-free spotted by the Coverity checker. Signed-off-by: Adrian Bunk Cc: Trond Myklebust Cc: "J. Bruce Fields" Cc: Neil Brown Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/sunrpc/xprtrdma/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index dc55cc974c9..1afeb3eb8e4 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -320,9 +320,9 @@ xprt_setup_rdma(struct xprt_create *args) xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL); if (xprt->slot == NULL) { - kfree(xprt); dprintk("RPC: %s: couldn't allocate %d slots\n", __func__, xprt->max_reqs); + kfree(xprt); return ERR_PTR(-ENOMEM); } -- cgit v1.2.3 From be21f0ab0d8f10c90265066603a8d95b6037a6fa Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 14 Nov 2007 17:00:01 -0800 Subject: fix mm/util.c:krealloc() Commit ef8b4520bd9f8294ffce9abd6158085bde5dc902 added one NULL check for "p" in krealloc(), but that doesn't seem to be enough since there doesn't seem to be any guarantee that memcpy(ret, NULL, 0) works (spotted by the Coverity checker). For making it clearer what happens this patch also removes the pointless min(). Signed-off-by: Adrian Bunk Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/util.c b/mm/util.c index 5f64026cbb4..8f18683825b 100644 --- a/mm/util.c +++ b/mm/util.c @@ -95,8 +95,8 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags) return (void *)p; ret = kmalloc_track_caller(new_size, flags); - if (ret) { - memcpy(ret, p, min(new_size, ks)); + if (ret && p) { + memcpy(ret, p, ks); kfree(p); } return ret; -- cgit v1.2.3 From 8744969a819de4ee5158f4cdb30104601cc015d4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 14 Nov 2007 17:00:02 -0800 Subject: fuse_file_alloc(): fix NULL dereferences Fix obvious NULL dereferences spotted by the Coverity checker. Signed-off-by: Adrian Bunk Acked-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0fcdba9d47c..535b3739900 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -55,9 +55,10 @@ struct fuse_file *fuse_file_alloc(void) if (!ff->reserved_req) { kfree(ff); ff = NULL; + } else { + INIT_LIST_HEAD(&ff->write_entry); + atomic_set(&ff->count, 0); } - INIT_LIST_HEAD(&ff->write_entry); - atomic_set(&ff->count, 0); } return ff; } -- cgit v1.2.3 From 822bd5aa2b8e8fa1d328f03bf5b9c75701481bf0 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 14 Nov 2007 17:00:04 -0800 Subject: tle62x0 driver stops ignoring read errors The tle62x0 driver was ignoring all read errors. This patch makes it pass such errors up the stack, instead of returning bogus data. Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/spi/tle62x0.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/tle62x0.c b/drivers/spi/tle62x0.c index 6da58ca48b3..455991fbe28 100644 --- a/drivers/spi/tle62x0.c +++ b/drivers/spi/tle62x0.c @@ -107,8 +107,11 @@ static ssize_t tle62x0_status_show(struct device *dev, mutex_lock(&st->lock); ret = tle62x0_read(st); - dev_dbg(dev, "tle62x0_read() returned %d\n", ret); + if (ret < 0) { + mutex_unlock(&st->lock); + return ret; + } for (ptr = 0; ptr < (st->nr_gpio * 2)/8; ptr += 1) { fault <<= 8; -- cgit v1.2.3 From 5d0360ee96a5ef953dbea45873c2a8c87e77d59b Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 14 Nov 2007 17:00:05 -0800 Subject: rd: fix data corruption on memory pressure We have seen ramdisk based install systems, where some pages of mapped libraries and programs were suddendly zeroed under memory pressure. This should not happen, as the ramdisk avoids freeing its pages by keeping them dirty all the time. It turns out that there is a case, where the VM makes a ramdisk page clean, without telling the ramdisk driver. On memory pressure shrink_zone runs and it starts to run shrink_active_list. There is a check for buffer_heads_over_limit, and if true, pagevec_strip is called. pagevec_strip calls try_to_release_page. If the mapping has no releasepage callback, try_to_free_buffers is called. try_to_free_buffers has now a special logic for some file systems to make a dirty page clean, if all buffers are clean. Thats what happened in our test case. The simplest solution is to provide a noop-releasepage callback for the ramdisk driver. This avoids try_to_free_buffers for ramdisk pages. Signed-off-by: Christian Borntraeger Acked-by: Nick Piggin Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/rd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/block/rd.c b/drivers/block/rd.c index 47f8ac6cce5..82f4eecc869 100644 --- a/drivers/block/rd.c +++ b/drivers/block/rd.c @@ -189,6 +189,18 @@ static int ramdisk_set_page_dirty(struct page *page) return 0; } +/* + * releasepage is called by pagevec_strip/try_to_release_page if + * buffers_heads_over_limit is true. Without a releasepage function + * try_to_free_buffers is called instead. That can unset the dirty + * bit of our ram disk pages, which will be eventually freed, even + * if the page is still in use. + */ +static int ramdisk_releasepage(struct page *page, gfp_t dummy) +{ + return 0; +} + static const struct address_space_operations ramdisk_aops = { .readpage = ramdisk_readpage, .prepare_write = ramdisk_prepare_write, @@ -196,6 +208,7 @@ static const struct address_space_operations ramdisk_aops = { .writepage = ramdisk_writepage, .set_page_dirty = ramdisk_set_page_dirty, .writepages = ramdisk_writepages, + .releasepage = ramdisk_releasepage, }; static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, -- cgit v1.2.3 From 5c6ff79d0908df0d281c05b5b693eaba55b06e0f Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Wed, 14 Nov 2007 17:00:06 -0800 Subject: cris gpio: undo locks before returning Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/drivers/gpio.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/cris/arch-v10/drivers/gpio.c b/arch/cris/arch-v10/drivers/gpio.c index f389ed6998f..0d347a70583 100644 --- a/arch/cris/arch-v10/drivers/gpio.c +++ b/arch/cris/arch-v10/drivers/gpio.c @@ -297,8 +297,10 @@ gpio_poll(struct file *file, data = *R_PORT_PB_DATA; else if (priv->minor == GPIO_MINOR_G) data = *R_PORT_G_DATA; - else + else { + spin_unlock(&gpio_lock); return 0; + } if ((data & priv->highalarm) || (~data & priv->lowalarm)) { @@ -381,18 +383,21 @@ static ssize_t gpio_write(struct file * file, const char * buf, size_t count, ssize_t retval = count; if (priv->minor !=GPIO_MINOR_A && priv->minor != GPIO_MINOR_B) { - return -EFAULT; + retval = -EFAULT; + goto out; } if (!access_ok(VERIFY_READ, buf, count)) { - return -EFAULT; + retval = -EFAULT; + goto out; } clk_mask = priv->clk_mask; data_mask = priv->data_mask; /* It must have been configured using the IO_CFG_WRITE_MODE */ /* Perhaps a better error code? */ if (clk_mask == 0 || data_mask == 0) { - return -EPERM; + retval = -EPERM; + goto out; } write_msb = priv->write_msb; D(printk("gpio_write: %lu to data 0x%02X clk 0x%02X msb: %i\n",count, data_mask, clk_mask, write_msb)); @@ -425,6 +430,7 @@ static ssize_t gpio_write(struct file * file, const char * buf, size_t count, } } } +out: spin_unlock(&gpio_lock); return retval; } @@ -506,6 +512,7 @@ gpio_release(struct inode *inode, struct file *filp) while (p) { if (p->highalarm | p->lowalarm) { gpio_some_alarms = 1; + spin_unlock(&gpio_lock); return 0; } p = p->next; -- cgit v1.2.3 From c0f2a9d75aed1a4be40c3975b94fd39066bd11bb Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Wed, 14 Nov 2007 17:00:06 -0800 Subject: mips: undo locking on error path returns [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/irixsig.c | 1 + arch/mips/vr41xx/common/icu.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c index 33506ff2591..5b10ac133ec 100644 --- a/arch/mips/kernel/irixsig.c +++ b/arch/mips/kernel/irixsig.c @@ -430,6 +430,7 @@ asmlinkage int irix_sigprocmask(int how, irix_sigset_t __user *new, break; default: + spin_unlock_irq(¤t->sighand->siglock); return -EINVAL; } recalc_sigpending(); diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 1899601e586..3f23d9fda66 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -525,6 +525,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) intassign1 |= (uint16_t)assign << 9; break; default: + spin_unlock_irq(&desc->lock); return -EINVAL; } @@ -592,6 +593,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) intassign3 |= (uint16_t)assign << 12; break; default: + spin_unlock_irq(&desc->lock); return -EINVAL; } -- cgit v1.2.3 From 9fcc2d15b14894aa53e5e8b7fd5d6e3ca558e5df Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 14 Nov 2007 17:00:07 -0800 Subject: proc: simplify and correct proc_flush_task Currently we special case when we have only the initial pid namespace. Unfortunately in doing so the copied case for the other namespaces was broken so we don't properly flush the thread directories :( So this patch removes the unnecessary special case (removing a usage of proc_mnt) and corrects the flushing of the thread directories. Signed-off-by: Eric W. Biederman Cc: Al Viro Cc: Pavel Emelyanov Cc: Sukadev Bhattiprolu Cc: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index aeaf0d0f2f5..a17c2685907 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2328,21 +2328,18 @@ out: void proc_flush_task(struct task_struct *task) { - int i, leader; - struct pid *pid, *tgid; + int i; + struct pid *pid, *tgid = NULL; struct upid *upid; - leader = thread_group_leader(task); - proc_flush_task_mnt(proc_mnt, task->pid, leader ? task->tgid : 0); pid = task_pid(task); - if (pid->level == 0) - return; + if (thread_group_leader(task)) + tgid = task_tgid(task); - tgid = task_tgid(task); - for (i = 1; i <= pid->level; i++) { + for (i = 0; i <= pid->level; i++) { upid = &pid->numbers[i]; proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, - leader ? 0 : tgid->numbers[i].nr); + tgid ? tgid->numbers[i].nr : 0); } upid = &pid->numbers[pid->level]; -- cgit v1.2.3 From 22800a2830ec07e7cc5c837999890ac47cc7f5de Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 14 Nov 2007 17:00:08 -0800 Subject: fix param_sysfs_builtin name length check Commit faf8c714f4508207a9c81cc94dafc76ed6680b44 caused a regression: parameter names longer than MAX_KBUILD_MODNAME will now be rejected, although we just need to keep the module name part that short. This patch restores the old behaviour while still avoiding that memchr is called with its length parameter larger than the total string length. Signed-off-by: Jan Kiszka Cc: Dave Young Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index 16f269e9ddc..2a4c51487e7 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -592,19 +592,16 @@ static void __init param_sysfs_builtin(void) for (i=0; i < __stop___param - __start___param; i++) { char *dot; - size_t kplen; + size_t max_name_len; kp = &__start___param[i]; - kplen = strlen(kp->name); + max_name_len = + min_t(size_t, MAX_KBUILD_MODNAME, strlen(kp->name)); - /* We do not handle args without periods. */ - if (kplen > MAX_KBUILD_MODNAME) { - DEBUGP("kernel parameter name is too long: %s\n", kp->name); - continue; - } - dot = memchr(kp->name, '.', kplen); + dot = memchr(kp->name, '.', max_name_len); if (!dot) { - DEBUGP("couldn't find period in %s\n", kp->name); + DEBUGP("couldn't find period in first %d characters " + "of %s\n", MAX_KBUILD_MODNAME, kp->name); continue; } name_len = dot - kp->name; -- cgit v1.2.3 From cb51f973bce7aef46452b0c6faea8f791885f5b8 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 14 Nov 2007 17:00:10 -0800 Subject: mark sys_open/sys_read exports unused sys_open / sys_read were used in the early 1.2 days to load firmware from disk inside drivers. Since 2.0 or so this was deprecated behavior, but several drivers still were using this. Since a few years we have a request_firmware() API that implements this in a nice, consistent way. Only some old ISA sound drivers (pre-ALSA) still straggled along for some time.... however with commit c2b1239a9f22f19c53543b460b24507d0e21ea0c the last user is now gone. This is a good thing, since using sys_open / sys_read etc for firmware is a very buggy to dangerous thing to do; these operations put an fd in the process file descriptor table.... which then can be tampered with from other threads for example. For those who don't want the firmware loader, filp_open()/vfs_read are the better APIs to use, without this security issue. The patch below marks sys_open and sys_read unused now that they're really not used anymore, and for deletion in the 2.6.25 timeframe. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/open.c | 2 +- fs/read_write.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/open.c b/fs/open.c index 3b69c53e183..4932b4d1da0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1061,7 +1061,7 @@ asmlinkage long sys_open(const char __user *filename, int flags, int mode) prevent_tail_call(ret); return ret; } -EXPORT_SYMBOL_GPL(sys_open); +EXPORT_UNUSED_SYMBOL_GPL(sys_open); /* To be deleted for 2.6.25 */ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, int mode) diff --git a/fs/read_write.c b/fs/read_write.c index 124693e8d3f..ea1f94cc722 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -370,7 +370,7 @@ asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) return ret; } -EXPORT_SYMBOL_GPL(sys_read); +EXPORT_UNUSED_SYMBOL_GPL(sys_read); /* to be deleted for 2.6.25 */ asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) { -- cgit v1.2.3 From 579d6d93ca531fba3e29ddf39fefe5184012068b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 14 Nov 2007 17:00:11 -0800 Subject: gbefb: fix section mismatch warnings Make 'default_mode' and 'default_var' be __initdata. Fixes these section warnings: WARNING: vmlinux.o(.data+0x128e0): Section mismatch: reference to .init.data:default_mode_CRT (between 'default_mode' and 'default_var') WARNING: vmlinux.o(.data+0x128e4): Section mismatch: reference to .init.data:default_var_CRT (between 'default_var' and 'dev_attr_size') Signed-off-by: Randy Dunlap Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/gbefb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index b9b572b293d..2e552d5bbb5 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -183,8 +183,8 @@ static struct fb_videomode default_mode_LCD __initdata = { .vmode = FB_VMODE_NONINTERLACED, }; -struct fb_videomode *default_mode = &default_mode_CRT; -struct fb_var_screeninfo *default_var = &default_var_CRT; +struct fb_videomode *default_mode __initdata = &default_mode_CRT; +struct fb_var_screeninfo *default_var __initdata = &default_var_CRT; static int flat_panel_enabled = 0; -- cgit v1.2.3 From 42614fcde7bfdcbe43a7b17035c167dfebc354dd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 14 Nov 2007 17:00:12 -0800 Subject: vmstat: fix section mismatch warning Mark start_cpu_timer() as __cpuinit instead of __devinit. Fixes this section warning: WARNING: vmlinux.o(.text+0x60e53): Section mismatch: reference to .init.text:start_cpu_timer (between 'vmstat_cpuup_callback' and 'vmstat_show') Signed-off-by: Randy Dunlap Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 4651bf153f3..e8d846f5777 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -803,7 +803,7 @@ static void vmstat_update(struct work_struct *w) sysctl_stat_interval); } -static void __devinit start_cpu_timer(int cpu) +static void __cpuinit start_cpu_timer(int cpu) { struct delayed_work *vmstat_work = &per_cpu(vmstat_work, cpu); -- cgit v1.2.3 From 57d5f66b86079efac5c9a7843cce2a9bcbe58fb8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 14 Nov 2007 17:00:13 -0800 Subject: pidns: Place under CONFIG_EXPERIMENTAL This is my trivial patch to swat innumerable little bugs with a single blow. After some intensive review (my apologies for not having gotten to this sooner) what we have looks like a good base to build on with the current pid namespace code but it is not complete, and it is still much to simple to find issues where the kernel does the wrong thing outside of the initial pid namespace. Until the dust settles and we are certain we have the ABI and the implementation is as correct as humanly possible let's keep process ID namespaces behind CONFIG_EXPERIMENTAL. Allowing us the option of fixing any ABI or other bugs we find as long as they are minor. Allowing users of the kernel to avoid those bugs simply by ensuring their kernel does not have support for multiple pid namespaces. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Eric W. Biederman Cc: Cedric Le Goater Cc: Adrian Bunk Cc: Jeremy Fitzhardinge Cc: Kir Kolyshkin Cc: Kirill Korotaev Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid_namespace.h | 23 +++++++++++++++++++++++ init/Kconfig | 12 ++++++++++++ kernel/pid.c | 2 ++ 3 files changed, 37 insertions(+) diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 0135c76c76c..1689e28483e 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -29,6 +29,7 @@ struct pid_namespace { extern struct pid_namespace init_pid_ns; +#ifdef CONFIG_PID_NS static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { if (ns != &init_pid_ns) @@ -45,6 +46,28 @@ static inline void put_pid_ns(struct pid_namespace *ns) kref_put(&ns->kref, free_pid_ns); } +#else /* !CONFIG_PID_NS */ +#include + +static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) +{ + return ns; +} + +static inline struct pid_namespace * +copy_pid_ns(unsigned long flags, struct pid_namespace *ns) +{ + if (flags & CLONE_NEWPID) + ns = ERR_PTR(-EINVAL); + return ns; +} + +static inline void put_pid_ns(struct pid_namespace *ns) +{ +} + +#endif /* CONFIG_PID_NS */ + static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) { return tsk->nsproxy->pid_ns; diff --git a/init/Kconfig b/init/Kconfig index 5b92e3aa136..c5b354b1409 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -215,6 +215,18 @@ config USER_NS vservers, to use user namespaces to provide different user info for different servers. If unsure, say N. +config PID_NS + bool "PID Namespaces (EXPERIMENTAL)" + default n + depends on EXPERIMENTAL + help + Suport process id namespaces. This allows having multiple + process with the same pid as long as they are in different + pid namespaces. This is a building block of containers. + + Unless you want to work with an experimental feature + say N here. + config AUDIT bool "Auditing support" depends on NET diff --git a/kernel/pid.c b/kernel/pid.c index d1db36b9467..f815455431b 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -537,6 +537,7 @@ err_alloc: return NULL; } +#ifdef CONFIG_PID_NS static struct pid_namespace *create_pid_namespace(int level) { struct pid_namespace *ns; @@ -621,6 +622,7 @@ void free_pid_ns(struct kref *kref) if (parent != NULL) put_pid_ns(parent); } +#endif /* CONFIG_PID_NS */ void zap_pid_ns_processes(struct pid_namespace *pid_ns) { -- cgit v1.2.3 From c642b8391cf8efc3622cc97329a0f46e7cbb70b8 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Wed, 14 Nov 2007 17:00:15 -0800 Subject: __do_IRQ does not check IRQ_DISABLED when IRQ_PER_CPU is set In __do_IRQ(), the normal case is that IRQ_DISABLED is checked and if set the handler (handle_IRQ_event()) is not called. Earlier in __do_IRQ(), if IRQ_PER_CPU is set the code does not check IRQ_DISABLED and calls the handler even though IRQ_DISABLED is set. This behavior seems unintentional. One user encountering this behavior is the CPE handler (in arch/ia64/kernel/mca.c). When the CPE handler encounters too many CPEs (such as a solid single bit error), it sets up a polling timer and disables the CPE interrupt (to avoid excessive overhead logging the stream of single bit errors). disable_irq_nosync() is called which sets IRQ_DISABLED. The IRQ_PER_CPU flag was previously set (in ia64_mca_late_init()). The net result is the CPE handler gets called even though it is marked disabled. If the behavior of not checking IRQ_DISABLED when IRQ_PER_CPU is set is intentional, it would be worthy of a comment describing the intended behavior. disable_irq_nosync() does call chip->disable() to provide a chipset specifiec interface for disabling the interrupt, which avoids this issue when used. Signed-off-by: Russ Anderson Cc: "Luck, Tony" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Bjorn Helgaas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/irq/handle.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index e391cbb1f56..dc335ad2752 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -178,9 +178,11 @@ fastcall unsigned int __do_IRQ(unsigned int irq) */ if (desc->chip->ack) desc->chip->ack(irq); - action_ret = handle_IRQ_event(irq, desc->action); - if (!noirqdebug) - note_interrupt(irq, desc, action_ret); + if (likely(!(desc->status & IRQ_DISABLED))) { + action_ret = handle_IRQ_event(irq, desc->action); + if (!noirqdebug) + note_interrupt(irq, desc, action_ret); + } desc->chip->end(irq); return 1; } -- cgit v1.2.3 From 60a0d23386eab0559ad32ae50b200cc58545f327 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Nov 2007 17:00:16 -0800 Subject: hibernate: fix lockdep report Lockdep reports a circular locking dependency in the hibernate code because - during system boot hibernate code (from an initcall) locks pm_mutex and then a sysfs buffer mutex via name_to_dev_t - during regular operation hibernate code locks pm_mutex under a sysfs buffer mutex because it's called from sysfs methods. The deadlock can never happen because during initcall invocation nothing can write to sysfs yet. This removes the lockdep report by marking the initcall locking as being in a different class. Signed-off-by: Johannes Berg Cc: "Rafael J. Wysocki" Cc: Alan Stern Acked-by: Peter Zijlstra Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 8b15f777010..05b64790fe8 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -456,7 +456,17 @@ static int software_resume(void) int error; unsigned int flags; - mutex_lock(&pm_mutex); + /* + * name_to_dev_t() below takes a sysfs buffer mutex when sysfs + * is configured into the kernel. Since the regular hibernate + * trigger path is via sysfs which takes a buffer mutex before + * calling hibernate functions (which take pm_mutex) this can + * cause lockdep to complain about a possible ABBA deadlock + * which cannot happen since we're in the boot code here and + * sysfs can't be invoked yet. Therefore, we use a subclass + * here to avoid lockdep complaining. + */ + mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); if (!swsusp_resume_device) { if (!strlen(resume_file)) { mutex_unlock(&pm_mutex); -- cgit v1.2.3 From dbaf4c024a657175f43b5091c4fab8b9f0e17078 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 14 Nov 2007 17:00:18 -0800 Subject: smbfs: fix debug builds Fix some warnings with SMBFS_DEBUG_* builds. This patch makes it so that builds with -Werror don't fail. Signed-off-by: Jeff Layton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/smbfs/file.c | 7 ++++--- fs/smbfs/inode.c | 2 +- fs/smbfs/proc.c | 2 +- fs/smbfs/smbiod.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index f5d14cebc75..efbe29af3d7 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -234,7 +234,7 @@ smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov, VERBOSE("before read, size=%ld, flags=%x, atime=%ld\n", (long)dentry->d_inode->i_size, - dentry->d_inode->i_flags, dentry->d_inode->i_atime); + dentry->d_inode->i_flags, dentry->d_inode->i_atime.tv_sec); status = generic_file_aio_read(iocb, iov, nr_segs, pos); out: @@ -269,7 +269,7 @@ smb_file_splice_read(struct file *file, loff_t *ppos, struct dentry *dentry = file->f_path.dentry; ssize_t status; - VERBOSE("file %s/%s, pos=%Ld, count=%d\n", + VERBOSE("file %s/%s, pos=%Ld, count=%lu\n", DENTRY_PATH(dentry), *ppos, count); status = smb_revalidate_inode(dentry); @@ -363,7 +363,8 @@ smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov, result = generic_file_aio_write(iocb, iov, nr_segs, pos); VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n", (long) file->f_pos, (long) dentry->d_inode->i_size, - dentry->d_inode->i_mtime, dentry->d_inode->i_atime); + dentry->d_inode->i_mtime.tv_sec, + dentry->d_inode->i_atime.tv_sec); } out: return result; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index ab517755ece..9416ead0c7a 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -536,7 +536,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) /* Allocate the global temp buffer and some superblock helper structs */ /* FIXME: move these to the smb_sb_info struct */ - VERBOSE("alloc chunk = %d\n", sizeof(struct smb_ops) + + VERBOSE("alloc chunk = %lu\n", sizeof(struct smb_ops) + sizeof(struct smb_mount_data_kernel)); mem = kmalloc(sizeof(struct smb_ops) + sizeof(struct smb_mount_data_kernel), GFP_KERNEL); diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index feac4605061..d517a27b7f4 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -2593,7 +2593,7 @@ smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry, fattr->f_mtime.tv_sec = date_dos2unix(server, date, time); fattr->f_mtime.tv_nsec = 0; VERBOSE("name=%s, date=%x, time=%x, mtime=%ld\n", - mask, date, time, fattr->f_mtime); + mask, date, time, fattr->f_mtime.tv_sec); fattr->f_size = DVAL(req->rq_data, 12); /* ULONG allocation size */ fattr->attr = WVAL(req->rq_data, 20); diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c index 283c5720c9d..fae8e85af0e 100644 --- a/fs/smbfs/smbiod.c +++ b/fs/smbfs/smbiod.c @@ -227,7 +227,7 @@ int smbiod_retry(struct smb_sb_info *server) printk(KERN_ERR "smb_retry: signal failed [%d]\n", result); goto out; } - VERBOSE("signalled pid %d\n", pid); + VERBOSE("signalled pid %d\n", pid_nr(pid)); /* FIXME: The retried requests should perhaps get a "time boost". */ -- cgit v1.2.3 From 7c06a8dc64a2d1884bd19b4c6353d9267ae4e3e1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Nov 2007 17:00:19 -0800 Subject: Fix 64KB blocksize in ext3 directories With 64KB blocksize, a directory entry can have size 64KB which does not fit into 16 bits we have for entry lenght. So we store 0xffff instead and convert value when read from / written to disk. The patch also converts some places to use ext3_next_entry() when we are changing them anyway. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/dir.c | 10 +++--- fs/ext3/namei.c | 92 ++++++++++++++++++++++++------------------------- include/linux/ext3_fs.h | 20 +++++++++++ 3 files changed, 70 insertions(+), 52 deletions(-) diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index c8e4ee3af1d..8ca3bfd7242 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -67,7 +67,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir, unsigned long offset) { const char * error_msg = NULL; - const int rlen = le16_to_cpu(de->rec_len); + const int rlen = ext3_rec_len_from_disk(de->rec_len); if (rlen < EXT3_DIR_REC_LEN(1)) error_msg = "rec_len is smaller than minimal"; @@ -173,10 +173,10 @@ revalidate: * least that it is non-zero. A * failure will be detected in the * dirent test below. */ - if (le16_to_cpu(de->rec_len) < + if (ext3_rec_len_from_disk(de->rec_len) < EXT3_DIR_REC_LEN(1)) break; - i += le16_to_cpu(de->rec_len); + i += ext3_rec_len_from_disk(de->rec_len); } offset = i; filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) @@ -197,7 +197,7 @@ revalidate: ret = stored; goto out; } - offset += le16_to_cpu(de->rec_len); + offset += ext3_rec_len_from_disk(de->rec_len); if (le32_to_cpu(de->inode)) { /* We might block in the next section * if the data destination is @@ -219,7 +219,7 @@ revalidate: goto revalidate; stored ++; } - filp->f_pos += le16_to_cpu(de->rec_len); + filp->f_pos += ext3_rec_len_from_disk(de->rec_len); } offset = 0; brelse (bh); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index ec8170adac5..4ab6f76e63d 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -176,6 +176,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode); +/* + * p is at least 6 bytes before the end of page + */ +static inline struct ext3_dir_entry_2 * +ext3_next_entry(struct ext3_dir_entry_2 *p) +{ + return (struct ext3_dir_entry_2 *)((char *)p + + ext3_rec_len_from_disk(p->rec_len)); +} + /* * Future: use high four bits of block for coalesce-on-delete flags * Mask them off for now. @@ -280,7 +290,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent space += EXT3_DIR_REC_LEN(de->name_len); names++; } - de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + de = ext3_next_entry(de); } printk("(%i)\n", names); return (struct stats) { names, space, 1 }; @@ -546,14 +556,6 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash, } -/* - * p is at least 6 bytes before the end of page - */ -static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) -{ - return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -} - /* * This function fills a red-black tree with information from a * directory block. It returns the number directory entries loaded @@ -720,7 +722,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, cond_resched(); } /* XXX: do we need to check rec_len == 0 case? -Chris */ - de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + de = ext3_next_entry(de); } return count; } @@ -822,7 +824,7 @@ static inline int search_dirblock(struct buffer_head * bh, return 1; } /* prevent looping on a bad block */ - de_len = le16_to_cpu(de->rec_len); + de_len = ext3_rec_len_from_disk(de->rec_len); if (de_len <= 0) return -1; offset += de_len; @@ -1130,7 +1132,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) rec_len = EXT3_DIR_REC_LEN(de->name_len); memcpy (to, de, rec_len); ((struct ext3_dir_entry_2 *) to)->rec_len = - cpu_to_le16(rec_len); + ext3_rec_len_to_disk(rec_len); de->inode = 0; map++; to += rec_len; @@ -1149,13 +1151,12 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) prev = to = de; while ((char*)de < base + size) { - next = (struct ext3_dir_entry_2 *) ((char *) de + - le16_to_cpu(de->rec_len)); + next = ext3_next_entry(de); if (de->inode && de->name_len) { rec_len = EXT3_DIR_REC_LEN(de->name_len); if (de > to) memmove(to, de, rec_len); - to->rec_len = cpu_to_le16(rec_len); + to->rec_len = ext3_rec_len_to_disk(rec_len); prev = to; to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len); } @@ -1229,8 +1230,8 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(data1, data2, map + split, count - split); de = dx_pack_dirents(data1,blocksize); - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); - de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); + de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de); + de2->rec_len = ext3_rec_len_to_disk(data2 + blocksize - (char *) de2); dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); @@ -1300,7 +1301,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, return -EEXIST; } nlen = EXT3_DIR_REC_LEN(de->name_len); - rlen = le16_to_cpu(de->rec_len); + rlen = ext3_rec_len_from_disk(de->rec_len); if ((de->inode? rlen - nlen: rlen) >= reclen) break; de = (struct ext3_dir_entry_2 *)((char *)de + rlen); @@ -1319,11 +1320,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, /* By now the buffer is marked for journaling */ nlen = EXT3_DIR_REC_LEN(de->name_len); - rlen = le16_to_cpu(de->rec_len); + rlen = ext3_rec_len_from_disk(de->rec_len); if (de->inode) { struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); - de1->rec_len = cpu_to_le16(rlen - nlen); - de->rec_len = cpu_to_le16(nlen); + de1->rec_len = ext3_rec_len_to_disk(rlen - nlen); + de->rec_len = ext3_rec_len_to_disk(nlen); de = de1; } de->file_type = EXT3_FT_UNKNOWN; @@ -1400,17 +1401,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, /* The 0th block becomes the root, move the dirents out */ fde = &root->dotdot; - de = (struct ext3_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len)); + de = (struct ext3_dir_entry_2 *)((char *)fde + + ext3_rec_len_from_disk(fde->rec_len)); len = ((char *) root) + blocksize - (char *) de; memcpy (data1, de, len); de = (struct ext3_dir_entry_2 *) data1; top = data1 + len; - while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top) + while ((char *)(de2 = ext3_next_entry(de)) < top) de = de2; - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); + de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de); /* Initialize the root; the dot dirents already exist */ de = (struct ext3_dir_entry_2 *) (&root->dotdot); - de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); + de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2)); memset (&root->info, 0, sizeof(root->info)); root->info.info_length = sizeof(root->info); root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; @@ -1490,7 +1492,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, return retval; de = (struct ext3_dir_entry_2 *) bh->b_data; de->inode = 0; - de->rec_len = cpu_to_le16(blocksize); + de->rec_len = ext3_rec_len_to_disk(blocksize); return add_dirent_to_buf(handle, dentry, inode, de, bh); } @@ -1553,7 +1555,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, goto cleanup; node2 = (struct dx_node *)(bh2->b_data); entries2 = node2->entries; - node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); + node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize); node2->fake.inode = 0; BUFFER_TRACE(frame->bh, "get_write_access"); err = ext3_journal_get_write_access(handle, frame->bh); @@ -1651,9 +1653,9 @@ static int ext3_delete_entry (handle_t *handle, BUFFER_TRACE(bh, "get_write_access"); ext3_journal_get_write_access(handle, bh); if (pde) - pde->rec_len = - cpu_to_le16(le16_to_cpu(pde->rec_len) + - le16_to_cpu(de->rec_len)); + pde->rec_len = ext3_rec_len_to_disk( + ext3_rec_len_from_disk(pde->rec_len) + + ext3_rec_len_from_disk(de->rec_len)); else de->inode = 0; dir->i_version++; @@ -1661,10 +1663,9 @@ static int ext3_delete_entry (handle_t *handle, ext3_journal_dirty_metadata(handle, bh); return 0; } - i += le16_to_cpu(de->rec_len); + i += ext3_rec_len_from_disk(de->rec_len); pde = de; - de = (struct ext3_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); + de = ext3_next_entry(de); } return -ENOENT; } @@ -1798,13 +1799,13 @@ retry: de = (struct ext3_dir_entry_2 *) dir_block->b_data; de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; - de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len)); + de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len)); strcpy (de->name, "."); ext3_set_de_type(dir->i_sb, de, S_IFDIR); - de = (struct ext3_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); + de = ext3_next_entry(de); de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1)); + de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize - + EXT3_DIR_REC_LEN(1)); de->name_len = 2; strcpy (de->name, ".."); ext3_set_de_type(dir->i_sb, de, S_IFDIR); @@ -1856,8 +1857,7 @@ static int empty_dir (struct inode * inode) return 1; } de = (struct ext3_dir_entry_2 *) bh->b_data; - de1 = (struct ext3_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); + de1 = ext3_next_entry(de); if (le32_to_cpu(de->inode) != inode->i_ino || !le32_to_cpu(de1->inode) || strcmp (".", de->name) || @@ -1868,9 +1868,9 @@ static int empty_dir (struct inode * inode) brelse (bh); return 1; } - offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); - de = (struct ext3_dir_entry_2 *) - ((char *) de1 + le16_to_cpu(de1->rec_len)); + offset = ext3_rec_len_from_disk(de->rec_len) + + ext3_rec_len_from_disk(de1->rec_len); + de = ext3_next_entry(de1); while (offset < inode->i_size ) { if (!bh || (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { @@ -1899,9 +1899,8 @@ static int empty_dir (struct inode * inode) brelse (bh); return 0; } - offset += le16_to_cpu(de->rec_len); - de = (struct ext3_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); + offset += ext3_rec_len_from_disk(de->rec_len); + de = ext3_next_entry(de); } brelse (bh); return 1; @@ -2255,8 +2254,7 @@ retry: } #define PARENT_INO(buffer) \ - ((struct ext3_dir_entry_2 *) ((char *) buffer + \ - le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode + (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode) /* * Anybody can rename anything with this: the permission checks are left to the diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 64134456ed8..241c01cb92b 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -656,6 +656,26 @@ struct ext3_dir_entry_2 { #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ ~EXT3_DIR_ROUND) +#define EXT3_MAX_REC_LEN ((1<<16)-1) + +static inline unsigned ext3_rec_len_from_disk(__le16 dlen) +{ + unsigned len = le16_to_cpu(dlen); + + if (len == EXT3_MAX_REC_LEN) + return 1 << 16; + return len; +} + +static inline __le16 ext3_rec_len_to_disk(unsigned len) +{ + if (len == (1 << 16)) + return cpu_to_le16(EXT3_MAX_REC_LEN); + else if (len > (1 << 16)) + BUG(); + return cpu_to_le16(len); +} + /* * Hash Tree Directory indexing * (c) Daniel Phillips, 2001 -- cgit v1.2.3 From 9ac625a3986034d79938baf9604210280fae35fa Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 14 Nov 2007 17:00:23 -0800 Subject: uml: fix spurious IRQ testing The spurious IRQ testing in request_irq is mishandled in um_request_irq, which sets the incoming file descriptors non-blocking only after request_irq succeeds. This results in the spurious irq calling read on a blocking descriptor, and a hang. Fixed by reversing the O_NONBLOCK setting and the request_irq call. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/irq.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 70c2d625b07..ba11ccd6a8a 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -347,14 +347,15 @@ int um_request_irq(unsigned int irq, int fd, int type, { int err; - err = request_irq(irq, handler, irqflags, devname, dev_id); - if (err) - return err; - - if (fd != -1) + if (fd != -1) { err = activate_fd(irq, fd, type, dev_id); - return err; + if (err) + return err; + } + + return request_irq(irq, handler, irqflags, devname, dev_id); } + EXPORT_SYMBOL(um_request_irq); EXPORT_SYMBOL(reactivate_fd); -- cgit v1.2.3 From ee1eca5d2493026affbbc91b228dd00879484687 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 14 Nov 2007 17:00:27 -0800 Subject: uml: remove last include of libc asm/page.h asm/page.h is disappearing from the libc headers and we don't need it anyway. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/clone.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index d119f4f7d89..8d07a7acb90 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -3,7 +3,6 @@ #include #include #include -#include #include "as-layout.h" #include "ptrace_user.h" #include "skas.h" -- cgit v1.2.3 From 32f862c310cbd0b430a14911c8b7e2cd415c56ea Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 14 Nov 2007 17:00:28 -0800 Subject: uml: fix build for !CONFIG_TCP Make UML build in the absence of CONFIG_INET by making the inetaddr_notifier registration depend on it. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/net_kern.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 8c01fa81a1a..73681f14f9f 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -753,6 +753,7 @@ static struct mc_device net_mc = { .remove = net_remove, }; +#ifdef CONFIG_INET static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -789,14 +790,13 @@ struct notifier_block uml_inetaddr_notifier = { .notifier_call = uml_inetaddr_event, }; -static int uml_net_init(void) +static void inet_register(void) { struct list_head *ele; struct uml_net_private *lp; struct in_device *ip; struct in_ifaddr *in; - mconsole_register_dev(&net_mc); register_inetaddr_notifier(¨_inetaddr_notifier); /* Devices may have been opened already, so the uml_inetaddr_notifier @@ -816,7 +816,17 @@ static int uml_net_init(void) } } spin_unlock(&opened_lock); +} +#else +static inline void inet_register(void) +{ +} +#endif +static int uml_net_init(void) +{ + mconsole_register_dev(&net_mc); + inet_register(); return 0; } -- cgit v1.2.3 From 9c8d6381dc107dbc2bfdbfdcaefe0d42e5b5b362 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 14 Nov 2007 17:00:31 -0800 Subject: uml: fix build for !CONFIG_PRINTK Handle the case of CONFIG_PRINTK being disabled. This requires a do-nothing stub to be present in arch/um/include/user.h so that we don't get references to printk from libc code. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/user.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/um/include/user.h b/arch/um/include/user.h index 99033ff28a7..1723fac6f40 100644 --- a/arch/um/include/user.h +++ b/arch/um/include/user.h @@ -1,11 +1,13 @@ /* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #ifndef __USER_H__ #define __USER_H__ +#include "uml-config.h" + /* * The usual definition - copied here because the kernel provides its own, * fancier, type-safe, definition. Using that one would require @@ -23,8 +25,17 @@ extern void panic(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); + +#ifdef UML_CONFIG_PRINTK extern int printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +#else +static inline int printk(const char *fmt, ...) +{ + return 0; +} +#endif + extern void schedule(void); extern int in_aton(char *str); extern int open_gdb_chan(void); -- cgit v1.2.3 From 20a1022d4ac5c53f0956006fd9e30cf4846d5e58 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 14 Nov 2007 17:00:33 -0800 Subject: Swap delay accounting, include lock_page() delays The delay incurred in lock_page() should also be accounted in swap delay accounting Reported-by: Nick Piggin Signed-off-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 7b0403bfc97..4bf0b6d0eb2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2084,9 +2084,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(PGMAJFAULT); } - delayacct_clear_flag(DELAYACCT_PF_SWAPIN); mark_page_accessed(page); lock_page(page); + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); /* * Back out if somebody else already faulted in this pte. -- cgit v1.2.3 From 91ad997a34d7abca1f04e819e31eb9f3d4e20585 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 14 Nov 2007 17:00:34 -0800 Subject: file capabilities: allow sigcont within session Fix http://bugzilla.kernel.org/show_bug.cgi?id=9247 Allow sigcont to be sent to a process with greater capabilities if it is in the same session. Otherwise, a shell from which I've started a root shell and done 'suspend' can't be restarted by the parent shell. Also don't do file-capabilities signaling checks when uids for the processes don't match, since the standard check_kill_permission will have done those checks. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Serge E. Hallyn Acked-by: Andrew Morgan Cc: Chris Wright Tested-by: "Theodore Ts'o" Cc: Stephen Smalley Cc: "Rafael J. Wysocki" Cc: Chris Wright Cc: James Morris Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- security/commoncap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/security/commoncap.c b/security/commoncap.c index bf67871173e..302e8d0839a 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -526,6 +526,10 @@ int cap_task_kill(struct task_struct *p, struct siginfo *info, if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info))) return 0; + /* sigcont is permitted within same session */ + if (sig == SIGCONT && (task_session_nr(current) == task_session_nr(p))) + return 0; + if (secid) /* * Signal sent as a particular user. -- cgit v1.2.3 From 4ae44c57748ad37dcd1d998525f6929d2fe39d02 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 14 Nov 2007 17:00:36 -0800 Subject: feature-removal-schedule: remove SA_* flags entry No reason to keep the feature-removal-schedule.txt entry after the code was removed. Signed-off-by: Adrian Bunk Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/feature-removal-schedule.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 6bb9be54ab7..20c4c8bac9d 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -181,15 +181,6 @@ Who: Nick Piggin --------------------------- -What: Interrupt only SA_* flags -When: September 2007 -Why: The interrupt related SA_* flags are replaced by IRQF_* to move them - out of the signal namespace. - -Who: Thomas Gleixner - ---------------------------- - What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment When: October 2008 Why: The stacking of class devices makes these values misleading and -- cgit v1.2.3 From f96159840bc5f605aca5113ab2d24308d3dc2eff Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 14 Nov 2007 17:00:37 -0800 Subject: kernel/taskstats.c: fix bogus nlmsg_free() We'd better not nlmsg_free on a pointer containing an undefined value (and without having anything allocated). Spotted by the Coverity checker. Signed-off-by: Adrian Bunk Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 354e74bc17c..07e86a82807 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -398,31 +398,31 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); file = fget_light(fd, &fput_needed); - if (file) { - size = nla_total_size(sizeof(struct cgroupstats)); + if (!file) + return 0; - rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, - size); - if (rc < 0) - goto err; + size = nla_total_size(sizeof(struct cgroupstats)); - na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, - sizeof(struct cgroupstats)); - stats = nla_data(na); - memset(stats, 0, sizeof(*stats)); + rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, + size); + if (rc < 0) + goto err; - rc = cgroupstats_build(stats, file->f_dentry); - if (rc < 0) - goto err; + na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, + sizeof(struct cgroupstats)); + stats = nla_data(na); + memset(stats, 0, sizeof(*stats)); - fput_light(file, fput_needed); - return send_reply(rep_skb, info->snd_pid); + rc = cgroupstats_build(stats, file->f_dentry); + if (rc < 0) { + nlmsg_free(rep_skb); + goto err; } + rc = send_reply(rep_skb, info->snd_pid); + err: - if (file) - fput_light(file, fput_needed); - nlmsg_free(rep_skb); + fput_light(file, fput_needed); return rc; } -- cgit v1.2.3 From 8a246ee43f4b1df3fa5cbf9c4a3d3dcad0b1e08c Mon Sep 17 00:00:00 2001 From: Andrey Borzenkov Date: Wed, 14 Nov 2007 17:00:37 -0800 Subject: make /proc/acpi/ac_adapter dependent on ACPI_PROCFS Do not provide /proc/acpi/ac_adapter if ACPI_PROCFS is not defined. This eliminates duplicated power adapters in HAL and makes it consistent with battery module Signed-off-by: Andrey Borzenkov Acked-by: Alexey Starikovskiy Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/ac.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index e03de37a750..1461dc9944a 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -27,8 +27,10 @@ #include #include #include +#ifdef CONFIG_ACPI_PROCFS #include #include +#endif #include #include #include @@ -49,12 +51,14 @@ MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI AC Adapter Driver"); MODULE_LICENSE("GPL"); +#ifdef CONFIG_ACPI_PROCFS extern struct proc_dir_entry *acpi_lock_ac_dir(void); extern void *acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir); +static int acpi_ac_open_fs(struct inode *inode, struct file *file); +#endif static int acpi_ac_add(struct acpi_device *device); static int acpi_ac_remove(struct acpi_device *device, int type); -static int acpi_ac_open_fs(struct inode *inode, struct file *file); const static struct acpi_device_id ac_device_ids[] = { {"ACPI0003", 0}, @@ -80,12 +84,15 @@ struct acpi_ac { #define to_acpi_ac(x) container_of(x, struct acpi_ac, charger); +#ifdef CONFIG_ACPI_PROCFS static const struct file_operations acpi_ac_fops = { .open = acpi_ac_open_fs, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; +#endif + static int get_ac_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) @@ -127,6 +134,7 @@ static int acpi_ac_get_state(struct acpi_ac *ac) return 0; } +#ifdef CONFIG_ACPI_PROCFS /* -------------------------------------------------------------------------- FS Interface (/proc) -------------------------------------------------------------------------- */ @@ -206,6 +214,7 @@ static int acpi_ac_remove_fs(struct acpi_device *device) return 0; } +#endif /* -------------------------------------------------------------------------- Driver Model @@ -264,7 +273,9 @@ static int acpi_ac_add(struct acpi_device *device) if (result) goto end; +#ifdef CONFIG_ACPI_PROCFS result = acpi_ac_add_fs(device); +#endif if (result) goto end; ac->charger.name = acpi_device_bid(device); @@ -287,7 +298,9 @@ static int acpi_ac_add(struct acpi_device *device) end: if (result) { +#ifdef CONFIG_ACPI_PROCFS acpi_ac_remove_fs(device); +#endif kfree(ac); } @@ -309,7 +322,9 @@ static int acpi_ac_remove(struct acpi_device *device, int type) ACPI_ALL_NOTIFY, acpi_ac_notify); if (ac->charger.dev) power_supply_unregister(&ac->charger); +#ifdef CONFIG_ACPI_PROCFS acpi_ac_remove_fs(device); +#endif kfree(ac); @@ -323,13 +338,17 @@ static int __init acpi_ac_init(void) if (acpi_disabled) return -ENODEV; +#ifdef CONFIG_ACPI_PROCFS acpi_ac_dir = acpi_lock_ac_dir(); if (!acpi_ac_dir) return -ENODEV; +#endif result = acpi_bus_register_driver(&acpi_ac_driver); if (result < 0) { +#ifdef CONFIG_ACPI_PROCFS acpi_unlock_ac_dir(acpi_ac_dir); +#endif return -ENODEV; } @@ -341,7 +360,9 @@ static void __exit acpi_ac_exit(void) acpi_bus_unregister_driver(&acpi_ac_driver); +#ifdef CONFIG_ACPI_PROCFS acpi_unlock_ac_dir(acpi_ac_dir); +#endif return; } -- cgit v1.2.3 From 5bfeca3138a6031e38c566d57128ff592eb009a8 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 Nov 2007 17:00:39 -0800 Subject: ACPI: AC: Update AC state on resume Check if AC state has changed across resume and notify userspace if so. Fixes "[2.6.24-rc1 regression] AC adapter state does not change after resume" Signed-off-by: Alexey Starikovskiy Tested-by: Andrey Borzenkov Cc: Len Brown Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/ac.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index 1461dc9944a..30238f6ff23 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -59,6 +59,7 @@ static int acpi_ac_open_fs(struct inode *inode, struct file *file); static int acpi_ac_add(struct acpi_device *device); static int acpi_ac_remove(struct acpi_device *device, int type); +static int acpi_ac_resume(struct acpi_device *device); const static struct acpi_device_id ac_device_ids[] = { {"ACPI0003", 0}, @@ -73,6 +74,7 @@ static struct acpi_driver acpi_ac_driver = { .ops = { .add = acpi_ac_add, .remove = acpi_ac_remove, + .resume = acpi_ac_resume, }, }; @@ -307,6 +309,21 @@ static int acpi_ac_add(struct acpi_device *device) return result; } +static int acpi_ac_resume(struct acpi_device *device) +{ + struct acpi_ac *ac; + unsigned old_state; + if (!device || !acpi_driver_data(device)) + return -EINVAL; + ac = acpi_driver_data(device); + old_state = ac->state; + if (acpi_ac_get_state(ac)) + return 0; + if (old_state != ac->state) + kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE); + return 0; +} + static int acpi_ac_remove(struct acpi_device *device, int type) { acpi_status status = AE_OK; -- cgit v1.2.3 From 7eea436433b7b18045f272562e256976f593f7c0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 14 Nov 2007 17:00:39 -0800 Subject: keyspan: init termios properly Remove redundant code leading to NULL ptr deref and let terminal config settings take place in the proper initialization path in usb_console_setup(). Signed-off-by: Borislav Petkov Cc: Cc: Greg KH Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/usb/serial/keyspan.c | 38 ++++++-------------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 6bfdba6a213..1f7ab15df36 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -1215,20 +1215,18 @@ static int keyspan_chars_in_buffer (struct usb_serial_port *port) static int keyspan_open (struct usb_serial_port *port, struct file *filp) { - struct keyspan_port_private *p_priv; - struct keyspan_serial_private *s_priv; - struct usb_serial *serial = port->serial; + struct keyspan_port_private *p_priv; + struct keyspan_serial_private *s_priv; + struct usb_serial *serial = port->serial; const struct keyspan_device_details *d_details; int i, err; - int baud_rate, device_port; struct urb *urb; - unsigned int cflag; s_priv = usb_get_serial_data(serial); p_priv = usb_get_serial_port_data(port); d_details = p_priv->device_details; - - dbg("%s - port%d.", __FUNCTION__, port->number); + + dbg("%s - port%d.", __FUNCTION__, port->number); /* Set some sane defaults */ p_priv->rts_state = 1; @@ -1249,7 +1247,7 @@ static int keyspan_open (struct usb_serial_port *port, struct file *filp) urb->dev = serial->dev; /* make sure endpoint data toggle is synchronized with the device */ - + usb_clear_halt(urb->dev, urb->pipe); if ((err = usb_submit_urb(urb, GFP_KERNEL)) != 0) { @@ -1265,30 +1263,6 @@ static int keyspan_open (struct usb_serial_port *port, struct file *filp) /* usb_settoggle(urb->dev, usb_pipeendpoint(urb->pipe), usb_pipeout(urb->pipe), 0); */ } - /* get the terminal config for the setup message now so we don't - * need to send 2 of them */ - - cflag = port->tty->termios->c_cflag; - device_port = port->number - port->serial->minor; - - /* Baud rate calculation takes baud rate as an integer - so other rates can be generated if desired. */ - baud_rate = tty_get_baud_rate(port->tty); - /* If no match or invalid, leave as default */ - if (baud_rate >= 0 - && d_details->calculate_baud_rate(baud_rate, d_details->baudclk, - NULL, NULL, NULL, device_port) == KEYSPAN_BAUD_RATE_OK) { - p_priv->baud = baud_rate; - } - - /* set CTS/RTS handshake etc. */ - p_priv->cflag = cflag; - p_priv->flow_control = (cflag & CRTSCTS)? flow_cts: flow_none; - - keyspan_send_setup(port, 1); - //mdelay(100); - //keyspan_set_termios(port, NULL); - return (0); } -- cgit v1.2.3 From 35d5d08a085c56f153458c3f5d8ce24123617faf Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Nov 2007 17:00:41 -0800 Subject: x86: disable preemption in delay_tsc() Marin Mitov points out that delay_tsc() can misbehave if it is preempted and rescheduled on a different CPU which has a skewed TSC. Fix it by disabling preemption. (I assume that the worst-case behaviour here is a stall of 2^32 cycles) Cc: Andi Kleen Cc: Marin Mitov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/lib/delay_32.c | 3 +++ arch/x86/lib/delay_64.c | 11 +++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index 952e7a89c2a..aad9d95469d 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -42,11 +43,13 @@ static void delay_tsc(unsigned long loops) { unsigned long bclock, now; + preempt_disable(); /* TSC's are per-cpu */ rdtscl(bclock); do { rep_nop(); rdtscl(now); } while ((now-bclock) < loops); + preempt_enable(); } /* diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index 0ebbfb9e7c7..45cdd3fbd91 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -10,7 +10,9 @@ #include #include +#include #include + #include #include @@ -27,14 +29,15 @@ int read_current_timer(unsigned long *timer_value) void __delay(unsigned long loops) { unsigned bclock, now; - + + preempt_disable(); /* TSC's are pre-cpu */ rdtscl(bclock); - do - { + do { rep_nop(); rdtscl(now); } - while((now-bclock) < loops); + while ((now-bclock) < loops); + preempt_enable(); } EXPORT_SYMBOL(__delay); -- cgit v1.2.3 From 77f2878b4f78c0b29c4a2580665a446c77901152 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Wed, 14 Nov 2007 17:00:42 -0800 Subject: oprofile: fix oops on x86 32-bit x86 32-bit isn't saving the stack pointer to pt_regs->esp when an interrupt occurs. Signed-off-by: Jan Blunck Tested-by: Robert Fitzsimons Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Philippe Elie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-x86/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 213c97300cb..51ddb259087 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -60,7 +60,7 @@ static inline int v8086_mode(struct pt_regs *regs) #define instruction_pointer(regs) ((regs)->eip) #define frame_pointer(regs) ((regs)->ebp) -#define stack_pointer(regs) ((regs)->esp) +#define stack_pointer(regs) ((unsigned long)(regs)) #define regs_return_value(regs) ((regs)->eax) extern unsigned long profile_pc(struct pt_regs *regs); -- cgit v1.2.3 From bae19fe033b0c5ed99b1ed27a4cce84625a24606 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 14 Nov 2007 17:00:44 -0800 Subject: x86: don't call mce_create_device on CPU_UP_PREPARE Fix regression introduced with d435d862baca3e25e5eec236762a43251b1e7ffc ("cpu hotplug: mce: fix cpu hotplug error handling"). A CPU which was not brought up during boot (using maxcpus and additional_cpus parameters) couldn't be onlined anymore. For such a CPU it seemed that MCE was not supported during CPU_UP_PREPARE-time which caused mce_cpu_callback to return NOTIFY_BAD to notifier_call_chain. To fix this we: - call mce_create_device for CPU_ONLINE event (instead of CPU_UP_PREPARE), - avoid mce_remove_device() for the CPU that is not correctly initialized by mce_create_device() failure, - make mce_cpu_callback always return NOTIFY_OK for CPU_ONLINE event. Because CPU_ONLINE callback return value is always ignored. [akinobu.mita@gmail.com: avoid mce_remove_device() for not initialized device] [akinobu.mita@gmail.com: make mce_cpu_callback always return NOTIFY_OK] Signed-off-by: Akinobu Mita Signed-off-by: Andreas Herrmann Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/mcheck/mce_64.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index b9f802e3520..447b351f1f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -802,6 +802,8 @@ static struct sysdev_attribute *mce_attributes[] = { NULL }; +static cpumask_t mce_device_initialized = CPU_MASK_NONE; + /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ static __cpuinit int mce_create_device(unsigned int cpu) { @@ -825,6 +827,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (err) goto error; } + cpu_set(cpu, mce_device_initialized); return 0; error: @@ -841,10 +844,14 @@ static void mce_remove_device(unsigned int cpu) { int i; + if (!cpu_isset(cpu, mce_device_initialized)) + return; + for (i = 0; mce_attributes[i]; i++) sysdev_remove_file(&per_cpu(device_mce,cpu), mce_attributes[i]); sysdev_unregister(&per_cpu(device_mce,cpu)); + cpu_clear(cpu, mce_device_initialized); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -852,21 +859,18 @@ static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - int err = 0; switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - err = mce_create_device(cpu); + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + mce_create_device(cpu); break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: mce_remove_device(cpu); break; } - return err ? NOTIFY_BAD : NOTIFY_OK; + return NOTIFY_OK; } static struct notifier_block mce_cpu_notifier = { -- cgit v1.2.3 From d297a5d576d549d97dce456ba4bd01e5a47e899c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Nov 2007 17:00:45 -0800 Subject: aic94xx_sds: rename FLASH_SIZE arm: drivers/scsi/aic94xx/aic94xx_sds.c:381:1: warning: "FLASH_SIZE" redefined In file included from include/asm/arch/irqs.h:22, from include/asm/irq.h:4, from include/asm/hardirq.h:6, from include/linux/hardirq.h:7, from include/asm-generic/local.h:5, from include/asm/local.h:1, from include/linux/module.h:19, from include/linux/device.h:21, from include/linux/pci.h:52, from drivers/scsi/aic94xx/aic94xx_sds.c:28: include/asm/arch/platform.h:444:1: warning: this is the location of the previous definition Cc: Gilbert Wu Cc: James Bottomley Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/scsi/aic94xx/aic94xx_sds.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c index 5b0932f6147..06509bff71f 100644 --- a/drivers/scsi/aic94xx/aic94xx_sds.c +++ b/drivers/scsi/aic94xx/aic94xx_sds.c @@ -377,7 +377,7 @@ out: #define FLASH_RESET 0xF0 -#define FLASH_SIZE 0x200000 +#define ASD_FLASH_SIZE 0x200000 #define FLASH_DIR_COOKIE "*** ADAPTEC FLASH DIRECTORY *** " #define FLASH_NEXT_ENTRY_OFFS 0x2000 #define FLASH_MAX_DIR_ENTRIES 32 @@ -609,7 +609,7 @@ static int asd_find_flash_dir(struct asd_ha_struct *asd_ha, struct asd_flash_dir *flash_dir) { u32 v; - for (v = 0; v < FLASH_SIZE; v += FLASH_NEXT_ENTRY_OFFS) { + for (v = 0; v < ASD_FLASH_SIZE; v += FLASH_NEXT_ENTRY_OFFS) { asd_read_flash_seg(asd_ha, flash_dir, v, sizeof(FLASH_DIR_COOKIE)-1); if (memcmp(flash_dir->cookie, FLASH_DIR_COOKIE, -- cgit v1.2.3 From b956947106c788a3f2ce0375af930b4cbf7454d5 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:00:47 -0800 Subject: cris build fixes: fix csum_tcpudp_magic() declaration Remove int from prototype, no longer needed and causes compile error. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-cris/checksum.h b/include/asm-cris/checksum.h index 180dbf2757b..c6c5be62c69 100644 --- a/include/asm-cris/checksum.h +++ b/include/asm-cris/checksum.h @@ -62,7 +62,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * returns a 16-bit checksum, already complemented */ -static inline __sum16 int csum_tcpudp_magic(__be32 saddr, __be32 daddr, +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, __wsum sum) -- cgit v1.2.3 From 0c3537ffcad08eabd49d43762fcb9f6da8517d9b Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:00:48 -0800 Subject: cris build fixes: Add missing syscalls Add missing syscalls to cris architecture. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/kernel/entry.S | 36 ++++++++++++++++++++++++++++++++++++ include/asm-cris/unistd.h | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index c5844cb70f0..bc9bed97f22 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -1135,6 +1135,42 @@ sys_call_table: .long sys_add_key .long sys_request_key .long sys_keyctl + .long sys_ioprio_set + .long sys_ioprio_get /* 290 */ + .long sys_inotify_init + .long sys_inotify_add_watch + .long sys_inotify_rm_watch + .long sys_migrate_pages + .long sys_openat /* 295 */ + .long sys_mkdirat + .long sys_mknodat + .long sys_fchownat + .long sys_futimesat + .long sys_fstatat64 /* 300 */ + .long sys_unlinkat + .long sys_renameat + .long sys_linkat + .long sys_symlinkat + .long sys_readlinkat /* 305 */ + .long sys_fchmodat + .long sys_faccessat + .long sys_pselect6 + .long sys_ppoll + .long sys_unshare /* 310 */ + .long sys_set_robust_list + .long sys_get_robust_list + .long sys_splice + .long sys_sync_file_range + .long sys_tee /* 315 */ + .long sys_vmsplice + .long sys_move_pages + .long sys_getcpu + .long sys_epoll_pwait + .long sys_utimensat /* 320 */ + .long sys_signalfd + .long sys_timerfd + .long sys_eventfd + .long sys_fallocate /* * NOTE!! This doesn't have to be exact - we just have diff --git a/include/asm-cris/unistd.h b/include/asm-cris/unistd.h index 7c90fa970c3..6f2d924f4fd 100644 --- a/include/asm-cris/unistd.h +++ b/include/asm-cris/unistd.h @@ -255,6 +255,7 @@ #define __NR_io_submit 248 #define __NR_io_cancel 249 #define __NR_fadvise64 250 +/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ #define __NR_exit_group 252 #define __NR_lookup_dcookie 253 #define __NR_epoll_create 254 @@ -292,10 +293,46 @@ #define __NR_add_key 286 #define __NR_request_key 287 #define __NR_keyctl 288 +#define __NR_ioprio_set 289 +#define __NR_ioprio_get 290 +#define __NR_inotify_init 291 +#define __NR_inotify_add_watch 292 +#define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 +#define __NR_openat 295 +#define __NR_mkdirat 296 +#define __NR_mknodat 297 +#define __NR_fchownat 298 +#define __NR_futimesat 299 +#define __NR_fstatat64 300 +#define __NR_unlinkat 301 +#define __NR_renameat 302 +#define __NR_linkat 303 +#define __NR_symlinkat 304 +#define __NR_readlinkat 305 +#define __NR_fchmodat 306 +#define __NR_faccessat 307 +#define __NR_pselect6 308 +#define __NR_ppoll 309 +#define __NR_unshare 310 +#define __NR_set_robust_list 311 +#define __NR_get_robust_list 312 +#define __NR_splice 313 +#define __NR_sync_file_range 314 +#define __NR_tee 315 +#define __NR_vmsplice 316 +#define __NR_move_pages 317 +#define __NR_getcpu 318 +#define __NR_epoll_pwait 319 +#define __NR_utimensat 320 +#define __NR_signalfd 321 +#define __NR_timerfd 322 +#define __NR_eventfd 323 +#define __NR_fallocate 324 #ifdef __KERNEL__ -#define NR_syscalls 289 +#define NR_syscalls 325 #include -- cgit v1.2.3 From 80bf7a5be08cc39c1edb490925a85a5f60e51a21 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:00:49 -0800 Subject: cris build fixes: hardirq.h: include asm/irq.h Include asm/irq.h to avoid undefined value warning. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/hardirq.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/asm-cris/hardirq.h b/include/asm-cris/hardirq.h index 1c13dd3faac..74178adeb1c 100644 --- a/include/asm-cris/hardirq.h +++ b/include/asm-cris/hardirq.h @@ -1,6 +1,7 @@ #ifndef __ASM_HARDIRQ_H #define __ASM_HARDIRQ_H +#include #include #include -- cgit v1.2.3 From df2b84a1f59567627891f5919efcedc7abd32d7a Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:00:50 -0800 Subject: cris build fixes: atomic.h needs compiler.h Include file linux/compiler.h is needed for 'likely'. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/atomic.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/asm-cris/atomic.h b/include/asm-cris/atomic.h index 0b51a87e553..d16ab24a468 100644 --- a/include/asm-cris/atomic.h +++ b/include/asm-cris/atomic.h @@ -5,6 +5,7 @@ #include #include +#include /* * Atomic operations that C can't guarantee us. Useful for -- cgit v1.2.3 From 9587997a4a9f74901981e0a751a7ae0e46a72b94 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Nov 2007 17:00:51 -0800 Subject: cris-build-fixes-atomich-needs-compilerh-fix Cc: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/atomic.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/asm-cris/atomic.h b/include/asm-cris/atomic.h index d16ab24a468..2949a945876 100644 --- a/include/asm-cris/atomic.h +++ b/include/asm-cris/atomic.h @@ -3,9 +3,10 @@ #ifndef __ASM_CRIS_ATOMIC__ #define __ASM_CRIS_ATOMIC__ +#include + #include #include -#include /* * Atomic operations that C can't guarantee us. Useful for -- cgit v1.2.3 From eb2746ddc376c9c72905fbdd3f8d1e68a81957aa Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:00:52 -0800 Subject: cris build fixes: irq fixes - New file include/asm-cris/irq_regs.h. - Change handling of registers for do_IRQ. - Add GENERIC_HARDIRQS to Kconfig. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/Kconfig | 4 ++++ arch/cris/kernel/irq.c | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 21900a9378b..f653772a87a 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -57,6 +57,10 @@ menu "General setup" source "fs/Kconfig.binfmt" +config GENERIC_HARDIRQS + bool + default y + config ETRAX_CMDLINE string "Kernel command line" default "root=/dev/mtdblock3" diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c index 5c27ff86121..2dfac8c7909 100644 --- a/arch/cris/kernel/irq.c +++ b/arch/cris/kernel/irq.c @@ -2,7 +2,7 @@ * * linux/arch/cris/kernel/irq.c * - * Copyright (c) 2000,2001 Axis Communications AB + * Copyright (c) 2000,2007 Axis Communications AB * * Authors: Bjorn Wesen (bjornw@axis.com) * @@ -92,14 +92,16 @@ skip: asmlinkage void do_IRQ(int irq, struct pt_regs * regs) { unsigned long sp; + struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); sp = rdsp(); if (unlikely((sp & (PAGE_SIZE - 1)) < (PAGE_SIZE/8))) { printk("do_IRQ: stack overflow: %lX\n", sp); show_stack(NULL, (unsigned long *)sp); } - __do_IRQ(irq, regs); + __do_IRQ(irq); irq_exit(); + set_irq_regs(old_regs); } void weird_irq(void) -- cgit v1.2.3 From 5978e791fed57d9d66fd80e2b908b803fa6a831d Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:00:53 -0800 Subject: cris build fixes: sys_cris.c needs fs.h Include linux/fs.h, needed for do_pipe. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/kernel/sys_cris.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c index 514359b8122..8b9984197ed 100644 --- a/arch/cris/kernel/sys_cris.c +++ b/arch/cris/kernel/sys_cris.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From eda35b64a7739691839ea997c836163ea12f123b Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:00:54 -0800 Subject: cris build fixes: add baud rate defines Add missing defines for (unsupported) baud rates. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/termbits.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/asm-cris/termbits.h b/include/asm-cris/termbits.h index 71c1b36269b..66e1a7492a0 100644 --- a/include/asm-cris/termbits.h +++ b/include/asm-cris/termbits.h @@ -171,6 +171,19 @@ struct ktermios { #define B115200 0010002 #define B230400 0010003 #define B460800 0010004 + +/* Unsupported rates, but needed to avoid compile error. */ +#define B500000 0010005 +#define B576000 0010006 +#define B1000000 0010010 +#define B1152000 0010011 +#define B1500000 0010012 +#define B2000000 0010013 +#define B2500000 0010014 +#define B3000000 0010015 +#define B3500000 0010016 +#define B4000000 0010017 + /* etrax supports these additional three baud rates */ #define B921600 0010005 #define B1843200 0010006 -- cgit v1.2.3 From bafef0ae9d3651540c442aebf242f7b68e183bff Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:00:55 -0800 Subject: cris build fixes: update eth_v10.c ethernet driver New (updated) version of ethernet driver for cris v10. - First steps to simplify and make the MII code more similar between the etrax100 and etraxfs ports. - Start the transmit queue before enabling tx interrupts to avoid race with the first frame. - Flip the comparition statement to stick to physical addresses to avoid phys_to_virt mapping a potential null pointer. This was not an error but the change simplifies debugging of address-space mappings. - Made myPrevRxDesc local to e100_rx since it was only used there. Fixed out of memory handling in e100_rx. If dev_alloc_skb() fails persistently the system is hosed anyway but at least it won't loop in an interrupt handler. - Correct some code formatting issues. - Add defines SET_ETH_ENABLE_LEDS, SET_ETH_DISABLE_LEDS and SET_ETH_AUTONEG used in new cris v10 ethernet driver. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/cris/eth_v10.c | 428 ++++++++++++++++++++++++++++---------------- include/asm-cris/ethernet.h | 3 + 2 files changed, 279 insertions(+), 152 deletions(-) diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c index edd6828f0a7..26ffa810e58 100644 --- a/drivers/net/cris/eth_v10.c +++ b/drivers/net/cris/eth_v10.c @@ -250,6 +250,7 @@ #include #include #include +#include //#define ETHDEBUG #define D(x) @@ -279,6 +280,9 @@ struct net_local { * by this lock as well. */ spinlock_t lock; + + spinlock_t led_lock; /* Protect LED state */ + spinlock_t transceiver_lock; /* Protect transceiver state. */ }; typedef struct etrax_eth_descr @@ -295,8 +299,6 @@ struct transceiver_ops void (*check_duplex)(struct net_device* dev); }; -struct transceiver_ops* transceiver; - /* Duplex settings */ enum duplex { @@ -307,7 +309,7 @@ enum duplex /* Dma descriptors etc. */ -#define MAX_MEDIA_DATA_SIZE 1518 +#define MAX_MEDIA_DATA_SIZE 1522 #define MIN_PACKET_LEN 46 #define ETHER_HEAD_LEN 14 @@ -332,8 +334,8 @@ enum duplex /*Intel LXT972A specific*/ #define MDIO_INT_STATUS_REG_2 0x0011 -#define MDIO_INT_FULL_DUPLEX_IND ( 1 << 9 ) -#define MDIO_INT_SPEED ( 1 << 14 ) +#define MDIO_INT_FULL_DUPLEX_IND (1 << 9) +#define MDIO_INT_SPEED (1 << 14) /* Network flash constants */ #define NET_FLASH_TIME (HZ/50) /* 20 ms */ @@ -344,8 +346,8 @@ enum duplex #define NO_NETWORK_ACTIVITY 0 #define NETWORK_ACTIVITY 1 -#define NBR_OF_RX_DESC 64 -#define NBR_OF_TX_DESC 256 +#define NBR_OF_RX_DESC 32 +#define NBR_OF_TX_DESC 16 /* Large packets are sent directly to upper layers while small packets are */ /* copied (to reduce memory waste). The following constant decides the breakpoint */ @@ -367,7 +369,6 @@ enum duplex static etrax_eth_descr *myNextRxDesc; /* Points to the next descriptor to to be processed */ static etrax_eth_descr *myLastRxDesc; /* The last processed descriptor */ -static etrax_eth_descr *myPrevRxDesc; /* The descriptor right before myNextRxDesc */ static etrax_eth_descr RxDescList[NBR_OF_RX_DESC] __attribute__ ((aligned(32))); @@ -377,7 +378,6 @@ static etrax_eth_descr* myNextTxDesc; /* Next descriptor to use */ static etrax_eth_descr TxDescList[NBR_OF_TX_DESC] __attribute__ ((aligned(32))); static unsigned int network_rec_config_shadow = 0; -static unsigned int mdio_phy_addr; /* Transciever address */ static unsigned int network_tr_ctrl_shadow = 0; @@ -411,7 +411,7 @@ static int e100_set_config(struct net_device* dev, struct ifmap* map); static void e100_tx_timeout(struct net_device *dev); static struct net_device_stats *e100_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void e100_hardware_send_packet(char *buf, int length); +static void e100_hardware_send_packet(struct net_local* np, char *buf, int length); static void update_rx_stats(struct net_device_stats *); static void update_tx_stats(struct net_device_stats *); static int e100_probe_transceiver(struct net_device* dev); @@ -434,7 +434,10 @@ static void e100_clear_network_leds(unsigned long dummy); static void e100_set_network_leds(int active); static const struct ethtool_ops e100_ethtool_ops; - +#if defined(CONFIG_ETRAX_NO_PHY) +static void dummy_check_speed(struct net_device* dev); +static void dummy_check_duplex(struct net_device* dev); +#else static void broadcom_check_speed(struct net_device* dev); static void broadcom_check_duplex(struct net_device* dev); static void tdk_check_speed(struct net_device* dev); @@ -443,16 +446,28 @@ static void intel_check_speed(struct net_device* dev); static void intel_check_duplex(struct net_device* dev); static void generic_check_speed(struct net_device* dev); static void generic_check_duplex(struct net_device* dev); +#endif +#ifdef CONFIG_NET_POLL_CONTROLLER +static void e100_netpoll(struct net_device* dev); +#endif + +static int autoneg_normal = 1; struct transceiver_ops transceivers[] = { +#if defined(CONFIG_ETRAX_NO_PHY) + {0x0000, dummy_check_speed, dummy_check_duplex} /* Dummy */ +#else {0x1018, broadcom_check_speed, broadcom_check_duplex}, /* Broadcom */ {0xC039, tdk_check_speed, tdk_check_duplex}, /* TDK 2120 */ {0x039C, tdk_check_speed, tdk_check_duplex}, /* TDK 2120C */ {0x04de, intel_check_speed, intel_check_duplex}, /* Intel LXT972A*/ {0x0000, generic_check_speed, generic_check_duplex} /* Generic, must be last */ +#endif }; +struct transceiver_ops* transceiver = &transceivers[0]; + #define tx_done(dev) (*R_DMA_CH0_CMD == 0) /* @@ -471,14 +486,22 @@ etrax_ethernet_init(void) int i, err; printk(KERN_INFO - "ETRAX 100LX 10/100MBit ethernet v2.0 (c) 2000-2003 Axis Communications AB\n"); + "ETRAX 100LX 10/100MBit ethernet v2.0 (c) 1998-2007 Axis Communications AB\n"); - dev = alloc_etherdev(sizeof(struct net_local)); - np = dev->priv; + if (cris_request_io_interface(if_eth, cardname)) { + printk(KERN_CRIT "etrax_ethernet_init failed to get IO interface\n"); + return -EBUSY; + } + dev = alloc_etherdev(sizeof(struct net_local)); if (!dev) return -ENOMEM; + np = netdev_priv(dev); + + /* we do our own locking */ + dev->features |= NETIF_F_LLTX; + dev->base_addr = (unsigned int)R_NETWORK_SA_0; /* just to have something to show */ /* now setup our etrax specific stuff */ @@ -498,14 +521,22 @@ etrax_ethernet_init(void) dev->do_ioctl = e100_ioctl; dev->set_config = e100_set_config; dev->tx_timeout = e100_tx_timeout; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = e100_netpoll; +#endif + + spin_lock_init(&np->lock); + spin_lock_init(&np->led_lock); + spin_lock_init(&np->transceiver_lock); /* Initialise the list of Etrax DMA-descriptors */ /* Initialise receive descriptors */ for (i = 0; i < NBR_OF_RX_DESC; i++) { - /* Allocate two extra cachelines to make sure that buffer used by DMA - * does not share cacheline with any other data (to avoid cache bug) + /* Allocate two extra cachelines to make sure that buffer used + * by DMA does not share cacheline with any other data (to + * avoid cache bug) */ RxDescList[i].skb = dev_alloc_skb(MAX_MEDIA_DATA_SIZE + 2 * L1_CACHE_BYTES); if (!RxDescList[i].skb) @@ -541,7 +572,6 @@ etrax_ethernet_init(void) myNextRxDesc = &RxDescList[0]; myLastRxDesc = &RxDescList[NBR_OF_RX_DESC - 1]; - myPrevRxDesc = &RxDescList[NBR_OF_RX_DESC - 1]; myFirstTxDesc = &TxDescList[0]; myNextTxDesc = &TxDescList[0]; myLastTxDesc = &TxDescList[NBR_OF_TX_DESC - 1]; @@ -562,10 +592,11 @@ etrax_ethernet_init(void) current_speed = 10; current_speed_selection = 0; /* Auto */ speed_timer.expires = jiffies + NET_LINK_UP_CHECK_INTERVAL; - duplex_timer.data = (unsigned long)dev; + speed_timer.data = (unsigned long)dev; speed_timer.function = e100_check_speed; clear_led_timer.function = e100_clear_network_leds; + clear_led_timer.data = (unsigned long)dev; full_duplex = 0; current_duplex = autoneg; @@ -574,7 +605,6 @@ etrax_ethernet_init(void) duplex_timer.function = e100_check_duplex; /* Initialize mii interface */ - np->mii_if.phy_id = mdio_phy_addr; np->mii_if.phy_id_mask = 0x1f; np->mii_if.reg_num_mask = 0x1f; np->mii_if.dev = dev; @@ -585,6 +615,9 @@ etrax_ethernet_init(void) /* unwanted addresses are matched */ *R_NETWORK_GA_0 = 0x00000000; *R_NETWORK_GA_1 = 0x00000000; + + /* Initialize next time the led can flash */ + led_next_time = jiffies; return 0; } @@ -595,7 +628,7 @@ etrax_ethernet_init(void) static int e100_set_mac_address(struct net_device *dev, void *p) { - struct net_local *np = (struct net_local *)dev->priv; + struct net_local *np = netdev_priv(dev); struct sockaddr *addr = p; int i; @@ -686,6 +719,25 @@ e100_open(struct net_device *dev) goto grace_exit2; } + /* + * Always allocate the DMA channels after the IRQ, + * and clean up on failure. + */ + + if (cris_request_dma(NETWORK_TX_DMA_NBR, + cardname, + DMA_VERBOSE_ON_ERROR, + dma_eth)) { + goto grace_exit3; + } + + if (cris_request_dma(NETWORK_RX_DMA_NBR, + cardname, + DMA_VERBOSE_ON_ERROR, + dma_eth)) { + goto grace_exit4; + } + /* give the HW an idea of what MAC address we want */ *R_NETWORK_SA_0 = dev->dev_addr[0] | (dev->dev_addr[1] << 8) | @@ -700,6 +752,7 @@ e100_open(struct net_device *dev) *R_NETWORK_REC_CONFIG = 0xd; /* broadcast rec, individ. rec, ma0 enabled */ #else + SETS(network_rec_config_shadow, R_NETWORK_REC_CONFIG, max_size, size1522); SETS(network_rec_config_shadow, R_NETWORK_REC_CONFIG, broadcast, receive); SETS(network_rec_config_shadow, R_NETWORK_REC_CONFIG, ma0, enable); SETF(network_rec_config_shadow, R_NETWORK_REC_CONFIG, duplex, full_duplex); @@ -719,8 +772,7 @@ e100_open(struct net_device *dev) SETS(network_tr_ctrl_shadow, R_NETWORK_TR_CTRL, crc, enable); *R_NETWORK_TR_CTRL = network_tr_ctrl_shadow; - save_flags(flags); - cli(); + local_irq_save(flags); /* enable the irq's for ethernet DMA */ @@ -752,12 +804,13 @@ e100_open(struct net_device *dev) *R_DMA_CH0_FIRST = 0; *R_DMA_CH0_DESCR = virt_to_phys(myLastTxDesc); + netif_start_queue(dev); - restore_flags(flags); + local_irq_restore(flags); /* Probe for transceiver */ if (e100_probe_transceiver(dev)) - goto grace_exit3; + goto grace_exit5; /* Start duplex/speed timers */ add_timer(&speed_timer); @@ -766,10 +819,14 @@ e100_open(struct net_device *dev) /* We are now ready to accept transmit requeusts from * the queueing layer of the networking. */ - netif_start_queue(dev); + netif_carrier_on(dev); return 0; +grace_exit5: + cris_free_dma(NETWORK_RX_DMA_NBR, cardname); +grace_exit4: + cris_free_dma(NETWORK_TX_DMA_NBR, cardname); grace_exit3: free_irq(NETWORK_STATUS_IRQ_NBR, (void *)dev); grace_exit2: @@ -780,12 +837,20 @@ grace_exit0: return -EAGAIN; } - +#if defined(CONFIG_ETRAX_NO_PHY) +static void +dummy_check_speed(struct net_device* dev) +{ + current_speed = 100; +} +#else static void generic_check_speed(struct net_device* dev) { unsigned long data; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MII_ADVERTISE); + struct net_local *np = netdev_priv(dev); + + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_ADVERTISE); if ((data & ADVERTISE_100FULL) || (data & ADVERTISE_100HALF)) current_speed = 100; @@ -797,7 +862,10 @@ static void tdk_check_speed(struct net_device* dev) { unsigned long data; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MDIO_TDK_DIAGNOSTIC_REG); + struct net_local *np = netdev_priv(dev); + + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, + MDIO_TDK_DIAGNOSTIC_REG); current_speed = (data & MDIO_TDK_DIAGNOSTIC_RATE ? 100 : 10); } @@ -805,7 +873,10 @@ static void broadcom_check_speed(struct net_device* dev) { unsigned long data; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MDIO_AUX_CTRL_STATUS_REG); + struct net_local *np = netdev_priv(dev); + + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, + MDIO_AUX_CTRL_STATUS_REG); current_speed = (data & MDIO_BC_SPEED ? 100 : 10); } @@ -813,46 +884,62 @@ static void intel_check_speed(struct net_device* dev) { unsigned long data; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MDIO_INT_STATUS_REG_2); + struct net_local *np = netdev_priv(dev); + + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, + MDIO_INT_STATUS_REG_2); current_speed = (data & MDIO_INT_SPEED ? 100 : 10); } - +#endif static void e100_check_speed(unsigned long priv) { struct net_device* dev = (struct net_device*)priv; + struct net_local *np = netdev_priv(dev); static int led_initiated = 0; unsigned long data; int old_speed = current_speed; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MII_BMSR); + spin_lock(&np->transceiver_lock); + + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_BMSR); if (!(data & BMSR_LSTATUS)) { current_speed = 0; } else { transceiver->check_speed(dev); } + spin_lock(&np->led_lock); if ((old_speed != current_speed) || !led_initiated) { led_initiated = 1; e100_set_network_leds(NO_NETWORK_ACTIVITY); + if (current_speed) + netif_carrier_on(dev); + else + netif_carrier_off(dev); } + spin_unlock(&np->led_lock); /* Reinitialize the timer. */ speed_timer.expires = jiffies + NET_LINK_UP_CHECK_INTERVAL; add_timer(&speed_timer); + + spin_unlock(&np->transceiver_lock); } static void e100_negotiate(struct net_device* dev) { - unsigned short data = e100_get_mdio_reg(dev, mdio_phy_addr, MII_ADVERTISE); + struct net_local *np = netdev_priv(dev); + unsigned short data = e100_get_mdio_reg(dev, np->mii_if.phy_id, + MII_ADVERTISE); /* Discard old speed and duplex settings */ data &= ~(ADVERTISE_100HALF | ADVERTISE_100FULL | ADVERTISE_10HALF | ADVERTISE_10FULL); switch (current_speed_selection) { - case 10 : + case 10: if (current_duplex == full) data |= ADVERTISE_10FULL; else if (current_duplex == half) @@ -861,7 +948,7 @@ e100_negotiate(struct net_device* dev) data |= ADVERTISE_10HALF | ADVERTISE_10FULL; break; - case 100 : + case 100: if (current_duplex == full) data |= ADVERTISE_100FULL; else if (current_duplex == half) @@ -870,7 +957,7 @@ e100_negotiate(struct net_device* dev) data |= ADVERTISE_100HALF | ADVERTISE_100FULL; break; - case 0 : /* Auto */ + case 0: /* Auto */ if (current_duplex == full) data |= ADVERTISE_100FULL | ADVERTISE_10FULL; else if (current_duplex == half) @@ -880,35 +967,44 @@ e100_negotiate(struct net_device* dev) ADVERTISE_100HALF | ADVERTISE_100FULL; break; - default : /* assume autoneg speed and duplex */ + default: /* assume autoneg speed and duplex */ data |= ADVERTISE_10HALF | ADVERTISE_10FULL | ADVERTISE_100HALF | ADVERTISE_100FULL; + break; } - e100_set_mdio_reg(dev, mdio_phy_addr, MII_ADVERTISE, data); + e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_ADVERTISE, data); /* Renegotiate with link partner */ - data = e100_get_mdio_reg(dev, mdio_phy_addr, MII_BMCR); + if (autoneg_normal) { + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR); data |= BMCR_ANENABLE | BMCR_ANRESTART; - - e100_set_mdio_reg(dev, mdio_phy_addr, MII_BMCR, data); + } + e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR, data); } static void e100_set_speed(struct net_device* dev, unsigned long speed) { + struct net_local *np = netdev_priv(dev); + + spin_lock(&np->transceiver_lock); if (speed != current_speed_selection) { current_speed_selection = speed; e100_negotiate(dev); } + spin_unlock(&np->transceiver_lock); } static void e100_check_duplex(unsigned long priv) { struct net_device *dev = (struct net_device *)priv; - struct net_local *np = (struct net_local *)dev->priv; - int old_duplex = full_duplex; + struct net_local *np = netdev_priv(dev); + int old_duplex; + + spin_lock(&np->transceiver_lock); + old_duplex = full_duplex; transceiver->check_duplex(dev); if (old_duplex != full_duplex) { /* Duplex changed */ @@ -920,13 +1016,22 @@ e100_check_duplex(unsigned long priv) duplex_timer.expires = jiffies + NET_DUPLEX_CHECK_INTERVAL; add_timer(&duplex_timer); np->mii_if.full_duplex = full_duplex; + spin_unlock(&np->transceiver_lock); } - +#if defined(CONFIG_ETRAX_NO_PHY) +static void +dummy_check_duplex(struct net_device* dev) +{ + full_duplex = 1; +} +#else static void generic_check_duplex(struct net_device* dev) { unsigned long data; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MII_ADVERTISE); + struct net_local *np = netdev_priv(dev); + + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_ADVERTISE); if ((data & ADVERTISE_10FULL) || (data & ADVERTISE_100FULL)) full_duplex = 1; @@ -938,7 +1043,10 @@ static void tdk_check_duplex(struct net_device* dev) { unsigned long data; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MDIO_TDK_DIAGNOSTIC_REG); + struct net_local *np = netdev_priv(dev); + + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, + MDIO_TDK_DIAGNOSTIC_REG); full_duplex = (data & MDIO_TDK_DIAGNOSTIC_DPLX) ? 1 : 0; } @@ -946,7 +1054,10 @@ static void broadcom_check_duplex(struct net_device* dev) { unsigned long data; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MDIO_AUX_CTRL_STATUS_REG); + struct net_local *np = netdev_priv(dev); + + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, + MDIO_AUX_CTRL_STATUS_REG); full_duplex = (data & MDIO_BC_FULL_DUPLEX_IND) ? 1 : 0; } @@ -954,38 +1065,51 @@ static void intel_check_duplex(struct net_device* dev) { unsigned long data; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MDIO_INT_STATUS_REG_2); + struct net_local *np = netdev_priv(dev); + + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, + MDIO_INT_STATUS_REG_2); full_duplex = (data & MDIO_INT_FULL_DUPLEX_IND) ? 1 : 0; } - +#endif static void e100_set_duplex(struct net_device* dev, enum duplex new_duplex) { + struct net_local *np = netdev_priv(dev); + + spin_lock(&np->transceiver_lock); if (new_duplex != current_duplex) { current_duplex = new_duplex; e100_negotiate(dev); } + spin_unlock(&np->transceiver_lock); } static int e100_probe_transceiver(struct net_device* dev) { +#if !defined(CONFIG_ETRAX_NO_PHY) unsigned int phyid_high; unsigned int phyid_low; unsigned int oui; struct transceiver_ops* ops = NULL; + struct net_local *np = netdev_priv(dev); + + spin_lock(&np->transceiver_lock); /* Probe MDIO physical address */ - for (mdio_phy_addr = 0; mdio_phy_addr <= 31; mdio_phy_addr++) { - if (e100_get_mdio_reg(dev, mdio_phy_addr, MII_BMSR) != 0xffff) + for (np->mii_if.phy_id = 0; np->mii_if.phy_id <= 31; + np->mii_if.phy_id++) { + if (e100_get_mdio_reg(dev, + np->mii_if.phy_id, MII_BMSR) != 0xffff) break; } - if (mdio_phy_addr == 32) + if (np->mii_if.phy_id == 32) return -ENODEV; /* Get manufacturer */ - phyid_high = e100_get_mdio_reg(dev, mdio_phy_addr, MII_PHYSID1); - phyid_low = e100_get_mdio_reg(dev, mdio_phy_addr, MII_PHYSID2); + phyid_high = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_PHYSID1); + phyid_low = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_PHYSID2); oui = (phyid_high << 6) | (phyid_low >> 10); for (ops = &transceivers[0]; ops->oui; ops++) { @@ -994,6 +1118,8 @@ e100_probe_transceiver(struct net_device* dev) } transceiver = ops; + spin_unlock(&np->transceiver_lock); +#endif return 0; } @@ -1088,13 +1214,14 @@ e100_receive_mdio_bit() static void e100_reset_transceiver(struct net_device* dev) { + struct net_local *np = netdev_priv(dev); unsigned short cmd; unsigned short data; int bitCounter; - data = e100_get_mdio_reg(dev, mdio_phy_addr, MII_BMCR); + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR); - cmd = (MDIO_START << 14) | (MDIO_WRITE << 12) | (mdio_phy_addr << 7) | (MII_BMCR << 2); + cmd = (MDIO_START << 14) | (MDIO_WRITE << 12) | (np->mii_if.phy_id << 7) | (MII_BMCR << 2); e100_send_mdio_cmd(cmd, 1); @@ -1112,7 +1239,7 @@ e100_reset_transceiver(struct net_device* dev) static void e100_tx_timeout(struct net_device *dev) { - struct net_local *np = (struct net_local *)dev->priv; + struct net_local *np = netdev_priv(dev); unsigned long flags; spin_lock_irqsave(&np->lock, flags); @@ -1134,8 +1261,7 @@ e100_tx_timeout(struct net_device *dev) e100_reset_transceiver(dev); /* and get rid of the packets that never got an interrupt */ - while (myFirstTxDesc != myNextTxDesc) - { + while (myFirstTxDesc != myNextTxDesc) { dev_kfree_skb(myFirstTxDesc->skb); myFirstTxDesc->skb = 0; myFirstTxDesc = phys_to_virt(myFirstTxDesc->descr.next); @@ -1161,7 +1287,7 @@ e100_tx_timeout(struct net_device *dev) static int e100_send_packet(struct sk_buff *skb, struct net_device *dev) { - struct net_local *np = (struct net_local *)dev->priv; + struct net_local *np = netdev_priv(dev); unsigned char *buf = skb->data; unsigned long flags; @@ -1174,7 +1300,7 @@ e100_send_packet(struct sk_buff *skb, struct net_device *dev) dev->trans_start = jiffies; - e100_hardware_send_packet(buf, skb->len); + e100_hardware_send_packet(np, buf, skb->len); myNextTxDesc = phys_to_virt(myNextTxDesc->descr.next); @@ -1197,13 +1323,15 @@ static irqreturn_t e100rxtx_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; - struct net_local *np = (struct net_local *)dev->priv; - unsigned long irqbits = *R_IRQ_MASK2_RD; + struct net_local *np = netdev_priv(dev); + unsigned long irqbits; - /* Disable RX/TX IRQs to avoid reentrancy */ - *R_IRQ_MASK2_CLR = - IO_STATE(R_IRQ_MASK2_CLR, dma0_eop, clr) | - IO_STATE(R_IRQ_MASK2_CLR, dma1_eop, clr); + /* + * Note that both rx and tx interrupts are blocked at this point, + * regardless of which got us here. + */ + + irqbits = *R_IRQ_MASK2_RD; /* Handle received packets */ if (irqbits & IO_STATE(R_IRQ_MASK2_RD, dma1_eop, active)) { @@ -1219,7 +1347,7 @@ e100rxtx_interrupt(int irq, void *dev_id) * allocate a new buffer to put a packet in. */ e100_rx(dev); - ((struct net_local *)dev->priv)->stats.rx_packets++; + np->stats.rx_packets++; /* restart/continue on the channel, for safety */ *R_DMA_CH1_CMD = IO_STATE(R_DMA_CH1_CMD, cmd, restart); /* clear dma channel 1 eop/descr irq bits */ @@ -1233,9 +1361,8 @@ e100rxtx_interrupt(int irq, void *dev_id) } /* Report any packets that have been sent */ - while (myFirstTxDesc != phys_to_virt(*R_DMA_CH0_FIRST) && - myFirstTxDesc != myNextTxDesc) - { + while (virt_to_phys(myFirstTxDesc) != *R_DMA_CH0_FIRST && + (netif_queue_stopped(dev) || myFirstTxDesc != myNextTxDesc)) { np->stats.tx_bytes += myFirstTxDesc->skb->len; np->stats.tx_packets++; @@ -1244,19 +1371,15 @@ e100rxtx_interrupt(int irq, void *dev_id) dev_kfree_skb_irq(myFirstTxDesc->skb); myFirstTxDesc->skb = 0; myFirstTxDesc = phys_to_virt(myFirstTxDesc->descr.next); + /* Wake up queue. */ + netif_wake_queue(dev); } if (irqbits & IO_STATE(R_IRQ_MASK2_RD, dma0_eop, active)) { - /* acknowledge the eop interrupt and wake up queue */ + /* acknowledge the eop interrupt. */ *R_DMA_CH0_CLR_INTR = IO_STATE(R_DMA_CH0_CLR_INTR, clr_eop, do); - netif_wake_queue(dev); } - /* Enable RX/TX IRQs again */ - *R_IRQ_MASK2_SET = - IO_STATE(R_IRQ_MASK2_SET, dma0_eop, set) | - IO_STATE(R_IRQ_MASK2_SET, dma1_eop, set); - return IRQ_HANDLED; } @@ -1264,7 +1387,7 @@ static irqreturn_t e100nw_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; - struct net_local *np = (struct net_local *)dev->priv; + struct net_local *np = netdev_priv(dev); unsigned long irqbits = *R_IRQ_MASK0_RD; /* check for underrun irq */ @@ -1286,7 +1409,6 @@ e100nw_interrupt(int irq, void *dev_id) SETS(network_tr_ctrl_shadow, R_NETWORK_TR_CTRL, clr_error, clr); *R_NETWORK_TR_CTRL = network_tr_ctrl_shadow; SETS(network_tr_ctrl_shadow, R_NETWORK_TR_CTRL, clr_error, nop); - *R_NETWORK_TR_CTRL = IO_STATE(R_NETWORK_TR_CTRL, clr_error, clr); np->stats.tx_errors++; D(printk("ethernet excessive collisions!\n")); } @@ -1299,12 +1421,13 @@ e100_rx(struct net_device *dev) { struct sk_buff *skb; int length = 0; - struct net_local *np = (struct net_local *)dev->priv; + struct net_local *np = netdev_priv(dev); unsigned char *skb_data_ptr; #ifdef ETHDEBUG int i; #endif - + etrax_eth_descr *prevRxDesc; /* The descriptor right before myNextRxDesc */ + spin_lock(&np->led_lock); if (!led_active && time_after(jiffies, led_next_time)) { /* light the network leds depending on the current speed. */ e100_set_network_leds(NETWORK_ACTIVITY); @@ -1314,9 +1437,10 @@ e100_rx(struct net_device *dev) led_active = 1; mod_timer(&clear_led_timer, jiffies + HZ/10); } + spin_unlock(&np->led_lock); length = myNextRxDesc->descr.hw_len - 4; - ((struct net_local *)dev->priv)->stats.rx_bytes += length; + np->stats.rx_bytes += length; #ifdef ETHDEBUG printk("Got a packet of length %d:\n", length); @@ -1336,7 +1460,7 @@ e100_rx(struct net_device *dev) if (!skb) { np->stats.rx_errors++; printk(KERN_NOTICE "%s: Memory squeeze, dropping packet.\n", dev->name); - return; + goto update_nextrxdesc; } skb_put(skb, length - ETHER_HEAD_LEN); /* allocate room for the packet body */ @@ -1354,15 +1478,15 @@ e100_rx(struct net_device *dev) else { /* Large packet, send directly to upper layers and allocate new * memory (aligned to cache line boundary to avoid bug). - * Before sending the skb to upper layers we must make sure that - * skb->data points to the aligned start of the packet. + * Before sending the skb to upper layers we must make sure + * that skb->data points to the aligned start of the packet. */ int align; struct sk_buff *new_skb = dev_alloc_skb(MAX_MEDIA_DATA_SIZE + 2 * L1_CACHE_BYTES); if (!new_skb) { np->stats.rx_errors++; printk(KERN_NOTICE "%s: Memory squeeze, dropping packet.\n", dev->name); - return; + goto update_nextrxdesc; } skb = myNextRxDesc->skb; align = (int)phys_to_virt(myNextRxDesc->descr.buf) - (int)skb->data; @@ -1377,9 +1501,10 @@ e100_rx(struct net_device *dev) /* Send the packet to the upper layers */ netif_rx(skb); + update_nextrxdesc: /* Prepare for next packet */ myNextRxDesc->descr.status = 0; - myPrevRxDesc = myNextRxDesc; + prevRxDesc = myNextRxDesc; myNextRxDesc = phys_to_virt(myNextRxDesc->descr.next); rx_queue_len++; @@ -1387,9 +1512,9 @@ e100_rx(struct net_device *dev) /* Check if descriptors should be returned */ if (rx_queue_len == RX_QUEUE_THRESHOLD) { flush_etrax_cache(); - myPrevRxDesc->descr.ctrl |= d_eol; + prevRxDesc->descr.ctrl |= d_eol; myLastRxDesc->descr.ctrl &= ~d_eol; - myLastRxDesc = myPrevRxDesc; + myLastRxDesc = prevRxDesc; rx_queue_len = 0; } } @@ -1398,7 +1523,7 @@ e100_rx(struct net_device *dev) static int e100_close(struct net_device *dev) { - struct net_local *np = (struct net_local *)dev->priv; + struct net_local *np = netdev_priv(dev); printk(KERN_INFO "Closing %s.\n", dev->name); @@ -1426,6 +1551,9 @@ e100_close(struct net_device *dev) free_irq(NETWORK_DMA_TX_IRQ_NBR, (void *)dev); free_irq(NETWORK_STATUS_IRQ_NBR, (void *)dev); + cris_free_dma(NETWORK_TX_DMA_NBR, cardname); + cris_free_dma(NETWORK_RX_DMA_NBR, cardname); + /* Update the statistics here. */ update_rx_stats(&np->stats); @@ -1443,18 +1571,11 @@ e100_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mii_ioctl_data *data = if_mii(ifr); struct net_local *np = netdev_priv(dev); + int rc = 0; + int old_autoneg; spin_lock(&np->lock); /* Preempt protection */ switch (cmd) { - case SIOCGMIIPHY: /* Get PHY address */ - data->phy_id = mdio_phy_addr; - break; - case SIOCGMIIREG: /* Read MII register */ - data->val_out = e100_get_mdio_reg(dev, mdio_phy_addr, data->reg_num); - break; - case SIOCSMIIREG: /* Write MII register */ - e100_set_mdio_reg(dev, mdio_phy_addr, data->reg_num, data->val_in); - break; /* The ioctls below should be considered obsolete but are */ /* still present for compatability with old scripts/apps */ case SET_ETH_SPEED_10: /* 10 Mbps */ @@ -1463,60 +1584,47 @@ e100_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SET_ETH_SPEED_100: /* 100 Mbps */ e100_set_speed(dev, 100); break; - case SET_ETH_SPEED_AUTO: /* Auto negotiate speed */ + case SET_ETH_SPEED_AUTO: /* Auto-negotiate speed */ e100_set_speed(dev, 0); break; - case SET_ETH_DUPLEX_HALF: /* Half duplex. */ + case SET_ETH_DUPLEX_HALF: /* Half duplex */ e100_set_duplex(dev, half); break; - case SET_ETH_DUPLEX_FULL: /* Full duplex. */ + case SET_ETH_DUPLEX_FULL: /* Full duplex */ e100_set_duplex(dev, full); break; - case SET_ETH_DUPLEX_AUTO: /* Autonegotiate duplex*/ + case SET_ETH_DUPLEX_AUTO: /* Auto-negotiate duplex */ e100_set_duplex(dev, autoneg); break; + case SET_ETH_AUTONEG: + old_autoneg = autoneg_normal; + autoneg_normal = *(int*)data; + if (autoneg_normal != old_autoneg) + e100_negotiate(dev); + break; default: - return -EINVAL; + rc = generic_mii_ioctl(&np->mii_if, if_mii(ifr), + cmd, NULL); + break; } spin_unlock(&np->lock); - return 0; + return rc; } -static int e100_set_settings(struct net_device *dev, - struct ethtool_cmd *ecmd) +static int e100_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) { - ecmd->supported = SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII | - SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full; - ecmd->port = PORT_TP; - ecmd->transceiver = XCVR_EXTERNAL; - ecmd->phy_address = mdio_phy_addr; - ecmd->speed = current_speed; - ecmd->duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF; - ecmd->advertising = ADVERTISED_TP; + struct net_local *np = netdev_priv(dev); + int err; - if (current_duplex == autoneg && current_speed_selection == 0) - ecmd->advertising |= ADVERTISED_Autoneg; - else { - ecmd->advertising |= - ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | - ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full; - if (current_speed_selection == 10) - ecmd->advertising &= ~(ADVERTISED_100baseT_Half | - ADVERTISED_100baseT_Full); - else if (current_speed_selection == 100) - ecmd->advertising &= ~(ADVERTISED_10baseT_Half | - ADVERTISED_10baseT_Full); - if (current_duplex == half) - ecmd->advertising &= ~(ADVERTISED_10baseT_Full | - ADVERTISED_100baseT_Full); - else if (current_duplex == full) - ecmd->advertising &= ~(ADVERTISED_10baseT_Half | - ADVERTISED_100baseT_Half); - } + spin_lock_irq(&np->lock); + err = mii_ethtool_gset(&np->mii_if, cmd); + spin_unlock_irq(&np->lock); - ecmd->autoneg = AUTONEG_ENABLE; - return 0; + /* The PHY may support 1000baseT, but the Etrax100 does not. */ + cmd->supported &= ~(SUPPORTED_1000baseT_Half + | SUPPORTED_1000baseT_Full); + return err; } static int e100_set_settings(struct net_device *dev, @@ -1560,7 +1668,8 @@ static const struct ethtool_ops e100_ethtool_ops = { static int e100_set_config(struct net_device *dev, struct ifmap *map) { - struct net_local *np = (struct net_local *)dev->priv; + struct net_local *np = netdev_priv(dev); + spin_lock(&np->lock); /* Preempt protection */ switch(map->port) { @@ -1612,7 +1721,6 @@ update_tx_stats(struct net_device_stats *es) es->collisions += IO_EXTRACT(R_TR_COUNTERS, single_col, r) + IO_EXTRACT(R_TR_COUNTERS, multiple_col, r); - es->tx_errors += IO_EXTRACT(R_TR_COUNTERS, deferred, r); } /* @@ -1622,8 +1730,9 @@ update_tx_stats(struct net_device_stats *es) static struct net_device_stats * e100_get_stats(struct net_device *dev) { - struct net_local *lp = (struct net_local *)dev->priv; + struct net_local *lp = netdev_priv(dev); unsigned long flags; + spin_lock_irqsave(&lp->lock, flags); update_rx_stats(&lp->stats); @@ -1643,13 +1752,13 @@ e100_get_stats(struct net_device *dev) static void set_multicast_list(struct net_device *dev) { - struct net_local *lp = (struct net_local *)dev->priv; + struct net_local *lp = netdev_priv(dev); int num_addr = dev->mc_count; unsigned long int lo_bits; unsigned long int hi_bits; + spin_lock(&lp->lock); - if (dev->flags & IFF_PROMISC) - { + if (dev->flags & IFF_PROMISC) { /* promiscuous mode */ lo_bits = 0xfffffffful; hi_bits = 0xfffffffful; @@ -1679,9 +1788,10 @@ set_multicast_list(struct net_device *dev) struct dev_mc_list *dmi = dev->mc_list; int i; char *baddr; + lo_bits = 0x00000000ul; hi_bits = 0x00000000ul; - for (i=0; i= 32) { hi_bits |= (1 << (hash_ix-32)); - } - else { + } else { lo_bits |= (1 << hash_ix); } dmi = dmi->next; @@ -1724,10 +1833,11 @@ set_multicast_list(struct net_device *dev) } void -e100_hardware_send_packet(char *buf, int length) +e100_hardware_send_packet(struct net_local *np, char *buf, int length) { D(printk("e100 send pack, buf 0x%x len %d\n", buf, length)); + spin_lock(&np->led_lock); if (!led_active && time_after(jiffies, led_next_time)) { /* light the network leds depending on the current speed. */ e100_set_network_leds(NETWORK_ACTIVITY); @@ -1737,6 +1847,7 @@ e100_hardware_send_packet(char *buf, int length) led_active = 1; mod_timer(&clear_led_timer, jiffies + HZ/10); } + spin_unlock(&np->led_lock); /* configure the tx dma descriptor */ myNextTxDesc->descr.sw_len = length; @@ -1754,6 +1865,11 @@ e100_hardware_send_packet(char *buf, int length) static void e100_clear_network_leds(unsigned long dummy) { + struct net_device *dev = (struct net_device *)dummy; + struct net_local *np = netdev_priv(dev); + + spin_lock(&np->led_lock); + if (led_active && time_after(jiffies, led_next_time)) { e100_set_network_leds(NO_NETWORK_ACTIVITY); @@ -1761,6 +1877,8 @@ e100_clear_network_leds(unsigned long dummy) led_next_time = jiffies + NET_FLASH_PAUSE; led_active = 0; } + + spin_unlock(&np->led_lock); } static void @@ -1781,19 +1899,25 @@ e100_set_network_leds(int active) #else LED_NETWORK_SET(LED_OFF); #endif - } - else if (light_leds) { + } else if (light_leds) { if (current_speed == 10) { LED_NETWORK_SET(LED_ORANGE); } else { LED_NETWORK_SET(LED_GREEN); } - } - else { + } else { LED_NETWORK_SET(LED_OFF); } } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void +e100_netpoll(struct net_device* netdev) +{ + e100rxtx_interrupt(NETWORK_DMA_TX_IRQ_NBR, netdev, NULL); +} +#endif + static int etrax_init_module(void) { diff --git a/include/asm-cris/ethernet.h b/include/asm-cris/ethernet.h index 30da58a7d00..4d58652c3a4 100644 --- a/include/asm-cris/ethernet.h +++ b/include/asm-cris/ethernet.h @@ -15,4 +15,7 @@ #define SET_ETH_DUPLEX_AUTO SIOCDEVPRIVATE+3 /* Auto neg duplex */ #define SET_ETH_DUPLEX_HALF SIOCDEVPRIVATE+4 /* Full duplex */ #define SET_ETH_DUPLEX_FULL SIOCDEVPRIVATE+5 /* Half duplex */ +#define SET_ETH_ENABLE_LEDS SIOCDEVPRIVATE+6 /* Enable net LEDs */ +#define SET_ETH_DISABLE_LEDS SIOCDEVPRIVATE+7 /* Disable net LEDs */ +#define SET_ETH_AUTONEG SIOCDEVPRIVATE+8 #endif /* _CRIS_ETHERNET_H */ -- cgit v1.2.3 From 633edf5a4fff0675851e377cc5f0c9072683a5f4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Nov 2007 17:00:58 -0800 Subject: cris-build-fixes-update-eth_v10c-ethernet-driver-fix Fix locking bug noted by Roel Kluin <12o3l@tiscali.nl>. Cc: Jeff Garzik Cc: Jesper Nilsson Cc: Mikael Starvik Cc: Roel Kluin <12o3l@tiscali.nl> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/cris/eth_v10.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c index 26ffa810e58..5dc984a3e00 100644 --- a/drivers/net/cris/eth_v10.c +++ b/drivers/net/cris/eth_v10.c @@ -1088,6 +1088,8 @@ e100_set_duplex(struct net_device* dev, enum duplex new_duplex) static int e100_probe_transceiver(struct net_device* dev) { + int ret = 0; + #if !defined(CONFIG_ETRAX_NO_PHY) unsigned int phyid_high; unsigned int phyid_low; @@ -1104,8 +1106,10 @@ e100_probe_transceiver(struct net_device* dev) np->mii_if.phy_id, MII_BMSR) != 0xffff) break; } - if (np->mii_if.phy_id == 32) - return -ENODEV; + if (np->mii_if.phy_id == 32) { + ret = -ENODEV; + goto out; + } /* Get manufacturer */ phyid_high = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_PHYSID1); @@ -1117,10 +1121,10 @@ e100_probe_transceiver(struct net_device* dev) break; } transceiver = ops; - +out: spin_unlock(&np->transceiver_lock); #endif - return 0; + return ret; } static int -- cgit v1.2.3 From 7b275523aba522aa76891861ee32ba2456e5f146 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:00:59 -0800 Subject: cris build fixes: corrected and improved NMI and IRQ handling Corrects compile errors and the following: - Remove oldset parameter from do_signal and do_notify_resume. - Modified to fit new consolidated IRQ handling code. - Reverse check order between external nmi and watchdog nmi to avoid false watchdog oops in case of a glitch on the nmi pin. - Return from an pin-generated NMI the same way as for other interrupts. - Moved blocking of ethernet rx/tx irq from ethernet interrupt handler to low-level asm interrupt handlers. Fixed in the multiple interrupt handler also. - Add space for thread local storage in thread_info struct. - Add NO_DMA to Kconfig, and include arch specific Kconfig using arch independent path. Include subsystem Kconfigs for pcmcia, usb, i2c, rtc and pci. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/Kconfig | 15 +++++++++- arch/cris/arch-v10/drivers/Kconfig | 1 + arch/cris/arch-v10/kernel/entry.S | 41 +++++++++++++------------- arch/cris/arch-v10/kernel/irq.c | 59 ++++++++++++++++++++++++++++++++++++-- arch/cris/kernel/process.c | 5 ++++ arch/cris/kernel/ptrace.c | 6 ++-- include/asm-cris/thread_info.h | 9 ++++-- 7 files changed, 107 insertions(+), 29 deletions(-) diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index f653772a87a..222da1501f4 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -13,6 +13,10 @@ config ZONE_DMA bool default y +config NO_DMA + bool + default y + config RWSEM_GENERIC_SPINLOCK bool default y @@ -153,7 +157,8 @@ source "net/Kconfig" # bring in ETRAX built-in drivers menu "Drivers for built-in interfaces" -source arch/cris/arch-v10/drivers/Kconfig +# arch/cris/arch is a symlink to correct arch (arch-v10 or arch-v32) +source arch/cris/arch/drivers/Kconfig endmenu @@ -184,6 +189,10 @@ source "drivers/isdn/Kconfig" source "drivers/telephony/Kconfig" +source "drivers/i2c/Kconfig" + +source "drivers/rtc/Kconfig" + # # input before char - char/joystick depends on it. As does USB. # @@ -198,6 +207,10 @@ source "fs/Kconfig" source "sound/Kconfig" +source "drivers/pcmcia/Kconfig" + +source "drivers/pci/Kconfig" + source "drivers/usb/Kconfig" source "kernel/Kconfig.instrumentation" diff --git a/arch/cris/arch-v10/drivers/Kconfig b/arch/cris/arch-v10/drivers/Kconfig index 03e2e68f947..e6fc8455a37 100644 --- a/arch/cris/arch-v10/drivers/Kconfig +++ b/arch/cris/arch-v10/drivers/Kconfig @@ -2,6 +2,7 @@ config ETRAX_ETHERNET bool "Ethernet support" depends on ETRAX_ARCH_V10 select NET_ETHERNET + select MII help This option enables the ETRAX 100LX built-in 10/100Mbit Ethernet controller. diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index bc9bed97f22..ec62c951fa3 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -500,9 +500,8 @@ _work_notifysig: ;; deal with pending signals and notify-resume requests move.d $r9, $r10 ; do_notify_resume syscall/irq param - moveq 0, $r11 ; oldset param - 0 in this case - move.d $sp, $r12 ; the regs param - move.d $r1, $r13 ; the thread_info_flags parameter + move.d $sp, $r11 ; the regs param + move.d $r1, $r12 ; the thread_info_flags parameter jsr do_notify_resume ba _Rexit @@ -678,13 +677,19 @@ IRQ1_interrupt: push $r10 ; push orig_r10 clear.d [$sp=$sp-4] ; frametype == 0, normal frame + ;; If there is a glitch on the NMI pin shorter than ~100ns + ;; (i.e. non-active by the time we get here) then the nmi_pin bit + ;; in R_IRQ_MASK0_RD will already be cleared. The watchdog_nmi bit + ;; is cleared by us however (when feeding the watchdog), which is why + ;; we use that bit to determine what brought us here. + move.d [R_IRQ_MASK0_RD], $r1 ; External NMI or watchdog? - and.d 0x80000000, $r1 - beq wdog + and.d (1<<30), $r1 + bne wdog move.d $sp, $r10 jsr handle_nmi setf m ; Enable NMI again - retb ; Return from NMI + ba _Rexit ; Return the standard way nop wdog: #if defined(CONFIG_ETRAX_WATCHDOG) && !defined(CONFIG_SVINTO_SIM) @@ -775,22 +780,9 @@ multiple_interrupt: push $r10 ; push orig_r10 clear.d [$sp=$sp-4] ; frametype == 0, normal frame - moveq 2, $r2 ; first bit we care about is the timer0 irq - move.d [R_VECT_MASK_RD], $r0; read the irq bits that triggered the multiple irq - move.d $r0, [R_VECT_MASK_CLR] ; Block all active IRQs -1: - btst $r2, $r0 ; check for the irq given by bit r2 - bpl 2f - move.d $r2, $r10 ; First argument to do_IRQ - move.d $sp, $r11 ; second argument to do_IRQ - jsr do_IRQ -2: - addq 1, $r2 ; next vector bit - cmp.b 32, $r2 - bne 1b ; process all irq's up to and including number 31 - moveq 0, $r9 ; make ret_from_intr realise we came from an ir + move.d $sp, $r10 + jsr do_multiple_IRQ - move.d $r0, [R_VECT_MASK_SET] ; Unblock all the IRQs jump ret_from_intr do_sigtrap: @@ -837,6 +829,13 @@ _ugdb_handle_breakpoint: ba do_sigtrap ; SIGTRAP the offending process. pop $dccr ; Restore dccr in delay slot. + .global kernel_execve +kernel_execve: + move.d __NR_execve, $r9 + break 13 + ret + nop + .data hw_bp_trigs: diff --git a/arch/cris/arch-v10/kernel/irq.c b/arch/cris/arch-v10/kernel/irq.c index 845c95f6e87..e06ab0050d3 100644 --- a/arch/cris/arch-v10/kernel/irq.c +++ b/arch/cris/arch-v10/kernel/irq.c @@ -12,10 +12,16 @@ */ #include +#include #include +#include #include #include +/* From kgdb.c. */ +extern void kgdb_init(void); +extern void breakpoint(void); + #define mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr)); #define unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr)); @@ -75,8 +81,8 @@ BUILD_IRQ(12, 0x1000) BUILD_IRQ(13, 0x2000) void mmu_bus_fault(void); /* IRQ 14 is the bus fault interrupt */ void multiple_interrupt(void); /* IRQ 15 is the multiple IRQ interrupt */ -BUILD_IRQ(16, 0x10000) -BUILD_IRQ(17, 0x20000) +BUILD_IRQ(16, 0x10000 | 0x20000) /* ethernet tx interrupt needs to block rx */ +BUILD_IRQ(17, 0x20000 | 0x10000) /* ...and vice versa */ BUILD_IRQ(18, 0x40000) BUILD_IRQ(19, 0x80000) BUILD_IRQ(20, 0x100000) @@ -147,6 +153,55 @@ void system_call(void); /* from entry.S */ void do_sigtrap(void); /* from entry.S */ void gdb_handle_breakpoint(void); /* from entry.S */ +extern void do_IRQ(int irq, struct pt_regs * regs); + +/* Handle multiple IRQs */ +void do_multiple_IRQ(struct pt_regs* regs) +{ + int bit; + unsigned masked; + unsigned mask; + unsigned ethmask = 0; + + /* Get interrupts to mask and handle */ + mask = masked = *R_VECT_MASK_RD; + + /* Never mask timer IRQ */ + mask &= ~(IO_MASK(R_VECT_MASK_RD, timer0)); + + /* + * If either ethernet interrupt (rx or tx) is active then block + * the other one too. Unblock afterwards also. + */ + if (mask & + (IO_STATE(R_VECT_MASK_RD, dma0, active) | + IO_STATE(R_VECT_MASK_RD, dma1, active))) { + ethmask = (IO_MASK(R_VECT_MASK_RD, dma0) | + IO_MASK(R_VECT_MASK_RD, dma1)); + } + + /* Block them */ + *R_VECT_MASK_CLR = (mask | ethmask); + + /* An extra irq_enter here to prevent softIRQs to run after + * each do_IRQ. This will decrease the interrupt latency. + */ + irq_enter(); + + /* Handle all IRQs */ + for (bit = 2; bit < 32; bit++) { + if (masked & (1 << bit)) { + do_IRQ(bit, regs); + } + } + + /* This irq_exit() will trigger the soft IRQs. */ + irq_exit(); + + /* Unblock the IRQs again */ + *R_VECT_MASK_SET = (masked | ethmask); +} + /* init_IRQ() is called by start_kernel and is responsible for fixing IRQ masks and setting the irq vector table. */ diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 123451c4415..9ca558fc5bc 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -195,6 +195,11 @@ EXPORT_SYMBOL(enable_hlt); */ void (*pm_idle)(void); +extern void default_idle(void); + +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c index 1085d037027..3ccd20e85dc 100644 --- a/arch/cris/kernel/ptrace.c +++ b/arch/cris/kernel/ptrace.c @@ -81,13 +81,13 @@ /* notification of userspace execution resumption * - triggered by current->work.notify_resume */ -extern int do_signal(int canrestart, sigset_t *oldset, struct pt_regs *regs); +extern int do_signal(int canrestart, struct pt_regs *regs); -void do_notify_resume(int canrestart, sigset_t *oldset, struct pt_regs *regs, +void do_notify_resume(int canrestart, struct pt_regs *regs, __u32 thread_info_flags ) { /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) - do_signal(canrestart,oldset,regs); + do_signal(canrestart,regs); } diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h index fde39f6c49c..784668ab0fa 100644 --- a/include/asm-cris/thread_info.h +++ b/include/asm-cris/thread_info.h @@ -32,6 +32,7 @@ struct thread_info { unsigned long flags; /* low level flags */ __u32 cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ + __u32 tls; /* TLS for this thread */ mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thead @@ -79,14 +80,18 @@ struct thread_info { * - other flags in MSW */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 #define _TIF_SYSCALL_TRACE (1< Date: Wed, 14 Nov 2007 17:01:00 -0800 Subject: cris build fixes: fixes in arch/cris/kernel/time.c - Remove debug print. - Change #if to #ifdef to avoid compile time warning if CONFIG_PROFILING isn't set. - Number of parameters to profile_tick has changed, drop the regs parameter. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/kernel/time.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c index acfd0455940..7a2cc7efbcf 100644 --- a/arch/cris/kernel/time.c +++ b/arch/cris/kernel/time.c @@ -171,10 +171,6 @@ get_cmos_time(void) mon = CMOS_READ(RTC_MONTH); year = CMOS_READ(RTC_YEAR); - printk(KERN_DEBUG - "rtc: sec 0x%x min 0x%x hour 0x%x day 0x%x mon 0x%x year 0x%x\n", - sec, min, hour, day, mon, year); - BCD_TO_BIN(sec); BCD_TO_BIN(min); BCD_TO_BIN(hour); @@ -207,12 +203,12 @@ void cris_do_profile(struct pt_regs* regs) { -#if CONFIG_SYSTEM_PROFILER +#ifdef CONFIG_SYSTEM_PROFILER cris_profile_sample(regs); #endif -#if CONFIG_PROFILING - profile_tick(CPU_PROFILING, regs); +#ifdef CONFIG_PROFILING + profile_tick(CPU_PROFILING); #endif } -- cgit v1.2.3 From a8e2a8b22712593980b524dc45baa06a36286fc0 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:01 -0800 Subject: cris build fixes: setup.c needs param.h Include linux/param.h for HZ. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/cris/arch-v10/kernel/setup.c b/arch/cris/arch-v10/kernel/setup.c index 682ef955aec..ad410afe3cc 100644 --- a/arch/cris/arch-v10/kernel/setup.c +++ b/arch/cris/arch-v10/kernel/setup.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_PROC_FS #define HAS_FPU 0x0001 -- cgit v1.2.3 From 5af26b1862a49770e6cf7a297ddd9fd8421b01fe Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:02 -0800 Subject: cris build fixes: fix crisksyms.c - Add __negdi2 to exported symbols. - Remove string functions from exported symbols. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/kernel/crisksyms.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c index 105bb5ed48f..62f0e752915 100644 --- a/arch/cris/kernel/crisksyms.c +++ b/arch/cris/kernel/crisksyms.c @@ -27,6 +27,7 @@ extern void __Mod(void); extern void __ashldi3(void); extern void __ashrdi3(void); extern void __lshrdi3(void); +extern void __negdi2(void); extern void iounmap(volatile void * __iomem); /* Platform dependent support */ @@ -34,19 +35,6 @@ EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(get_cmos_time); EXPORT_SYMBOL(loops_per_usec); -/* String functions */ -EXPORT_SYMBOL(memcmp); -EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strchr); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strncat); -EXPORT_SYMBOL(strncmp); -EXPORT_SYMBOL(strncpy); - /* Math functions */ EXPORT_SYMBOL(__Udiv); EXPORT_SYMBOL(__Umod); @@ -55,6 +43,7 @@ EXPORT_SYMBOL(__Mod); EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); EXPORT_SYMBOL(__lshrdi3); +EXPORT_SYMBOL(__negdi2); /* Memory functions */ EXPORT_SYMBOL(__ioremap); @@ -84,4 +73,4 @@ EXPORT_SYMBOL(start_one_shot_timer); EXPORT_SYMBOL(del_fast_timer); EXPORT_SYMBOL(schedule_usleep); #endif - +EXPORT_SYMBOL(csum_partial); -- cgit v1.2.3 From f150f35232daaedd86e46d2581e3b7eb40e2c360 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:04 -0800 Subject: cris build fixes: defconfig updates Remove some unused config lines, and add some others that needs to be off for a succesful compile. This is the minimal set of changes for 2.6.23. Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/defconfig | 76 +++++++++++++++++++++++------------------------------ 1 file changed, 33 insertions(+), 43 deletions(-) diff --git a/arch/cris/defconfig b/arch/cris/defconfig index 142a10818af..e2d81859ee1 100644 --- a/arch/cris/defconfig +++ b/arch/cris/defconfig @@ -276,6 +276,7 @@ CONFIG_MTDRAM_ABS_POS=0x0 # CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_DEV_COW_COMMON is not set # CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_CRYPTOLOOP is not set # CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y @@ -302,16 +303,14 @@ CONFIG_IOSCHED_CFQ=y # # ATA/ATAPI/MFM/RLL support # -CONFIG_IDE=y -CONFIG_BLK_DEV_IDE=y +# CONFIG_IDE is not set +# CONFIG_PARIDE is not set # # Please see Documentation/ide.txt for help/info on IDE drives # # CONFIG_BLK_DEV_IDE_SATA is not set -CONFIG_BLK_DEV_IDEDISK=y # CONFIG_IDEDISK_MULTI_MODE is not set -CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDETAPE is not set # CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_IDE_TASK_IOCTL is not set @@ -321,7 +320,6 @@ CONFIG_BLK_DEV_IDECD=y # # CONFIG_IDE_GENERIC is not set # CONFIG_IDE_ARM is not set -CONFIG_BLK_DEV_IDEDMA=y # CONFIG_IDEDMA_AUTO is not set # CONFIG_BLK_DEV_HD is not set @@ -329,6 +327,7 @@ CONFIG_BLK_DEV_IDEDMA=y # SCSI device support # # CONFIG_SCSI is not set +# CONFIG_ISCSI_TCP is not set # # IEEE 1394 (FireWire) support @@ -414,26 +413,11 @@ CONFIG_NETFILTER=y # CONFIG_NET_POLL_CONTROLLER is not set # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set -CONFIG_BT=y -CONFIG_BT_L2CAP=y -# CONFIG_BT_SCO is not set -CONFIG_BT_RFCOMM=y -# CONFIG_BT_RFCOMM_TTY is not set -CONFIG_BT_BNEP=y -# CONFIG_BT_BNEP_MC_FILTER is not set -# CONFIG_BT_BNEP_PROTO_FILTER is not set -# CONFIG_BT_HIDP is not set - -# -# Bluetooth device drivers -# -CONFIG_BT_HCIUSB=y -# CONFIG_BT_HCIUSB_SCO is not set -# CONFIG_BT_HCIUART is not set -# CONFIG_BT_HCIBCM203X is not set -# CONFIG_BT_HCIBPA10X is not set -# CONFIG_BT_HCIBFUSB is not set -# CONFIG_BT_HCIVHCI is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_AF_RXRPC_DEBUG is not set +# CONFIG_BT is not set +# CONFIG_I2C is not set + CONFIG_NETDEVICES=y # CONFIG_DUMMY is not set # CONFIG_BONDING is not set @@ -485,31 +469,17 @@ CONFIG_NET_ETHERNET=y # # Input device support # -CONFIG_INPUT=y - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set -# CONFIG_INPUT_EVDEV is not set -# CONFIG_INPUT_EVBUG is not set +# CONFIG_INPUT is not set # # Input I/O drivers # -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y CONFIG_SERIO=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set -# CONFIG_SERIO_CT82C710 is not set -CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_LIBPS2 is not set # CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set # # Input Device Drivers @@ -525,6 +495,7 @@ CONFIG_MOUSE_PS2=y # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_VSXXXAA is not set # CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set @@ -542,6 +513,8 @@ CONFIG_MOUSE_PS2=y # # Non-8250 serial port support # +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -559,6 +532,8 @@ CONFIG_LEGACY_PTY_COUNT=256 # CONFIG_GEN_RTC is not set # CONFIG_DTLK is not set # CONFIG_R3964 is not set +# CONFIG_RTC_LIB is not set +# CONFIG_RTC_CLASS is not set # # Ftape, the floppy tape device driver @@ -660,7 +635,9 @@ CONFIG_NFS_V3=y # CONFIG_NFSD is not set CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y +# CONFIG_SUNRPC_BIND34 is not set # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -685,10 +662,22 @@ CONFIG_MSDOS_PARTITION=y # # CONFIG_SOUND is not set +# +# Generic devices +# +# CONFIG_SND_MPU401_UART is not set +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + # # PCCARD (PCMCIA/CardBus) support # # CONFIG_PCCARD is not set +# CONFIG_PARPORT_PC_PCMCIA is not set +# CONFIG_NET_PCMCIA is not set # # PC-card bridges @@ -734,6 +723,7 @@ CONFIG_USB_DEVICEFS=y # USB Input Devices # # CONFIG_USB_HID is not set +# HID_SUPPORT is not set # # USB HID Boot Protocol drivers @@ -829,7 +819,7 @@ CONFIG_USB_RTL8150=y # # Hardware crypto devices -# +# CONFIG_CRYPTO_HW is not set # # Library routines -- cgit v1.2.3 From 16ad1b49104684da3ab0fede79f29b01f4c76896 Mon Sep 17 00:00:00 2001 From: Alex Unleashed Date: Wed, 14 Nov 2007 17:01:06 -0800 Subject: cris: ARRAY_SIZE() cleanup I'm converting most array size calculations under arch/ to use ARRAY_SIZE(). This is the patch for CRIS. Signed-off-by: Alejandro Martinez Ruiz Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/kernel/io_interface_mux.c | 2 +- arch/cris/arch-v10/kernel/setup.c | 4 ++-- arch/cris/arch-v32/drivers/sync_serial.c | 2 +- arch/cris/arch-v32/kernel/io.c | 2 +- arch/cris/arch-v32/kernel/setup.c | 6 ++---- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/cris/arch-v10/kernel/io_interface_mux.c b/arch/cris/arch-v10/kernel/io_interface_mux.c index 29d48ad00df..3a9114e89ed 100644 --- a/arch/cris/arch-v10/kernel/io_interface_mux.c +++ b/arch/cris/arch-v10/kernel/io_interface_mux.c @@ -304,7 +304,7 @@ static unsigned char clear_group_from_set(const unsigned char groups, struct if_ static struct if_group *get_group(const unsigned char groups) { int i; - for (i = 0; i < sizeof(if_groups)/sizeof(struct if_group); i++) { + for (i = 0; i < ARRAY_SIZE(if_groups); i++) { if (groups & if_groups[i].group) { return &if_groups[i]; } diff --git a/arch/cris/arch-v10/kernel/setup.c b/arch/cris/arch-v10/kernel/setup.c index ad410afe3cc..de27b50b72a 100644 --- a/arch/cris/arch-v10/kernel/setup.c +++ b/arch/cris/arch-v10/kernel/setup.c @@ -57,8 +57,8 @@ int show_cpuinfo(struct seq_file *m, void *v) revision = rdvr(); - if (revision >= sizeof cpu_info/sizeof *cpu_info) - info = &cpu_info[sizeof cpu_info/sizeof *cpu_info - 1]; + if (revision >= ARRAY_SIZE(cpu_info)) + info = &cpu_info[ARRAY_SIZE(cpu_info) - 1]; else info = &cpu_info[revision]; diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index df89298aafc..d581b0a92a3 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -185,7 +185,7 @@ static struct sync_port ports[]= } }; -#define NUMBER_OF_PORTS (sizeof(ports)/sizeof(sync_port)) +#define NUMBER_OF_PORTS ARRAY_SIZE(ports) static const struct file_operations sync_serial_fops = { .owner = THIS_MODULE, diff --git a/arch/cris/arch-v32/kernel/io.c b/arch/cris/arch-v32/kernel/io.c index dfbfcb8d258..a22a9e02e09 100644 --- a/arch/cris/arch-v32/kernel/io.c +++ b/arch/cris/arch-v32/kernel/io.c @@ -49,7 +49,7 @@ struct crisv32_ioport crisv32_ioports[] = } }; -#define NBR_OF_PORTS sizeof(crisv32_ioports)/sizeof(struct crisv32_ioport) +#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports) struct crisv32_iopin crisv32_led1_green; struct crisv32_iopin crisv32_led1_red; diff --git a/arch/cris/arch-v32/kernel/setup.c b/arch/cris/arch-v32/kernel/setup.c index 4662f363df6..72e9e8331f6 100644 --- a/arch/cris/arch-v32/kernel/setup.c +++ b/arch/cris/arch-v32/kernel/setup.c @@ -54,12 +54,10 @@ show_cpuinfo(struct seq_file *m, void *v) { int i; int cpu = (int)v - 1; - int entries; unsigned long revision; struct cpu_info *info; - entries = sizeof cpinfo / sizeof(struct cpu_info); - info = &cpinfo[entries - 1]; + info = &cpinfo[ARRAY_SIZE(cpinfo) - 1]; #ifdef CONFIG_SMP if (!cpu_online(cpu)) @@ -68,7 +66,7 @@ show_cpuinfo(struct seq_file *m, void *v) revision = rdvr(); - for (i = 0; i < entries; i++) { + for (i = 0; i < ARRAY_SIZE(cpinfo); i++) { if (cpinfo[i].rev == revision) { info = &cpinfo[i]; break; -- cgit v1.2.3 From 4b7e888933b0d71f9534830ae1edc1e23f0fb075 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:13 -0800 Subject: CRIS don't include bitops.h in posix_types.h In file included from include/asm/byteorder.h:23, from include/asm-generic/bitops/le.h:5, from include/asm-generic/bitops/ext2-non-atomic.h:4, from include/asm/bitops.h:163, from include/linux/bitops.h:17, from include/asm/posix_types.h:55, from include/linux/posix_types.h:47, from include/linux/types.h:11, from include/linux/capability.h:16, from include/linux/sched.h:49, from arch/cris/kernel/asm-offsets.c:1: include/linux/byteorder/little_endian.h:43: parse error before "__cpu_to_le64p" include/linux/byteorder/little_endian.h:44: warning: return type defaults to `int' include/linux/byteorder/little_endian.h: In function `__cpu_to_le64p': include/linux/byteorder/little_endian.h:45: `__le64' undeclared (first use in this function) Remove include of asm/bitops.h, not needed here, corrects compilation error (__le64 undeclared). Signed-off-by: Jesper Nilsson Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/posix_types.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/asm-cris/posix_types.h b/include/asm-cris/posix_types.h index 92000d0c3f9..3a5e4c43eae 100644 --- a/include/asm-cris/posix_types.h +++ b/include/asm-cris/posix_types.h @@ -52,7 +52,6 @@ typedef struct { } __kernel_fsid_t; #ifdef __KERNEL__ -#include #undef __FD_SET #define __FD_SET(fd,fdsetp) set_bit(fd, (void *)(fdsetp)) -- cgit v1.2.3 From 77accbf505a073beecf32e60265697517e203bea Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:15 -0800 Subject: CRISv10 serial driver rewrite New and improved serial driver for CRISv10, take three, with improvements suggested by Jiri Slaby. - Call wait_event_interruptible with a _correct_ and sensible condition. - Removed superfluous test of info->flags & ASYNC_CLOSING, since that is done by wait_event_interruptible. - Moved common code for deregistering DMA and IRQ to deinit_port function. - Use setup_timer when initializing flush_timer. - Convert bit-field for uses_dma_in and uses_dma_out to regular bytes. - Removed CVS tags. - Removed defines and comments for CRIS_BUF_SIZE and TTY_THRESHOLD_THROTTLE (no longer used). - Cleaned up code to pass checkpatch. - Add crisv10.h header file. - Merge of CRISv10 from Axis internal CVS. Signed-off-by: Jesper Nilsson Reviewed-by: Jiri Slaby Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/crisv10.c | 1293 ++++++++++++++++------------------------------ drivers/serial/crisv10.h | 146 ++++++ 2 files changed, 580 insertions(+), 859 deletions(-) create mode 100644 drivers/serial/crisv10.h diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c index f523cdf4b02..a4e23cf4790 100644 --- a/drivers/serial/crisv10.c +++ b/drivers/serial/crisv10.c @@ -1,426 +1,10 @@ -/* $Id: serial.c,v 1.25 2004/09/29 10:33:49 starvik Exp $ - * +/* * Serial port driver for the ETRAX 100LX chip * - * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Axis Communications AB + * Copyright (C) 1998-2007 Axis Communications AB * * Many, many authors. Based once upon a time on serial.c for 16x50. * - * $Log: serial.c,v $ - * Revision 1.25 2004/09/29 10:33:49 starvik - * Resolved a dealock when printing debug from kernel. - * - * Revision 1.24 2004/08/27 23:25:59 johana - * rs_set_termios() must call change_speed() if c_iflag has changed or - * automatic XOFF handling will be enabled and transmitter will stop - * if 0x13 is received. - * - * Revision 1.23 2004/08/24 06:57:13 starvik - * More whitespace cleanup - * - * Revision 1.22 2004/08/24 06:12:20 starvik - * Whitespace cleanup - * - * Revision 1.20 2004/05/24 12:00:20 starvik - * Big merge of stuff from Linux 2.4 (e.g. manual mode for the serial port). - * - * Revision 1.19 2004/05/17 13:12:15 starvik - * Kernel console hook - * Big merge from Linux 2.4 still pending. - * - * Revision 1.18 2003/10/28 07:18:30 starvik - * Compiles with debug info - * - * Revision 1.17 2003/07/04 08:27:37 starvik - * Merge of Linux 2.5.74 - * - * Revision 1.16 2003/06/13 10:05:19 johana - * Help the user to avoid trouble by: - * Forcing mixed mode for status/control lines if not all pins are used. - * - * Revision 1.15 2003/06/13 09:43:01 johana - * Merged in the following changes from os/linux/arch/cris/drivers/serial.c - * + some minor changes to reduce diff. - * - * Revision 1.49 2003/05/30 11:31:54 johana - * Merged in change-branch--serial9bit that adds CMSPAR support for sticky - * parity (mark/space) - * - * Revision 1.48 2003/05/30 11:03:57 johana - * Implemented rs_send_xchar() by disabling the DMA and writing manually. - * Added e100_disable_txdma_channel() and e100_enable_txdma_channel(). - * Fixed rs_throttle() and rs_unthrottle() to properly call rs_send_xchar - * instead of setting info->x_char and check the CRTSCTS flag before - * controlling the rts pin. - * - * Revision 1.14 2003/04/09 08:12:44 pkj - * Corrected typo changes made upstream. - * - * Revision 1.13 2003/04/09 05:20:47 starvik - * Merge of Linux 2.5.67 - * - * Revision 1.11 2003/01/22 06:48:37 starvik - * Fixed warnings issued by GCC 3.2.1 - * - * Revision 1.9 2002/12/13 09:07:47 starvik - * Alert user that RX_TIMEOUT_TICKS==0 doesn't work - * - * Revision 1.8 2002/12/11 13:13:57 starvik - * Added arch/ to v10 specific includes - * Added fix from Linux 2.4 in serial.c (flush_to_flip_buffer) - * - * Revision 1.7 2002/12/06 07:13:57 starvik - * Corrected work queue stuff - * Removed CONFIG_ETRAX_SERIAL_FLUSH_DMA_FAST - * - * Revision 1.6 2002/11/21 07:17:46 starvik - * Change static inline to extern inline where otherwise outlined with gcc-3.2 - * - * Revision 1.5 2002/11/14 15:59:49 starvik - * Linux 2.5 port of the latest serial driver from 2.4. The work queue stuff - * probably doesn't work yet. - * - * Revision 1.42 2002/11/05 09:08:47 johana - * Better implementation of rs_stop() and rs_start() that uses the XOFF - * register to start/stop transmission. - * change_speed() also initilises XOFF register correctly so that - * auto_xoff is enabled when IXON flag is set by user. - * This gives fast XOFF response times. - * - * Revision 1.41 2002/11/04 18:40:57 johana - * Implemented rs_stop() and rs_start(). - * Simple tests using hwtestserial indicates that this should be enough - * to make it work. - * - * Revision 1.40 2002/10/14 05:33:18 starvik - * RS-485 uses fast timers even if SERIAL_FAST_TIMER is disabled - * - * Revision 1.39 2002/09/30 21:00:57 johana - * Support for CONFIG_ETRAX_SERx_DTR_RI_DSR_CD_MIXED where the status and - * control pins can be mixed between PA and PB. - * If no serial port uses MIXED old solution is used - * (saves a few bytes and cycles). - * control_pins struct uses masks instead of bit numbers. - * Corrected dummy values and polarity in line_info() so - * /proc/tty/driver/serial is now correct. - * (the E100_xxx_GET() macros is really active low - perhaps not obvious) - * - * Revision 1.38 2002/08/23 11:01:36 starvik - * Check that serial port is enabled in all interrupt handlers to avoid - * restarts of DMA channels not assigned to serial ports - * - * Revision 1.37 2002/08/13 13:02:37 bjornw - * Removed some warnings because of unused code - * - * Revision 1.36 2002/08/08 12:50:01 starvik - * Serial interrupt is shared with synchronous serial port driver - * - * Revision 1.35 2002/06/03 10:40:49 starvik - * Increased RS-485 RTS toggle timer to 2 characters - * - * Revision 1.34 2002/05/28 18:59:36 johana - * Whitespace and comment fixing to be more like etrax100ser.c 1.71. - * - * Revision 1.33 2002/05/28 17:55:43 johana - * RS-485 uses FAST_TIMER if enabled, and starts a short (one char time) - * timer from tranismit_chars (interrupt context). - * The timer toggles RTS in interrupt context when expired giving minimum - * latencies. - * - * Revision 1.32 2002/05/22 13:58:00 johana - * Renamed rs_write() to raw_write() and made it inline. - * New rs_write() handles RS-485 if configured and enabled - * (moved code from e100_write_rs485()). - * RS-485 ioctl's uses copy_from_user() instead of verify_area(). - * - * Revision 1.31 2002/04/22 11:20:03 johana - * Updated copyright years. - * - * Revision 1.30 2002/04/22 09:39:12 johana - * RS-485 support compiles. - * - * Revision 1.29 2002/01/14 16:10:01 pkj - * Allocate the receive buffers dynamically. The static 4kB buffer was - * too small for the peaks. This means that we can get rid of the extra - * buffer and the copying to it. It also means we require less memory - * under normal operations, but can use more when needed (there is a - * cap at 64kB for safety reasons). If there is no memory available - * we panic(), and die a horrible death... - * - * Revision 1.28 2001/12/18 15:04:53 johana - * Cleaned up write_rs485() - now it works correctly without padding extra - * char. - * Added sane default initialisation of rs485. - * Added #ifdef around dummy variables. - * - * Revision 1.27 2001/11/29 17:00:41 pkj - * 2kB seems to be too small a buffer when using 921600 bps, - * so increase it to 4kB (this was already done for the elinux - * version of the serial driver). - * - * Revision 1.26 2001/11/19 14:20:41 pkj - * Minor changes to comments and unused code. - * - * Revision 1.25 2001/11/12 20:03:43 pkj - * Fixed compiler warnings. - * - * Revision 1.24 2001/11/12 15:10:05 pkj - * Total redesign of the receiving part of the serial driver. - * Uses eight chained descriptors to write to a 4kB buffer. - * This data is then serialised into a 2kB buffer. From there it - * is copied into the TTY's flip buffers when they become available. - * A lot of copying, and the sizes of the buffers might need to be - * tweaked, but all in all it should work better than the previous - * version, without the need to modify the TTY code in any way. - * Also note that erroneous bytes are now correctly marked in the - * flag buffers (instead of always marking the first byte). - * - * Revision 1.23 2001/10/30 17:53:26 pkj - * * Set info->uses_dma to 0 when a port is closed. - * * Mark the timer1 interrupt as a fast one (SA_INTERRUPT). - * * Call start_flush_timer() in start_receive() if - * CONFIG_ETRAX_SERIAL_FLUSH_DMA_FAST is defined. - * - * Revision 1.22 2001/10/30 17:44:03 pkj - * Use %lu for received and transmitted counters in line_info(). - * - * Revision 1.21 2001/10/30 17:40:34 pkj - * Clean-up. The only change to functionality is that - * CONFIG_ETRAX_SERIAL_RX_TIMEOUT_TICKS(=5) is used instead of - * MAX_FLUSH_TIME(=8). - * - * Revision 1.20 2001/10/30 15:24:49 johana - * Added char_time stuff from 2.0 driver. - * - * Revision 1.19 2001/10/30 15:23:03 johana - * Merged with 1.13.2 branch + fixed indentation - * and changed CONFIG_ETRAX100_XYS to CONFIG_ETRAX_XYZ - * - * Revision 1.18 2001/09/24 09:27:22 pkj - * Completed ext_baud_table[] in cflag_to_baud() and cflag_to_etrax_baud(). - * - * Revision 1.17 2001/08/24 11:32:49 ronny - * More fixes for the CONFIG_ETRAX_SERIAL_PORT0 define. - * - * Revision 1.16 2001/08/24 07:56:22 ronny - * Added config ifdefs around ser0 irq requests. - * - * Revision 1.15 2001/08/16 09:10:31 bjarne - * serial.c - corrected the initialization of rs_table, the wrong defines - * where used. - * Corrected a test in timed_flush_handler. - * Changed configured to enabled. - * serial.h - Changed configured to enabled. - * - * Revision 1.14 2001/08/15 07:31:23 bjarne - * Introduced two new members to the e100_serial struct. - * configured - Will be set to 1 if the port has been configured in .config - * uses_dma - Should be set to 1 if the port uses DMA. Currently it is set - * to 1 - * when a port is opened. This is used to limit the DMA interrupt - * routines to only manipulate DMA channels actually used by the - * serial driver. - * - * Revision 1.13.2.2 2001/10/17 13:57:13 starvik - * Receiver was broken by the break fixes - * - * Revision 1.13.2.1 2001/07/20 13:57:39 ronny - * Merge with new stuff from etrax100ser.c. Works but haven't checked stuff - * like break handling. - * - * Revision 1.13 2001/05/09 12:40:31 johana - * Use DMA_NBR and IRQ_NBR defines from dma.h and irq.h - * - * Revision 1.12 2001/04/19 12:23:07 bjornw - * CONFIG_RS485 -> CONFIG_ETRAX_RS485 - * - * Revision 1.11 2001/04/05 14:29:48 markusl - * Updated according to review remarks i.e. - * -Use correct types in port structure to avoid compiler warnings - * -Try to use IO_* macros whenever possible - * -Open should never return -EBUSY - * - * Revision 1.10 2001/03/05 13:14:07 bjornw - * Another spelling fix - * - * Revision 1.9 2001/02/23 13:46:38 bjornw - * Spellling check - * - * Revision 1.8 2001/01/23 14:56:35 markusl - * Made use of ser1 optional - * Needed by USB - * - * Revision 1.7 2001/01/19 16:14:48 perf - * Added kernel options for serial ports 234. - * Changed option names from CONFIG_ETRAX100_XYZ to CONFIG_ETRAX_XYZ. - * - * Revision 1.6 2000/11/22 16:36:09 bjornw - * Please marketing by using the correct case when spelling Etrax. - * - * Revision 1.5 2000/11/21 16:43:37 bjornw - * Fixed so it compiles under CONFIG_SVINTO_SIM - * - * Revision 1.4 2000/11/15 17:34:12 bjornw - * Added a timeout timer for flushing input channels. The interrupt-based - * fast flush system should be easy to merge with this later (works the same - * way, only with an irq instead of a system timer_list) - * - * Revision 1.3 2000/11/13 17:19:57 bjornw - * * Incredibly, this almost complete rewrite of serial.c worked (at least - * for output) the first time. - * - * Items worth noticing: - * - * No Etrax100 port 1 workarounds (does only compile on 2.4 anyway now) - * RS485 is not ported (why can't it be done in userspace as on x86 ?) - * Statistics done through async_icount - if any more stats are needed, - * that's the place to put them or in an arch-dep version of it. - * timeout_interrupt and the other fast timeout stuff not ported yet - * There be dragons in this 3k+ line driver - * - * Revision 1.2 2000/11/10 16:50:28 bjornw - * First shot at a 2.4 port, does not compile totally yet - * - * Revision 1.1 2000/11/10 16:47:32 bjornw - * Added verbatim copy of rev 1.49 etrax100ser.c from elinux - * - * Revision 1.49 2000/10/30 15:47:14 tobiasa - * Changed version number. - * - * Revision 1.48 2000/10/25 11:02:43 johana - * Changed %ul to %lu in printf's - * - * Revision 1.47 2000/10/18 15:06:53 pkj - * Compile correctly with CONFIG_ETRAX_SERIAL_FLUSH_DMA_FAST and - * CONFIG_ETRAX_SERIAL_PROC_ENTRY together. - * Some clean-up of the /proc/serial file. - * - * Revision 1.46 2000/10/16 12:59:40 johana - * Added CONFIG_ETRAX_SERIAL_PROC_ENTRY for statistics and debug info. - * - * Revision 1.45 2000/10/13 17:10:59 pkj - * Do not flush DMAs while flipping TTY buffers. - * - * Revision 1.44 2000/10/13 16:34:29 pkj - * Added a delay in ser_interrupt() for 2.3ms when an error is detected. - * We do not know why this delay is required yet, but without it the - * irmaflash program does not work (this was the program that needed - * the ser_interrupt() to be needed in the first place). This should not - * affect normal use of the serial ports. - * - * Revision 1.43 2000/10/13 16:30:44 pkj - * New version of the fast flush of serial buffers code. This time - * it is localized to the serial driver and uses a fast timer to - * do the work. - * - * Revision 1.42 2000/10/13 14:54:26 bennyo - * Fix for switching RTS when using rs485 - * - * Revision 1.41 2000/10/12 11:43:44 pkj - * Cleaned up a number of comments. - * - * Revision 1.40 2000/10/10 11:58:39 johana - * Made RS485 support generic for all ports. - * Toggle rts in interrupt if no delay wanted. - * WARNING: No true transmitter empty check?? - * Set d_wait bit when sending data so interrupt is delayed until - * fifo flushed. (Fix tcdrain() problem) - * - * Revision 1.39 2000/10/04 16:08:02 bjornw - * * Use virt_to_phys etc. for DMA addresses - * * Removed CONFIG_FLUSH_DMA_FAST hacks - * * Indentation fix - * - * Revision 1.38 2000/10/02 12:27:10 mattias - * * added variable used when using fast flush on serial dma. - * (CONFIG_FLUSH_DMA_FAST) - * - * Revision 1.37 2000/09/27 09:44:24 pkj - * Uncomment definition of SERIAL_HANDLE_EARLY_ERRORS. - * - * Revision 1.36 2000/09/20 13:12:52 johana - * Support for CONFIG_ETRAX_SERIAL_RX_TIMEOUT_TICKS: - * Number of timer ticks between flush of receive fifo (1 tick = 10ms). - * Try 0-3 for low latency applications. Approx 5 for high load - * applications (e.g. PPP). Maybe this should be more adaptive some day... - * - * Revision 1.35 2000/09/20 10:36:08 johana - * Typo in get_lsr_info() - * - * Revision 1.34 2000/09/20 10:29:59 johana - * Let rs_chars_in_buffer() check fifo content as well. - * get_lsr_info() might work now (not tested). - * Easier to change the port to debug. - * - * Revision 1.33 2000/09/13 07:52:11 torbjore - * Support RS485 - * - * Revision 1.32 2000/08/31 14:45:37 bjornw - * After sending a break we need to reset the transmit DMA channel - * - * Revision 1.31 2000/06/21 12:13:29 johana - * Fixed wait for all chars sent when closing port. - * (Used to always take 1 second!) - * Added shadows for directions of status/ctrl signals. - * - * Revision 1.30 2000/05/29 16:27:55 bjornw - * Simulator ifdef moved a bit - * - * Revision 1.29 2000/05/09 09:40:30 mattias - * * Added description of dma registers used in timeout_interrupt - * * Removed old code - * - * Revision 1.28 2000/05/08 16:38:58 mattias - * * Bugfix for flushing fifo in timeout_interrupt - * Problem occurs when bluetooth stack waits for a small number of bytes - * containing an event acknowledging free buffers in bluetooth HW - * As before, data was stuck in fifo until more data came on uart and - * flushed it up to the stack. - * - * Revision 1.27 2000/05/02 09:52:28 jonasd - * Added fix for peculiar etrax behaviour when eop is forced on an empty - * fifo. This is used when flashing the IRMA chip. Disabled by default. - * - * Revision 1.26 2000/03/29 15:32:02 bjornw - * 2.0.34 updates - * - * Revision 1.25 2000/02/16 16:59:36 bjornw - * * Receive DMA directly into the flip-buffer, eliminating an intermediary - * receive buffer and a memcpy. Will avoid some overruns. - * * Error message on debug port if an overrun or flip buffer overrun occurs. - * * Just use the first byte in the flag flip buffer for errors. - * * Check for timeout on the serial ports only each 5/100 s, not 1/100. - * - * Revision 1.24 2000/02/09 18:02:28 bjornw - * * Clear serial errors (overrun, framing, parity) correctly. Before, the - * receiver would get stuck if an error occurred and we did not restart - * the input DMA. - * * Cosmetics (indentation, some code made into inlines) - * * Some more debug options - * * Actually shut down the serial port (DMA irq, DMA reset, receiver stop) - * when the last open is closed. Corresponding fixes in startup(). - * * rs_close() "tx FIFO wait" code moved into right place, bug & -> && fixed - * and make a special case out of port 1 (R_DMA_CHx_STATUS is broken for that) - * * e100_disable_rx/enable_rx just disables/enables the receiver, not RTS - * - * Revision 1.23 2000/01/24 17:46:19 johana - * Wait for flush of DMA/FIFO when closing port. - * - * Revision 1.22 2000/01/20 18:10:23 johana - * Added TIOCMGET ioctl to return modem status. - * Implemented modem status/control that works with the extra signals - * (DTR, DSR, RI,CD) as well. - * 3 different modes supported: - * ser0 on PB (Bundy), ser1 on PB (Lisa) and ser2 on PA (Bundy) - * Fixed DEF_TX value that caused the serial transmitter pin (txd) to go to 0 when - * closing the last filehandle, NASTY!. - * Added break generation, not tested though! - * Use IRQF_SHARED when request_irq() for ser2 and ser3 (shared with) par0 and par1. - * You can't use them at the same time (yet..), but you can hopefully switch - * between ser2/par0, ser3/par1 with the same kernel config. - * Replaced some magic constants with defines - * - * */ static char *serial_version = "$Revision: 1.25 $"; @@ -446,6 +30,7 @@ static char *serial_version = "$Revision: 1.25 $"; #include #include +#include #include #include @@ -454,8 +39,9 @@ static char *serial_version = "$Revision: 1.25 $"; /* non-arch dependent serial structures are in linux/serial.h */ #include /* while we keep our own stuff (struct e100_serial) in a local .h file */ -#include "serial.h" +#include "crisv10.h" #include +#include #ifdef CONFIG_ETRAX_SERIAL_FAST_TIMER #ifndef CONFIG_ETRAX_FAST_TIMER @@ -504,18 +90,6 @@ struct tty_driver *serial_driver; from eLinux */ #define SERIAL_HANDLE_EARLY_ERRORS -/* Defined and used in n_tty.c, but we need it here as well */ -#define TTY_THRESHOLD_THROTTLE 128 - -/* Due to buffersizes and threshold values, our SERIAL_DESCR_BUF_SIZE - * must not be to high or flow control won't work if we leave it to the tty - * layer so we have our own throttling in flush_to_flip - * TTY_FLIPBUF_SIZE=512, - * TTY_THRESHOLD_THROTTLE/UNTHROTTLE=128 - * BUF_SIZE can't be > 128 - */ -#define CRIS_BUF_SIZE 512 - /* Currently 16 descriptors x 128 bytes = 2048 bytes */ #define SERIAL_DESCR_BUF_SIZE 256 @@ -588,13 +162,13 @@ unsigned long timer_data_to_ns(unsigned long timer_data); static void change_speed(struct e100_serial *info); static void rs_throttle(struct tty_struct * tty); static void rs_wait_until_sent(struct tty_struct *tty, int timeout); -static int rs_write(struct tty_struct * tty, int from_user, - const unsigned char *buf, int count); +static int rs_write(struct tty_struct *tty, + const unsigned char *buf, int count); #ifdef CONFIG_ETRAX_RS485 -static int e100_write_rs485(struct tty_struct * tty, int from_user, - const unsigned char *buf, int count); +static int e100_write_rs485(struct tty_struct *tty, + const unsigned char *buf, int count); #endif -static int get_lsr_info(struct e100_serial * info, unsigned int *value); +static int get_lsr_info(struct e100_serial *info, unsigned int *value); #define DEF_BAUD 115200 /* 115.2 kbit/s */ @@ -679,20 +253,39 @@ static struct e100_serial rs_table[] = { .rx_ctrl = DEF_RX, .tx_ctrl = DEF_TX, .iseteop = 2, + .dma_owner = dma_ser0, + .io_if = if_serial_0, #ifdef CONFIG_ETRAX_SERIAL_PORT0 .enabled = 1, #ifdef CONFIG_ETRAX_SERIAL_PORT0_DMA6_OUT .dma_out_enabled = 1, + .dma_out_nbr = SER0_TX_DMA_NBR, + .dma_out_irq_nbr = SER0_DMA_TX_IRQ_NBR, + .dma_out_irq_flags = IRQF_DISABLED, + .dma_out_irq_description = "serial 0 dma tr", #else .dma_out_enabled = 0, + .dma_out_nbr = UINT_MAX, + .dma_out_irq_nbr = 0, + .dma_out_irq_flags = 0, + .dma_out_irq_description = NULL, #endif #ifdef CONFIG_ETRAX_SERIAL_PORT0_DMA7_IN .dma_in_enabled = 1, + .dma_in_nbr = SER0_RX_DMA_NBR, + .dma_in_irq_nbr = SER0_DMA_RX_IRQ_NBR, + .dma_in_irq_flags = IRQF_DISABLED, + .dma_in_irq_description = "serial 0 dma rec", #else - .dma_in_enabled = 0 + .dma_in_enabled = 0, + .dma_in_nbr = UINT_MAX, + .dma_in_irq_nbr = 0, + .dma_in_irq_flags = 0, + .dma_in_irq_description = NULL, #endif #else .enabled = 0, + .io_if_description = NULL, .dma_out_enabled = 0, .dma_in_enabled = 0 #endif @@ -714,20 +307,42 @@ static struct e100_serial rs_table[] = { .rx_ctrl = DEF_RX, .tx_ctrl = DEF_TX, .iseteop = 3, + .dma_owner = dma_ser1, + .io_if = if_serial_1, #ifdef CONFIG_ETRAX_SERIAL_PORT1 .enabled = 1, + .io_if_description = "ser1", #ifdef CONFIG_ETRAX_SERIAL_PORT1_DMA8_OUT .dma_out_enabled = 1, + .dma_out_nbr = SER1_TX_DMA_NBR, + .dma_out_irq_nbr = SER1_DMA_TX_IRQ_NBR, + .dma_out_irq_flags = IRQF_DISABLED, + .dma_out_irq_description = "serial 1 dma tr", #else .dma_out_enabled = 0, + .dma_out_nbr = UINT_MAX, + .dma_out_irq_nbr = 0, + .dma_out_irq_flags = 0, + .dma_out_irq_description = NULL, #endif #ifdef CONFIG_ETRAX_SERIAL_PORT1_DMA9_IN .dma_in_enabled = 1, + .dma_in_nbr = SER1_RX_DMA_NBR, + .dma_in_irq_nbr = SER1_DMA_RX_IRQ_NBR, + .dma_in_irq_flags = IRQF_DISABLED, + .dma_in_irq_description = "serial 1 dma rec", #else - .dma_in_enabled = 0 + .dma_in_enabled = 0, + .dma_in_enabled = 0, + .dma_in_nbr = UINT_MAX, + .dma_in_irq_nbr = 0, + .dma_in_irq_flags = 0, + .dma_in_irq_description = NULL, #endif #else .enabled = 0, + .io_if_description = NULL, + .dma_in_irq_nbr = 0, .dma_out_enabled = 0, .dma_in_enabled = 0 #endif @@ -748,20 +363,40 @@ static struct e100_serial rs_table[] = { .rx_ctrl = DEF_RX, .tx_ctrl = DEF_TX, .iseteop = 0, + .dma_owner = dma_ser2, + .io_if = if_serial_2, #ifdef CONFIG_ETRAX_SERIAL_PORT2 .enabled = 1, + .io_if_description = "ser2", #ifdef CONFIG_ETRAX_SERIAL_PORT2_DMA2_OUT .dma_out_enabled = 1, + .dma_out_nbr = SER2_TX_DMA_NBR, + .dma_out_irq_nbr = SER2_DMA_TX_IRQ_NBR, + .dma_out_irq_flags = IRQF_DISABLED, + .dma_out_irq_description = "serial 2 dma tr", #else .dma_out_enabled = 0, + .dma_out_nbr = UINT_MAX, + .dma_out_irq_nbr = 0, + .dma_out_irq_flags = 0, + .dma_out_irq_description = NULL, #endif #ifdef CONFIG_ETRAX_SERIAL_PORT2_DMA3_IN .dma_in_enabled = 1, + .dma_in_nbr = SER2_RX_DMA_NBR, + .dma_in_irq_nbr = SER2_DMA_RX_IRQ_NBR, + .dma_in_irq_flags = IRQF_DISABLED, + .dma_in_irq_description = "serial 2 dma rec", #else - .dma_in_enabled = 0 + .dma_in_enabled = 0, + .dma_in_nbr = UINT_MAX, + .dma_in_irq_nbr = 0, + .dma_in_irq_flags = 0, + .dma_in_irq_description = NULL, #endif #else .enabled = 0, + .io_if_description = NULL, .dma_out_enabled = 0, .dma_in_enabled = 0 #endif @@ -782,20 +417,40 @@ static struct e100_serial rs_table[] = { .rx_ctrl = DEF_RX, .tx_ctrl = DEF_TX, .iseteop = 1, + .dma_owner = dma_ser3, + .io_if = if_serial_3, #ifdef CONFIG_ETRAX_SERIAL_PORT3 .enabled = 1, + .io_if_description = "ser3", #ifdef CONFIG_ETRAX_SERIAL_PORT3_DMA4_OUT .dma_out_enabled = 1, + .dma_out_nbr = SER3_TX_DMA_NBR, + .dma_out_irq_nbr = SER3_DMA_TX_IRQ_NBR, + .dma_out_irq_flags = IRQF_DISABLED, + .dma_out_irq_description = "serial 3 dma tr", #else .dma_out_enabled = 0, + .dma_out_nbr = UINT_MAX, + .dma_out_irq_nbr = 0, + .dma_out_irq_flags = 0, + .dma_out_irq_description = NULL, #endif #ifdef CONFIG_ETRAX_SERIAL_PORT3_DMA5_IN .dma_in_enabled = 1, + .dma_in_nbr = SER3_RX_DMA_NBR, + .dma_in_irq_nbr = SER3_DMA_RX_IRQ_NBR, + .dma_in_irq_flags = IRQF_DISABLED, + .dma_in_irq_description = "serial 3 dma rec", #else - .dma_in_enabled = 0 + .dma_in_enabled = 0, + .dma_in_nbr = UINT_MAX, + .dma_in_irq_nbr = 0, + .dma_in_irq_flags = 0, + .dma_in_irq_description = NULL #endif #else .enabled = 0, + .io_if_description = NULL, .dma_out_enabled = 0, .dma_in_enabled = 0 #endif @@ -1416,12 +1071,11 @@ e100_dtr(struct e100_serial *info, int set) { unsigned long flags; - save_flags(flags); - cli(); + local_irq_save(flags); *e100_modem_pins[info->line].dtr_shadow &= ~mask; *e100_modem_pins[info->line].dtr_shadow |= (set ? 0 : mask); *e100_modem_pins[info->line].dtr_port = *e100_modem_pins[info->line].dtr_shadow; - restore_flags(flags); + local_irq_restore(flags); } #ifdef SERIAL_DEBUG_IO @@ -1440,12 +1094,11 @@ e100_rts(struct e100_serial *info, int set) { #ifndef CONFIG_SVINTO_SIM unsigned long flags; - save_flags(flags); - cli(); + local_irq_save(flags); info->rx_ctrl &= ~E100_RTS_MASK; info->rx_ctrl |= (set ? 0 : E100_RTS_MASK); /* RTS is active low */ info->port[REG_REC_CTRL] = info->rx_ctrl; - restore_flags(flags); + local_irq_restore(flags); #ifdef SERIAL_DEBUG_IO printk("ser%i rts %i\n", info->line, set); #endif @@ -1463,12 +1116,11 @@ e100_ri_out(struct e100_serial *info, int set) unsigned char mask = e100_modem_pins[info->line].ri_mask; unsigned long flags; - save_flags(flags); - cli(); + local_irq_save(flags); *e100_modem_pins[info->line].ri_shadow &= ~mask; *e100_modem_pins[info->line].ri_shadow |= (set ? 0 : mask); *e100_modem_pins[info->line].ri_port = *e100_modem_pins[info->line].ri_shadow; - restore_flags(flags); + local_irq_restore(flags); } #endif } @@ -1481,12 +1133,11 @@ e100_cd_out(struct e100_serial *info, int set) unsigned char mask = e100_modem_pins[info->line].cd_mask; unsigned long flags; - save_flags(flags); - cli(); + local_irq_save(flags); *e100_modem_pins[info->line].cd_shadow &= ~mask; *e100_modem_pins[info->line].cd_shadow |= (set ? 0 : mask); *e100_modem_pins[info->line].cd_port = *e100_modem_pins[info->line].cd_shadow; - restore_flags(flags); + local_irq_restore(flags); } #endif } @@ -1560,8 +1211,7 @@ static void e100_disable_txdma_channel(struct e100_serial *info) /* Disable output DMA channel for the serial port in question * ( set to something other then serialX) */ - save_flags(flags); - cli(); + local_irq_save(flags); DFLOW(DEBUG_LOG(info->line, "disable_txdma_channel %i\n", info->line)); if (info->line == 0) { if ((genconfig_shadow & IO_MASK(R_GEN_CONFIG, dma6)) == @@ -1589,7 +1239,7 @@ static void e100_disable_txdma_channel(struct e100_serial *info) } } *R_GEN_CONFIG = genconfig_shadow; - restore_flags(flags); + local_irq_restore(flags); } @@ -1597,8 +1247,7 @@ static void e100_enable_txdma_channel(struct e100_serial *info) { unsigned long flags; - save_flags(flags); - cli(); + local_irq_save(flags); DFLOW(DEBUG_LOG(info->line, "enable_txdma_channel %i\n", info->line)); /* Enable output DMA channel for the serial port in question */ if (info->line == 0) { @@ -1615,7 +1264,7 @@ static void e100_enable_txdma_channel(struct e100_serial *info) genconfig_shadow |= IO_STATE(R_GEN_CONFIG, dma4, serial3); } *R_GEN_CONFIG = genconfig_shadow; - restore_flags(flags); + local_irq_restore(flags); } static void e100_disable_rxdma_channel(struct e100_serial *info) @@ -1625,8 +1274,7 @@ static void e100_disable_rxdma_channel(struct e100_serial *info) /* Disable input DMA channel for the serial port in question * ( set to something other then serialX) */ - save_flags(flags); - cli(); + local_irq_save(flags); if (info->line == 0) { if ((genconfig_shadow & IO_MASK(R_GEN_CONFIG, dma7)) == IO_STATE(R_GEN_CONFIG, dma7, serial0)) { @@ -1653,7 +1301,7 @@ static void e100_disable_rxdma_channel(struct e100_serial *info) } } *R_GEN_CONFIG = genconfig_shadow; - restore_flags(flags); + local_irq_restore(flags); } @@ -1661,8 +1309,7 @@ static void e100_enable_rxdma_channel(struct e100_serial *info) { unsigned long flags; - save_flags(flags); - cli(); + local_irq_save(flags); /* Enable input DMA channel for the serial port in question */ if (info->line == 0) { genconfig_shadow &= ~IO_MASK(R_GEN_CONFIG, dma7); @@ -1678,7 +1325,7 @@ static void e100_enable_rxdma_channel(struct e100_serial *info) genconfig_shadow |= IO_STATE(R_GEN_CONFIG, dma5, serial3); } *R_GEN_CONFIG = genconfig_shadow; - restore_flags(flags); + local_irq_restore(flags); } #ifdef SERIAL_HANDLE_EARLY_ERRORS @@ -1785,7 +1432,7 @@ e100_enable_rs485(struct tty_struct *tty,struct rs485_control *r) } static int -e100_write_rs485(struct tty_struct *tty, int from_user, +e100_write_rs485(struct tty_struct *tty, const unsigned char *buf, int count) { struct e100_serial * info = (struct e100_serial *)tty->driver_data; @@ -1798,7 +1445,7 @@ e100_write_rs485(struct tty_struct *tty, int from_user, */ info->rs485.enabled = 1; /* rs_write now deals with RS485 if enabled */ - count = rs_write(tty, from_user, buf, count); + count = rs_write(tty, buf, count); info->rs485.enabled = old_enabled; return count; } @@ -1836,7 +1483,7 @@ rs_stop(struct tty_struct *tty) unsigned long flags; unsigned long xoff; - save_flags(flags); cli(); + local_irq_save(flags); DFLOW(DEBUG_LOG(info->line, "XOFF rs_stop xmit %i\n", CIRC_CNT(info->xmit.head, info->xmit.tail,SERIAL_XMIT_SIZE))); @@ -1848,7 +1495,7 @@ rs_stop(struct tty_struct *tty) } *((unsigned long *)&info->port[REG_XOFF]) = xoff; - restore_flags(flags); + local_irq_restore(flags); } } @@ -1860,7 +1507,7 @@ rs_start(struct tty_struct *tty) unsigned long flags; unsigned long xoff; - save_flags(flags); cli(); + local_irq_save(flags); DFLOW(DEBUG_LOG(info->line, "XOFF rs_start xmit %i\n", CIRC_CNT(info->xmit.head, info->xmit.tail,SERIAL_XMIT_SIZE))); @@ -1875,7 +1522,7 @@ rs_start(struct tty_struct *tty) info->xmit.head != info->xmit.tail && info->xmit.buf) e100_enable_serial_tx_ready_irq(info); - restore_flags(flags); + local_irq_restore(flags); } } @@ -2055,8 +1702,7 @@ static int serial_fast_timer_expired = 0; static void flush_timeout_function(unsigned long data); #define START_FLUSH_FAST_TIMER_TIME(info, string, usec) {\ unsigned long timer_flags; \ - save_flags(timer_flags); \ - cli(); \ + local_irq_save(timer_flags); \ if (fast_timers[info->line].function == NULL) { \ serial_fast_timer_started++; \ TIMERD(DEBUG_LOG(info->line, "start_timer %i ", info->line)); \ @@ -2070,7 +1716,7 @@ static void flush_timeout_function(unsigned long data); else { \ TIMERD(DEBUG_LOG(info->line, "timer %i already running\n", info->line)); \ } \ - restore_flags(timer_flags); \ + local_irq_restore(timer_flags); \ } #define START_FLUSH_FAST_TIMER(info, string) START_FLUSH_FAST_TIMER_TIME(info, string, info->flush_time_usec) @@ -2099,8 +1745,7 @@ append_recv_buffer(struct e100_serial *info, struct etrax_recv_buffer *buffer) { unsigned long flags; - save_flags(flags); - cli(); + local_irq_save(flags); if (!info->first_recv_buffer) info->first_recv_buffer = buffer; @@ -2113,7 +1758,7 @@ append_recv_buffer(struct e100_serial *info, struct etrax_recv_buffer *buffer) if (info->recv_cnt > info->max_recv_cnt) info->max_recv_cnt = info->recv_cnt; - restore_flags(flags); + local_irq_restore(flags); } static int @@ -2133,11 +1778,7 @@ add_char_and_flag(struct e100_serial *info, unsigned char data, unsigned char fl info->icount.rx++; } else { struct tty_struct *tty = info->tty; - *tty->flip.char_buf_ptr = data; - *tty->flip.flag_buf_ptr = flag; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, data, flag); info->icount.rx++; } @@ -2322,7 +1963,6 @@ start_receive(struct e100_serial *info) */ return; #endif - info->tty->flip.count = 0; if (info->uses_dma_in) { /* reset the input dma channel to be sure it works */ @@ -2484,32 +2124,20 @@ static void flush_to_flip_buffer(struct e100_serial *info) { struct tty_struct *tty; struct etrax_recv_buffer *buffer; - unsigned int length; unsigned long flags; - int max_flip_size; - - if (!info->first_recv_buffer) - return; - save_flags(flags); - cli(); + local_irq_save(flags); + tty = info->tty; - if (!(tty = info->tty)) { - restore_flags(flags); + if (!tty) { + local_irq_restore(flags); return; } while ((buffer = info->first_recv_buffer) != NULL) { unsigned int count = buffer->length; - count = tty_buffer_request_room(tty, count); - if (count == 0) /* Throttle ?? */ - break; - - if (count > 1) - tty_insert_flip_strings(tty, buffer->buffer, count - 1); - tty_insert_flip_char(tty, buffer->buffer[count-1], buffer->error); - + tty_insert_flip_string(tty, buffer->buffer, count); info->recv_cnt -= count; if (count == buffer->length) { @@ -2525,18 +2153,9 @@ static void flush_to_flip_buffer(struct e100_serial *info) if (!info->first_recv_buffer) info->last_recv_buffer = NULL; - restore_flags(flags); - - DFLIP( - if (1) { - DEBUG_LOG(info->line, "*** rxtot %i\n", info->icount.rx); - DEBUG_LOG(info->line, "ldisc %lu\n", tty->ldisc.chars_in_buffer(tty)); - DEBUG_LOG(info->line, "room %lu\n", tty->ldisc.receive_room(tty)); - } + local_irq_restore(flags); - ); - - /* this includes a check for low-latency */ + /* This includes a check for low-latency */ tty_flip_buffer_push(tty); } @@ -2679,21 +2298,7 @@ struct e100_serial * handle_ser_rx_interrupt_no_dma(struct e100_serial *info) printk("!NO TTY!\n"); return info; } - if (tty->flip.count >= CRIS_BUF_SIZE - TTY_THRESHOLD_THROTTLE) { - /* check TTY_THROTTLED first so it indicates our state */ - if (!test_and_set_bit(TTY_THROTTLED, &tty->flags)) { - DFLOW(DEBUG_LOG(info->line, "rs_throttle flip.count: %i\n", tty->flip.count)); - rs_throttle(tty); - } - } - if (tty->flip.count >= CRIS_BUF_SIZE) { - DEBUG_LOG(info->line, "force FLIP! %i\n", tty->flip.count); - tty->flip.work.func((void *) tty); - if (tty->flip.count >= CRIS_BUF_SIZE) { - DEBUG_LOG(info->line, "FLIP FULL! %i\n", tty->flip.count); - return info; /* if TTY_DONT_FLIP is set */ - } - } + /* Read data and status at the same time */ data_read = *((unsigned long *)&info->port[REG_DATA_STATUS32]); more_data: @@ -2746,27 +2351,26 @@ more_data: DEBUG_LOG(info->line, "EBRK %i\n", info->break_detected_cnt); info->errorcode = ERRCODE_INSERT_BREAK; } else { + unsigned char data = IO_EXTRACT(R_SERIAL0_READ, + data_in, data_read); + char flag = TTY_NORMAL; if (info->errorcode == ERRCODE_INSERT_BREAK) { - info->icount.brk++; - *tty->flip.char_buf_ptr = 0; - *tty->flip.flag_buf_ptr = TTY_BREAK; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + struct tty_struct *tty = info->tty; + tty_insert_flip_char(tty, 0, flag); info->icount.rx++; } - *tty->flip.char_buf_ptr = IO_EXTRACT(R_SERIAL0_READ, data_in, data_read); if (data_read & IO_MASK(R_SERIAL0_READ, par_err)) { info->icount.parity++; - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; } else if (data_read & IO_MASK(R_SERIAL0_READ, overrun)) { info->icount.overrun++; - *tty->flip.flag_buf_ptr = TTY_OVERRUN; + flag = TTY_OVERRUN; } else if (data_read & IO_MASK(R_SERIAL0_READ, framing_err)) { info->icount.frame++; - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; } + tty_insert_flip_char(tty, data, flag); info->errorcode = 0; } info->break_detected_cnt = 0; @@ -2782,16 +2386,14 @@ more_data: log_int(rdpc(), 0, 0); } ); - *tty->flip.char_buf_ptr = IO_EXTRACT(R_SERIAL0_READ, data_in, data_read); - *tty->flip.flag_buf_ptr = 0; + tty_insert_flip_char(tty, + IO_EXTRACT(R_SERIAL0_READ, data_in, data_read), + TTY_NORMAL); } else { DEBUG_LOG(info->line, "ser_rx int but no data_avail %08lX\n", data_read); } - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; info->icount.rx++; data_read = *((unsigned long *)&info->port[REG_DATA_STATUS32]); if (data_read & IO_MASK(R_SERIAL0_READ, data_avail)) { @@ -2929,7 +2531,7 @@ static void handle_ser_tx_interrupt(struct e100_serial *info) if (info->x_char) { unsigned char rstat; DFLOW(DEBUG_LOG(info->line, "tx_int: xchar 0x%02X\n", info->x_char)); - save_flags(flags); cli(); + local_irq_save(flags); rstat = info->port[REG_STATUS]; DFLOW(DEBUG_LOG(info->line, "stat %x\n", rstat)); @@ -2938,7 +2540,7 @@ static void handle_ser_tx_interrupt(struct e100_serial *info) info->x_char = 0; /* We must enable since it is disabled in ser_interrupt */ e100_enable_serial_tx_ready_irq(info); - restore_flags(flags); + local_irq_restore(flags); return; } if (info->uses_dma_out) { @@ -2946,7 +2548,7 @@ static void handle_ser_tx_interrupt(struct e100_serial *info) int i; /* We only use normal tx interrupt when sending x_char */ DFLOW(DEBUG_LOG(info->line, "tx_int: xchar sent\n", 0)); - save_flags(flags); cli(); + local_irq_save(flags); rstat = info->port[REG_STATUS]; DFLOW(DEBUG_LOG(info->line, "stat %x\n", rstat)); e100_disable_serial_tx_ready_irq(info); @@ -2959,7 +2561,7 @@ static void handle_ser_tx_interrupt(struct e100_serial *info) nop(); *info->ocmdadr = IO_STATE(R_DMA_CH6_CMD, cmd, continue); - restore_flags(flags); + local_irq_restore(flags); return; } /* Normal char-by-char interrupt */ @@ -2973,7 +2575,7 @@ static void handle_ser_tx_interrupt(struct e100_serial *info) } DINTR2(DEBUG_LOG(info->line, "tx_int %c\n", info->xmit.buf[info->xmit.tail])); /* Send a byte, rs485 timing is critical so turn of ints */ - save_flags(flags); cli(); + local_irq_save(flags); info->port[REG_TR_DATA] = info->xmit.buf[info->xmit.tail]; info->xmit.tail = (info->xmit.tail + 1) & (SERIAL_XMIT_SIZE-1); info->icount.tx++; @@ -2997,7 +2599,7 @@ static void handle_ser_tx_interrupt(struct e100_serial *info) /* We must enable since it is disabled in ser_interrupt */ e100_enable_serial_tx_ready_irq(info); } - restore_flags(flags); + local_irq_restore(flags); if (CIRC_CNT(info->xmit.head, info->xmit.tail, @@ -3022,7 +2624,7 @@ ser_interrupt(int irq, void *dev_id) int handled = 0; static volatile unsigned long reentered_ready_mask = 0; - save_flags(flags); cli(); + local_irq_save(flags); irq_mask1_rd = *R_IRQ_MASK1_RD; /* First handle all rx interrupts with ints disabled */ info = rs_table; @@ -3067,7 +2669,7 @@ ser_interrupt(int irq, void *dev_id) /* Unblock the serial interrupt */ *R_VECT_MASK_SET = IO_STATE(R_VECT_MASK_SET, serial, set); - sti(); + local_irq_enable(); ready_mask = (1 << (8+1+2*0)); /* ser0 tr_ready */ info = rs_table; for (i = 0; i < NR_PORTS; i++) { @@ -3080,11 +2682,11 @@ ser_interrupt(int irq, void *dev_id) ready_mask <<= 2; } /* handle_ser_tx_interrupt enables tr_ready interrupts */ - cli(); + local_irq_disable(); /* Handle reentered TX interrupt */ irq_mask1_rd = reentered_ready_mask; } - cli(); + local_irq_disable(); tx_started = 0; } else { unsigned long ready_mask; @@ -3100,7 +2702,7 @@ ser_interrupt(int irq, void *dev_id) } } - restore_flags(flags); + local_irq_restore(flags); return IRQ_RETVAL(handled); } /* ser_interrupt */ #endif @@ -3121,11 +2723,13 @@ ser_interrupt(int irq, void *dev_id) * them using rs_sched_event(), and they get done here. */ static void -do_softint(void *private_) +do_softint(struct work_struct *work) { - struct e100_serial *info = (struct e100_serial *) private_; + struct e100_serial *info; struct tty_struct *tty; + info = container_of(work, struct e100_serial, work); + tty = info->tty; if (!tty) return; @@ -3145,13 +2749,12 @@ startup(struct e100_serial * info) if (!xmit_page) return -ENOMEM; - save_flags(flags); - cli(); + local_irq_save(flags); /* if it was already initialized, skip this */ if (info->flags & ASYNC_INITIALIZED) { - restore_flags(flags); + local_irq_restore(flags); free_page(xmit_page); return 0; } @@ -3277,7 +2880,7 @@ startup(struct e100_serial * info) info->flags |= ASYNC_INITIALIZED; - restore_flags(flags); + local_irq_restore(flags); return 0; } @@ -3328,8 +2931,7 @@ shutdown(struct e100_serial * info) info->irq); #endif - save_flags(flags); - cli(); /* Disable interrupts */ + local_irq_save(flags); if (info->xmit.buf) { free_page((unsigned long)info->xmit.buf); @@ -3353,7 +2955,7 @@ shutdown(struct e100_serial * info) set_bit(TTY_IO_ERROR, &info->tty->flags); info->flags &= ~ASYNC_INITIALIZED; - restore_flags(flags); + local_irq_restore(flags); } @@ -3411,7 +3013,6 @@ change_speed(struct e100_serial *info) DBAUD(printk("using external baudrate: %lu\n", CONFIG_ETRAX_EXTERN_PB6CLK_FREQ/8)); info->baud = CONFIG_ETRAX_EXTERN_PB6CLK_FREQ/8; } - } #endif else { @@ -3445,8 +3046,7 @@ change_speed(struct e100_serial *info) #ifndef CONFIG_SVINTO_SIM /* start with default settings and then fill in changes */ - save_flags(flags); - cli(); + local_irq_save(flags); /* 8 bit, no/even parity */ info->rx_ctrl &= ~(IO_MASK(R_SERIAL0_REC_CTRL, rec_bitnr) | IO_MASK(R_SERIAL0_REC_CTRL, rec_par_en) | @@ -3510,7 +3110,7 @@ change_speed(struct e100_serial *info) } *((unsigned long *)&info->port[REG_XOFF]) = xoff; - restore_flags(flags); + local_irq_restore(flags); #endif /* !CONFIG_SVINTO_SIM */ update_char_time(info); @@ -3538,13 +3138,12 @@ rs_flush_chars(struct tty_struct *tty) /* this protection might not exactly be necessary here */ - save_flags(flags); - cli(); + local_irq_save(flags); start_transmit(info); - restore_flags(flags); + local_irq_restore(flags); } -static int rs_raw_write(struct tty_struct * tty, int from_user, +static int rs_raw_write(struct tty_struct *tty, const unsigned char *buf, int count) { int c, ret = 0; @@ -3567,53 +3166,19 @@ static int rs_raw_write(struct tty_struct * tty, int from_user, SIMCOUT(buf, count); return count; #endif - save_flags(flags); + local_save_flags(flags); DFLOW(DEBUG_LOG(info->line, "write count %i ", count)); DFLOW(DEBUG_LOG(info->line, "ldisc %i\n", tty->ldisc.chars_in_buffer(tty))); - /* the cli/restore_flags pairs below are needed because the - * DMA interrupt handler moves the info->xmit values. the memcpy - * needs to be in the critical region unfortunately, because we - * need to read xmit values, memcpy, write xmit values in one - * atomic operation... this could perhaps be avoided by more clever - * design. + /* The local_irq_disable/restore_flags pairs below are needed + * because the DMA interrupt handler moves the info->xmit values. + * the memcpy needs to be in the critical region unfortunately, + * because we need to read xmit values, memcpy, write xmit values + * in one atomic operation... this could perhaps be avoided by + * more clever design. */ - if (from_user) { - mutex_lock(&tmp_buf_mutex); - while (1) { - int c1; - c = CIRC_SPACE_TO_END(info->xmit.head, - info->xmit.tail, - SERIAL_XMIT_SIZE); - if (count < c) - c = count; - if (c <= 0) - break; - - c -= copy_from_user(tmp_buf, buf, c); - if (!c) { - if (!ret) - ret = -EFAULT; - break; - } - cli(); - c1 = CIRC_SPACE_TO_END(info->xmit.head, - info->xmit.tail, - SERIAL_XMIT_SIZE); - if (c1 < c) - c = c1; - memcpy(info->xmit.buf + info->xmit.head, tmp_buf, c); - info->xmit.head = ((info->xmit.head + c) & - (SERIAL_XMIT_SIZE-1)); - restore_flags(flags); - buf += c; - count -= c; - ret += c; - } - mutex_unlock(&tmp_buf_mutex); - } else { - cli(); + local_irq_disable(); while (count) { c = CIRC_SPACE_TO_END(info->xmit.head, info->xmit.tail, @@ -3631,8 +3196,7 @@ static int rs_raw_write(struct tty_struct * tty, int from_user, count -= c; ret += c; } - restore_flags(flags); - } + local_irq_restore(flags); /* enable transmitter if not running, unless the tty is stopped * this does not need IRQ protection since if tr_running == 0 @@ -3651,7 +3215,7 @@ static int rs_raw_write(struct tty_struct * tty, int from_user, } /* raw_raw_write() */ static int -rs_write(struct tty_struct * tty, int from_user, +rs_write(struct tty_struct *tty, const unsigned char *buf, int count) { #if defined(CONFIG_ETRAX_RS485) @@ -3678,7 +3242,7 @@ rs_write(struct tty_struct * tty, int from_user, } #endif /* CONFIG_ETRAX_RS485 */ - count = rs_raw_write(tty, from_user, buf, count); + count = rs_raw_write(tty, buf, count); #if defined(CONFIG_ETRAX_RS485) if (info->rs485.enabled) @@ -3746,10 +3310,9 @@ rs_flush_buffer(struct tty_struct *tty) struct e100_serial *info = (struct e100_serial *)tty->driver_data; unsigned long flags; - save_flags(flags); - cli(); + local_irq_save(flags); info->xmit.head = info->xmit.tail = 0; - restore_flags(flags); + local_irq_restore(flags); tty_wakeup(tty); } @@ -3767,7 +3330,7 @@ static void rs_send_xchar(struct tty_struct *tty, char ch) { struct e100_serial *info = (struct e100_serial *)tty->driver_data; unsigned long flags; - save_flags(flags); cli(); + local_irq_save(flags); if (info->uses_dma_out) { /* Put the DMA on hold and disable the channel */ *info->ocmdadr = IO_STATE(R_DMA_CH6_CMD, cmd, hold); @@ -3784,7 +3347,7 @@ static void rs_send_xchar(struct tty_struct *tty, char ch) DFLOW(DEBUG_LOG(info->line, "rs_send_xchar 0x%02X\n", ch)); info->x_char = ch; e100_enable_serial_tx_ready_irq(info); - restore_flags(flags); + local_irq_restore(flags); } /* @@ -3996,21 +3559,61 @@ char *get_control_state_str(int MLines, char *s) } #endif +static void +rs_break(struct tty_struct *tty, int break_state) +{ + struct e100_serial *info = (struct e100_serial *)tty->driver_data; + unsigned long flags; + + if (!info->port) + return; + + local_irq_save(flags); + if (break_state == -1) { + /* Go to manual mode and set the txd pin to 0 */ + /* Clear bit 7 (txd) and 6 (tr_enable) */ + info->tx_ctrl &= 0x3F; + } else { + /* Set bit 7 (txd) and 6 (tr_enable) */ + info->tx_ctrl |= (0x80 | 0x40); + } + info->port[REG_TR_CTRL] = info->tx_ctrl; + local_irq_restore(flags); +} + static int -get_modem_info(struct e100_serial * info, unsigned int *value) +rs_tiocmset(struct tty_struct *tty, struct file *file, + unsigned int set, unsigned int clear) { - unsigned int result; - /* Polarity isn't verified */ -#if 0 /*def SERIAL_DEBUG_IO */ + struct e100_serial *info = (struct e100_serial *)tty->driver_data; - printk("get_modem_info: RTS: %i DTR: %i CD: %i RI: %i DSR: %i CTS: %i\n", - E100_RTS_GET(info), - E100_DTR_GET(info), - E100_CD_GET(info), - E100_RI_GET(info), - E100_DSR_GET(info), - E100_CTS_GET(info)); -#endif + if (clear & TIOCM_RTS) + e100_rts(info, 0); + if (clear & TIOCM_DTR) + e100_dtr(info, 0); + /* Handle FEMALE behaviour */ + if (clear & TIOCM_RI) + e100_ri_out(info, 0); + if (clear & TIOCM_CD) + e100_cd_out(info, 0); + + if (set & TIOCM_RTS) + e100_rts(info, 1); + if (set & TIOCM_DTR) + e100_dtr(info, 1); + /* Handle FEMALE behaviour */ + if (set & TIOCM_RI) + e100_ri_out(info, 1); + if (set & TIOCM_CD) + e100_cd_out(info, 1); + return 0; +} + +static int +rs_tiocmget(struct tty_struct *tty, struct file *file) +{ + struct e100_serial *info = (struct e100_serial *)tty->driver_data; + unsigned int result; result = (!E100_RTS_GET(info) ? TIOCM_RTS : 0) @@ -4021,95 +3624,20 @@ get_modem_info(struct e100_serial * info, unsigned int *value) | (!E100_CTS_GET(info) ? TIOCM_CTS : 0); #ifdef SERIAL_DEBUG_IO - printk("e100ser: modem state: %i 0x%08X\n", result, result); + printk(KERN_DEBUG "ser%i: modem state: %i 0x%08X\n", + info->line, result, result); { char s[100]; get_control_state_str(result, s); - printk("state: %s\n", s); + printk(KERN_DEBUG "state: %s\n", s); } #endif - if (copy_to_user(value, &result, sizeof(int))) - return -EFAULT; - return 0; -} + return result; - -static int -set_modem_info(struct e100_serial * info, unsigned int cmd, - unsigned int *value) -{ - unsigned int arg; - - if (copy_from_user(&arg, value, sizeof(int))) - return -EFAULT; - - switch (cmd) { - case TIOCMBIS: - if (arg & TIOCM_RTS) { - e100_rts(info, 1); - } - if (arg & TIOCM_DTR) { - e100_dtr(info, 1); - } - /* Handle FEMALE behaviour */ - if (arg & TIOCM_RI) { - e100_ri_out(info, 1); - } - if (arg & TIOCM_CD) { - e100_cd_out(info, 1); - } - break; - case TIOCMBIC: - if (arg & TIOCM_RTS) { - e100_rts(info, 0); - } - if (arg & TIOCM_DTR) { - e100_dtr(info, 0); - } - /* Handle FEMALE behaviour */ - if (arg & TIOCM_RI) { - e100_ri_out(info, 0); - } - if (arg & TIOCM_CD) { - e100_cd_out(info, 0); - } - break; - case TIOCMSET: - e100_rts(info, arg & TIOCM_RTS); - e100_dtr(info, arg & TIOCM_DTR); - /* Handle FEMALE behaviour */ - e100_ri_out(info, arg & TIOCM_RI); - e100_cd_out(info, arg & TIOCM_CD); - break; - default: - return -EINVAL; - } - return 0; } -static void -rs_break(struct tty_struct *tty, int break_state) -{ - struct e100_serial * info = (struct e100_serial *)tty->driver_data; - unsigned long flags; - - if (!info->port) - return; - - save_flags(flags); - cli(); - if (break_state == -1) { - /* Go to manual mode and set the txd pin to 0 */ - info->tx_ctrl &= 0x3F; /* Clear bit 7 (txd) and 6 (tr_enable) */ - } else { - info->tx_ctrl |= (0x80 | 0x40); /* Set bit 7 (txd) and 6 (tr_enable) */ - } - info->port[REG_TR_CTRL] = info->tx_ctrl; - restore_flags(flags); -} - static int rs_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg) @@ -4124,49 +3652,45 @@ rs_ioctl(struct tty_struct *tty, struct file * file, } switch (cmd) { - case TIOCMGET: - return get_modem_info(info, (unsigned int *) arg); - case TIOCMBIS: - case TIOCMBIC: - case TIOCMSET: - return set_modem_info(info, cmd, (unsigned int *) arg); - case TIOCGSERIAL: - return get_serial_info(info, - (struct serial_struct *) arg); - case TIOCSSERIAL: - return set_serial_info(info, - (struct serial_struct *) arg); - case TIOCSERGETLSR: /* Get line status register */ - return get_lsr_info(info, (unsigned int *) arg); - - case TIOCSERGSTRUCT: - if (copy_to_user((struct e100_serial *) arg, - info, sizeof(struct e100_serial))) - return -EFAULT; - return 0; + case TIOCGSERIAL: + return get_serial_info(info, + (struct serial_struct *) arg); + case TIOCSSERIAL: + return set_serial_info(info, + (struct serial_struct *) arg); + case TIOCSERGETLSR: /* Get line status register */ + return get_lsr_info(info, (unsigned int *) arg); + + case TIOCSERGSTRUCT: + if (copy_to_user((struct e100_serial *) arg, + info, sizeof(struct e100_serial))) + return -EFAULT; + return 0; #if defined(CONFIG_ETRAX_RS485) - case TIOCSERSETRS485: - { - struct rs485_control rs485ctrl; - if (copy_from_user(&rs485ctrl, (struct rs485_control*)arg, sizeof(rs485ctrl))) - return -EFAULT; + case TIOCSERSETRS485: + { + struct rs485_control rs485ctrl; + if (copy_from_user(&rs485ctrl, (struct rs485_control *)arg, + sizeof(rs485ctrl))) + return -EFAULT; - return e100_enable_rs485(tty, &rs485ctrl); - } + return e100_enable_rs485(tty, &rs485ctrl); + } - case TIOCSERWRRS485: - { - struct rs485_write rs485wr; - if (copy_from_user(&rs485wr, (struct rs485_write*)arg, sizeof(rs485wr))) - return -EFAULT; + case TIOCSERWRRS485: + { + struct rs485_write rs485wr; + if (copy_from_user(&rs485wr, (struct rs485_write *)arg, + sizeof(rs485wr))) + return -EFAULT; - return e100_write_rs485(tty, 1, rs485wr.outc, rs485wr.outc_size); - } + return e100_write_rs485(tty, rs485wr.outc, rs485wr.outc_size); + } #endif - default: - return -ENOIOCTLCMD; + default: + return -ENOIOCTLCMD; } return 0; } @@ -4191,46 +3715,6 @@ rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) } -/* In debugport.c - register a console write function that uses the normal - * serial driver - */ -typedef int (*debugport_write_function)(int i, const char *buf, unsigned int len); - -extern debugport_write_function debug_write_function; - -static int rs_debug_write_function(int i, const char *buf, unsigned int len) -{ - int cnt; - int written = 0; - struct tty_struct *tty; - static int recurse_cnt = 0; - - tty = rs_table[i].tty; - if (tty) { - unsigned long flags; - if (recurse_cnt > 5) /* We skip this debug output */ - return 1; - - local_irq_save(flags); - recurse_cnt++; - local_irq_restore(flags); - do { - cnt = rs_write(tty, 0, buf + written, len); - if (cnt >= 0) { - written += cnt; - buf += cnt; - len -= cnt; - } else - len = cnt; - } while(len > 0); - local_irq_save(flags); - recurse_cnt--; - local_irq_restore(flags); - return 1; - } - return 0; -} - /* * ------------------------------------------------------------ * rs_close() @@ -4252,11 +3736,10 @@ rs_close(struct tty_struct *tty, struct file * filp) /* interrupts are disabled for this entire function */ - save_flags(flags); - cli(); + local_irq_save(flags); if (tty_hung_up_p(filp)) { - restore_flags(flags); + local_irq_restore(flags); return; } @@ -4283,7 +3766,7 @@ rs_close(struct tty_struct *tty, struct file * filp) info->count = 0; } if (info->count) { - restore_flags(flags); + local_irq_restore(flags); return; } info->flags |= ASYNC_CLOSING; @@ -4337,7 +3820,7 @@ rs_close(struct tty_struct *tty, struct file * filp) } info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); wake_up_interruptible(&info->close_wait); - restore_flags(flags); + local_irq_restore(flags); /* port closed */ @@ -4359,6 +3842,28 @@ rs_close(struct tty_struct *tty, struct file * filp) #endif } #endif + + /* + * Release any allocated DMA irq's. + */ + if (info->dma_in_enabled) { + free_irq(info->dma_in_irq_nbr, info); + cris_free_dma(info->dma_in_nbr, info->dma_in_irq_description); + info->uses_dma_in = 0; +#ifdef SERIAL_DEBUG_OPEN + printk(KERN_DEBUG "DMA irq '%s' freed\n", + info->dma_in_irq_description); +#endif + } + if (info->dma_out_enabled) { + free_irq(info->dma_out_irq_nbr, info); + cris_free_dma(info->dma_out_nbr, info->dma_out_irq_description); + info->uses_dma_out = 0; +#ifdef SERIAL_DEBUG_OPEN + printk(KERN_DEBUG "DMA irq '%s' freed\n", + info->dma_out_irq_description); +#endif + } } /* @@ -4433,8 +3938,8 @@ block_til_ready(struct tty_struct *tty, struct file * filp, */ if (tty_hung_up_p(filp) || (info->flags & ASYNC_CLOSING)) { - if (info->flags & ASYNC_CLOSING) - interruptible_sleep_on(&info->close_wait); + wait_event_interruptible(info->close_wait, + !(info->flags & ASYNC_CLOSING)); #ifdef SERIAL_DO_RESTART if (info->flags & ASYNC_HUP_NOTIFY) return -EAGAIN; @@ -4472,21 +3977,19 @@ block_til_ready(struct tty_struct *tty, struct file * filp, printk("block_til_ready before block: ttyS%d, count = %d\n", info->line, info->count); #endif - save_flags(flags); - cli(); + local_irq_save(flags); if (!tty_hung_up_p(filp)) { extra_count++; info->count--; } - restore_flags(flags); + local_irq_restore(flags); info->blocked_open++; while (1) { - save_flags(flags); - cli(); + local_irq_save(flags); /* assert RTS and DTR */ e100_rts(info, 1); e100_dtr(info, 1); - restore_flags(flags); + local_irq_restore(flags); set_current_state(TASK_INTERRUPTIBLE); if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)) { @@ -4528,6 +4031,19 @@ block_til_ready(struct tty_struct *tty, struct file * filp, return 0; } +static void +deinit_port(struct e100_serial *info) +{ + if (info->dma_out_enabled) { + cris_free_dma(info->dma_out_nbr, info->dma_out_irq_description); + free_irq(info->dma_out_irq_nbr, info); + } + if (info->dma_in_enabled) { + cris_free_dma(info->dma_in_nbr, info->dma_in_irq_description); + free_irq(info->dma_in_irq_nbr, info); + } +} + /* * This routine is called whenever a serial port is opened. * It performs the serial-specific initialization for the tty structure. @@ -4538,9 +4054,9 @@ rs_open(struct tty_struct *tty, struct file * filp) struct e100_serial *info; int retval, line; unsigned long page; + int allocated_resources = 0; /* find which port we want to open */ - line = tty->index; if (line < 0 || line >= NR_PORTS) @@ -4580,8 +4096,8 @@ rs_open(struct tty_struct *tty, struct file * filp) */ if (tty_hung_up_p(filp) || (info->flags & ASYNC_CLOSING)) { - if (info->flags & ASYNC_CLOSING) - interruptible_sleep_on(&info->close_wait); + wait_event_interruptible(info->close_wait, + !(info->flags & ASYNC_CLOSING)); #ifdef SERIAL_DO_RESTART return ((info->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS); @@ -4590,13 +4106,86 @@ rs_open(struct tty_struct *tty, struct file * filp) #endif } + /* + * If DMA is enabled try to allocate the irq's. + */ + if (info->count == 1) { + allocated_resources = 1; + if (info->dma_in_enabled) { + if (request_irq(info->dma_in_irq_nbr, + rec_interrupt, + info->dma_in_irq_flags, + info->dma_in_irq_description, + info)) { + printk(KERN_WARNING "DMA irq '%s' busy; " + "falling back to non-DMA mode\n", + info->dma_in_irq_description); + /* Make sure we never try to use DMA in */ + /* for the port again. */ + info->dma_in_enabled = 0; + } else if (cris_request_dma(info->dma_in_nbr, + info->dma_in_irq_description, + DMA_VERBOSE_ON_ERROR, + info->dma_owner)) { + free_irq(info->dma_in_irq_nbr, info); + printk(KERN_WARNING "DMA '%s' busy; " + "falling back to non-DMA mode\n", + info->dma_in_irq_description); + /* Make sure we never try to use DMA in */ + /* for the port again. */ + info->dma_in_enabled = 0; + } +#ifdef SERIAL_DEBUG_OPEN + else + printk(KERN_DEBUG "DMA irq '%s' allocated\n", + info->dma_in_irq_description); +#endif + } + if (info->dma_out_enabled) { + if (request_irq(info->dma_out_irq_nbr, + tr_interrupt, + info->dma_out_irq_flags, + info->dma_out_irq_description, + info)) { + printk(KERN_WARNING "DMA irq '%s' busy; " + "falling back to non-DMA mode\n", + info->dma_out_irq_description); + /* Make sure we never try to use DMA out */ + /* for the port again. */ + info->dma_out_enabled = 0; + } else if (cris_request_dma(info->dma_out_nbr, + info->dma_out_irq_description, + DMA_VERBOSE_ON_ERROR, + info->dma_owner)) { + free_irq(info->dma_out_irq_nbr, info); + printk(KERN_WARNING "DMA '%s' busy; " + "falling back to non-DMA mode\n", + info->dma_out_irq_description); + /* Make sure we never try to use DMA out */ + /* for the port again. */ + info->dma_out_enabled = 0; + } +#ifdef SERIAL_DEBUG_OPEN + else + printk(KERN_DEBUG "DMA irq '%s' allocated\n", + info->dma_out_irq_description); +#endif + } + } + /* * Start up the serial port */ retval = startup(info); - if (retval) + if (retval) { + if (allocated_resources) + deinit_port(info); + + /* FIXME Decrease count info->count here too? */ return retval; + } + retval = block_til_ready(tty, filp, info); if (retval) { @@ -4604,6 +4193,9 @@ rs_open(struct tty_struct *tty, struct file * filp) printk("rs_open returning after block_til_ready with %d\n", retval); #endif + if (allocated_resources) + deinit_port(info); + return retval; } @@ -4793,6 +4385,8 @@ static const struct tty_operations rs_ops = { .send_xchar = rs_send_xchar, .wait_until_sent = rs_wait_until_sent, .read_proc = rs_read_proc, + .tiocmget = rs_tiocmget, + .tiocmset = rs_tiocmset }; static int __init @@ -4810,9 +4404,27 @@ rs_init(void) /* Setup the timed flush handler system */ #if !defined(CONFIG_ETRAX_SERIAL_FAST_TIMER) - init_timer(&flush_timer); - flush_timer.function = timed_flush_handler; - mod_timer(&flush_timer, jiffies + CONFIG_ETRAX_SERIAL_RX_TIMEOUT_TICKS); + setup_timer(&flush_timer, timed_flush_handler, 0); + mod_timer(&flush_timer, jiffies + 5); +#endif + +#if defined(CONFIG_ETRAX_RS485) +#if defined(CONFIG_ETRAX_RS485_ON_PA) + if (cris_io_interface_allocate_pins(if_ser0, 'a', rs485_pa_bit, + rs485_pa_bit)) { + printk(KERN_CRIT "ETRAX100LX serial: Could not allocate " + "RS485 pin\n"); + return -EBUSY; + } +#endif +#if defined(CONFIG_ETRAX_RS485_ON_PORT_G) + if (cris_io_interface_allocate_pins(if_ser0, 'g', rs485_pa_bit, + rs485_port_g_bit)) { + printk(KERN_CRIT "ETRAX100LX serial: Could not allocate " + "RS485 pin\n"); + return -EBUSY; + } +#endif #endif /* Initialize the tty_driver structure */ @@ -4839,6 +4451,16 @@ rs_init(void) /* do some initializing for the separate ports */ for (i = 0, info = rs_table; i < NR_PORTS; i++,info++) { + if (info->enabled) { + if (cris_request_io_interface(info->io_if, + info->io_if_description)) { + printk(KERN_CRIT "ETRAX100LX async serial: " + "Could not allocate IO pins for " + "%s, port %d\n", + info->io_if_description, i); + info->enabled = 0; + } + } info->uses_dma_in = 0; info->uses_dma_out = 0; info->line = i; @@ -4872,7 +4494,7 @@ rs_init(void) info->rs485.delay_rts_before_send = 0; info->rs485.enabled = 0; #endif - INIT_WORK(&info->work, do_softint, info); + INIT_WORK(&info->work, do_softint); if (info->enabled) { printk(KERN_INFO "%s%d at 0x%x is a builtin UART with DMA\n", @@ -4890,64 +4512,17 @@ rs_init(void) #endif #ifndef CONFIG_SVINTO_SIM +#ifndef CONFIG_ETRAX_KGDB /* Not needed in simulator. May only complicate stuff. */ /* hook the irq's for DMA channel 6 and 7, serial output and input, and some more... */ - if (request_irq(SERIAL_IRQ_NBR, ser_interrupt, IRQF_SHARED | IRQF_DISABLED, "serial ", NULL)) - panic("irq8"); - -#ifdef CONFIG_ETRAX_SERIAL_PORT0 -#ifdef CONFIG_ETRAX_SERIAL_PORT0_DMA6_OUT - if (request_irq(SER0_DMA_TX_IRQ_NBR, tr_interrupt, IRQF_DISABLED, "serial 0 dma tr", NULL)) - panic("irq22"); -#endif -#ifdef CONFIG_ETRAX_SERIAL_PORT0_DMA7_IN - if (request_irq(SER0_DMA_RX_IRQ_NBR, rec_interrupt, IRQF_DISABLED, "serial 0 dma rec", NULL)) - panic("irq23"); -#endif -#endif - -#ifdef CONFIG_ETRAX_SERIAL_PORT1 -#ifdef CONFIG_ETRAX_SERIAL_PORT1_DMA8_OUT - if (request_irq(SER1_DMA_TX_IRQ_NBR, tr_interrupt, IRQF_DISABLED, "serial 1 dma tr", NULL)) - panic("irq24"); -#endif -#ifdef CONFIG_ETRAX_SERIAL_PORT1_DMA9_IN - if (request_irq(SER1_DMA_RX_IRQ_NBR, rec_interrupt, IRQF_DISABLED, "serial 1 dma rec", NULL)) - panic("irq25"); -#endif -#endif -#ifdef CONFIG_ETRAX_SERIAL_PORT2 - /* DMA Shared with par0 (and SCSI0 and ATA) */ -#ifdef CONFIG_ETRAX_SERIAL_PORT2_DMA2_OUT - if (request_irq(SER2_DMA_TX_IRQ_NBR, tr_interrupt, IRQF_SHARED | IRQF_DISABLED, "serial 2 dma tr", NULL)) - panic("irq18"); -#endif -#ifdef CONFIG_ETRAX_SERIAL_PORT2_DMA3_IN - if (request_irq(SER2_DMA_RX_IRQ_NBR, rec_interrupt, IRQF_SHARED | IRQF_DISABLED, "serial 2 dma rec", NULL)) - panic("irq19"); -#endif -#endif -#ifdef CONFIG_ETRAX_SERIAL_PORT3 - /* DMA Shared with par1 (and SCSI1 and Extern DMA 0) */ -#ifdef CONFIG_ETRAX_SERIAL_PORT3_DMA4_OUT - if (request_irq(SER3_DMA_TX_IRQ_NBR, tr_interrupt, IRQF_SHARED | IRQF_DISABLED, "serial 3 dma tr", NULL)) - panic("irq20"); -#endif -#ifdef CONFIG_ETRAX_SERIAL_PORT3_DMA5_IN - if (request_irq(SER3_DMA_RX_IRQ_NBR, rec_interrupt, IRQF_SHARED | IRQF_DISABLED, "serial 3 dma rec", NULL)) - panic("irq21"); -#endif -#endif + if (request_irq(SERIAL_IRQ_NBR, ser_interrupt, + IRQF_SHARED | IRQF_DISABLED, "serial ", driver)) + panic("%s: Failed to request irq8", __FUNCTION__); -#ifdef CONFIG_ETRAX_SERIAL_FLUSH_DMA_FAST - if (request_irq(TIMER1_IRQ_NBR, timeout_interrupt, IRQF_SHARED | IRQF_DISABLED, - "fast serial dma timeout", NULL)) { - printk(KERN_CRIT "err: timer1 irq\n"); - } #endif #endif /* CONFIG_SVINTO_SIM */ - debug_write_function = rs_debug_write_function; + return 0; } diff --git a/drivers/serial/crisv10.h b/drivers/serial/crisv10.h new file mode 100644 index 00000000000..ccd0f32b737 --- /dev/null +++ b/drivers/serial/crisv10.h @@ -0,0 +1,146 @@ +/* + * serial.h: Arch-dep definitions for the Etrax100 serial driver. + * + * Copyright (C) 1998-2007 Axis Communications AB + */ + +#ifndef _ETRAX_SERIAL_H +#define _ETRAX_SERIAL_H + +#include +#include +#include +#include + +/* Software state per channel */ + +#ifdef __KERNEL__ +/* + * This is our internal structure for each serial port's state. + * + * Many fields are paralleled by the structure used by the serial_struct + * structure. + * + * For definitions of the flags field, see tty.h + */ + +#define SERIAL_RECV_DESCRIPTORS 8 + +struct etrax_recv_buffer { + struct etrax_recv_buffer *next; + unsigned short length; + unsigned char error; + unsigned char pad; + + unsigned char buffer[0]; +}; + +struct e100_serial { + int baud; + volatile u8 *port; /* R_SERIALx_CTRL */ + u32 irq; /* bitnr in R_IRQ_MASK2 for dmaX_descr */ + + /* Output registers */ + volatile u8 *oclrintradr; /* adr to R_DMA_CHx_CLR_INTR */ + volatile u32 *ofirstadr; /* adr to R_DMA_CHx_FIRST */ + volatile u8 *ocmdadr; /* adr to R_DMA_CHx_CMD */ + const volatile u8 *ostatusadr; /* adr to R_DMA_CHx_STATUS */ + + /* Input registers */ + volatile u8 *iclrintradr; /* adr to R_DMA_CHx_CLR_INTR */ + volatile u32 *ifirstadr; /* adr to R_DMA_CHx_FIRST */ + volatile u8 *icmdadr; /* adr to R_DMA_CHx_CMD */ + volatile u32 *idescradr; /* adr to R_DMA_CHx_DESCR */ + + int flags; /* defined in tty.h */ + + u8 rx_ctrl; /* shadow for R_SERIALx_REC_CTRL */ + u8 tx_ctrl; /* shadow for R_SERIALx_TR_CTRL */ + u8 iseteop; /* bit number for R_SET_EOP for the input dma */ + int enabled; /* Set to 1 if the port is enabled in HW config */ + + u8 dma_out_enabled; /* Set to 1 if DMA should be used */ + u8 dma_in_enabled; /* Set to 1 if DMA should be used */ + + /* end of fields defined in rs_table[] in .c-file */ + int dma_owner; + unsigned int dma_in_nbr; + unsigned int dma_out_nbr; + unsigned int dma_in_irq_nbr; + unsigned int dma_out_irq_nbr; + unsigned long dma_in_irq_flags; + unsigned long dma_out_irq_flags; + char *dma_in_irq_description; + char *dma_out_irq_description; + + enum cris_io_interface io_if; + char *io_if_description; + + u8 uses_dma_in; /* Set to 1 if DMA is used */ + u8 uses_dma_out; /* Set to 1 if DMA is used */ + u8 forced_eop; /* a fifo eop has been forced */ + int baud_base; /* For special baudrates */ + int custom_divisor; /* For special baudrates */ + struct etrax_dma_descr tr_descr; + struct etrax_dma_descr rec_descr[SERIAL_RECV_DESCRIPTORS]; + int cur_rec_descr; + + volatile int tr_running; /* 1 if output is running */ + + struct tty_struct *tty; + int read_status_mask; + int ignore_status_mask; + int x_char; /* xon/xoff character */ + int close_delay; + unsigned short closing_wait; + unsigned short closing_wait2; + unsigned long event; + unsigned long last_active; + int line; + int type; /* PORT_ETRAX */ + int count; /* # of fd on device */ + int blocked_open; /* # of blocked opens */ + struct circ_buf xmit; + struct etrax_recv_buffer *first_recv_buffer; + struct etrax_recv_buffer *last_recv_buffer; + unsigned int recv_cnt; + unsigned int max_recv_cnt; + + struct work_struct work; + struct async_icount icount; /* error-statistics etc.*/ + struct ktermios normal_termios; + struct ktermios callout_termios; + wait_queue_head_t open_wait; + wait_queue_head_t close_wait; + + unsigned long char_time_usec; /* The time for 1 char, in usecs */ + unsigned long flush_time_usec; /* How often we should flush */ + unsigned long last_tx_active_usec; /* Last tx usec in the jiffies */ + unsigned long last_tx_active; /* Last tx time in jiffies */ + unsigned long last_rx_active_usec; /* Last rx usec in the jiffies */ + unsigned long last_rx_active; /* Last rx time in jiffies */ + + int break_detected_cnt; + int errorcode; + +#ifdef CONFIG_ETRAX_RS485 + struct rs485_control rs485; /* RS-485 support */ +#endif +}; + +/* this PORT is not in the standard serial.h. it's not actually used for + * anything since we only have one type of async serial-port anyway in this + * system. + */ + +#define PORT_ETRAX 1 + +/* + * Events are used to schedule things to happen at timer-interrupt + * time, instead of at rs interrupt time. + */ +#define RS_EVENT_WRITE_WAKEUP 0 + +#endif /* __KERNEL__ */ + +#endif /* !_ETRAX_SERIAL_H */ -- cgit v1.2.3 From 1b8be1d82dd3b1ef32b15923123afa03bfeeb116 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:20 -0800 Subject: CRIS: remove MTD_AMSTD and MTD_OBSOLETE_CHIPS take two Remove MTD_AMDSTD and MTD_OBSOLETE_CHIPS from defconfig, Kconfig and code, instead we'll use MTD_CFI or MTD_JEDECPROBE. [akpm@linux-foundation.org: codingl-style cleanups] Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/defconfig | 2 -- arch/cris/arch-v10/drivers/Kconfig | 2 -- arch/cris/arch-v10/drivers/axisflashmap.c | 8 ++++---- arch/cris/arch-v32/drivers/Kconfig | 2 -- arch/cris/arch-v32/drivers/axisflashmap.c | 9 ++++----- arch/cris/defconfig | 2 -- 6 files changed, 8 insertions(+), 17 deletions(-) diff --git a/arch/cris/arch-v10/defconfig b/arch/cris/arch-v10/defconfig index 710c20ba2be..572f1192639 100644 --- a/arch/cris/arch-v10/defconfig +++ b/arch/cris/arch-v10/defconfig @@ -99,7 +99,6 @@ CONFIG_MTD=y CONFIG_MTD_CFI=y # CONFIG_MTD_CFI_INTELEXT is not set CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_AMDSTD=y CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_ETRAX_I2C=y @@ -145,7 +144,6 @@ CONFIG_MTD_CFI=y # CONFIG_MTD_CFI_GEOMETRY is not set # CONFIG_MTD_CFI_INTELEXT is not set CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_AMDSTD=y # CONFIG_MTD_SHARP is not set # CONFIG_MTD_PHYSMAP is not set # CONFIG_MTD_NORA is not set diff --git a/arch/cris/arch-v10/drivers/Kconfig b/arch/cris/arch-v10/drivers/Kconfig index e6fc8455a37..faf8b4d3ca0 100644 --- a/arch/cris/arch-v10/drivers/Kconfig +++ b/arch/cris/arch-v10/drivers/Kconfig @@ -606,8 +606,6 @@ config ETRAX_AXISFLASHMAP select MTD select MTD_CFI select MTD_CFI_AMDSTD - select MTD_OBSOLETE_CHIPS - select MTD_AMDSTD select MTD_CHAR select MTD_BLOCK select MTD_PARTITIONS diff --git a/arch/cris/arch-v10/drivers/axisflashmap.c b/arch/cris/arch-v10/drivers/axisflashmap.c index efd7b0f3a91..ea3cf2e39a1 100644 --- a/arch/cris/arch-v10/drivers/axisflashmap.c +++ b/arch/cris/arch-v10/drivers/axisflashmap.c @@ -312,12 +312,12 @@ static struct mtd_info *probe_cs(struct map_info *map_cs) "%s: Probing a 0x%08lx bytes large window at 0x%08lx.\n", map_cs->name, map_cs->size, map_cs->map_priv_1); -#ifdef CONFIG_MTD_AMDSTD - mtd_cs = do_map_probe("amd_flash", map_cs); -#endif #ifdef CONFIG_MTD_CFI + mtd_cs = do_map_probe("cfi_probe", map_cs); +#endif +#ifdef CONFIG_MTD_JEDECPROBE if (!mtd_cs) { - mtd_cs = do_map_probe("cfi_probe", map_cs); + mtd_cs = do_map_probe("jedec_probe", map_cs); } #endif diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index cc6ba542375..7f72d7c9e1c 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -362,8 +362,6 @@ config ETRAX_AXISFLASHMAP select MTD select MTD_CFI select MTD_CFI_AMDSTD - select MTD_OBSOLETE_CHIPS - select MTD_AMDSTD select MTD_CHAR select MTD_BLOCK select MTD_PARTITIONS diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c index 3ec12ea44e8..c5ff95e1826 100644 --- a/arch/cris/arch-v32/drivers/axisflashmap.c +++ b/arch/cris/arch-v32/drivers/axisflashmap.c @@ -190,13 +190,12 @@ static struct mtd_info *probe_cs(struct map_info *map_cs) "%s: Probing a 0x%08lx bytes large window at 0x%08lx.\n", map_cs->name, map_cs->size, map_cs->map_priv_1); -#ifdef CONFIG_MTD_AMDSTD - mtd_cs = do_map_probe("amd_flash", map_cs); -#endif #ifdef CONFIG_MTD_CFI - if (!mtd_cs) { mtd_cs = do_map_probe("cfi_probe", map_cs); - } +#endif +#ifdef CONFIG_MTD_JEDECPROBE + if (!mtd_cs) + mtd_cs = do_map_probe("jedec_probe", map_cs); #endif return mtd_cs; diff --git a/arch/cris/defconfig b/arch/cris/defconfig index e2d81859ee1..9c33ae65993 100644 --- a/arch/cris/defconfig +++ b/arch/cris/defconfig @@ -226,8 +226,6 @@ CONFIG_MTD_CFI_UTIL=y CONFIG_MTD_RAM=y # CONFIG_MTD_ROM is not set # CONFIG_MTD_ABSENT is not set -CONFIG_MTD_OBSOLETE_CHIPS=y -CONFIG_MTD_AMDSTD=y # CONFIG_MTD_SHARP is not set # CONFIG_MTD_JEDEC is not set -- cgit v1.2.3 From 3244c77bd3460e9701ffeecc2452d7f2ee17106c Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:21 -0800 Subject: CRISv10: fix timer interrupt parameters Fix timer interrupt parameters for CRIS v10. - irq_handler_t only takes two arguments, use get_irq_regs to get regs for profiling. - Call update_process_times. - Remove CVS tag. Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/kernel/time.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c index 575a14bb110..5976f6199c4 100644 --- a/arch/cris/arch-v10/kernel/time.c +++ b/arch/cris/arch-v10/kernel/time.c @@ -1,5 +1,4 @@ -/* $Id: time.c,v 1.5 2004/09/29 06:12:46 starvik Exp $ - * +/* * linux/arch/cris/arch-v10/kernel/time.c * * Copyright (C) 1991, 1992, 1995 Linus Torvalds @@ -20,6 +19,7 @@ #include #include #include +#include /* define this if you need to use print_timestamp */ /* it will make jiffies at 96 hz instead of 100 hz though */ @@ -201,8 +201,9 @@ static long last_rtc_update = 0; extern void cris_do_profile(struct pt_regs *regs); static inline irqreturn_t -timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +timer_interrupt(int irq, void *dev_id) { + struct pt_regs *regs = get_irq_regs(); /* acknowledge the timer irq */ #ifdef USE_CASCADE_TIMERS @@ -221,9 +222,11 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) #endif /* reset watchdog otherwise it resets us! */ - reset_watchdog(); + /* Update statistics. */ + update_process_times(user_mode(regs)); + /* call the real timer interrupt handler */ do_timer(1); -- cgit v1.2.3 From d8e5219f9f5ca7518eb820db9f3d287a1d46fcf5 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:23 -0800 Subject: CRISv10 improve and bugfix fasttimer Improve and bugfix CRIS v10 fast timers. - irq_handler_t now only takes two arguments. - Keep interrupts disabled as long as we have a reference to the fasttimer list and only enable them while doing the callback. del_fast_timer may be called from other interrupt context. - Fix bug where debug code could return without calling local_irq_restore. - Use jiffies instead of usec (change from struct timeval to fasttime_t). - Don't initialize static variables to zero. - Remove obsolete #ifndef DECLARE_WAITQUEUE code. - fast_timer_init should be __initcall. - Change status/debug variables to unsigned. - Remove CVS log and CVS id. Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/kernel/fasttimer.c | 315 ++++++++++++---------------------- include/asm-cris/fasttimer.h | 16 +- 2 files changed, 124 insertions(+), 207 deletions(-) diff --git a/arch/cris/arch-v10/kernel/fasttimer.c b/arch/cris/arch-v10/kernel/fasttimer.c index d3ea052e5ee..645d7059b40 100644 --- a/arch/cris/arch-v10/kernel/fasttimer.c +++ b/arch/cris/arch-v10/kernel/fasttimer.c @@ -1,97 +1,9 @@ -/* $Id: fasttimer.c,v 1.9 2005/03/04 08:16:16 starvik Exp $ +/* * linux/arch/cris/kernel/fasttimer.c * * Fast timers for ETRAX100/ETRAX100LX - * This may be useful in other OS than Linux so use 2 space indentation... * - * $Log: fasttimer.c,v $ - * Revision 1.9 2005/03/04 08:16:16 starvik - * Merge of Linux 2.6.11. - * - * Revision 1.8 2005/01/05 06:09:29 starvik - * cli()/sti() will be obsolete in 2.6.11. - * - * Revision 1.7 2005/01/03 13:35:46 starvik - * Removed obsolete stuff. - * Mark fast timer IRQ as not shared. - * - * Revision 1.6 2004/05/14 10:18:39 starvik - * Export fast_timer_list - * - * Revision 1.5 2004/05/14 07:58:01 starvik - * Merge of changes from 2.4 - * - * Revision 1.4 2003/07/04 08:27:41 starvik - * Merge of Linux 2.5.74 - * - * Revision 1.3 2002/12/12 08:26:32 starvik - * Don't use C-comments inside CVS comments - * - * Revision 1.2 2002/12/11 15:42:02 starvik - * Extracted v10 (ETRAX 100LX) specific stuff from arch/cris/kernel/ - * - * Revision 1.1 2002/11/18 07:58:06 starvik - * Fast timers (from Linux 2.4) - * - * Revision 1.5 2002/10/15 06:21:39 starvik - * Added call to init_waitqueue_head - * - * Revision 1.4 2002/05/28 17:47:59 johana - * Added del_fast_timer() - * - * Revision 1.3 2002/05/28 16:16:07 johana - * Handle empty fast_timer_list - * - * Revision 1.2 2002/05/27 15:38:42 johana - * Made it compile without warnings on Linux 2.4. - * (includes, wait_queue, PROC_FS and snprintf) - * - * Revision 1.1 2002/05/27 15:32:25 johana - * arch/etrax100/kernel/fasttimer.c v1.8 from the elinux tree. - * - * Revision 1.8 2001/11/27 13:50:40 pkj - * Disable interrupts while stopping the timer and while modifying the - * list of active timers in timer1_handler() as it may be interrupted - * by other interrupts (e.g., the serial interrupt) which may add fast - * timers. - * - * Revision 1.7 2001/11/22 11:50:32 pkj - * * Only store information about the last 16 timers. - * * proc_fasttimer_read() now uses an allocated buffer, since it - * requires more space than just a page even for only writing the - * last 16 timers. The buffer is only allocated on request, so - * unless /proc/fasttimer is read, it is never allocated. - * * Renamed fast_timer_started to fast_timers_started to match - * fast_timers_added and fast_timers_expired. - * * Some clean-up. - * - * Revision 1.6 2000/12/13 14:02:08 johana - * Removed volatile for fast_timer_list - * - * Revision 1.5 2000/12/13 13:55:35 johana - * Added DEBUG_LOG, added som cli() and cleanup - * - * Revision 1.4 2000/12/05 13:48:50 johana - * Added range check when writing proc file, modified timer int handling - * - * Revision 1.3 2000/11/23 10:10:20 johana - * More debug/logging possibilities. - * Moved GET_JIFFIES_USEC() to timex.h and time.c - * - * Revision 1.2 2000/11/01 13:41:04 johana - * Clean up and bugfixes. - * Created new do_gettimeofday_fast() that gets a timeval struct - * with time based on jiffies and *R_TIMER0_DATA, uses a table - * for fast conversion of timer value to microseconds. - * (Much faster the standard do_gettimeofday() and we don't really - * want to use the true time - we want the "uptime" so timers don't screw up - * when we change the time. - * TODO: Add efficient support for continuous timers as well. - * - * Revision 1.1 2000/10/26 15:49:16 johana - * Added fasttimer, highresolution timers. - * - * Copyright (C) 2000,2001 2002 Axis Communications AB, Lund, Sweden + * Copyright (C) 2000-2007 Axis Communications AB, Lund, Sweden */ #include @@ -125,7 +37,7 @@ #ifdef FAST_TIMER_SANITY_CHECKS #define SANITYCHECK(x) x -static int sanity_failed = 0; +static int sanity_failed; #else #define SANITYCHECK(x) #endif @@ -136,13 +48,13 @@ static int sanity_failed = 0; #define __INLINE__ inline -static int fast_timer_running = 0; -static int fast_timers_added = 0; -static int fast_timers_started = 0; -static int fast_timers_expired = 0; -static int fast_timers_deleted = 0; -static int fast_timer_is_init = 0; -static int fast_timer_ints = 0; +static unsigned int fast_timer_running; +static unsigned int fast_timers_added; +static unsigned int fast_timers_started; +static unsigned int fast_timers_expired; +static unsigned int fast_timers_deleted; +static unsigned int fast_timer_is_init; +static unsigned int fast_timer_ints; struct fast_timer *fast_timer_list = NULL; @@ -150,8 +62,8 @@ struct fast_timer *fast_timer_list = NULL; #define DEBUG_LOG_MAX 128 static const char * debug_log_string[DEBUG_LOG_MAX]; static unsigned long debug_log_value[DEBUG_LOG_MAX]; -static int debug_log_cnt = 0; -static int debug_log_cnt_wrapped = 0; +static unsigned int debug_log_cnt; +static unsigned int debug_log_cnt_wrapped; #define DEBUG_LOG(string, value) \ { \ @@ -206,42 +118,26 @@ int timer_freq_settings[NUM_TIMER_STATS]; int timer_delay_settings[NUM_TIMER_STATS]; /* Not true gettimeofday, only checks the jiffies (uptime) + useconds */ -void __INLINE__ do_gettimeofday_fast(struct timeval *tv) +void __INLINE__ do_gettimeofday_fast(struct fasttime_t *tv) { - unsigned long sec = jiffies; - unsigned long usec = GET_JIFFIES_USEC(); - - usec += (sec % HZ) * (1000000 / HZ); - sec = sec / HZ; - - if (usec > 1000000) - { - usec -= 1000000; - sec++; - } - tv->tv_sec = sec; - tv->tv_usec = usec; + tv->tv_jiff = jiffies; + tv->tv_usec = GET_JIFFIES_USEC(); } -int __INLINE__ timeval_cmp(struct timeval *t0, struct timeval *t1) +int __INLINE__ timeval_cmp(struct fasttime_t *t0, struct fasttime_t *t1) { - if (t0->tv_sec < t1->tv_sec) - { - return -1; - } - else if (t0->tv_sec > t1->tv_sec) - { - return 1; - } - if (t0->tv_usec < t1->tv_usec) - { - return -1; - } - else if (t0->tv_usec > t1->tv_usec) - { - return 1; - } - return 0; + /* Compare jiffies. Takes care of wrapping */ + if (time_before(t0->tv_jiff, t1->tv_jiff)) + return -1; + else if (time_after(t0->tv_jiff, t1->tv_jiff)) + return 1; + + /* Compare us */ + if (t0->tv_usec < t1->tv_usec) + return -1; + else if (t0->tv_usec > t1->tv_usec) + return 1; + return 0; } void __INLINE__ start_timer1(unsigned long delay_us) @@ -285,7 +181,7 @@ void __INLINE__ start_timer1(unsigned long delay_us) timer_freq_settings[fast_timers_started % NUM_TIMER_STATS] = freq_index; timer_delay_settings[fast_timers_started % NUM_TIMER_STATS] = delay_us; - D1(printk("start_timer1 : %d us freq: %i div: %i\n", + D1(printk(KERN_DEBUG "start_timer1 : %d us freq: %i div: %i\n", delay_us, freq_index, div)); /* Clear timer1 irq */ *R_IRQ_MASK0_CLR = IO_STATE(R_IRQ_MASK0_CLR, timer1, clr); @@ -340,7 +236,7 @@ void start_one_shot_timer(struct fast_timer *t, printk(KERN_WARNING "timer name: %s data: 0x%08lX already in list!\n", name, data); sanity_failed++; - return; + goto done; } else { @@ -356,11 +252,11 @@ void start_one_shot_timer(struct fast_timer *t, t->name = name; t->tv_expires.tv_usec = t->tv_set.tv_usec + delay_us % 1000000; - t->tv_expires.tv_sec = t->tv_set.tv_sec + delay_us / 1000000; + t->tv_expires.tv_jiff = t->tv_set.tv_jiff + delay_us / 1000000 / HZ; if (t->tv_expires.tv_usec > 1000000) { t->tv_expires.tv_usec -= 1000000; - t->tv_expires.tv_sec++; + t->tv_expires.tv_jiff += HZ; } #ifdef FAST_TIMER_LOG timer_added_log[fast_timers_added % NUM_TIMER_STATS] = *t; @@ -401,6 +297,7 @@ void start_one_shot_timer(struct fast_timer *t, D2(printk("start_one_shot_timer: %d us done\n", delay_us)); +done: local_irq_restore(flags); } /* start_one_shot_timer */ @@ -444,11 +341,18 @@ int del_fast_timer(struct fast_timer * t) /* Timer 1 interrupt handler */ static irqreturn_t -timer1_handler(int irq, void *dev_id, struct pt_regs *regs) +timer1_handler(int irq, void *dev_id) { struct fast_timer *t; unsigned long flags; + /* We keep interrupts disabled not only when we modify the + * fast timer list, but any time we hold a reference to a + * timer in the list, since del_fast_timer may be called + * from (another) interrupt context. Thus, the only time + * when interrupts are enabled is when calling the timer + * callback function. + */ local_irq_save(flags); /* Clear timer1 irq */ @@ -466,16 +370,17 @@ timer1_handler(int irq, void *dev_id, struct pt_regs *regs) fast_timer_running = 0; fast_timer_ints++; - local_irq_restore(flags); - t = fast_timer_list; while (t) { - struct timeval tv; + struct fasttime_t tv; + fast_timer_function_type *f; + unsigned long d; /* Has it really expired? */ do_gettimeofday_fast(&tv); - D1(printk("t: %is %06ius\n", tv.tv_sec, tv.tv_usec)); + D1(printk(KERN_DEBUG "t: %is %06ius\n", + tv.tv_jiff, tv.tv_usec)); if (timeval_cmp(&t->tv_expires, &tv) <= 0) { @@ -486,7 +391,6 @@ timer1_handler(int irq, void *dev_id, struct pt_regs *regs) fast_timers_expired++; /* Remove this timer before call, since it may reuse the timer */ - local_irq_save(flags); if (t->prev) { t->prev->next = t->next; @@ -501,16 +405,23 @@ timer1_handler(int irq, void *dev_id, struct pt_regs *regs) } t->prev = NULL; t->next = NULL; - local_irq_restore(flags); - if (t->function != NULL) - { - t->function(t->data); - } - else - { + /* Save function callback data before enabling + * interrupts, since the timer may be removed and + * we don't know how it was allocated + * (e.g. ->function and ->data may become overwritten + * after deletion if the timer was stack-allocated). + */ + f = t->function; + d = t->data; + + if (f != NULL) { + /* Run callback with interrupts enabled. */ + local_irq_restore(flags); + f(d); + local_irq_save(flags); + } else DEBUG_LOG("!timer1 %i function==NULL!\n", fast_timer_ints); - } } else { @@ -518,16 +429,20 @@ timer1_handler(int irq, void *dev_id, struct pt_regs *regs) D1(printk(".\n")); } - local_irq_save(flags); if ((t = fast_timer_list) != NULL) { /* Start next timer.. */ - long us; - struct timeval tv; + long us = 0; + struct fasttime_t tv; do_gettimeofday_fast(&tv); - us = ((t->tv_expires.tv_sec - tv.tv_sec) * 1000000 + - t->tv_expires.tv_usec - tv.tv_usec); + + /* time_after_eq takes care of wrapping */ + if (time_after_eq(t->tv_expires.tv_jiff, tv.tv_jiff)) + us = ((t->tv_expires.tv_jiff - tv.tv_jiff) * + 1000000 / HZ + t->tv_expires.tv_usec - + tv.tv_usec); + if (us > 0) { if (!fast_timer_running) @@ -537,7 +452,6 @@ timer1_handler(int irq, void *dev_id, struct pt_regs *regs) #endif start_timer1(us); } - local_irq_restore(flags); break; } else @@ -548,9 +462,10 @@ timer1_handler(int irq, void *dev_id, struct pt_regs *regs) D1(printk("e! %d\n", us)); } } - local_irq_restore(flags); } + local_irq_restore(flags); + if (!t) { D1(printk("t1 stop!\n")); @@ -575,28 +490,17 @@ static void wake_up_func(unsigned long data) void schedule_usleep(unsigned long us) { struct fast_timer t; -#ifdef DECLARE_WAITQUEUE wait_queue_head_t sleep_wait; init_waitqueue_head(&sleep_wait); - { - DECLARE_WAITQUEUE(wait, current); -#else - struct wait_queue *sleep_wait = NULL; - struct wait_queue wait = { current, NULL }; -#endif D1(printk("schedule_usleep(%d)\n", us)); - add_wait_queue(&sleep_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); start_one_shot_timer(&t, wake_up_func, (unsigned long)&sleep_wait, us, "usleep"); - schedule(); - set_current_state(TASK_RUNNING); - remove_wait_queue(&sleep_wait, &wait); + /* Uninterruptible sleep on the fast timer. (The condition is somewhat + * redundant since the timer is what wakes us up.) */ + wait_event(sleep_wait, !fast_timer_pending(&t)); + D1(printk("done schedule_usleep(%d)\n", us)); -#ifdef DECLARE_WAITQUEUE - } -#endif } #ifdef CONFIG_PROC_FS @@ -616,7 +520,7 @@ static int proc_fasttimer_read(char *buf, char **start, off_t offset, int len unsigned long flags; int i = 0; int num_to_show; - struct timeval tv; + struct fasttime_t tv; struct fast_timer *t, *nextt; static char *bigbuf = NULL; static unsigned long used; @@ -624,7 +528,8 @@ static int proc_fasttimer_read(char *buf, char **start, off_t offset, int len if (!bigbuf && !(bigbuf = vmalloc(BIG_BUF_SIZE))) { used = 0; - bigbuf[0] = '\0'; + if (buf) + buf[0] = '\0'; return 0; } @@ -646,7 +551,7 @@ static int proc_fasttimer_read(char *buf, char **start, off_t offset, int len used += sprintf(bigbuf + used, "Fast timer running: %s\n", fast_timer_running ? "yes" : "no"); used += sprintf(bigbuf + used, "Current time: %lu.%06lu\n", - (unsigned long)tv.tv_sec, + (unsigned long)tv.tv_jiff, (unsigned long)tv.tv_usec); #ifdef FAST_TIMER_SANITY_CHECKS used += sprintf(bigbuf + used, "Sanity failed: %i\n", @@ -696,9 +601,9 @@ static int proc_fasttimer_read(char *buf, char **start, off_t offset, int len "d: %6li us data: 0x%08lX" "\n", t->name, - (unsigned long)t->tv_set.tv_sec, + (unsigned long)t->tv_set.tv_jiff, (unsigned long)t->tv_set.tv_usec, - (unsigned long)t->tv_expires.tv_sec, + (unsigned long)t->tv_expires.tv_jiff, (unsigned long)t->tv_expires.tv_usec, t->delay_us, t->data @@ -718,9 +623,9 @@ static int proc_fasttimer_read(char *buf, char **start, off_t offset, int len "d: %6li us data: 0x%08lX" "\n", t->name, - (unsigned long)t->tv_set.tv_sec, + (unsigned long)t->tv_set.tv_jiff, (unsigned long)t->tv_set.tv_usec, - (unsigned long)t->tv_expires.tv_sec, + (unsigned long)t->tv_expires.tv_jiff, (unsigned long)t->tv_expires.tv_usec, t->delay_us, t->data @@ -738,9 +643,9 @@ static int proc_fasttimer_read(char *buf, char **start, off_t offset, int len "d: %6li us data: 0x%08lX" "\n", t->name, - (unsigned long)t->tv_set.tv_sec, + (unsigned long)t->tv_set.tv_jiff, (unsigned long)t->tv_set.tv_usec, - (unsigned long)t->tv_expires.tv_sec, + (unsigned long)t->tv_expires.tv_jiff, (unsigned long)t->tv_expires.tv_usec, t->delay_us, t->data @@ -761,15 +666,15 @@ static int proc_fasttimer_read(char *buf, char **start, off_t offset, int len /* " func: 0x%08lX" */ "\n", t->name, - (unsigned long)t->tv_set.tv_sec, + (unsigned long)t->tv_set.tv_jiff, (unsigned long)t->tv_set.tv_usec, - (unsigned long)t->tv_expires.tv_sec, + (unsigned long)t->tv_expires.tv_jiff, (unsigned long)t->tv_expires.tv_usec, t->delay_us, t->data /* , t->function */ ); - local_irq_disable(); + local_irq_save(flags); if (t->next != nextt) { printk(KERN_WARNING "timer removed!\n"); @@ -798,7 +703,7 @@ static volatile int num_test_timeout = 0; static struct fast_timer tr[10]; static int exp_num[10]; -static struct timeval tv_exp[100]; +static struct fasttime_t tv_exp[100]; static void test_timeout(unsigned long data) { @@ -836,7 +741,7 @@ static void fast_timer_test(void) int prev_num; int j; - struct timeval tv, tv0, tv1, tv2; + struct fasttime_t tv, tv0, tv1, tv2; printk("fast_timer_test() start\n"); do_gettimeofday_fast(&tv); @@ -849,7 +754,8 @@ static void fast_timer_test(void) { do_gettimeofday_fast(&tv_exp[j]); } - printk("fast_timer_test() %is %06i\n", tv.tv_sec, tv.tv_usec); + printk(KERN_DEBUG "fast_timer_test() %is %06i\n", + tv.tv_jiff, tv.tv_usec); for (j = 0; j < 1000; j++) { @@ -858,12 +764,12 @@ static void fast_timer_test(void) } for (j = 0; j < 100; j++) { - printk("%i.%i %i.%i %i.%i %i.%i %i.%i\n", - tv_exp[j].tv_sec,tv_exp[j].tv_usec, - tv_exp[j+1].tv_sec,tv_exp[j+1].tv_usec, - tv_exp[j+2].tv_sec,tv_exp[j+2].tv_usec, - tv_exp[j+3].tv_sec,tv_exp[j+3].tv_usec, - tv_exp[j+4].tv_sec,tv_exp[j+4].tv_usec); + printk(KERN_DEBUG "%i.%i %i.%i %i.%i %i.%i %i.%i\n", + tv_exp[j].tv_jiff, tv_exp[j].tv_usec, + tv_exp[j+1].tv_jiff, tv_exp[j+1].tv_usec, + tv_exp[j+2].tv_jiff, tv_exp[j+2].tv_usec, + tv_exp[j+3].tv_jiff, tv_exp[j+3].tv_usec, + tv_exp[j+4].tv_jiff, tv_exp[j+4].tv_usec); j += 4; } do_gettimeofday_fast(&tv0); @@ -895,9 +801,12 @@ static void fast_timer_test(void) } } do_gettimeofday_fast(&tv2); - printk("Timers started %is %06i\n", tv0.tv_sec, tv0.tv_usec); - printk("Timers started at %is %06i\n", tv1.tv_sec, tv1.tv_usec); - printk("Timers done %is %06i\n", tv2.tv_sec, tv2.tv_usec); + printk(KERN_DEBUG "Timers started %is %06i\n", + tv0.tv_jiff, tv0.tv_usec); + printk(KERN_DEBUG "Timers started at %is %06i\n", + tv1.tv_jiff, tv1.tv_usec); + printk(KERN_DEBUG "Timers done %is %06i\n", + tv2.tv_jiff, tv2.tv_usec); DP(printk("buf0:\n"); printk(buf0); printk("buf1:\n"); @@ -919,9 +828,9 @@ static void fast_timer_test(void) printk("%-10s set: %6is %06ius exp: %6is %06ius " "data: 0x%08X func: 0x%08X\n", t->name, - t->tv_set.tv_sec, + t->tv_set.tv_jiff, t->tv_set.tv_usec, - t->tv_expires.tv_sec, + t->tv_expires.tv_jiff, t->tv_expires.tv_usec, t->data, t->function @@ -929,10 +838,12 @@ static void fast_timer_test(void) printk(" del: %6ius did exp: %6is %06ius as #%i error: %6li\n", t->delay_us, - tv_exp[j].tv_sec, + tv_exp[j].tv_jiff, tv_exp[j].tv_usec, exp_num[j], - (tv_exp[j].tv_sec - t->tv_expires.tv_sec)*1000000 + tv_exp[j].tv_usec - t->tv_expires.tv_usec); + (tv_exp[j].tv_jiff - t->tv_expires.tv_jiff) * + 1000000 + tv_exp[j].tv_usec - + t->tv_expires.tv_usec); } proc_fasttimer_read(buf5, NULL, 0, 0, 0); printk("buf5 after all done:\n"); @@ -942,7 +853,7 @@ static void fast_timer_test(void) #endif -void fast_timer_init(void) +int fast_timer_init(void) { /* For some reason, request_irq() hangs when called froom time_init() */ if (!fast_timer_is_init) @@ -975,4 +886,6 @@ void fast_timer_init(void) fast_timer_test(); #endif } + return 0; } +__initcall(fast_timer_init); diff --git a/include/asm-cris/fasttimer.h b/include/asm-cris/fasttimer.h index a3a77132ce3..8f8a8d6c965 100644 --- a/include/asm-cris/fasttimer.h +++ b/include/asm-cris/fasttimer.h @@ -1,9 +1,8 @@ -/* $Id: fasttimer.h,v 1.3 2004/05/14 10:19:19 starvik Exp $ +/* * linux/include/asm-cris/fasttimer.h * * Fast timers for ETRAX100LX - * This may be useful in other OS than Linux so use 2 space indentation... - * Copyright (C) 2000, 2002 Axis Communications AB + * Copyright (C) 2000-2007 Axis Communications AB */ #include /* struct timeval */ #include @@ -12,11 +11,16 @@ typedef void fast_timer_function_type(unsigned long); +struct fasttime_t { + unsigned long tv_jiff; /* jiffies */ + unsigned long tv_usec; /* microseconds */ +}; + struct fast_timer{ /* Close to timer_list */ struct fast_timer *next; struct fast_timer *prev; - struct timeval tv_set; - struct timeval tv_expires; + struct fasttime_t tv_set; + struct fasttime_t tv_expires; unsigned long delay_us; fast_timer_function_type *function; unsigned long data; @@ -38,6 +42,6 @@ int del_fast_timer(struct fast_timer * t); void schedule_usleep(unsigned long us); -void fast_timer_init(void); +int fast_timer_init(void); #endif -- cgit v1.2.3 From 57c230a873b9d82ae88d6b6736127b5485024699 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:27 -0800 Subject: CRISv32: add cache flush operations These are needed due to a cache bug, and can be used to make sure that the DMA descriptors are flushed to memory and can be safely handled by DMA. flush_dma_descr - Flush one DMA descriptor. flush_dma_list - Flush a complete list of DMA descriptors. cris_flush_cache - Flush the complete cache. cris_flush_cache_range - Flush only the specified range Signed-off-by: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v32/kernel/cache.c | 33 ++++++++++++ arch/cris/arch-v32/kernel/cacheflush.S | 94 ++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 arch/cris/arch-v32/kernel/cache.c create mode 100644 arch/cris/arch-v32/kernel/cacheflush.S diff --git a/arch/cris/arch-v32/kernel/cache.c b/arch/cris/arch-v32/kernel/cache.c new file mode 100644 index 00000000000..80da7b88a72 --- /dev/null +++ b/arch/cris/arch-v32/kernel/cache.c @@ -0,0 +1,33 @@ +#include +#include +#include +#include + +/* This file is used to workaround a cache bug, Guinness TR 106. */ + +inline void flush_dma_descr(struct dma_descr_data *descr, int flush_buf) +{ + /* Flush descriptor to make sure we get correct in_eop and after. */ + asm volatile ("ftagd [%0]" :: "r" (descr)); + /* Flush buffer pointed out by descriptor. */ + if (flush_buf) + cris_flush_cache_range(phys_to_virt((unsigned)descr->buf), + (unsigned)(descr->after - descr->buf)); +} +EXPORT_SYMBOL(flush_dma_descr); + +void flush_dma_list(struct dma_descr_data *descr) +{ + while (1) { + flush_dma_descr(descr, 1); + if (descr->eol) + break; + descr = phys_to_virt((unsigned)descr->next); + } +} +EXPORT_SYMBOL(flush_dma_list); + +/* From cacheflush.S */ +EXPORT_SYMBOL(cris_flush_cache); +/* From cacheflush.S */ +EXPORT_SYMBOL(cris_flush_cache_range); diff --git a/arch/cris/arch-v32/kernel/cacheflush.S b/arch/cris/arch-v32/kernel/cacheflush.S new file mode 100644 index 00000000000..956e8fb82f0 --- /dev/null +++ b/arch/cris/arch-v32/kernel/cacheflush.S @@ -0,0 +1,94 @@ + .global cris_flush_cache_range +cris_flush_cache_range: + move.d 1024, $r12 + cmp.d $r11, $r12 + bhi cris_flush_1KB + nop + add.d $r10, $r11 + ftagd [$r10] +cris_flush_last: + addq 32, $r10 + cmp.d $r11, $r10 + blt cris_flush_last + ftagd [$r10] + ret + nop +cris_flush_1KB: + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ftagd [$r10] + addq 32, $r10 + ba cris_flush_cache_range + sub.d $r12, $r11 + + .global cris_flush_cache +cris_flush_cache: + moveq 0, $r10 +cris_flush_line: + move.d 16*1024, $r11 + addq 16, $r10 + cmp.d $r10, $r11 + blt cris_flush_line + fidxd [$r10] + ret + nop -- cgit v1.2.3 From ebd33e11c3bef555573510d8677d626ebaa1eccd Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:28 -0800 Subject: CRIS tlb.h should include linux/pagemap.h Include linux/pagemap.h for release_pages and page_cache_release. Fixes compilation error in arch/cris/mm/init.c when CONFIG_SWAP is unset. Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/tlb.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/asm-cris/tlb.h b/include/asm-cris/tlb.h index 6cc26debe40..7724246a260 100644 --- a/include/asm-cris/tlb.h +++ b/include/asm-cris/tlb.h @@ -1,6 +1,8 @@ #ifndef _CRIS_TLB_H #define _CRIS_TLB_H +#include + #include /* -- cgit v1.2.3 From 3eed6393644c960e2343db7dabec08c775d3738f Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:29 -0800 Subject: CRISv10 Ethernet declare mac fix Declare mac using DECLARE_MAC_BUF for use when calling print_mac(). This fixes compile error where mac was undeclared. Also, remove unused variable i. Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/cris/eth_v10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c index 5dc984a3e00..917b7b46f1a 100644 --- a/drivers/net/cris/eth_v10.c +++ b/drivers/net/cris/eth_v10.c @@ -630,7 +630,7 @@ e100_set_mac_address(struct net_device *dev, void *p) { struct net_local *np = netdev_priv(dev); struct sockaddr *addr = p; - int i; + DECLARE_MAC_BUF(mac); spin_lock(&np->lock); /* preemption protection */ -- cgit v1.2.3 From 2b05d2b3b4d1e59e8710ec9274684d0d13eee34d Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:30 -0800 Subject: CRISv10 usercopy library add lineendings to asm Removes warning when compiling arch/cris/arch-v10/lib/usercopy.c No change except adding \n\ on the end of the lines has been done. Removes warning about multi-line string literals. Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/lib/usercopy.c | 314 +++++++++++++++++++------------------- 1 file changed, 157 insertions(+), 157 deletions(-) diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c index a12c708afc9..b8e6c0430e5 100644 --- a/arch/cris/arch-v10/lib/usercopy.c +++ b/arch/cris/arch-v10/lib/usercopy.c @@ -92,58 +92,58 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn) .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ .err \n\ .endif \n\ - - ;; Save the registers we'll use in the movem process - ;; on the stack. - subq 11*4,$sp - movem $r10,[$sp] - - ;; Now we've got this: - ;; r11 - src - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 44,$r12 - -; Since the noted PC of a faulting instruction in a delay-slot of a taken -; branch, is that of the branch target, we actually point at the from-movem -; for this case. There is no ambiguity here; if there was a fault in that -; instruction (meaning a kernel oops), the faulted PC would be the address -; after *that* movem. - -0: - movem [$r11+],$r10 - subq 44,$r12 - bge 0b - movem $r10,[$r13+] -1: - addq 44,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10 -2: - .section .fixup,\"ax\" - -; To provide a correct count in r10 of bytes that failed to be copied, -; we jump back into the loop if the loop-branch was taken. There is no -; performance penalty for sany use; the program will segfault soon enough. - -3: - move.d [$sp],$r10 - addq 44,$r10 - move.d $r10,[$sp] - jump 0b -4: - movem [$sp+],$r10 - addq 44,$r10 - addq 44,$r12 - jump 2b - - .previous - .section __ex_table,\"a\" - .dword 0b,3b - .dword 1b,4b + \n\ + ;; Save the registers we'll use in the movem process \n\ + ;; on the stack. \n\ + subq 11*4,$sp \n\ + movem $r10,[$sp] \n\ + \n\ + ;; Now we've got this: \n\ + ;; r11 - src \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ + \n\ + ;; Update n for the first loop \n\ + subq 44,$r12 \n\ + \n\ +; Since the noted PC of a faulting instruction in a delay-slot of a taken \n\ +; branch, is that of the branch target, we actually point at the from-movem \n\ +; for this case. There is no ambiguity here; if there was a fault in that \n\ +; instruction (meaning a kernel oops), the faulted PC would be the address \n\ +; after *that* movem. \n\ + \n\ +0: \n\ + movem [$r11+],$r10 \n\ + subq 44,$r12 \n\ + bge 0b \n\ + movem $r10,[$r13+] \n\ +1: \n\ + addq 44,$r12 ;; compensate for last loop underflowing n \n\ + \n\ + ;; Restore registers from stack \n\ + movem [$sp+],$r10 \n\ +2: \n\ + .section .fixup,\"ax\" \n\ + \n\ +; To provide a correct count in r10 of bytes that failed to be copied, \n\ +; we jump back into the loop if the loop-branch was taken. There is no \n\ +; performance penalty for sany use; the program will segfault soon enough.\n\ + \n\ +3: \n\ + move.d [$sp],$r10 \n\ + addq 44,$r10 \n\ + move.d $r10,[$sp] \n\ + jump 0b \n\ +4: \n\ + movem [$sp+],$r10 \n\ + addq 44,$r10 \n\ + addq 44,$r12 \n\ + jump 2b \n\ + \n\ + .previous \n\ + .section __ex_table,\"a\" \n\ + .dword 0b,3b \n\ + .dword 1b,4b \n\ .previous" /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) @@ -253,59 +253,59 @@ __copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn) If you want to check that the allocation was right; then check the equalities in the first comment. It should say "r13=r13, r11=r11, r12=r12" */ - __asm__ volatile (" + __asm__ volatile ("\n\ .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ .err \n\ .endif \n\ - - ;; Save the registers we'll use in the movem process - ;; on the stack. - subq 11*4,$sp - movem $r10,[$sp] - - ;; Now we've got this: - ;; r11 - src - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 44,$r12 -0: - movem [$r11+],$r10 -1: - subq 44,$r12 - bge 0b - movem $r10,[$r13+] - - addq 44,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10 -4: - .section .fixup,\"ax\" - -;; Do not jump back into the loop if we fail. For some uses, we get a -;; page fault somewhere on the line. Without checking for page limits, -;; we don't know where, but we need to copy accurately and keep an -;; accurate count; not just clear the whole line. To do that, we fall -;; down in the code below, proceeding with smaller amounts. It should -;; be kept in mind that we have to cater to code like what at one time -;; was in fs/super.c: -;; i = size - copy_from_user((void *)page, data, size); -;; which would cause repeated faults while clearing the remainder of -;; the SIZE bytes at PAGE after the first fault. -;; A caveat here is that we must not fall through from a failing page -;; to a valid page. - -3: - movem [$sp+],$r10 - addq 44,$r12 ;; Get back count before faulting point. - subq 44,$r11 ;; Get back pointer to faulting movem-line. - jump 4b ;; Fall through, pretending the fault didn't happen. - - .previous - .section __ex_table,\"a\" - .dword 1b,3b + \n\ + ;; Save the registers we'll use in the movem process \n\ + ;; on the stack. \n\ + subq 11*4,$sp \n\ + movem $r10,[$sp] \n\ + \n\ + ;; Now we've got this: \n\ + ;; r11 - src \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ + \n\ + ;; Update n for the first loop \n\ + subq 44,$r12 \n\ +0: \n\ + movem [$r11+],$r10 \n\ +1: \n\ + subq 44,$r12 \n\ + bge 0b \n\ + movem $r10,[$r13+] \n\ + \n\ + addq 44,$r12 ;; compensate for last loop underflowing n \n\ + \n\ + ;; Restore registers from stack \n\ + movem [$sp+],$r10 \n\ +4: \n\ + .section .fixup,\"ax\" \n\ + \n\ +;; Do not jump back into the loop if we fail. For some uses, we get a \n\ +;; page fault somewhere on the line. Without checking for page limits, \n\ +;; we don't know where, but we need to copy accurately and keep an \n\ +;; accurate count; not just clear the whole line. To do that, we fall \n\ +;; down in the code below, proceeding with smaller amounts. It should \n\ +;; be kept in mind that we have to cater to code like what at one time \n\ +;; was in fs/super.c: \n\ +;; i = size - copy_from_user((void *)page, data, size); \n\ +;; which would cause repeated faults while clearing the remainder of \n\ +;; the SIZE bytes at PAGE after the first fault. \n\ +;; A caveat here is that we must not fall through from a failing page \n\ +;; to a valid page. \n\ + \n\ +3: \n\ + movem [$sp+],$r10 \n\ + addq 44,$r12 ;; Get back count before faulting point. \n\ + subq 44,$r11 ;; Get back pointer to faulting movem-line. \n\ + jump 4b ;; Fall through, pretending the fault didn't happen.\n\ + \n\ + .previous \n\ + .section __ex_table,\"a\" \n\ + .dword 1b,3b \n\ .previous" /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) @@ -425,64 +425,64 @@ __do_clear_user (void __user *pto, unsigned long pn) If you want to check that the allocation was right; then check the equalities in the first comment. It should say something like "r13=r13, r11=r11, r12=r12". */ - __asm__ volatile (" + __asm__ volatile ("\n\ .ifnc %0%1%2,$r13$r12$r10 \n\ .err \n\ .endif \n\ - - ;; Save the registers we'll clobber in the movem process - ;; on the stack. Don't mention them to gcc, it will only be - ;; upset. - subq 11*4,$sp - movem $r10,[$sp] - - clear.d $r0 - clear.d $r1 - clear.d $r2 - clear.d $r3 - clear.d $r4 - clear.d $r5 - clear.d $r6 - clear.d $r7 - clear.d $r8 - clear.d $r9 - clear.d $r10 - clear.d $r11 - - ;; Now we've got this: - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 12*4,$r12 -0: - subq 12*4,$r12 - bge 0b - movem $r11,[$r13+] -1: - addq 12*4,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10 -2: - .section .fixup,\"ax\" -3: - move.d [$sp],$r10 - addq 12*4,$r10 - move.d $r10,[$sp] - clear.d $r10 - jump 0b - -4: - movem [$sp+],$r10 - addq 12*4,$r10 - addq 12*4,$r12 - jump 2b - - .previous - .section __ex_table,\"a\" - .dword 0b,3b - .dword 1b,4b + \n\ + ;; Save the registers we'll clobber in the movem process \n\ + ;; on the stack. Don't mention them to gcc, it will only be \n\ + ;; upset. \n\ + subq 11*4,$sp \n\ + movem $r10,[$sp] \n\ + \n\ + clear.d $r0 \n\ + clear.d $r1 \n\ + clear.d $r2 \n\ + clear.d $r3 \n\ + clear.d $r4 \n\ + clear.d $r5 \n\ + clear.d $r6 \n\ + clear.d $r7 \n\ + clear.d $r8 \n\ + clear.d $r9 \n\ + clear.d $r10 \n\ + clear.d $r11 \n\ + \n\ + ;; Now we've got this: \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ + \n\ + ;; Update n for the first loop \n\ + subq 12*4,$r12 \n\ +0: \n\ + subq 12*4,$r12 \n\ + bge 0b \n\ + movem $r11,[$r13+] \n\ +1: \n\ + addq 12*4,$r12 ;; compensate for last loop underflowing n\n\ + \n\ + ;; Restore registers from stack \n\ + movem [$sp+],$r10 \n\ +2: \n\ + .section .fixup,\"ax\" \n\ +3: \n\ + move.d [$sp],$r10 \n\ + addq 12*4,$r10 \n\ + move.d $r10,[$sp] \n\ + clear.d $r10 \n\ + jump 0b \n\ + \n\ +4: \n\ + movem [$sp+],$r10 \n\ + addq 12*4,$r10 \n\ + addq 12*4,$r12 \n\ + jump 2b \n\ + \n\ + .previous \n\ + .section __ex_table,\"a\" \n\ + .dword 0b,3b \n\ + .dword 1b,4b \n\ .previous" /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) -- cgit v1.2.3 From 341ac6e4bee51ed5f83275fb311f3ef014f4bcd7 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:31 -0800 Subject: CRISv10 string library add lineendings to asm Add \n\ at end of lines inside asm statement to avoid warning. No change except adding \n\ to end of line and correcting whitespace has been done. Removes warning about multi-line string literals when compiling arch/cris/arch-v10/lib/string.c Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/lib/string.c | 54 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/cris/arch-v10/lib/string.c b/arch/cris/arch-v10/lib/string.c index 15d6662b03b..7161a2bef4f 100644 --- a/arch/cris/arch-v10/lib/string.c +++ b/arch/cris/arch-v10/lib/string.c @@ -95,33 +95,33 @@ void *memcpy(void *pdst, If you want to check that the allocation was right; then check the equalities in the first comment. It should say "r13=r13, r11=r11, r12=r12" */ - __asm__ volatile (" - ;; Check that the following is true (same register names on - ;; both sides of equal sign, as in r8=r8): - ;; %0=r13, %1=r11, %2=r12 - ;; - ;; Save the registers we'll use in the movem process - ;; on the stack. - subq 11*4,$sp - movem $r10,[$sp] - - ;; Now we've got this: - ;; r11 - src - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 44,$r12 -0: - movem [$r11+],$r10 - subq 44,$r12 - bge 0b - movem $r10,[$r13+] - - addq 44,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10" + __asm__ volatile ("\n\ + ;; Check that the following is true (same register names on \n\ + ;; both sides of equal sign, as in r8=r8): \n\ + ;; %0=r13, %1=r11, %2=r12 \n\ + ;; \n\ + ;; Save the registers we'll use in the movem process \n\ + ;; on the stack. \n\ + subq 11*4,$sp \n\ + movem $r10,[$sp] \n\ + \n\ + ;; Now we've got this: \n\ + ;; r11 - src \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ + \n\ + ;; Update n for the first loop \n\ + subq 44,$r12 \n\ +0: \n\ + movem [$r11+],$r10 \n\ + subq 44,$r12 \n\ + bge 0b \n\ + movem $r10,[$r13+] \n\ + \n\ + addq 44,$r12 ;; compensate for last loop underflowing n \n\ + \n\ + ;; Restore registers from stack \n\ + movem [$sp+],$r10" /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) /* Inputs */ : "0" (dst), "1" (src), "2" (n)); -- cgit v1.2.3 From 2e2cd8bad6e03ceea73495ee6d557044213d95de Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:32 -0800 Subject: CRISv10 memset library add lineendings to asm Add \n\ at end of lines inside asm statement to avoid warning. No change except adding \n\ to end of line and correcting whitespace has been done. Removes warning about multi-line string literals when compiling arch/cris/arch-v10/lib/memset.c Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/lib/memset.c | 82 ++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/arch/cris/arch-v10/lib/memset.c b/arch/cris/arch-v10/lib/memset.c index 82bb6683917..42c1101043a 100644 --- a/arch/cris/arch-v10/lib/memset.c +++ b/arch/cris/arch-v10/lib/memset.c @@ -66,7 +66,7 @@ void *memset(void *pdst, { register char *dst __asm__ ("r13") = pdst; - + /* This is NONPORTABLE, but since this whole routine is */ /* grossly nonportable that doesn't matter. */ @@ -110,52 +110,52 @@ void *memset(void *pdst, If you want to check that the allocation was right; then check the equalities in the first comment. It should say "r13=r13, r12=r12, r11=r11" */ - __asm__ volatile (" - ;; Check that the following is true (same register names on - ;; both sides of equal sign, as in r8=r8): - ;; %0=r13, %1=r12, %4=r11 - ;; - ;; Save the registers we'll clobber in the movem process - ;; on the stack. Don't mention them to gcc, it will only be - ;; upset. - subq 11*4,$sp - movem $r10,[$sp] - - move.d $r11,$r0 - move.d $r11,$r1 - move.d $r11,$r2 - move.d $r11,$r3 - move.d $r11,$r4 - move.d $r11,$r5 - move.d $r11,$r6 - move.d $r11,$r7 - move.d $r11,$r8 - move.d $r11,$r9 - move.d $r11,$r10 - - ;; Now we've got this: - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 12*4,$r12 -0: - subq 12*4,$r12 - bge 0b - movem $r11,[$r13+] - - addq 12*4,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10" + __asm__ volatile ("\n\ + ;; Check that the following is true (same register names on \n\ + ;; both sides of equal sign, as in r8=r8): \n\ + ;; %0=r13, %1=r12, %4=r11 \n\ + ;; \n\ + ;; Save the registers we'll clobber in the movem process \n\ + ;; on the stack. Don't mention them to gcc, it will only be \n\ + ;; upset. \n\ + subq 11*4,$sp \n\ + movem $r10,[$sp] \n\ + \n\ + move.d $r11,$r0 \n\ + move.d $r11,$r1 \n\ + move.d $r11,$r2 \n\ + move.d $r11,$r3 \n\ + move.d $r11,$r4 \n\ + move.d $r11,$r5 \n\ + move.d $r11,$r6 \n\ + move.d $r11,$r7 \n\ + move.d $r11,$r8 \n\ + move.d $r11,$r9 \n\ + move.d $r11,$r10 \n\ + \n\ + ;; Now we've got this: \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ + \n\ + ;; Update n for the first loop \n\ + subq 12*4,$r12 \n\ +0: \n\ + subq 12*4,$r12 \n\ + bge 0b \n\ + movem $r11,[$r13+] \n\ + \n\ + addq 12*4,$r12 ;; compensate for last loop underflowing n \n\ + \n\ + ;; Restore registers from stack \n\ + movem [$sp+],$r10" /* Outputs */ : "=r" (dst), "=r" (n) /* Inputs */ : "0" (dst), "1" (n), "r" (lc)); - + } /* Either we directly starts copying, using dword copying - in a loop, or we copy as much as possible with 'movem' + in a loop, or we copy as much as possible with 'movem' and then the last block (<44 bytes) is copied here. This will work since 'movem' will have updated src,dst,n. */ -- cgit v1.2.3 From 2f1f53bdc6531696934f6ee7bbdfa2ab4f4f62a3 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Nov 2007 17:01:33 -0800 Subject: CRISv10 fasttimer: Scrap INLINE and name timeval_cmp better Scrap the local __INLINE__ macro, and rename timeval_cmp to fasttime_cmp. Inline macro was completely unnecessary since the macro was defined locally to be inline. timeval_cmp was inaccurately named since it does comparison on struct fasttimer_t and not on struct timeval. Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/kernel/fasttimer.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/cris/arch-v10/kernel/fasttimer.c b/arch/cris/arch-v10/kernel/fasttimer.c index 645d7059b40..c1a3a2100ee 100644 --- a/arch/cris/arch-v10/kernel/fasttimer.c +++ b/arch/cris/arch-v10/kernel/fasttimer.c @@ -46,8 +46,6 @@ static int sanity_failed; #define D2(x) #define DP(x) -#define __INLINE__ inline - static unsigned int fast_timer_running; static unsigned int fast_timers_added; static unsigned int fast_timers_started; @@ -118,13 +116,13 @@ int timer_freq_settings[NUM_TIMER_STATS]; int timer_delay_settings[NUM_TIMER_STATS]; /* Not true gettimeofday, only checks the jiffies (uptime) + useconds */ -void __INLINE__ do_gettimeofday_fast(struct fasttime_t *tv) +inline void do_gettimeofday_fast(struct fasttime_t *tv) { tv->tv_jiff = jiffies; tv->tv_usec = GET_JIFFIES_USEC(); } -int __INLINE__ timeval_cmp(struct fasttime_t *t0, struct fasttime_t *t1) +inline int fasttime_cmp(struct fasttime_t *t0, struct fasttime_t *t1) { /* Compare jiffies. Takes care of wrapping */ if (time_before(t0->tv_jiff, t1->tv_jiff)) @@ -140,7 +138,7 @@ int __INLINE__ timeval_cmp(struct fasttime_t *t0, struct fasttime_t *t1) return 0; } -void __INLINE__ start_timer1(unsigned long delay_us) +inline void start_timer1(unsigned long delay_us) { int freq_index = 0; /* This is the lowest resolution */ unsigned long upper_limit = MAX_DELAY_US; @@ -264,7 +262,7 @@ void start_one_shot_timer(struct fast_timer *t, fast_timers_added++; /* Check if this should timeout before anything else */ - if (tmp == NULL || timeval_cmp(&t->tv_expires, &tmp->tv_expires) < 0) + if (tmp == NULL || fasttime_cmp(&t->tv_expires, &tmp->tv_expires) < 0) { /* Put first in list and modify the timer value */ t->prev = NULL; @@ -280,8 +278,8 @@ void start_one_shot_timer(struct fast_timer *t, start_timer1(delay_us); } else { /* Put in correct place in list */ - while (tmp->next && - timeval_cmp(&t->tv_expires, &tmp->next->tv_expires) > 0) + while (tmp->next && fasttime_cmp(&t->tv_expires, + &tmp->next->tv_expires) > 0) { tmp = tmp->next; } @@ -382,7 +380,7 @@ timer1_handler(int irq, void *dev_id) D1(printk(KERN_DEBUG "t: %is %06ius\n", tv.tv_jiff, tv.tv_usec)); - if (timeval_cmp(&t->tv_expires, &tv) <= 0) + if (fasttime_cmp(&t->tv_expires, &tv) <= 0) { /* Yes it has expired */ #ifdef FAST_TIMER_LOG -- cgit v1.2.3 From bd7b3f34198071d8bec05180530c362f1800ba46 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 14 Nov 2007 19:47:27 -0800 Subject: [VIA_VELOCITY]: Don't oops on MTU change. Simple mtu change when device is down. Fix http://bugzilla.kernel.org/show_bug.cgi?id=9382. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/via-velocity.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 5c4a92de9a0..450e29d7a9f 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -1963,6 +1963,11 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } + if (!netif_running(dev)) { + dev->mtu = new_mtu; + return 0; + } + if (new_mtu != oldmtu) { spin_lock_irqsave(&vptr->lock, flags); -- cgit v1.2.3 From dab6ba36888a12f3e3edff71eeef968fc159178a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Nov 2007 02:57:06 -0800 Subject: [INET]: Fix potential kfree on vmalloc-ed area of request_sock_queue The request_sock_queue's listen_opt is either vmalloc-ed or kmalloc-ed depending on the number of table entries. Thus it is expected to be handled properly on free, which is done in the reqsk_queue_destroy(). However the error path in inet_csk_listen_start() calls the lite version of reqsk_queue_destroy, called __reqsk_queue_destroy, which calls the kfree unconditionally. Fix this and move the __reqsk_queue_destroy into a .c file as it looks too big to be inline. As David also noticed, this is an error recovery path only, so no locking is required and the lopt is known to be not NULL. reqsk_queue_yank_listen_sk is also now only used in net/core/request_sock.c so we should move it there too. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/request_sock.h | 18 +----------------- net/core/request_sock.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 7aed02ce2b6..cff4608179c 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -124,23 +124,7 @@ struct request_sock_queue { extern int reqsk_queue_alloc(struct request_sock_queue *queue, unsigned int nr_table_entries); -static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) -{ - struct listen_sock *lopt; - - write_lock_bh(&queue->syn_wait_lock); - lopt = queue->listen_opt; - queue->listen_opt = NULL; - write_unlock_bh(&queue->syn_wait_lock); - - return lopt; -} - -static inline void __reqsk_queue_destroy(struct request_sock_queue *queue) -{ - kfree(reqsk_queue_yank_listen_sk(queue)); -} - +extern void __reqsk_queue_destroy(struct request_sock_queue *queue); extern void reqsk_queue_destroy(struct request_sock_queue *queue); static inline struct request_sock * diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 5f0818d815e..45aed75cb57 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -71,6 +71,41 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, EXPORT_SYMBOL(reqsk_queue_alloc); +void __reqsk_queue_destroy(struct request_sock_queue *queue) +{ + struct listen_sock *lopt; + size_t lopt_size; + + /* + * this is an error recovery path only + * no locking needed and the lopt is not NULL + */ + + lopt = queue->listen_opt; + lopt_size = sizeof(struct listen_sock) + + lopt->nr_table_entries * sizeof(struct request_sock *); + + if (lopt_size > PAGE_SIZE) + vfree(lopt); + else + kfree(lopt); +} + +EXPORT_SYMBOL(__reqsk_queue_destroy); + +static inline struct listen_sock *reqsk_queue_yank_listen_sk( + struct request_sock_queue *queue) +{ + struct listen_sock *lopt; + + write_lock_bh(&queue->syn_wait_lock); + lopt = queue->listen_opt; + queue->listen_opt = NULL; + write_unlock_bh(&queue->syn_wait_lock); + + return lopt; +} + void reqsk_queue_destroy(struct request_sock_queue *queue) { /* make all the listen_opt local to us */ -- cgit v1.2.3 From 330a9c1df63ca5043c468698da0a1853fd6778bb Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Tue, 16 Oct 2007 11:23:51 +0200 Subject: [AVR32] pcmcia ioaddr_t should be 32 bits on AVR32 Define ioaddr_t as u_int on AVR32 just like on ARM and MIPS. Signed-off-by: Haavard Skinnemoen --- include/pcmcia/cs_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pcmcia/cs_types.h b/include/pcmcia/cs_types.h index c1d1629fcd2..5f388035687 100644 --- a/include/pcmcia/cs_types.h +++ b/include/pcmcia/cs_types.h @@ -21,7 +21,7 @@ #include #endif -#if defined(__arm__) || defined(__mips__) +#if defined(__arm__) || defined(__mips__) || defined(__avr32__) /* This (ioaddr_t) is exposed to userspace & hence cannot be changed. */ typedef u_int ioaddr_t; #else -- cgit v1.2.3 From 9797bed20e2257f349872e00840a226f397f8d3a Mon Sep 17 00:00:00 2001 From: Hans-Christian Egtvedt Date: Tue, 30 Oct 2007 14:29:50 +0100 Subject: Extend I/O resource for wdt0 for at32ap7000 devices This patch extends the I/O resource to 0xfff000cf which will enable the watchdog driver to access the reset cause (RCAUSE) register. Making it capable of reporting boot status. Signed-off-by: Hans-Christian Egtvedt Signed-off-by: Haavard Skinnemoen --- arch/avr32/mach-at32ap/at32ap7000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/avr32/mach-at32ap/at32ap7000.c b/arch/avr32/mach-at32ap/at32ap7000.c index a9d9ec081e3..293f03c5ada 100644 --- a/arch/avr32/mach-at32ap/at32ap7000.c +++ b/arch/avr32/mach-at32ap/at32ap7000.c @@ -474,7 +474,7 @@ static struct resource at32ap700x_rtc0_resource[] = { static struct resource at32_wdt0_resource[] = { { .start = 0xfff000b0, - .end = 0xfff000bf, + .end = 0xfff000cf, .flags = IORESOURCE_MEM, }, }; -- cgit v1.2.3 From 3a65a69d493c589f4225dc26c19598c00c1de0b4 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Mon, 5 Nov 2007 14:06:51 +0100 Subject: [AVR32] Turn off debugging in SMC driver Signed-off-by: Haavard Skinnemoen --- arch/avr32/mach-at32ap/hsmc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/avr32/mach-at32ap/hsmc.c b/arch/avr32/mach-at32ap/hsmc.c index 704607fbcc6..fa427ed4278 100644 --- a/arch/avr32/mach-at32ap/hsmc.c +++ b/arch/avr32/mach-at32ap/hsmc.c @@ -7,7 +7,6 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#define DEBUG #include #include #include -- cgit v1.2.3 From 138712218e7582e274990cdf77dc798b2aa4fa99 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 5 Nov 2007 18:06:12 +0100 Subject: [AVR32] remove UID16 option avr32 already sees the option from init/Kconfig. Signed-off-by: Adrian Bunk Signed-off-by: Haavard Skinnemoen --- arch/avr32/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index bbecbd8469b..4f402c92450 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -19,9 +19,6 @@ config AVR32 There is an AVR32 Linux project with a web page at http://avr32linux.org/. -config UID16 - bool - config GENERIC_GPIO bool default y -- cgit v1.2.3 From 80f76c54bd23c45b37f03a4d49ef4b760430dbcc Mon Sep 17 00:00:00 2001 From: "ben.nizette@iinet.net.au" Date: Wed, 7 Nov 2007 16:16:22 +0900 Subject: [AVR32] Fix duplicate clock index in at32ap machine code There's a duplicate clock index between USART0 and USART1 which may be causing system crashes when USART0 is used. Change the USART0 index to '3', indicating the clock that is actually used by USART0. Signed-off-by: Ben Nizette Signed-off-by: Haavard Skinnemoen --- arch/avr32/mach-at32ap/at32ap7000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/avr32/mach-at32ap/at32ap7000.c b/arch/avr32/mach-at32ap/at32ap7000.c index 293f03c5ada..7c4388f4f17 100644 --- a/arch/avr32/mach-at32ap/at32ap7000.c +++ b/arch/avr32/mach-at32ap/at32ap7000.c @@ -690,7 +690,7 @@ static struct resource atmel_usart0_resource[] = { IRQ(6), }; DEFINE_DEV_DATA(atmel_usart, 0); -DEV_CLK(usart, atmel_usart0, pba, 4); +DEV_CLK(usart, atmel_usart0, pba, 3); static struct atmel_uart_data atmel_usart1_data = { .use_dma_tx = 1, -- cgit v1.2.3 From 8fea1ad17f1d7ba0ef6ce7b73ad9dd4562035437 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Wed, 31 Oct 2007 20:32:00 +0100 Subject: [AVR32] Add missing bit in PCCR sysreg The enable bit was missing... Signed-off-by: Haavard Skinnemoen --- include/asm-avr32/sysreg.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/asm-avr32/sysreg.h b/include/asm-avr32/sysreg.h index c02bc8304b1..dd21182b60e 100644 --- a/include/asm-avr32/sysreg.h +++ b/include/asm-avr32/sysreg.h @@ -215,6 +215,8 @@ #define SYSREG_IRP_SIZE 6 /* Bitfields in PCCR */ +#define SYSREG_PCCR_E_OFFSET 0 +#define SYSREG_PCCR_E_SIZE 1 #define SYSREG_PCCR_R_OFFSET 1 #define SYSREG_PCCR_R_SIZE 1 #define SYSREG_PCCR_C_OFFSET 2 -- cgit v1.2.3 From 597702aeb492fcb8e1b48283e1450df40e928ba0 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Wed, 31 Oct 2007 20:34:11 +0100 Subject: [AVR32] Export intc_get_pending symbol Oprofile needs to call intc_get_pending() in order to determine whether a performance counter interrupt is pending. Also, include the header which declares intc_get_pending() and fix the definition to match the prototype. Signed-off-by: Haavard Skinnemoen --- arch/avr32/mach-at32ap/intc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c index dd5c009cf22..0b286cd5302 100644 --- a/arch/avr32/mach-at32ap/intc.c +++ b/arch/avr32/mach-at32ap/intc.c @@ -13,6 +13,7 @@ #include #include +#include #include #include "intc.h" @@ -136,7 +137,8 @@ fail: panic("Interrupt controller initialization failed!\n"); } -unsigned long intc_get_pending(int group) +unsigned long intc_get_pending(unsigned int group) { return intc_readl(&intc0, INTREQ0 + 4 * group); } +EXPORT_SYMBOL_GPL(intc_get_pending); -- cgit v1.2.3 From a3474224e6a01924be40a8255636ea5522c1023a Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 13 Nov 2007 22:11:50 -0800 Subject: wait_task_stopped: Check p->exit_state instead of TASK_TRACED The original meaning of the old test (p->state > TASK_STOPPED) was "not dead", since it was before TASK_TRACED existed and before the state/exit_state split. It was a wrong correction in commit 14bf01bb0599c89fc7f426d20353b76e12555308 to make this test for TASK_TRACED instead. It should have been changed when TASK_TRACED was introducted and again when exit_state was introduced. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Cc: Alexey Dobriyan Cc: Kees Cook Acked-by: Scott James Remnant Signed-off-by: Linus Torvalds --- kernel/exit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index f1aec27f1df..cd0f1d4137a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1386,8 +1386,7 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; exit_code = p->exit_code; - if (unlikely(!exit_code) || - unlikely(p->state & TASK_TRACED)) + if (unlikely(!exit_code) || unlikely(p->exit_state)) goto bail_ref; return wait_noreap_copyout(p, pid, uid, why, (exit_code << 8) | 0x7f, -- cgit v1.2.3 From d32ddd8f20e7d7a49c45c337c2079be03c77dc41 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 15 Nov 2007 12:32:04 +0100 Subject: slob: fix memory corruption Previously, it would be possible for prev->next to point to &free_slob_pages, and thus we would try to move a list onto itself, and bad things would happen. It seems a bit hairy to be doing list operations with the list marker as an entry, rather than a head, but... this resolves the following crash: http://bugzilla.kernel.org/show_bug.cgi?id=9379 Signed-off-by: Nick Piggin Signed-off-by: Ingo Molnar Acked-by: Matt Mackall Signed-off-by: Linus Torvalds --- mm/slob.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slob.c b/mm/slob.c index 5bc2ceb692e..08a9bd91a1a 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -321,7 +321,8 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) /* Improve fragment distribution and reduce our average * search time by starting our next search here. (see * Knuth vol 1, sec 2.5, pg 449) */ - if (free_slob_pages.next != prev->next) + if (prev != free_slob_pages.prev && + free_slob_pages.next != prev->next) list_move_tail(&free_slob_pages, prev->next); break; } -- cgit v1.2.3 From 279e1dab949d33737557babfe9f74e0b74fbe39a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Nov 2007 08:44:36 -0800 Subject: Revert "skge: fix ram buffer size calculation" This reverts commit 7fb7ac241162dc51ec0f7644d4a97b2855213c32. Heikki Orsila reports that it causes a regression: "Doing nc host port < /dev/zero on a sending machine (not skge) to an skge machine that is receiving: nc -l -p port >/dev/null with ~60 MiB/s speed, causes the interface go malfunct. A slow transfer doesn't cause a problem." See http://bugzilla.kernel.org/show_bug.cgi?id=9321 for some more information. There is a workaround (also reported by Heikki): "After some fiddling, I noticed that not changing the register write order on patch: + skge_write32(hw, RB_ADDR(q, RB_END), end); skge_write32(hw, RB_ADDR(q, RB_WP), start); skge_write32(hw, RB_ADDR(q, RB_RP), start); - skge_write32(hw, RB_ADDR(q, RB_END), end); fixes the visible effect.. Possibly not the root cause of the problem, but changing the order back fixes networking here." but that has yet to be ack'ed or tested more widely, so the whole problem-causing commit gets reverted until this is resolved properly. Bisected-and-requested-by: Heikki Orsila Cc: Stephen Hemminger Cc: Jeff Garzik Signed-off-by: Linus Torvalds --- drivers/net/skge.c | 51 +++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/net/skge.c b/drivers/net/skge.c index b9961dc4760..6d62250fba0 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -2512,31 +2512,32 @@ static int skge_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } -/* Assign Ram Buffer allocation to queue */ -static void skge_ramset(struct skge_hw *hw, u16 q, u32 start, u32 space) +static void skge_ramset(struct skge_hw *hw, u16 q, u32 start, size_t len) { u32 end; - /* convert from K bytes to qwords used for hw register */ - start *= 1024/8; - space *= 1024/8; - end = start + space - 1; + start /= 8; + len /= 8; + end = start + len - 1; skge_write8(hw, RB_ADDR(q, RB_CTRL), RB_RST_CLR); skge_write32(hw, RB_ADDR(q, RB_START), start); - skge_write32(hw, RB_ADDR(q, RB_END), end); skge_write32(hw, RB_ADDR(q, RB_WP), start); skge_write32(hw, RB_ADDR(q, RB_RP), start); + skge_write32(hw, RB_ADDR(q, RB_END), end); if (q == Q_R1 || q == Q_R2) { - u32 tp = space - space/4; - /* Set thresholds on receive queue's */ - skge_write32(hw, RB_ADDR(q, RB_RX_UTPP), tp); - skge_write32(hw, RB_ADDR(q, RB_RX_LTPP), space/4); - } else if (hw->chip_id != CHIP_ID_GENESIS) - /* Genesis Tx Fifo is too small for normal store/forward */ + skge_write32(hw, RB_ADDR(q, RB_RX_UTPP), + start + (2*len)/3); + skge_write32(hw, RB_ADDR(q, RB_RX_LTPP), + start + (len/3)); + } else { + /* Enable store & forward on Tx queue's because + * Tx FIFO is only 4K on Genesis and 1K on Yukon + */ skge_write8(hw, RB_ADDR(q, RB_CTRL), RB_ENA_STFWD); + } skge_write8(hw, RB_ADDR(q, RB_CTRL), RB_ENA_OP_MD); } @@ -2564,7 +2565,7 @@ static int skge_up(struct net_device *dev) struct skge_port *skge = netdev_priv(dev); struct skge_hw *hw = skge->hw; int port = skge->port; - u32 ramaddr, ramsize, rxspace; + u32 chunk, ram_addr; size_t rx_size, tx_size; int err; @@ -2619,15 +2620,14 @@ static int skge_up(struct net_device *dev) spin_unlock_bh(&hw->phy_lock); /* Configure RAMbuffers */ - ramsize = (hw->ram_size - hw->ram_offset) / hw->ports; - ramaddr = hw->ram_offset + port * ramsize; - rxspace = 8 + (2*(ramsize - 16))/3; - - skge_ramset(hw, rxqaddr[port], ramaddr, rxspace); - skge_ramset(hw, txqaddr[port], ramaddr + rxspace, ramsize - rxspace); + chunk = hw->ram_size / ((hw->ports + 1)*2); + ram_addr = hw->ram_offset + 2 * chunk * port; + skge_ramset(hw, rxqaddr[port], ram_addr, chunk); skge_qset(skge, rxqaddr[port], skge->rx_ring.to_clean); + BUG_ON(skge->tx_ring.to_use != skge->tx_ring.to_clean); + skge_ramset(hw, txqaddr[port], ram_addr+chunk, chunk); skge_qset(skge, txqaddr[port], skge->tx_ring.to_use); /* Start receiver BMU */ @@ -3591,12 +3591,15 @@ static int skge_reset(struct skge_hw *hw) if (hw->chip_id == CHIP_ID_GENESIS) { if (t8 == 3) { /* special case: 4 x 64k x 36, offset = 0x80000 */ - hw->ram_size = 1024; - hw->ram_offset = 512; + hw->ram_size = 0x100000; + hw->ram_offset = 0x80000; } else hw->ram_size = t8 * 512; - } else /* Yukon */ - hw->ram_size = t8 ? t8 * 4 : 128; + } + else if (t8 == 0) + hw->ram_size = 0x20000; + else + hw->ram_size = t8 * 4096; hw->intr_mask = IS_HW_ERR; -- cgit v1.2.3 From 907135aaa0cc120a347222c8f274ecc5ca0db641 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 15 Nov 2007 19:24:01 +0100 Subject: i2c-dev: "how does it work" comments This adds some "how does this work" comments to the i2c-dev driver, plus separators between the three main components: - The parallel list of i2c_adapters ("i2c_dev_list"), each of which gets a "struct i2c_dev" and a /dev/i2c-X character special file. - An i2cdev_driver gets adapter add/remove notifications, which are used to maintain that list of adapters. - Special file operations, which let userspace talk either directly to the adapter (for i2c_msg operations) or through cached addressing info using an anonymous i2c_client (never registered anywhere). Plus there's the usual module load/unload record keeping. After making sense of this code, I think that the anonymous i2c_client is pretty shady. But since it's never registered, using this code with a system set up for "new style" I2C drivers is no more complicated than always using the I2C_SLAVE_FORCE ioctl (instead of I2C_SLAVE). Signed-off-by: David Brownell Signed-off-by: Jean Delvare --- drivers/i2c/i2c-dev.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 5a15e50748d..7360f9c3725 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -38,6 +38,15 @@ static struct i2c_driver i2cdev_driver; +/* + * An i2c_dev represents an i2c_adapter ... an I2C or SMBus master, not a + * slave (i2c_client) with which messages will be exchanged. It's coupled + * with a character special file which is accessed by user mode drivers. + * + * The list of i2c_dev structures is parallel to the i2c_adapter lists + * maintained by the driver model, and is updated using notifications + * delivered to the i2cdev_driver. + */ struct i2c_dev { struct list_head list; struct i2c_adapter *adap; @@ -103,6 +112,25 @@ static ssize_t show_adapter_name(struct device *dev, } static DEVICE_ATTR(name, S_IRUGO, show_adapter_name, NULL); +/* ------------------------------------------------------------------------- */ + +/* + * After opening an instance of this character special file, a file + * descriptor starts out associated only with an i2c_adapter (and bus). + * + * Using the I2C_RDWR ioctl(), you can then *immediately* issue i2c_msg + * traffic to any devices on the bus used by that adapter. That's because + * the i2c_msg vectors embed all the addressing information they need, and + * are submitted directly to an i2c_adapter. However, SMBus-only adapters + * don't support that interface. + * + * To use read()/write() system calls on that file descriptor, or to use + * SMBus interfaces (and work with SMBus-only hosts!), you must first issue + * an I2C_SLAVE (or I2C_SLAVE_FORCE) ioctl. That configures an anonymous + * (never registered) i2c_client so it holds the addressing information + * needed by those system calls and by this SMBus interface. + */ + static ssize_t i2cdev_read (struct file *file, char __user *buf, size_t count, loff_t *offset) { @@ -172,6 +200,16 @@ static int i2cdev_ioctl(struct inode *inode, struct file *file, switch ( cmd ) { case I2C_SLAVE: case I2C_SLAVE_FORCE: + /* NOTE: devices set up to work with "new style" drivers + * can't use I2C_SLAVE, even when the device node is not + * bound to a driver. Only I2C_SLAVE_FORCE will work. + * + * Setting the PEC flag here won't affect kernel drivers, + * which will be using the i2c_client node registered with + * the driver model core. Likewise, when that client has + * the PEC flag already set, the i2c-dev driver won't see + * (or use) this setting. + */ if ((arg > 0x3ff) || (((client->flags & I2C_M_TEN) == 0) && arg > 0x7f)) return -EINVAL; @@ -386,6 +424,13 @@ static int i2cdev_open(struct inode *inode, struct file *file) if (!adap) return -ENODEV; + /* This creates an anonymous i2c_client, which may later be + * pointed to some address using I2C_SLAVE or I2C_SLAVE_FORCE. + * + * This client is ** NEVER REGISTERED ** with the driver model + * or I2C core code!! It just holds private copies of addressing + * information and maybe a PEC flag. + */ client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) { i2c_put_adapter(adap); @@ -394,7 +439,6 @@ static int i2cdev_open(struct inode *inode, struct file *file) snprintf(client->name, I2C_NAME_SIZE, "i2c-dev %d", adap->nr); client->driver = &i2cdev_driver; - /* registered with adapter, passed as client to user */ client->adapter = adap; file->private_data = client; @@ -422,6 +466,14 @@ static const struct file_operations i2cdev_fops = { .release = i2cdev_release, }; +/* ------------------------------------------------------------------------- */ + +/* + * The legacy "i2cdev_driver" is used primarily to get notifications when + * I2C adapters are added or removed, so that each one gets an i2c_dev + * and is thus made available to userspace driver code. + */ + static struct class *i2c_dev_class; static int i2cdev_attach_adapter(struct i2c_adapter *adap) @@ -486,6 +538,12 @@ static struct i2c_driver i2cdev_driver = { .detach_client = i2cdev_detach_client, }; +/* ------------------------------------------------------------------------- */ + +/* + * module load/unload record keeping + */ + static int __init i2c_dev_init(void) { int res; -- cgit v1.2.3 From bd4217d8c6ef48425c8d6b28d2e089a83e01af04 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 15 Nov 2007 19:24:01 +0100 Subject: i2c-dev: Unbound new-style i2c clients aren't busy Let i2c-dev deal properly with new-style i2c clients. Instead of considering them always busy, it needs to check wether a driver is bound to them or not. This is still not completely correct, as the client could become busy later, but the same problem already existed before new-style clients were introduced. We'll want to fix it someday. Signed-off-by: Jean Delvare Acked-by: David Brownell --- drivers/i2c/i2c-dev.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 7360f9c3725..c21ae20ae36 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -182,6 +182,29 @@ static ssize_t i2cdev_write (struct file *file, const char __user *buf, size_t c return ret; } +/* This address checking function differs from the one in i2c-core + in that it considers an address with a registered device, but no + bounded driver, as NOT busy. */ +static int i2cdev_check_addr(struct i2c_adapter *adapter, unsigned int addr) +{ + struct list_head *item; + struct i2c_client *client; + int res = 0; + + mutex_lock(&adapter->clist_lock); + list_for_each(item, &adapter->clients) { + client = list_entry(item, struct i2c_client, list); + if (client->addr == addr) { + if (client->driver) + res = -EBUSY; + break; + } + } + mutex_unlock(&adapter->clist_lock); + + return res; +} + static int i2cdev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -213,8 +236,9 @@ static int i2cdev_ioctl(struct inode *inode, struct file *file, if ((arg > 0x3ff) || (((client->flags & I2C_M_TEN) == 0) && arg > 0x7f)) return -EINVAL; - if ((cmd == I2C_SLAVE) && i2c_check_addr(client->adapter,arg)) + if (cmd == I2C_SLAVE && i2cdev_check_addr(client->adapter, arg)) return -EBUSY; + /* REVISIT: address could become busy later */ client->addr = arg; return 0; case I2C_TENBIT: -- cgit v1.2.3 From 5e31c2bd3c865f8f474811340182795396b99696 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 15 Nov 2007 19:24:02 +0100 Subject: i2c: Make i2c_check_addr static i2c_check_addr is only used inside i2c-core now, so we can make it static and stop exporting it. Thanks to David Brownell for noticing. Signed-off-by: Jean Delvare --- drivers/i2c/i2c-core.c | 3 +-- include/linux/i2c.h | 5 ----- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 1a4e8dc03b3..b5e13e405e7 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -673,7 +673,7 @@ static int __i2c_check_addr(struct i2c_adapter *adapter, unsigned int addr) return 0; } -int i2c_check_addr(struct i2c_adapter *adapter, int addr) +static int i2c_check_addr(struct i2c_adapter *adapter, int addr) { int rval; @@ -683,7 +683,6 @@ int i2c_check_addr(struct i2c_adapter *adapter, int addr) return rval; } -EXPORT_SYMBOL(i2c_check_addr); int i2c_attach_client(struct i2c_client *client) { diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 8033e6b3327..a100c9f8eb7 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -400,11 +400,6 @@ extern int i2c_release_client(struct i2c_client *); extern void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg); -/* returns -EBUSY if address has been taken, 0 if not. Note that the only - other place at which this is called is within i2c_attach_client; so - you can cheat by simply not registering. Not recommended, of course! */ -extern int i2c_check_addr (struct i2c_adapter *adapter, int addr); - /* Detect function. It iterates over all possible addresses itself. * It will only call found_proc if some client is connected at the * specific address (unless a 'force' matched); -- cgit v1.2.3 From ff23f3eabbaa4fc398e0ce109a8688db29d95d78 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Thu, 15 Nov 2007 19:24:02 +0100 Subject: i2c-pasemi: Replace obsolete "driverfs" reference with "sysfs" Signed-off-by: Robert P. J. Day Signed-off-by: Jean Delvare --- drivers/i2c/busses/i2c-pasemi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c index 58e32714afb..79c72dfa63b 100644 --- a/drivers/i2c/busses/i2c-pasemi.c +++ b/drivers/i2c/busses/i2c-pasemi.c @@ -364,7 +364,7 @@ static int __devinit pasemi_smb_probe(struct pci_dev *dev, smbus->adapter.algo = &smbus_algorithm; smbus->adapter.algo_data = smbus; - /* set up the driverfs linkage to our parent device */ + /* set up the sysfs linkage to our parent device */ smbus->adapter.dev.parent = &dev->dev; reg_write(smbus, REG_CTL, (CTL_MTR | CTL_MRR | -- cgit v1.2.3 From be8a1f7cd4501c3b4b32543577a33aee6d2193ac Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Thu, 15 Nov 2007 19:24:02 +0100 Subject: i2c-pasemi: Fix NACK detection Turns out we don't actually check the status to see if there was a device out there to talk to, just if we had a timeout when doing so. Add the proper check, so we don't falsly think there are devices on the bus that are not there, etc. Signed-off-by: Olof Johansson Signed-off-by: Jean Delvare --- drivers/i2c/busses/i2c-pasemi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c index 79c72dfa63b..ca18e0be490 100644 --- a/drivers/i2c/busses/i2c-pasemi.c +++ b/drivers/i2c/busses/i2c-pasemi.c @@ -51,6 +51,7 @@ struct pasemi_smbus { #define MRXFIFO_DATA_M 0x000000ff #define SMSTA_XEN 0x08000000 +#define SMSTA_MTN 0x00200000 #define CTL_MRR 0x00000400 #define CTL_MTR 0x00000200 @@ -98,6 +99,10 @@ static unsigned int pasemi_smb_waitready(struct pasemi_smbus *smbus) status = reg_read(smbus, REG_SMSTA); } + /* Got NACK? */ + if (status & SMSTA_MTN) + return -ENXIO; + if (timeout < 0) { dev_warn(&smbus->dev->dev, "Timeout, status 0x%08x\n", status); reg_write(smbus, REG_SMSTA, status); -- cgit v1.2.3 From 0f2cbd38aa377e30df3b7602abed69464d1970aa Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 15 Nov 2007 19:24:03 +0100 Subject: i2c/eeprom: Hide Sony Vaio serial numbers The sysfs interface to DMI data takes care to not make the system serial number and UUID world-readable, presumably due to privacy concerns. For consistency, we should not let the eeprom driver export these same strings to the world on Sony Vaio laptops. Instead, only make them readable by root, as we already do for BIOS passwords. Signed-off-by: Jean Delvare --- drivers/i2c/chips/eeprom.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c index d3da1fb05b9..ef8a754db1d 100644 --- a/drivers/i2c/chips/eeprom.c +++ b/drivers/i2c/chips/eeprom.c @@ -128,13 +128,20 @@ static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr, for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++) eeprom_update_client(client, slice); - /* Hide Vaio security settings to regular users (16 first bytes) */ - if (data->nature == VAIO && off < 16 && !capable(CAP_SYS_ADMIN)) { - size_t in_row1 = 16 - off; - in_row1 = min(in_row1, count); - memset(buf, 0, in_row1); - if (count - in_row1 > 0) - memcpy(buf + in_row1, &data->data[16], count - in_row1); + /* Hide Vaio private settings to regular users: + - BIOS passwords: bytes 0x00 to 0x0f + - UUID: bytes 0x10 to 0x1f + - Serial number: 0xc0 to 0xdf */ + if (data->nature == VAIO && !capable(CAP_SYS_ADMIN)) { + int i; + + for (i = 0; i < count; i++) { + if ((off + i <= 0x1f) || + (off + i >= 0xc0 && off + i <= 0xdf)) + buf[i] = 0; + else + buf[i] = data->data[off + i]; + } } else { memcpy(buf, &data->data[off], count); } @@ -204,7 +211,7 @@ static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind) && i2c_smbus_read_byte(new_client) == 'G' && i2c_smbus_read_byte(new_client) == '-') { dev_info(&new_client->dev, "Vaio EEPROM detected, " - "enabling password protection\n"); + "enabling privacy protection\n"); data->nature = VAIO; } } -- cgit v1.2.3 From 8b925a3dd8a4d7451092cb9aa11da727ba69e0f0 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 15 Nov 2007 19:24:03 +0100 Subject: i2c/eeprom: Recognize VGN as a valid Sony Vaio name prefix Recent (i.e. 2005 and later) Sony Vaio laptops have names beginning with VGN rather than PCG. Update the eeprom driver so that it recognizes these. Signed-off-by: Jean Delvare --- drivers/i2c/chips/eeprom.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c index ef8a754db1d..1a7eeebac50 100644 --- a/drivers/i2c/chips/eeprom.c +++ b/drivers/i2c/chips/eeprom.c @@ -204,12 +204,16 @@ static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind) goto exit_kfree; /* Detect the Vaio nature of EEPROMs. - We use the "PCG-" prefix as the signature. */ + We use the "PCG-" or "VGN-" prefix as the signature. */ if (address == 0x57) { - if (i2c_smbus_read_byte_data(new_client, 0x80) == 'P' - && i2c_smbus_read_byte(new_client) == 'C' - && i2c_smbus_read_byte(new_client) == 'G' - && i2c_smbus_read_byte(new_client) == '-') { + char name[4]; + + name[0] = i2c_smbus_read_byte_data(new_client, 0x80); + name[1] = i2c_smbus_read_byte(new_client); + name[2] = i2c_smbus_read_byte(new_client); + name[3] = i2c_smbus_read_byte(new_client); + + if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) { dev_info(&new_client->dev, "Vaio EEPROM detected, " "enabling privacy protection\n"); data->nature = VAIO; -- cgit v1.2.3 From 9778385db35a799d410039be123044a0d3e917a2 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 15 Nov 2007 20:57:39 +0100 Subject: sched: fix accounting of interrupts during guest execution on s390 Currently the scheduler checks for PF_VCPU to decide if this timeslice has to be accounted as guest time. On s390 host interrupts are not disabled during guest execution. This causes theses interrupts to be accounted as guest time if CONFIG_VIRT_CPU_ACCOUNTING is set. Solution is to check if an interrupt triggered account_system_time. As the tick is timer interrupt based, we have to subtract hardirq_offset. I tested the patch on s390 with CONFIG_VIRT_CPU_ACCOUNTING and on x86_64. Seems to work. CC: Avi Kivity CC: Laurent Vivier Signed-off-by: Christian Borntraeger Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 4fb3532dd7e..908335e1781 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3390,10 +3390,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset, struct rq *rq = this_rq(); cputime64_t tmp; - if (p->flags & PF_VCPU) { - account_guest_time(p, cputime); - return; - } + if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) + return account_guest_time(p, cputime); p->stime = cputime_add(p->stime, cputime); -- cgit v1.2.3 From dae51f56204d33444f61d9e7af3ee70aef55daa4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 15 Nov 2007 20:57:40 +0100 Subject: sched: fix SCHED_FIFO tasks & FAIR_GROUP_SCHED Suppose that the SCHED_FIFO task does switch_uid(new_user); Now, p->se.cfs_rq and p->se.parent both point into the old user_struct->tg because sched_move_task() doesn't call set_task_cfs_rq() for !fair_sched_class case. Suppose that old user_struct/task_group is freed/reused, and the task does sched_setscheduler(SCHED_NORMAL); __setscheduler() sets fair_sched_class, but doesn't update ->se.cfs_rq/parent which point to the freed memory. This means that check_preempt_wakeup() doing while (!is_same_group(se, pse)) { se = parent_entity(se); pse = parent_entity(pse); } may OOPS in a similar way if rq->curr or p did something like above. Perhaps we need something like the patch below, note that __setscheduler() can't do set_task_cfs_rq(). Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index 908335e1781..d62b759753f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7087,8 +7087,10 @@ void sched_move_task(struct task_struct *tsk) rq = task_rq_lock(tsk, &flags); - if (tsk->sched_class != &fair_sched_class) + if (tsk->sched_class != &fair_sched_class) { + set_task_cfs_rq(tsk); goto done; + } update_rq_clock(rq); -- cgit v1.2.3 From ce96b5ac742801718ae86d2adf0500c5abef3782 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Thu, 15 Nov 2007 20:57:40 +0100 Subject: sched: fix __set_task_cpu() SMP race Grant Wilson has reported rare SCHED_FAIR_USER crashes on his quad-core system, which crashes can only be explained via runqueue corruption. there is a narrow SMP race in __set_task_cpu(): after ->cpu is set up to a new value, task_rq_lock(p, ...) can be successfuly executed on another CPU. We must ensure that updates of per-task data have been completed by this moment. this bug has been hiding in the Linux scheduler for an eternity (we never had any explicit barrier for task->cpu in set_task_cpu() - so the bug was introduced in 2.5.1), but only became visible via set_task_cfs_rq() being accidentally put after the task->cpu update. It also probably needs a sufficiently out-of-order CPU to trigger. Reported-by: Grant Wilson Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- kernel/sched.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index d62b759753f..db1f71e3131 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -216,15 +216,15 @@ static inline struct task_group *task_group(struct task_struct *p) } /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ -static inline void set_task_cfs_rq(struct task_struct *p) +static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { - p->se.cfs_rq = task_group(p)->cfs_rq[task_cpu(p)]; - p->se.parent = task_group(p)->se[task_cpu(p)]; + p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; + p->se.parent = task_group(p)->se[cpu]; } #else -static inline void set_task_cfs_rq(struct task_struct *p) { } +static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { } #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -1022,10 +1022,16 @@ unsigned long weighted_cpuload(const int cpu) static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) { + set_task_cfs_rq(p, cpu); #ifdef CONFIG_SMP + /* + * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be + * successfuly executed on another CPU. We must ensure that updates of + * per-task data have been completed by this moment. + */ + smp_wmb(); task_thread_info(p)->cpu = cpu; #endif - set_task_cfs_rq(p); } #ifdef CONFIG_SMP @@ -7088,7 +7094,7 @@ void sched_move_task(struct task_struct *tsk) rq = task_rq_lock(tsk, &flags); if (tsk->sched_class != &fair_sched_class) { - set_task_cfs_rq(tsk); + set_task_cfs_rq(tsk, task_cpu(tsk)); goto done; } @@ -7103,7 +7109,7 @@ void sched_move_task(struct task_struct *tsk) tsk->sched_class->put_prev_task(rq, tsk); } - set_task_cfs_rq(tsk); + set_task_cfs_rq(tsk, task_cpu(tsk)); if (on_rq) { if (unlikely(running)) -- cgit v1.2.3 From 94bc9a7bd97efdda4dfbe61d0df32ce19d41c0a9 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Thu, 15 Nov 2007 20:57:40 +0100 Subject: sched: remove activate_idle_task() cpu_down() code is ok wrt sched_idle_next() placing the 'idle' task not at the beginning of the queue. So get rid of activate_idle_task() and make use of activate_task() instead. It is the same as activate_task(), except for the update_rq_clock(rq) call that is redundant. Code size goes down: text data bss dec hex filename 47853 3934 336 52123 cb9b sched.o.before 47828 3934 336 52098 cb82 sched.o.after Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- kernel/sched.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index db1f71e3131..cc8cb6f7d82 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5281,24 +5281,10 @@ static void migrate_live_tasks(int src_cpu) read_unlock(&tasklist_lock); } -/* - * activate_idle_task - move idle task to the _front_ of runqueue. - */ -static void activate_idle_task(struct task_struct *p, struct rq *rq) -{ - update_rq_clock(rq); - - if (p->state == TASK_UNINTERRUPTIBLE) - rq->nr_uninterruptible--; - - enqueue_task(rq, p, 0); - inc_nr_running(p, rq); -} - /* * Schedules idle task to be the next runnable task on current CPU. - * It does so by boosting its priority to highest possible and adding it to - * the _front_ of the runqueue. Used by CPU offline code. + * It does so by boosting its priority to highest possible. + * Used by CPU offline code. */ void sched_idle_next(void) { @@ -5318,8 +5304,8 @@ void sched_idle_next(void) __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); - /* Add idle task to the _front_ of its priority queue: */ - activate_idle_task(p, rq); + update_rq_clock(rq); + activate_task(rq, p, 0); spin_unlock_irqrestore(&rq->lock, flags); } -- cgit v1.2.3 From 518b22e990a9071bf508ca67e31b37e7590f499c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 15 Nov 2007 20:57:40 +0100 Subject: sched: make sched_nr_latency static sched_nr_latency can now become static. Signed-off-by: Adrian Bunk Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index d3c03070872..ee00da284b1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -43,7 +43,7 @@ unsigned int sysctl_sched_min_granularity = 1000000ULL; /* * is kept at sysctl_sched_latency / sysctl_sched_min_granularity */ -unsigned int sched_nr_latency = 20; +static unsigned int sched_nr_latency = 20; /* * After fork, child runs first. (default) If set to 0 then -- cgit v1.2.3 From 9612633a21ae8424531caf977f0560f64285bf36 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Nov 2007 20:57:40 +0100 Subject: sched: reorder SCHED_FEAT_ bits reorder SCHED_FEAT_ bits so that the used ones come first. Makes tuning instructions easier. Signed-off-by: Ingo Molnar --- kernel/sched.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index cc8cb6f7d82..38933cafea8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -455,18 +455,18 @@ static void update_rq_clock(struct rq *rq) */ enum { SCHED_FEAT_NEW_FAIR_SLEEPERS = 1, - SCHED_FEAT_START_DEBIT = 2, - SCHED_FEAT_TREE_AVG = 4, - SCHED_FEAT_APPROX_AVG = 8, - SCHED_FEAT_WAKEUP_PREEMPT = 16, + SCHED_FEAT_WAKEUP_PREEMPT = 2, + SCHED_FEAT_START_DEBIT = 4, + SCHED_FEAT_TREE_AVG = 8, + SCHED_FEAT_APPROX_AVG = 16, }; const_debug unsigned int sysctl_sched_features = SCHED_FEAT_NEW_FAIR_SLEEPERS * 1 | + SCHED_FEAT_WAKEUP_PREEMPT * 1 | SCHED_FEAT_START_DEBIT * 1 | SCHED_FEAT_TREE_AVG * 0 | - SCHED_FEAT_APPROX_AVG * 0 | - SCHED_FEAT_WAKEUP_PREEMPT * 1; + SCHED_FEAT_APPROX_AVG * 0; #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) -- cgit v1.2.3 From 6452a5fde03717c55dbb8b6b0b5bfab510ad38d5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 15 Nov 2007 14:29:21 -0800 Subject: [NETFILTER]: fix compat_nf_sockopt typo It should pass opt to the ->get/->set functions, not ops. Tested-by: Luca Tettamanti Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_sockopt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 87bc1443c52..3dd4b3c76d8 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -143,12 +143,12 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, if (ops->compat_get) ret = ops->compat_get(sk, val, opt, len); else - ret = ops->get(sk, val, ops, len); + ret = ops->get(sk, val, opt, len); } else { if (ops->compat_set) - ret = ops->compat_set(sk, val, ops, *len); + ret = ops->compat_set(sk, val, opt, *len); else - ret = ops->set(sk, val, ops, *len); + ret = ops->set(sk, val, opt, *len); } module_put(ops->owner); -- cgit v1.2.3 From 7de6af0f23b25df8da9719ecae1916b669d0b03d Mon Sep 17 00:00:00 2001 From: Divy Le Ray Date: Thu, 15 Nov 2007 15:06:32 -0800 Subject: [CHELSIO]: Fix skb->dev setting. eth_type_trans() now sets skb->dev. Access skb->def after it gets set. Signed-off-by: Divy Le Ray Signed-off-by: David S. Miller --- drivers/net/chelsio/sge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index ffa7e649a6e..443666292a5 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1379,11 +1379,11 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) } __skb_pull(skb, sizeof(*p)); - skb->dev->last_rx = jiffies; st = per_cpu_ptr(sge->port_stats[p->iff], smp_processor_id()); st->rx_packets++; skb->protocol = eth_type_trans(skb, adapter->port[p->iff].dev); + skb->dev->last_rx = jiffies; if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && skb->protocol == htons(ETH_P_IP) && (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) { -- cgit v1.2.3 From a754f70886ebcc7fda3d18a828e0e54e3ffc86d9 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sat, 3 Nov 2007 01:01:37 +0000 Subject: [MIPS] Sibyte: resurrect old cache hack. The recent switch of the Sibyte SOCs from the processor specific cache managment code in c-sb1.c to c-r4k.c lost this old hack [MIPS] Hack for SB1 cache issues Removing flush_icache_page a while ago broke SB1 which was using an empty flush_data_cache_page function. This glues things well enough so a more efficient but also more intrusive solution can be found later. Signed-Off-By: Thiemo Seufer Signed-off-by: Ralf Baechle in the hope it was no longer needed. As it turns it still is so resurrect it until there is a better solution. Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 6806d58211b..9355f1c9325 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -7,6 +7,7 @@ * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Ralf Baechle (ralf@gnu.org) * Copyright (C) 1999, 2000 Silicon Graphics, Inc. */ +#include #include #include #include @@ -507,7 +508,11 @@ static inline void local_r4k_flush_data_cache_page(void * addr) static void r4k_flush_data_cache_page(unsigned long addr) { - r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr, 1, 1); + if (in_atomic()) + local_r4k_flush_data_cache_page((void *)addr); + else + r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr, + 1, 1); } struct flush_icache_range_args { -- cgit v1.2.3 From a8049c53cdad347b5b1234969dba65a179fdf8f1 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 4 Nov 2007 04:42:03 +0000 Subject: [MIPS] Convert reference to mem_map to pfn_to_page(). This was crashing the combination of highmem and sparsemem. Signed-off-by: Ralf Baechle --- arch/mips/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 110ee7656b4..ec3b9e9f30f 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -426,7 +426,7 @@ void __init mem_init(void) #ifdef CONFIG_HIGHMEM for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = mem_map + tmp; + struct page *page = pfn_to_page(tmp); if (!page_is_ram(tmp)) { SetPageReserved(page); -- cgit v1.2.3 From a57c228935fd55c4a1cf7c0b7823537c81914000 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 4 Nov 2007 04:49:44 +0000 Subject: [MIPS] Qemu: Add early printk, your friend in a cold night. Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 2 +- arch/mips/qemu/Makefile | 3 ++- arch/mips/qemu/q-console.c | 26 ++++++++++++++++++++++++++ arch/mips/qemu/q-firmware.c | 6 ++++++ arch/mips/qemu/q-setup.c | 3 --- 5 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 arch/mips/qemu/q-console.c diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2c7d6c240b7..4b07b18e519 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -361,10 +361,10 @@ config QEMU select PCSPEAKER select SWAP_IO_SPACE select SYS_HAS_CPU_MIPS32_R1 + select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_LITTLE_ENDIAN - select ARCH_SPARSEMEM_ENABLE select GENERIC_HARDIRQS_NO__DO_IRQ select NR_CPUS_DEFAULT_1 select SYS_SUPPORTS_SMP diff --git a/arch/mips/qemu/Makefile b/arch/mips/qemu/Makefile index cec24c117f6..2ba4ef34b4a 100644 --- a/arch/mips/qemu/Makefile +++ b/arch/mips/qemu/Makefile @@ -4,6 +4,7 @@ obj-y = q-firmware.o q-irq.o q-mem.o q-setup.o q-reset.o -obj-$(CONFIG_SMP) += q-smp.o +obj-$(CONFIG_EARLY_PRINTK) += q-console.o +obj-$(CONFIG_SMP) += q-smp.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/qemu/q-console.c b/arch/mips/qemu/q-console.c new file mode 100644 index 00000000000..81101ae5017 --- /dev/null +++ b/arch/mips/qemu/q-console.c @@ -0,0 +1,26 @@ +#include +#include +#include +#include + +#define PORT(offset) (0x3f8 + (offset)) + +static inline unsigned int serial_in(int offset) +{ + return inb(PORT(offset)); +} + +static inline void serial_out(int offset, int value) +{ + outb(value, PORT(offset)); +} + +int prom_putchar(char c) +{ + while ((serial_in(UART_LSR) & UART_LSR_THRE) == 0) + ; + + serial_out(UART_TX, c); + + return 1; +} diff --git a/arch/mips/qemu/q-firmware.c b/arch/mips/qemu/q-firmware.c index c2239b41758..3ed43f416cd 100644 --- a/arch/mips/qemu/q-firmware.c +++ b/arch/mips/qemu/q-firmware.c @@ -2,6 +2,9 @@ #include #include #include +#include + +#define QEMU_PORT_BASE 0xb4000000 void __init prom_init(void) { @@ -15,4 +18,7 @@ void __init prom_init(void) } else { add_memory_region(0x0<<20, 0x10<<20, BOOT_MEM_RAM); } + + + set_io_port_base(QEMU_PORT_BASE); } diff --git a/arch/mips/qemu/q-setup.c b/arch/mips/qemu/q-setup.c index 23d34c1917c..969cedc8d8b 100644 --- a/arch/mips/qemu/q-setup.c +++ b/arch/mips/qemu/q-setup.c @@ -6,8 +6,6 @@ extern void qemu_reboot_setup(void); -#define QEMU_PORT_BASE 0xb4000000 - const char *get_system_type(void) { return "Qemu"; @@ -20,6 +18,5 @@ void __init plat_time_init(void) void __init plat_mem_setup(void) { - set_io_port_base(QEMU_PORT_BASE); qemu_reboot_setup(); } -- cgit v1.2.3 From 07a80e49240ff57bccc3c65944d35947c3d33697 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 5 Nov 2007 00:18:05 +0000 Subject: [MIPS] Sibyte: pin timer interrupt to their cores. Or strange things will happen. Signed-off-by: Ralf Baechle --- arch/mips/kernel/cevt-bcm1480.c | 3 +++ arch/mips/kernel/cevt-sb1250.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c index 21e6d63eb4d..f6fcc62ade3 100644 --- a/arch/mips/kernel/cevt-bcm1480.c +++ b/arch/mips/kernel/cevt-bcm1480.c @@ -143,7 +143,10 @@ void __cpuinit sb1480_clockevent_init(void) action->handler = sibyte_counter_handler; action->flags = IRQF_DISABLED | IRQF_PERCPU; + action->mask = cpumask_of_cpu(cpu); action->name = name; action->dev_id = cd; + + irq_set_affinity(irq, cpumask_of_cpu(cpu)); setup_irq(irq, action); } diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c index e2029d0fc39..194e0f726fe 100644 --- a/arch/mips/kernel/cevt-sb1250.c +++ b/arch/mips/kernel/cevt-sb1250.c @@ -142,7 +142,10 @@ void __cpuinit sb1250_clockevent_init(void) action->handler = sibyte_counter_handler; action->flags = IRQF_DISABLED | IRQF_PERCPU; + action->mask = cpumask_of_cpu(cpu); action->name = name; action->dev_id = cd; + + irq_set_affinity(irq, cpumask_of_cpu(cpu)); setup_irq(irq, action); } -- cgit v1.2.3 From a8401fa57f1600ca0ad74b958c2c9eb494f40dc8 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 5 Nov 2007 00:29:45 +0000 Subject: [MIPS] BCM1480: Remove duplicate acknowledge of timer interrupt. Signed-off-by: Ralf Baechle --- arch/mips/sibyte/bcm1480/irq.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index e28d626255a..3ba5ef35816 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -412,18 +412,6 @@ static void bcm1480_kgdb_interrupt(void) extern void bcm1480_mailbox_interrupt(void); -static inline void dispatch_ip4(void) -{ - int cpu = smp_processor_id(); - int irq = K_BCM1480_INT_TIMER_0 + cpu; - - /* Reset the timer */ - __raw_writeq(M_SCD_TIMER_ENABLE|M_SCD_TIMER_MODE_CONTINUOUS, - IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG))); - - do_IRQ(irq); -} - static inline void dispatch_ip2(void) { unsigned long long mask_h, mask_l; @@ -451,6 +439,7 @@ static inline void dispatch_ip2(void) asmlinkage void plat_irq_dispatch(void) { + unsigned int cpu = smp_processor_id(); unsigned int pending; #ifdef CONFIG_SIBYTE_BCM1480_PROF @@ -467,7 +456,7 @@ asmlinkage void plat_irq_dispatch(void) #endif if (pending & CAUSEF_IP4) - dispatch_ip4(); + do_IRQ(K_BCM1480_INT_TIMER_0 + cpu); #ifdef CONFIG_SMP else if (pending & CAUSEF_IP3) bcm1480_mailbox_interrupt(); -- cgit v1.2.3 From bb856c5b494049584eca61c6069eb8e1b0f9ce35 Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Sun, 4 Nov 2007 13:00:06 +0100 Subject: [MIPS] iounmap if in vr41xx_pciu_init() pci clock is over 33MHz iounmap if pci clock is over 33MHz. Cosmetic because the iomap() in this case is just a bit of address magic. Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Acked-by: Yoichi Yuasa Signed-off-by: Ralf Baechle --- arch/mips/pci/pci-vr41xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/pci/pci-vr41xx.c b/arch/mips/pci/pci-vr41xx.c index 240df9e3381..33c4f683d06 100644 --- a/arch/mips/pci/pci-vr41xx.c +++ b/arch/mips/pci/pci-vr41xx.c @@ -154,6 +154,7 @@ static int __init vr41xx_pciu_init(void) pciu_write(PCICLKSELREG, QUARTER_VTCLOCK); else { printk(KERN_ERR "PCI Clock is over 33MHz.\n"); + iounmap(pciu_base); return -EINVAL; } -- cgit v1.2.3 From 33b75e5c51e119c903681f592b4ec6c772def0e0 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 6 Nov 2007 00:43:51 +0000 Subject: [MIPS] Sibyte: Replace use of removed IO_SPACE_BASE with IOADDR. Signed-off-by: Ralf Baechle --- arch/mips/mm/cerr-sb1.c | 6 +++--- arch/mips/sibyte/bcm1480/irq.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c index e7f539e3284..1bd1f18ac23 100644 --- a/arch/mips/mm/cerr-sb1.c +++ b/arch/mips/mm/cerr-sb1.c @@ -154,7 +154,7 @@ static void check_bus_watcher(void) if (status & ~(1UL << 31)) { l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); #ifdef DUMP_L2_ECC_TAG_ON_ERROR - l2_tag = in64(IO_SPACE_BASE | A_L2_ECC_TAG); + l2_tag = in64(IOADDR(A_L2_ECC_TAG)); #endif memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); printk("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); @@ -183,9 +183,9 @@ asmlinkage void sb1_cache_error(void) #ifdef CONFIG_SIBYTE_BW_TRACE /* Freeze the trace buffer now */ #if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80) - csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG); + csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IOADDR(A_SCD_TRACE_CFG)); #else - csr_out32(M_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG); + csr_out32(M_SCD_TRACE_CFG_FREEZE, IOADDR(A_SCD_TRACE_CFG)); #endif printk("Trace buffer frozen\n"); #endif diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index 3ba5ef35816..db372a0f106 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -370,11 +370,11 @@ void __init arch_init_irq(void) #endif /* Setup uart 1 settings, mapper */ /* QQQ FIXME */ - __raw_writeq(M_DUART_IMR_BRK, IO_SPACE_BASE + A_DUART_IMRREG(kgdb_port)); + __raw_writeq(M_DUART_IMR_BRK, IOADDR(A_DUART_IMRREG(kgdb_port))); __raw_writeq(IMR_IP6_VAL, - IO_SPACE_BASE + A_BCM1480_IMR_REGISTER(0, R_BCM1480_IMR_INTERRUPT_MAP_BASE_H) + - (kgdb_irq<<3)); + IOADDR(A_BCM1480_IMR_REGISTER(0, R_BCM1480_IMR_INTERRUPT_MAP_BASE_H) + + (kgdb_irq << 3))); bcm1480_unmask_irq(0, kgdb_irq); #ifdef CONFIG_GDB_CONSOLE -- cgit v1.2.3 From c6563e85f73e5806d58d8b0230edecbc65537200 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Wed, 7 Nov 2007 01:08:48 +0900 Subject: [MIPS] Fix typo in R3000 TRACE_IRQFLAGS code Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/kernel/genex.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index c0f19d638b9..e76a76bf0b3 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -146,7 +146,7 @@ NESTED(handle_int, PT_SIZE, sp) and k0, ST0_IEP bnez k0, 1f - mfc0 k0, EP0_EPC + mfc0 k0, CP0_EPC .set noreorder j k0 rfe -- cgit v1.2.3 From efb9ca08b5a2374b29938cdcab417ce4feb14b54 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 8 Nov 2007 11:37:47 +0000 Subject: [MIPS] Change get_cycles to always return 0. This avoids us executing an mfc0 c0_count instruction on processors which don't have but also on certain R4000 and R4400 versions where reading from the count register just in the very moment when its value equals c0_compare will result in the timer interrupt getting lost. There is still a number of users of get_cycles remaining outside the arch code: crypto/tcrypt.c: start = get_cycles(); crypto/tcrypt.c: end = get_cycles(); crypto/tcrypt.c: start = get_cycles(); crypto/tcrypt.c: end = get_cycles(); crypto/tcrypt.c: start = get_cycles(); crypto/tcrypt.c: end = get_cycles(); drivers/char/hangcheck-timer.c: return get_cycles(); drivers/char/hangcheck-timer.c: printk("Hangcheck: Using get_cycles().\n"); drivers/char/random.c: sample.cycles = get_cycles(); drivers/input/joystick/analog.c:#define GET_TIME(x) do { x = get_cycles(); } include/linux/arcdevice.h: _x = get_cycles(); \ include/linux/arcdevice.h: _y = get_cycles(); \ mm/slub.c: if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) mm/slub.c: p += 64 + (get_cycles() & 0xff) * sizeof(void *); Signed-off-by: Ralf Baechle --- include/asm-mips/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-mips/timex.h b/include/asm-mips/timex.h index 5816ad1569d..6529704aa73 100644 --- a/include/asm-mips/timex.h +++ b/include/asm-mips/timex.h @@ -35,7 +35,7 @@ typedef unsigned int cycles_t; static inline cycles_t get_cycles(void) { - return read_c0_count(); + return 0; } #endif /* __KERNEL__ */ -- cgit v1.2.3 From f6771dbb27c704ce837ba3bb1dcaa53f48f76ea8 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 8 Nov 2007 18:02:29 +0000 Subject: [MIPS] Fix shadow register support. Shadow register support would not possibly have worked on multicore systems. The support code for it was also depending not on MIPS R2 but VSMP or SMTC kernels even though it makes perfect sense with UP kernels. SR sets are a scarce resource and the expected usage pattern is that users actually hardcode the register set numbers in their code. So fix the allocator by ditching it. Move the remaining CPU probe bits into the generic CPU probe. Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 9 ------ arch/mips/kernel/cpu-probe.c | 5 ++++ arch/mips/kernel/proc.c | 2 ++ arch/mips/kernel/traps.c | 68 ++------------------------------------------ include/asm-mips/cpu-info.h | 1 + 5 files changed, 11 insertions(+), 74 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 4b07b18e519..2f2ce0c28bc 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1409,7 +1409,6 @@ config MIPS_MT_SMP depends on SYS_SUPPORTS_MULTITHREADING select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI - select CPU_MIPSR2_SRS select MIPS_MT select NR_CPUS_DEFAULT_2 select SMP @@ -1426,7 +1425,6 @@ config MIPS_MT_SMTC select GENERIC_CLOCKEVENTS_BROADCAST select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI - select CPU_MIPSR2_SRS select MIPS_MT select NR_CPUS_DEFAULT_8 select SMP @@ -1453,7 +1451,6 @@ config MIPS_VPE_LOADER depends on SYS_SUPPORTS_MULTITHREADING select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI - select CPU_MIPSR2_SRS select MIPS_MT help Includes a loader for loading an elf relocatable object @@ -1582,12 +1579,6 @@ config CPU_MIPSR2_IRQ_VI config CPU_MIPSR2_IRQ_EI bool -# -# Shadow registers are an R2 feature -# -config CPU_MIPSR2_SRS - bool - config CPU_HAS_SYNC bool depends on !CPU_R3000 diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index c8c47a2d197..5c2794391bf 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -943,6 +943,11 @@ __init void cpu_probe(void) } __cpu_name[cpu] = cpu_to_name(c); + + if (cpu_has_mips_r2) + c->srsets = ((read_c0_srsctl() >> 26) & 0x0f) + 1; + else + c->srsets = 1; } __init void cpu_report(void) diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index efd2d131412..6e6e947cce1 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -60,6 +60,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) cpu_has_dsp ? " dsp" : "", cpu_has_mipsmt ? " mt" : "" ); + seq_printf(m, "shadow register sets\t: %d\n", + cpu_data[n].srsets); sprintf(fmt, "VCE%%c exceptions\t\t: %s\n", cpu_has_vce ? "%u" : "not available"); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index fa500787152..23e73d0650a 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1100,59 +1100,6 @@ void *set_except_vector(int n, void *addr) return (void *)old_handler; } -#ifdef CONFIG_CPU_MIPSR2_SRS -/* - * MIPSR2 shadow register set allocation - * FIXME: SMP... - */ - -static struct shadow_registers { - /* - * Number of shadow register sets supported - */ - unsigned long sr_supported; - /* - * Bitmap of allocated shadow registers - */ - unsigned long sr_allocated; -} shadow_registers; - -static void mips_srs_init(void) -{ - shadow_registers.sr_supported = ((read_c0_srsctl() >> 26) & 0x0f) + 1; - printk(KERN_INFO "%ld MIPSR2 register sets available\n", - shadow_registers.sr_supported); - shadow_registers.sr_allocated = 1; /* Set 0 used by kernel */ -} - -int mips_srs_max(void) -{ - return shadow_registers.sr_supported; -} - -int mips_srs_alloc(void) -{ - struct shadow_registers *sr = &shadow_registers; - int set; - -again: - set = find_first_zero_bit(&sr->sr_allocated, sr->sr_supported); - if (set >= sr->sr_supported) - return -1; - - if (test_and_set_bit(set, &sr->sr_allocated)) - goto again; - - return set; -} - -void mips_srs_free(int set) -{ - struct shadow_registers *sr = &shadow_registers; - - clear_bit(set, &sr->sr_allocated); -} - static asmlinkage void do_default_vi(void) { show_regs(get_irq_regs()); @@ -1163,6 +1110,7 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) { unsigned long handler; unsigned long old_handler = vi_handlers[n]; + int srssets = current_cpu_data.srsets; u32 *w; unsigned char *b; @@ -1178,7 +1126,7 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) b = (unsigned char *)(ebase + 0x200 + n*VECTORSPACING); - if (srs >= mips_srs_max()) + if (srs >= srssets) panic("Shadow register set %d not supported", srs); if (cpu_has_veic) { @@ -1186,7 +1134,7 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) board_bind_eic_interrupt(n, srs); } else if (cpu_has_vint) { /* SRSMap is only defined if shadow sets are implemented */ - if (mips_srs_max() > 1) + if (srssets > 1) change_c0_srsmap(0xf << n*4, srs << n*4); } @@ -1253,14 +1201,6 @@ void *set_vi_handler(int n, vi_handler_t addr) return set_vi_srs_handler(n, addr, 0); } -#else - -static inline void mips_srs_init(void) -{ -} - -#endif /* CONFIG_CPU_MIPSR2_SRS */ - /* * This is used by native signal handling */ @@ -1503,8 +1443,6 @@ void __init trap_init(void) else ebase = CAC_BASE; - mips_srs_init(); - per_cpu_trap_init(); /* diff --git a/include/asm-mips/cpu-info.h b/include/asm-mips/cpu-info.h index 94f1c817236..ed5c02c6afb 100644 --- a/include/asm-mips/cpu-info.h +++ b/include/asm-mips/cpu-info.h @@ -54,6 +54,7 @@ struct cpuinfo_mips { struct cache_desc dcache; /* Primary D or combined I/D cache */ struct cache_desc scache; /* Secondary cache */ struct cache_desc tcache; /* Tertiary/split secondary cache */ + int srsets; /* Shadow register sets */ #if defined(CONFIG_MIPS_MT_SMTC) /* * In the MIPS MT "SMTC" model, each TC is considered -- cgit v1.2.3 From fcee3faf8339bb65660c9a22123f71aa0cc30514 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 8 Nov 2007 22:09:11 +0100 Subject: [MIPS] SNI PCIT CPLUS: workaround for b0rked irq wiring of onboard PCI bus 1 Signed-off-by: Thomas Bogendoerfer Signed-off-by: Ralf Baechle --- arch/mips/pci/fixup-sni.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c index a45bedd1723..5c8a79bb266 100644 --- a/arch/mips/pci/fixup-sni.c +++ b/arch/mips/pci/fixup-sni.c @@ -113,6 +113,16 @@ static char irq_tab_pcit[13][5] __initdata = { { 0, INTA, INTB, INTC, INTD }, /* Slot 5 */ }; +static char irq_tab_pcit_cplus[13][5] __initdata = { + /* INTA INTB INTC INTD */ + { 0, 0, 0, 0, 0 }, /* HOST bridge */ + { 0, INTB, INTC, INTD, INTA }, /* PCI Slot 9 */ + { 0, 0, 0, 0, 0 }, /* PCI-EISA */ + { 0, 0, 0, 0, 0 }, /* Unused */ + { 0, INTA, INTB, INTC, INTD }, /* PCI-PCI bridge */ + { 0, INTB, INTC, INTD, INTA }, /* fixup */ +}; + static inline int is_rm300_revd(void) { unsigned char csmsr = *(volatile unsigned char *)PCIMT_CSMSR; @@ -123,8 +133,19 @@ static inline int is_rm300_revd(void) int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { switch (sni_brd_type) { - case SNI_BRD_PCI_TOWER: case SNI_BRD_PCI_TOWER_CPLUS: + if (slot == 4) { + /* + * SNI messed up interrupt wiring for onboard + * PCI bus 1; we need to fix this up here + */ + while (dev && dev->bus->number != 1) + dev = dev->bus->self; + if (dev && dev->devfn >= PCI_DEVFN(4, 0)) + slot = 5; + } + return irq_tab_pcit_cplus[slot][pin]; + case SNI_BRD_PCI_TOWER: return irq_tab_pcit[slot][pin]; case SNI_BRD_PCI_MTOWER: -- cgit v1.2.3 From 89becf5c0d9019f4f9300840f08a98ee33d57d37 Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Fri, 9 Nov 2007 18:42:35 +0900 Subject: [MIPS] Lasat: Fix overlap of interrupt number ranges. The range of MIPS_CPU IRQ and the range of LASAT IRQ overlap. Signed-off-by: Yoichi Yuasa Signed-off-by: Ralf Baechle --- arch/mips/lasat/interrupt.c | 22 ++++++++++++++-------- arch/mips/pci/pci-lasat.c | 32 +++++++++++++++++--------------- include/asm-mips/lasat/lasatint.h | 5 ----- include/asm-mips/mach-lasat/irq.h | 13 +++++++++++++ 4 files changed, 44 insertions(+), 28 deletions(-) create mode 100644 include/asm-mips/mach-lasat/irq.h diff --git a/arch/mips/lasat/interrupt.c b/arch/mips/lasat/interrupt.c index ba9692be356..cfeab669782 100644 --- a/arch/mips/lasat/interrupt.c +++ b/arch/mips/lasat/interrupt.c @@ -19,17 +19,14 @@ * Lasat boards. */ #include -#include -#include -#include #include -#include +#include #include #include #include -#include -#include + +#include static volatile int *lasat_int_status; static volatile int *lasat_int_mask; @@ -97,12 +94,18 @@ asmlinkage void plat_irq_dispatch(void) /* if int_status == 0, then the interrupt has already been cleared */ if (int_status) { - irq = LASATINT_BASE + ls1bit32(int_status); + irq = LASAT_IRQ_BASE + ls1bit32(int_status); do_IRQ(irq); } } +static struct irqaction cascade = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + void __init arch_init_irq(void) { int i; @@ -127,6 +130,9 @@ void __init arch_init_irq(void) } mips_cpu_irq_init(); - for (i = LASATINT_BASE; i <= LASATINT_END; i++) + + for (i = LASAT_IRQ_BASE; i <= LASAT_IRQ_END; i++) set_irq_chip_and_handler(i, &lasat_irq_type, handle_level_irq); + + setup_irq(LASAT_CASCADE_IRQ, &cascade); } diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c index 174f314933b..e70ae3236e0 100644 --- a/arch/mips/pci/pci-lasat.c +++ b/arch/mips/pci/pci-lasat.c @@ -5,12 +5,14 @@ * * Copyright (C) 2000, 2001, 04 Keith M Wesolowski */ -#include #include +#include #include #include + #include -#include + +#include extern struct pci_ops nile4_pci_ops; extern struct pci_ops gt64xxx_pci0_ops; @@ -55,15 +57,15 @@ static int __init lasat_pci_setup(void) arch_initcall(lasat_pci_setup); -#define LASATINT_ETH1 (LASATINT_BASE + 0) -#define LASATINT_ETH0 (LASATINT_BASE + 1) -#define LASATINT_HDC (LASATINT_BASE + 2) -#define LASATINT_COMP (LASATINT_BASE + 3) -#define LASATINT_HDLC (LASATINT_BASE + 4) -#define LASATINT_PCIA (LASATINT_BASE + 5) -#define LASATINT_PCIB (LASATINT_BASE + 6) -#define LASATINT_PCIC (LASATINT_BASE + 7) -#define LASATINT_PCID (LASATINT_BASE + 8) +#define LASAT_IRQ_ETH1 (LASAT_IRQ_BASE + 0) +#define LASAT_IRQ_ETH0 (LASAT_IRQ_BASE + 1) +#define LASAT_IRQ_HDC (LASAT_IRQ_BASE + 2) +#define LASAT_IRQ_COMP (LASAT_IRQ_BASE + 3) +#define LASAT_IRQ_HDLC (LASAT_IRQ_BASE + 4) +#define LASAT_IRQ_PCIA (LASAT_IRQ_BASE + 5) +#define LASAT_IRQ_PCIB (LASAT_IRQ_BASE + 6) +#define LASAT_IRQ_PCIC (LASAT_IRQ_BASE + 7) +#define LASAT_IRQ_PCID (LASAT_IRQ_BASE + 8) int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { @@ -71,13 +73,13 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) case 1: case 2: case 3: - return LASATINT_PCIA + (((slot-1) + (pin-1)) % 4); + return LASAT_IRQ_PCIA + (((slot-1) + (pin-1)) % 4); case 4: - return LASATINT_ETH1; /* Ethernet 1 (LAN 2) */ + return LASAT_IRQ_ETH1; /* Ethernet 1 (LAN 2) */ case 5: - return LASATINT_ETH0; /* Ethernet 0 (LAN 1) */ + return LASAT_IRQ_ETH0; /* Ethernet 0 (LAN 1) */ case 6: - return LASATINT_HDC; /* IDE controller */ + return LASAT_IRQ_HDC; /* IDE controller */ default: return 0xff; /* Illegal */ } diff --git a/include/asm-mips/lasat/lasatint.h b/include/asm-mips/lasat/lasatint.h index 581dc45685a..e0d2458b43d 100644 --- a/include/asm-mips/lasat/lasatint.h +++ b/include/asm-mips/lasat/lasatint.h @@ -1,11 +1,6 @@ #ifndef __ASM_LASAT_LASATINT_H #define __ASM_LASAT_LASATINT_H -#include - -#define LASATINT_BASE MIPS_CPU_IRQ_BASE -#define LASATINT_END (LASATINT_BASE + 16) - /* lasat 100 */ #define LASAT_INT_STATUS_REG_100 (KSEG1ADDR(0x1c880000)) #define LASAT_INT_MASK_REG_100 (KSEG1ADDR(0x1c890000)) diff --git a/include/asm-mips/mach-lasat/irq.h b/include/asm-mips/mach-lasat/irq.h new file mode 100644 index 00000000000..da75f89f372 --- /dev/null +++ b/include/asm-mips/mach-lasat/irq.h @@ -0,0 +1,13 @@ +#ifndef _ASM_MACH_LASAT_IRQ_H +#define _ASM_MACH_LASAT_IRQ_H + +#define LASAT_CASCADE_IRQ (MIPS_CPU_IRQ_BASE + 0) + +#define LASAT_IRQ_BASE 8 +#define LASAT_IRQ_END 23 + +#define NR_IRQS 24 + +#include_next + +#endif /* _ASM_MACH_LASAT_IRQ_H */ -- cgit v1.2.3 From 622477533d3dc24845c847f386533f3c0e6a1be6 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 11 Nov 2007 17:24:46 +0000 Subject: [MIPS] Sibyte: Increase minimum oneshot timer interval to two ticks. For the old minimum of a single tick a value of zero would be programmed into the init value register which in the BCM1250/BCM1125/BCM1125H User Manual in the Timer Special Cases section is documented to have UNPREDICTABLE effect. Observable sympthoms of this bug were hangs of several seconds on the console during bootup and later if both dyntick and highres timer options were activated. In theory contiguous mode of the timers is also affected but in an act of hopeless lack of realism I'll assume nobody will ever configure a KERNEL for HZ > 500kHz but if so I leave that to evolution to sort out. Signed-off-by: Ralf Baechle --- arch/mips/kernel/cevt-bcm1480.c | 2 +- arch/mips/kernel/cevt-sb1250.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c index f6fcc62ade3..e54410e5a2c 100644 --- a/arch/mips/kernel/cevt-bcm1480.c +++ b/arch/mips/kernel/cevt-bcm1480.c @@ -122,7 +122,7 @@ void __cpuinit sb1480_clockevent_init(void) CLOCK_EVT_FEAT_ONESHOT; clockevent_set_clock(cd, V_SCD_TIMER_FREQ); cd->max_delta_ns = clockevent_delta2ns(0x7fffff, cd); - cd->min_delta_ns = clockevent_delta2ns(1, cd); + cd->min_delta_ns = clockevent_delta2ns(2, cd); cd->rating = 200; cd->irq = irq; cd->cpumask = cpumask_of_cpu(cpu); diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c index 194e0f726fe..8fbb0553768 100644 --- a/arch/mips/kernel/cevt-sb1250.c +++ b/arch/mips/kernel/cevt-sb1250.c @@ -121,7 +121,7 @@ void __cpuinit sb1250_clockevent_init(void) CLOCK_EVT_FEAT_ONESHOT; clockevent_set_clock(cd, V_SCD_TIMER_FREQ); cd->max_delta_ns = clockevent_delta2ns(0x7fffff, cd); - cd->min_delta_ns = clockevent_delta2ns(1, cd); + cd->min_delta_ns = clockevent_delta2ns(2, cd); cd->rating = 200; cd->irq = irq; cd->cpumask = cpumask_of_cpu(cpu); -- cgit v1.2.3 From 8dfa741f146b39eb59ef2094e03f47079ca99eb0 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 11 Nov 2007 17:33:55 +0000 Subject: [MIPS] Sibyte: Stop timers before programming next even. We have no guarantee by the generic time code that the timer is stopped when the ->next_event method is called. Modifying the Timer Initial Count register while the timer is enabled has UNPREDICTABLE effect according to the BCM1250/BCM1125/BCM1125H User Manual. So stop the timer before reprogramming. This is a paranoia fix; no ill effects have been observed previously. Signed-off-by: Ralf Baechle --- arch/mips/kernel/cevt-bcm1480.c | 1 + arch/mips/kernel/cevt-sb1250.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c index e54410e5a2c..0a57f86945f 100644 --- a/arch/mips/kernel/cevt-bcm1480.c +++ b/arch/mips/kernel/cevt-bcm1480.c @@ -75,6 +75,7 @@ static int sibyte_next_event(unsigned long delta, struct clock_event_device *cd) cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT)); + __raw_writeq(0, cfg); __raw_writeq(delta - 1, init); __raw_writeq(M_SCD_TIMER_ENABLE, cfg); diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c index 8fbb0553768..63ac3ad462b 100644 --- a/arch/mips/kernel/cevt-sb1250.c +++ b/arch/mips/kernel/cevt-sb1250.c @@ -73,6 +73,7 @@ static int sibyte_next_event(unsigned long delta, struct clock_event_device *cd) cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT)); + __raw_writeq(0, cfg); __raw_writeq(delta - 1, init); __raw_writeq(M_SCD_TIMER_ENABLE, cfg); -- cgit v1.2.3 From 7c3a622d9c8e88117a8d647756827852dd8c8432 Mon Sep 17 00:00:00 2001 From: Nigel Stephens Date: Thu, 8 Nov 2007 13:25:51 +0000 Subject: [MIPS] vpe: handle halting TCs in an errata safe way. Adds a JR.HB after halting a TC, to ensure that the TC has really halted. only modifies the TCSTATUS register when the TC is safely halted. Signed-off-by: Ralf Baechle --- arch/mips/kernel/vpe.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 436a64ff398..38bd33fa2a2 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -1003,6 +1003,7 @@ static void cleanup_tc(struct tc *tc) write_tc_c0_tcstatus(tmp); write_tc_c0_tchalt(TCHALT_H); + mips_ihb(); /* bind it to anything other than VPE1 */ // write_tc_c0_tcbind(read_tc_c0_tcbind() & ~TCBIND_CURVPE); // | TCBIND_CURVPE @@ -1235,9 +1236,12 @@ int vpe_free(vpe_handle vpe) settc(t->index); write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() & ~VPECONF0_VPA); - /* mark the TC unallocated and halt'ed */ - write_tc_c0_tcstatus(read_tc_c0_tcstatus() & ~TCSTATUS_A); + /* halt the TC */ write_tc_c0_tchalt(TCHALT_H); + mips_ihb(); + + /* mark the TC unallocated */ + write_tc_c0_tcstatus(read_tc_c0_tcstatus() & ~TCSTATUS_A); v->state = VPE_STATE_UNUSED; @@ -1533,14 +1537,16 @@ static int __init vpe_module_init(void) t->pvpe = get_vpe(0); /* set the parent vpe */ } + /* halt the TC */ + write_tc_c0_tchalt(TCHALT_H); + mips_ihb(); + tmp = read_tc_c0_tcstatus(); /* mark not activated and not dynamically allocatable */ tmp &= ~(TCSTATUS_A | TCSTATUS_DA); tmp |= TCSTATUS_IXMT; /* interrupt exempt */ write_tc_c0_tcstatus(tmp); - - write_tc_c0_tchalt(TCHALT_H); } } -- cgit v1.2.3 From 3247989ee864db2cc5dccb14460573fee82b6832 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 12 Nov 2007 17:30:52 +0000 Subject: [MIPS] Makefile: Fix canonical system names The GNU `config.guess' uses "linux-gnu" as the canonical system name. Fix the list of compiler prefixes checked to spell it correctly. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ralf Baechle --- arch/mips/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 23c17755eca..a1f8d8b96b0 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -44,7 +44,7 @@ endif ifneq ($(SUBARCH),$(ARCH)) ifeq ($(CROSS_COMPILE),) - CROSS_COMPILE := $(call cc-cross-prefix, $(tool-archpref)-linux- $(tool-archpref)-gnu-linux- $(tool-archpref)-unknown-gnu-linux-) + CROSS_COMPILE := $(call cc-cross-prefix, $(tool-archpref)-linux- $(tool-archpref)-linux-gnu- $(tool-archpref)-unknown-linux-gnu-) endif endif -- cgit v1.2.3 From eae5fdc3e5032304a7d53bd06784b7ee71abccaf Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 12 Nov 2007 17:32:48 +0000 Subject: [MIPS] SNI: s/achknowledge/acknowledge/ Signed-off-by: Maciej W. Rozycki Signed-off-by: Ralf Baechle --- arch/mips/sni/pcimt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/sni/pcimt.c b/arch/mips/sni/pcimt.c index 4df070f2ff5..834650f371e 100644 --- a/arch/mips/sni/pcimt.c +++ b/arch/mips/sni/pcimt.c @@ -244,7 +244,7 @@ static void pcimt_hwint1(void) if (pend & IT_EISA) { int irq; /* - * Note: ASIC PCI's builtin interrupt achknowledge feature is + * Note: ASIC PCI's builtin interrupt acknowledge feature is * broken. Using it may result in loss of some or all i8259 * interrupts, so don't use PCIMT_INT_ACKNOWLEDGE ... */ -- cgit v1.2.3 From f99f2cc9363a08cdbd6cfbe3f29234e3235d05e8 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 13 Nov 2007 00:35:13 +0000 Subject: [MIPS] Sibyte: Fix name of clocksource. Signed-off-by: Ralf Baechle --- arch/mips/kernel/csrc-sb1250.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c index ebb16e66887..92212bbb8e4 100644 --- a/arch/mips/kernel/csrc-sb1250.c +++ b/arch/mips/kernel/csrc-sb1250.c @@ -43,7 +43,7 @@ static cycle_t sb1250_hpt_read(void) } struct clocksource bcm1250_clocksource = { - .name = "MIPS", + .name = "bcm1250-counter-3", .rating = 200, .read = sb1250_hpt_read, .mask = CLOCKSOURCE_MASK(23), -- cgit v1.2.3 From 30e748a507919a41f9eb4d10b4954f453325a37d Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 15 Nov 2007 19:37:15 +0000 Subject: [MIPS] irq_cpu: use handle_percpu_irq handler to avoid dropping interrupts. This matters to any sort of device that is wired to one of the CPU interrupt pins on an SMP system. Typically the scenario is most easily triggered with the count/compare timer interrupt where the same interrupt number and thus irq_desc is used on each processor. CPU A CPU B do_IRQ() generic_handle_irq() handle_level_irq() spin_lock(desc_lock) set IRQ_INPROGRESS spin_unlock(desc_lock) do_IRQ() generic_handle_irq() handle_level_irq() spin_lock(desc_lock) IRQ_INPROGRESS set => bail out spin_lock(desc_lock) clear IRQ_INPROGRESS spin_unlock(desc_lock) In case of the cp0 compare interrupt this means the interrupt will be acked and not handled or re-armed on CPU b, so there won't be any timer interrupt until the count register wraps around. With kernels 2.6.20 ... 2.6.23 we usually were lucky that things were just working right on VSMP because the count registers are synchronized on bootup so it takes something that disables interrupts for a long time on one processor to trigger this one. For scenarios where an interrupt is multicasted or broadcasted over several CPUs the existing code was safe and the fix will break it. There is no way to know in the interrupt controller code because it is abstracted from the platform code. I think we do not have such a setup currently, so this should be ok. Signed-off-by: Ralf Baechle --- arch/mips/kernel/irq-rm7000.c | 2 +- arch/mips/kernel/irq-rm9000.c | 2 +- arch/mips/kernel/irq_cpu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/irq-rm7000.c b/arch/mips/kernel/irq-rm7000.c index 25073288348..971adf6ef4f 100644 --- a/arch/mips/kernel/irq-rm7000.c +++ b/arch/mips/kernel/irq-rm7000.c @@ -44,5 +44,5 @@ void __init rm7k_cpu_irq_init(void) for (i = base; i < base + 4; i++) set_irq_chip_and_handler(i, &rm7k_irq_controller, - handle_level_irq); + handle_percpu_irq); } diff --git a/arch/mips/kernel/irq-rm9000.c b/arch/mips/kernel/irq-rm9000.c index ae83d2df6f3..7b04583bd80 100644 --- a/arch/mips/kernel/irq-rm9000.c +++ b/arch/mips/kernel/irq-rm9000.c @@ -104,5 +104,5 @@ void __init rm9k_cpu_irq_init(void) rm9000_perfcount_irq = base + 1; set_irq_chip_and_handler(rm9000_perfcount_irq, &rm9k_perfcounter_irq, - handle_level_irq); + handle_percpu_irq); } diff --git a/arch/mips/kernel/irq_cpu.c b/arch/mips/kernel/irq_cpu.c index 7b66e03b589..0ee2567b780 100644 --- a/arch/mips/kernel/irq_cpu.c +++ b/arch/mips/kernel/irq_cpu.c @@ -116,5 +116,5 @@ void __init mips_cpu_irq_init(void) for (i = irq_base + 2; i < irq_base + 8; i++) set_irq_chip_and_handler(i, &mips_cpu_irq_controller, - handle_level_irq); + handle_percpu_irq); } -- cgit v1.2.3 From 72e510654c814e2c5c9227e95ae02ea77e015ce4 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 15 Nov 2007 22:20:33 +0000 Subject: [MIPS] N32 needs to use the compat version of sys_nfsservctl. Signed-off-by: Ralf Baechle --- arch/mips/kernel/scall64-n32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 118be24224f..01993ec3368 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -293,7 +293,7 @@ EXPORT(sysn32_call_table) PTR sys_ni_syscall /* 6170, was get_kernel_syms */ PTR sys_ni_syscall /* was query_module */ PTR sys_quotactl - PTR sys_nfsservctl + PTR compat_sys_nfsservctl PTR sys_ni_syscall /* res. for getpmsg */ PTR sys_ni_syscall /* 6175 for putpmsg */ PTR sys_ni_syscall /* res. for afs_syscall */ -- cgit v1.2.3 From a5a97263a9fd6a94f954d41ae3233ea65a90bd8a Mon Sep 17 00:00:00 2001 From: Chris Poon Date: Thu, 15 Nov 2007 15:38:45 -0800 Subject: [SUNHME]: VLAN support for sunhme This patch enables VLAN support on sunhme by increasing BMAC_TXMAX/BMAC_RXMAX and allocating extra space via skb_put for the VLAN header. Signed-off-by: Chris Poon Signed-off-by: David S. Miller --- drivers/net/sunhme.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index c20a3bd21bb..9cc13dd8a82 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -1281,7 +1281,7 @@ static void happy_meal_init_rings(struct happy_meal *hp) skb->dev = dev; /* Because we reserve afterwards. */ - skb_put(skb, (ETH_FRAME_LEN + RX_OFFSET)); + skb_put(skb, (ETH_FRAME_LEN + RX_OFFSET + 4)); hme_write_rxd(hp, &hb->happy_meal_rxd[i], (RXFLAG_OWN | ((RX_BUF_ALLOC_SIZE - RX_OFFSET) << 16)), hme_dma_map(hp, skb->data, RX_BUF_ALLOC_SIZE, DMA_FROMDEVICE)); @@ -1700,6 +1700,11 @@ static int happy_meal_init(struct happy_meal *hp) HMD(("tx old[%08x] and rx [%08x] ON!\n", hme_read32(hp, bregs + BMAC_TXCFG), hme_read32(hp, bregs + BMAC_RXCFG))); + + /* Set larger TX/RX size to allow for 802.1q */ + hme_write32(hp, bregs + BMAC_TXMAX, ETH_FRAME_LEN + 8); + hme_write32(hp, bregs + BMAC_RXMAX, ETH_FRAME_LEN + 8); + hme_write32(hp, bregs + BMAC_TXCFG, hme_read32(hp, bregs + BMAC_TXCFG) | BIGMAC_TXCFG_ENABLE); hme_write32(hp, bregs + BMAC_RXCFG, @@ -2039,7 +2044,7 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev) hme_dma_unmap(hp, dma_addr, RX_BUF_ALLOC_SIZE, DMA_FROMDEVICE); hp->rx_skbs[elem] = new_skb; new_skb->dev = dev; - skb_put(new_skb, (ETH_FRAME_LEN + RX_OFFSET)); + skb_put(new_skb, (ETH_FRAME_LEN + RX_OFFSET + 4)); hme_write_rxd(hp, this, (RXFLAG_OWN|((RX_BUF_ALLOC_SIZE-RX_OFFSET)<<16)), hme_dma_map(hp, new_skb->data, RX_BUF_ALLOC_SIZE, DMA_FROMDEVICE)); @@ -2809,8 +2814,8 @@ static int __devinit happy_meal_sbus_probe_one(struct sbus_dev *sdev, int is_qfe dev->watchdog_timeo = 5*HZ; dev->ethtool_ops = &hme_ethtool_ops; - /* Happy Meal can do it all... except VLAN. */ - dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_VLAN_CHALLENGED; + /* Happy Meal can do it all... */ + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; dev->irq = sdev->irqs[0]; @@ -3143,8 +3148,8 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev, dev->irq = pdev->irq; dev->dma = 0; - /* Happy Meal can do it all... except VLAN. */ - dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_VLAN_CHALLENGED; + /* Happy Meal can do it all... */ + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; #if defined(CONFIG_SBUS) && defined(CONFIG_PCI) /* Hook up PCI register/dma accessors. */ -- cgit v1.2.3 From 7799652557d966e49512479f4d3b9079bbc01fff Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Thu, 15 Nov 2007 15:52:32 -0800 Subject: [NETFILTER]: Fix NULL pointer dereference in nf_nat_move_storage() Reported by Chuck Ebbert as: https://bugzilla.redhat.com/show_bug.cgi?id=259501#c14 This routine is called each time hash should be replaced, nf_conn has extension list which contains pointers to connection tracking users (like nat, which is right now the only such user), so when replace takes place it should copy own extensions. Loop above checks for own extension, but tries to move higer-layer one, which can lead to above oops. Signed-off-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_extend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index a1a65a1313b..cf6ba6659a8 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -109,7 +109,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[i]); if (t && t->move) - t->move(ct, ct->ext + ct->ext->offset[id]); + t->move(ct, ct->ext + ct->ext->offset[i]); rcu_read_unlock(); } kfree(ct->ext); -- cgit v1.2.3 From 8c0863403f109a43d7000b4646da4818220d501f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Nov 2007 16:41:52 -0800 Subject: dirty page balancing: Get rid of broken unmapped_ratio logic This code harks back to the days when we didn't count dirty mapped pages, which led us to try to balance the number of dirty unmapped pages by how much unmapped memory there was in the system. That makes no sense any more, since now the dirty counts include the mapped pages. Not to mention that the math doesn't work with HIGHMEM machines anyway, and causes the unmapped_ratio to potentially turn negative (which we do catch thanks to clamping it at a minimum value, but I mention that as an indication of how broken the code is). The code also was written at a time when the default dirty ratio was much larger, and the unmapped_ratio logic effectively capped that large dirty ratio a bit. Again, we've since lowered the dirty ratio rather aggressively, further lessening the point of that code. Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 81a91e6f1f9..d55cfcae2ef 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -297,20 +297,12 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, { int background_ratio; /* Percentages */ int dirty_ratio; - int unmapped_ratio; long background; long dirty; unsigned long available_memory = determine_dirtyable_memory(); struct task_struct *tsk; - unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + - global_page_state(NR_ANON_PAGES)) * 100) / - available_memory; - dirty_ratio = vm_dirty_ratio; - if (dirty_ratio > unmapped_ratio / 2) - dirty_ratio = unmapped_ratio / 2; - if (dirty_ratio < 5) dirty_ratio = 5; -- cgit v1.2.3 From 2a0d7187340ca192ccc061a86c8a55be5e7627f0 Mon Sep 17 00:00:00 2001 From: eric miao Date: Tue, 30 Oct 2007 08:10:18 +0100 Subject: [ARM] 4638/1: pxa: use PXA3xx specific macros to define clks PXA3xx uses its own clk_pxa3xx_cken_ops, modify the code to use the PXA3xx specific macros to define its clocks Signed-off-by: eric miao Signed-off-by: Russell King --- arch/arm/mach-pxa/pxa3xx.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 5da798282a5..61d9c9d69e6 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -150,22 +150,45 @@ static void clk_pxa3xx_cken_disable(struct clk *clk) local_irq_enable(); } +static const struct clkops clk_pxa3xx_cken_ops = { + .enable = clk_pxa3xx_cken_enable, + .disable = clk_pxa3xx_cken_disable, +}; + static const struct clkops clk_pxa3xx_hsio_ops = { .enable = clk_pxa3xx_cken_enable, .disable = clk_pxa3xx_cken_disable, .getrate = clk_pxa3xx_hsio_getrate, }; +#define PXA3xx_CKEN(_name, _cken, _rate, _delay, _dev) \ + { \ + .name = _name, \ + .dev = _dev, \ + .ops = &clk_pxa3xx_cken_ops, \ + .rate = _rate, \ + .cken = CKEN_##_cken, \ + .delay = _delay, \ + } + +#define PXA3xx_CK(_name, _cken, _ops, _dev) \ + { \ + .name = _name, \ + .dev = _dev, \ + .ops = _ops, \ + .cken = CKEN_##_cken, \ + } + static struct clk pxa3xx_clks[] = { - INIT_CK("LCDCLK", LCD, &clk_pxa3xx_hsio_ops, &pxa_device_fb.dev), - INIT_CK("CAMCLK", CAMERA, &clk_pxa3xx_hsio_ops, NULL), + PXA3xx_CK("LCDCLK", LCD, &clk_pxa3xx_hsio_ops, &pxa_device_fb.dev), + PXA3xx_CK("CAMCLK", CAMERA, &clk_pxa3xx_hsio_ops, NULL), - INIT_CKEN("UARTCLK", FFUART, 14857000, 1, &pxa_device_ffuart.dev), - INIT_CKEN("UARTCLK", BTUART, 14857000, 1, &pxa_device_btuart.dev), - INIT_CKEN("UARTCLK", STUART, 14857000, 1, NULL), + PXA3xx_CKEN("UARTCLK", FFUART, 14857000, 1, &pxa_device_ffuart.dev), + PXA3xx_CKEN("UARTCLK", BTUART, 14857000, 1, &pxa_device_btuart.dev), + PXA3xx_CKEN("UARTCLK", STUART, 14857000, 1, NULL), - INIT_CKEN("I2CCLK", I2C, 32842000, 0, &pxa_device_i2c.dev), - INIT_CKEN("UDCCLK", UDC, 48000000, 5, &pxa_device_udc.dev), + PXA3xx_CKEN("I2CCLK", I2C, 32842000, 0, &pxa_device_i2c.dev), + PXA3xx_CKEN("UDCCLK", UDC, 48000000, 5, &pxa_device_udc.dev), }; void __init pxa3xx_init_irq(void) -- cgit v1.2.3 From ec418781708f89ef6eb8eb5ef1eeb79a6bec9d69 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 16 Nov 2007 16:35:56 -0500 Subject: SELinux: return EOPNOTSUPP not ENOTSUPP ENOTSUPP is not a valid error code in the kernel (it is defined in some NFS internal error codes and has been improperly used other places). In the !CONFIG_SECURITY_SELINUX case though it is possible that we could return this from selinux_audit_rule_init(). This patch just returns the userspace valid EOPNOTSUPP. Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/selinux.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/selinux.h b/include/linux/selinux.h index d1b7ca6c1c5..6080f73fc85 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -136,7 +136,7 @@ static inline int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, struct selinux_audit_rule **rule) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule) -- cgit v1.2.3 From d9f8bcbf67a0ee67c8cb0734f003dfe916bb5248 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 16 Nov 2007 21:16:36 -0800 Subject: Linux 2.6.24-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9c9c4bfb0e5..7f969303ed4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 24 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Arr Matey! A Hairy Bilge Rat! # *DOCUMENTATION* -- cgit v1.2.3 From 142d0a674d50b53366bd5ea02d7093d04960744e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Nov 2007 13:20:48 +0100 Subject: x86: fix bogus memcpy in es7000_check_dsdt() es7000_check_dst() contains a memcpy from 0, which probably should have been a memset. Remove it and check the retunr value from acpi_get_table_header. Noticed by: Joe Perches Signed-off-by: Thomas Gleixner --- include/asm-x86/mach-es7000/mach_mpparse.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/mach-es7000/mach_mpparse.h index 8aa10547b4b..52ee75cd0fe 100644 --- a/include/asm-x86/mach-es7000/mach_mpparse.h +++ b/include/asm-x86/mach-es7000/mach_mpparse.h @@ -29,9 +29,9 @@ extern int mps_oem_check(struct mp_config_table *mpc, char *oem, static inline int es7000_check_dsdt(void) { struct acpi_table_header header; - memcpy(&header, 0, sizeof(struct acpi_table_header)); - acpi_get_table_header(ACPI_SIG_DSDT, 0, &header); - if (!strncmp(header.oem_id, "UNISYS", 6)) + + if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && + !strncmp(header.oem_id, "UNISYS", 6)) return 1; return 0; } -- cgit v1.2.3 From e5ef67ef0b5d96315d3f7b74823cbfa85938a3a8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 11 Nov 2007 21:06:23 -0800 Subject: x86: fix voyager_cat_init section Fix Voyager section mismatches: voyager_cat_init() should be __init. WARNING: vmlinux.o(.text+0xee83): Section mismatch: reference to .init.data:eprom_buf (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xeea6): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xeeac): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xeeb2): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xef4c): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xef56): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xf10f): Section mismatch: reference to .init.data:eprom_buf (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xf13b): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xf14b): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xf159): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xf1b1): Section mismatch: reference to .init.data:eprom_buf (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xf1bb): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xf1c1): Section mismatch: reference to .init.data:eprom_buf (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xf1c7): Section mismatch: reference to .init.data:eprom_buf (between 'voyager_cat_init' and 'aes_enc_blk') WARNING: vmlinux.o(.text+0xf1e6): Section mismatch: reference to .init.data: (between 'voyager_cat_init' and 'aes_enc_blk') Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner --- arch/x86/mach-voyager/voyager_cat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c index 26a2d4c54b6..2132ca652df 100644 --- a/arch/x86/mach-voyager/voyager_cat.c +++ b/arch/x86/mach-voyager/voyager_cat.c @@ -568,7 +568,7 @@ static voyager_module_t *voyager_initial_module; * boot cpu *after* all memory initialisation has been done (so we can * use kmalloc) but before smp initialisation, so we can probe the SMP * configuration and pick up necessary information. */ -void +void __init voyager_cat_init(void) { voyager_module_t **modpp = &voyager_initial_module; -- cgit v1.2.3 From 8f8182106890970bb13bed325f0a04aef931883d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 11 Nov 2007 21:06:45 -0800 Subject: x86: fix smp init sections Fix Voyager section mismatch due to using __devinit instead of __cpuinit. WARNING: vmlinux.o(.text+0xd943): Section mismatch: reference to .init.text:init_gdt (between 'voyager_smp_prepare_boot_cpu' and 'smp_vic_cmn_interrupt') Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner --- arch/x86/mach-voyager/voyager_smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 69371434b0c..88124dd3540 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1900,7 +1900,7 @@ voyager_smp_prepare_cpus(unsigned int max_cpus) smp_boot_cpus(); } -static void __devinit voyager_smp_prepare_boot_cpu(void) +static void __cpuinit voyager_smp_prepare_boot_cpu(void) { init_gdt(smp_processor_id()); switch_to_new_gdt(); @@ -1911,7 +1911,7 @@ static void __devinit voyager_smp_prepare_boot_cpu(void) cpu_set(smp_processor_id(), cpu_present_map); } -static int __devinit +static int __cpuinit voyager_cpu_up(unsigned int cpu) { /* This only works at boot for x86. See "rewrite" above. */ -- cgit v1.2.3 From 434b3d3209a8d8dcda63c3b14815659f4671b0a8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 11 Nov 2007 21:06:02 -0800 Subject: x86: voyager use correct header file name Fix header file name for Voyager build. In file included from arch/x86/kernel/setup_32.c:61: include/asm-x86/mach-voyager/setup_arch.h:2:26: error: asm/setup_32.h: No such file or directory make[1]: *** [arch/x86/kernel/setup_32.o] Error 1 Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner --- include/asm-x86/mach-voyager/setup_arch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-x86/mach-voyager/setup_arch.h b/include/asm-x86/mach-voyager/setup_arch.h index 1710ae10eb6..71729ca05cd 100644 --- a/include/asm-x86/mach-voyager/setup_arch.h +++ b/include/asm-x86/mach-voyager/setup_arch.h @@ -1,5 +1,5 @@ #include -#include +#include #define VOYAGER_BIOS_INFO ((struct voyager_bios_info *) \ (&boot_params.apm_bios_info)) -- cgit v1.2.3 From 4307d1e5ada595c87f9a4d16db16ba5edb70dcb1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 Nov 2007 18:37:48 +0100 Subject: x86: ignore the sys_getcpu() tcache parameter dont use the vgetcpu tcache - it's causing problems with tasks migrating, they'll see the old cache up to a jiffy after the migration, further increasing the costs of the migration. In the worst case they see a complete bogus information from the tcache, when a sys_getcpu() call "invalidated" the cache info by incrementing the jiffies _and_ the cpuid info in the cache and the following vdso_getcpu() call happens after vdso_jiffies have been incremented. Signed-off-by: Ingo Molnar Signed-off-by: Ulrich Drepper Signed-off-by: Thomas Gleixner --- arch/x86/vdso/vgetcpu.c | 19 ++----------------- kernel/sys.c | 20 +------------------- 2 files changed, 3 insertions(+), 36 deletions(-) diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c index 91f6e85d0fc..3b1ae1abfba 100644 --- a/arch/x86/vdso/vgetcpu.c +++ b/arch/x86/vdso/vgetcpu.c @@ -13,32 +13,17 @@ #include #include "vextern.h" -long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) +long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) { unsigned int dummy, p; - unsigned long j = 0; - /* Fast cache - only recompute value once per jiffies and avoid - relatively costly rdtscp/cpuid otherwise. - This works because the scheduler usually keeps the process - on the same CPU and this syscall doesn't guarantee its - results anyways. - We do this here because otherwise user space would do it on - its own in a likely inferior way (no access to jiffies). - If you don't like it pass NULL. */ - if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) { - p = tcache->blob[1]; - } else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) { + if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) { /* Load per CPU data from RDTSCP */ rdtscp(dummy, dummy, p); } else { /* Load per CPU data from GDT */ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); } - if (tcache) { - tcache->blob[0] = j; - tcache->blob[1] = p; - } if (cpu) *cpu = p & 0xfff; if (node) diff --git a/kernel/sys.c b/kernel/sys.c index 304b5410d74..d1fe71eb454 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1750,7 +1750,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, } asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, - struct getcpu_cache __user *cache) + struct getcpu_cache __user *unused) { int err = 0; int cpu = raw_smp_processor_id(); @@ -1758,24 +1758,6 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, err |= put_user(cpu, cpup); if (nodep) err |= put_user(cpu_to_node(cpu), nodep); - if (cache) { - /* - * The cache is not needed for this implementation, - * but make sure user programs pass something - * valid. vsyscall implementations can instead make - * good use of the cache. Only use t0 and t1 because - * these are available in both 32bit and 64bit ABI (no - * need for a compat_getcpu). 32bit has enough - * padding - */ - unsigned long t0, t1; - get_user(t0, &cache->blob[0]); - get_user(t1, &cache->blob[1]); - t0++; - t1++; - put_user(t0, &cache->blob[0]); - put_user(t1, &cache->blob[1]); - } return err ? -EFAULT : 0; } -- cgit v1.2.3 From 903675569e278af86aa08c2af238b0ab997065d1 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 7 Nov 2007 02:12:58 +0100 Subject: x86: fix cpu-hotplug regression Commit d435d862baca3e25e5eec236762a43251b1e7ffc ("cpu hotplug: mce: fix cpu hotplug error handling") changed the error handling in mce_cpu_callback. In cases where not all CPUs are brought up during boot (e.g. using maxcpus and additional_cpus parameters) mce_cpu_callback now returns NOTFIY_BAD because for such CPUs cpu_data is not completely filled when the notifier is called. Thus mce_create_device fails right at its beginning: if (!mce_available(&cpu_data[cpu])) return -EIO; As a quick fix I suggest to check boot_cpu_data for MCE. To reproduce this regression: (1) boot with maxcpus=2 addtional_cpus=2 on a 4 CPU x86-64 system (2) # echo 1 >/sys/devices/system/cpu/cpu2/online -bash: echo: write error: Invalid argument dmesg shows: _cpu_up: attempt to bring up CPU 2 failed Signed-off-by: Andreas Herrmann Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 447b351f1f2..4b21d29fb5a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -810,7 +810,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) int err; int i; - if (!mce_available(&cpu_data(cpu))) + if (!mce_available(&boot_cpu_data)) return -EIO; memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); -- cgit v1.2.3 From c0c52d28e05e8bdaa2126570c02ecb1a7358cecc Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 1 Nov 2007 19:32:17 +0100 Subject: x86: show cpuinfo only for online CPUs Fix regressions introduced with 92cb7612aee39642d109b8d935ad265e602c0563. It can happen that cpuinfo is displayed for CPUs that are not online or even worse for CPUs not present at all. As an example, following was shown for a "second" CPU of a single core K8 variant: processor : 0 vendor_id : unknown cpu family : 0 model : 0 model name : unknown stepping : 0 cache size : 0 KB fpu : yes fpu_exception : yes cpuid level : 0 wp : yes flags : bogomips : 0.00 clflush size : 0 cache_alignment : 0 address sizes : 0 bits physical, 0 bits virtual power management: Signed-off-by: Andreas Herrmann Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/proc.c | 8 +++----- arch/x86/kernel/setup_64.c | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 066f8c6af4d..3900e46d66d 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -89,8 +89,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) int fpu_exception; #ifdef CONFIG_SMP - if (!cpu_online(n)) - return 0; n = c->cpu_index; #endif seq_printf(m, "processor\t: %d\n" @@ -177,14 +175,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = first_cpu(cpu_possible_map); - if ((*pos) < NR_CPUS && cpu_possible(*pos)) + *pos = first_cpu(cpu_online_map); + if ((*pos) < NR_CPUS && cpu_online(*pos)) return &cpu_data(*pos); return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - *pos = next_cpu(*pos, cpu_possible_map); + *pos = next_cpu(*pos, cpu_online_map); return c_start(m, pos); } static void c_stop(struct seq_file *m, void *v) diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 238633d3d09..3cb3874489b 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -1078,8 +1078,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP - if (!cpu_online(c->cpu_index)) - return 0; cpu = c->cpu_index; #endif @@ -1171,15 +1169,15 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = first_cpu(cpu_possible_map); - if ((*pos) < NR_CPUS && cpu_possible(*pos)) + *pos = first_cpu(cpu_online_map); + if ((*pos) < NR_CPUS && cpu_online(*pos)) return &cpu_data(*pos); return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - *pos = next_cpu(*pos, cpu_possible_map); + *pos = next_cpu(*pos, cpu_online_map); return c_start(m, pos); } -- cgit v1.2.3 From 699d934d5f958d7944d195c03c334f28cc0b3669 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 17 Nov 2007 13:18:42 +0100 Subject: x86: fixup cpu_info array conversion 92cb7612aee39642d109b8d935ad265e602c0563 sets cpu_info->cpu_index to zero for no reason. Referencing cpu_info->cpu_index now points always to CPU#0, which is apparently not what we want. Remove it. Spotted-by: Zou Nan hai Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup_64.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 3cb3874489b..30d94d1d5f5 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -892,7 +892,6 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; - c->cpu_index = 0; #endif } -- cgit v1.2.3 From d0974b11e0741034fc5d22838b9cb85402a280d6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Nov 2007 07:11:12 +0100 Subject: Remove x86 merge artifact from top Makefile The x86 merge modified the tags target to handle the two separate source directories. Remove it now that i386/x86_64 are gone completely. Signed-off-by: Thomas Gleixner --- Makefile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 7f969303ed4..11961a225cb 100644 --- a/Makefile +++ b/Makefile @@ -1334,12 +1334,7 @@ else ALLINCLUDE_ARCHS := $(ALLSOURCE_ARCHS) endif -# Take care of arch/x86 -ifeq ($(ARCH), $(SRCARCH)) -ALLSOURCE_ARCHS := $(ARCH) -else -ALLSOURCE_ARCHS := $(ARCH) $(SRCARCH) -endif +ALLSOURCE_ARCHS := $(SRCARCH) define find-sources ( for arch in $(ALLSOURCE_ARCHS) ; do \ -- cgit v1.2.3 From fa6a1a554b50cbb7763f6907e6fef927ead480d9 Mon Sep 17 00:00:00 2001 From: "David P. Reed" Date: Wed, 14 Nov 2007 17:49:21 -0500 Subject: ntp: fix typo that makes sync_cmos_clock erratic Fix a typo in ntp.c that has caused updating of the persistent (RTC) clock when synced to NTP to behave erratically. When debugging a freeze that arises on my AMD64 machines when I run the ntpd service, I added a number of printk's to monitor the sync_cmos_clock procedure. I discovered that it was not syncing to cmos RTC every 11 minutes as documented, but instead would keep trying every second for hours at a time. The reason turned out to be a typo in sync_cmos_clock, where it attempts to ensure that update_persistent_clock is called very close to 500 msec. after a 1 second boundary (required by the PC RTC's spec). That typo referred to "xtime" in one spot, rather than "now", which is derived from "xtime" but not equal to it. This makes the test erratic, creating a "coin-flip" that decides when update_persistent_clock is called - when it is called, which is rarely, it may be at any time during the one second period, rather than close to 500 msec, so the value written is needlessly incorrect, too. Signed-off-by: David P. Reed Signed-off-by: Thomas Gleixner --- kernel/time/ntp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index de6a2d6b3eb..14a2ecf2b31 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -205,7 +205,7 @@ static void sync_cmos_clock(unsigned long dummy) return; getnstimeofday(&now); - if (abs(xtime.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) + if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) fail = update_persistent_clock(now); next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec; -- cgit v1.2.3 From c399da0d97e06803e51085ec076b63a3168aad1b Mon Sep 17 00:00:00 2001 From: "David P. Reed" Date: Wed, 14 Nov 2007 17:47:35 -0500 Subject: x86: fix freeze in x86_64 RTC update code in time_64.c Fix hard freeze on x86_64 when the ntpd service calls update_persistent_clock() A repeatable but randomly timed freeze has been happening in Fedora 6 and 7 for the last year, whenever I run the ntpd service on my AMD64x2 HP Pavilion dv9000z laptop. This freeze is due to the use of spin_lock(&rtc_lock) under the assumption (per a bad comment) that set_rtc_mmss is called only with interrupts disabled. The call from ntp.c to update_persistent_clock is made with interrupts enabled. Signed-off-by: David P. Reed Signed-off-by: Thomas Gleixner --- arch/x86/kernel/time_64.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index c821edc3221..4ad1f7a6a83 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -82,18 +82,15 @@ static int set_rtc_mmss(unsigned long nowtime) int retval = 0; int real_seconds, real_minutes, cmos_minutes; unsigned char control, freq_select; + unsigned long flags; /* - * IRQs are disabled when we're called from the timer interrupt, - * no need for spin_lock_irqsave() + * set_rtc_mmss is called when irqs are enabled, so disable irqs here */ - - spin_lock(&rtc_lock); - + spin_lock_irqsave(&rtc_lock, flags); /* * Tell the clock it's being set and stop it. */ - control = CMOS_READ(RTC_CONTROL); CMOS_WRITE(control | RTC_SET, RTC_CONTROL); @@ -138,7 +135,7 @@ static int set_rtc_mmss(unsigned long nowtime) CMOS_WRITE(control, RTC_CONTROL); CMOS_WRITE(freq_select, RTC_FREQ_SELECT); - spin_unlock(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); return retval; } -- cgit v1.2.3 From bbbd99955bfe84c9ae63f51db946a7bcd21f48be Mon Sep 17 00:00:00 2001 From: "David P. Reed" Date: Wed, 14 Nov 2007 20:14:50 -0500 Subject: x86: on x86_64, correct reading of PC RTC when update in progress in time_64.c Correct potentially unstable PC RTC time register reading in time_64.c Stop the use of an incorrect technique for reading the standard PC RTC timer, which is documented to "disconnect" time registers from the bus while updates are in progress. The use of UIP flag while interrupts are disabled to protect a 244 microsecond window is one of the Motorola spec sheet's documented ways to read the RTC time registers reliably. tglx: removed locking changes from original patch, as they gain nothing (read_persistent_clock is only called during boot, suspend, resume - so no hot path affected) and conflict with the paravirt locking scheme (see 32bit code), which we do not want to complicate for no benefit. Signed-off-by: David P. Reed Signed-off-by: Thomas Gleixner --- arch/x86/kernel/time_64.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 4ad1f7a6a83..368b1942b39 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -161,21 +161,27 @@ unsigned long read_persistent_clock(void) unsigned century = 0; spin_lock_irqsave(&rtc_lock, flags); + /* + * if UIP is clear, then we have >= 244 microseconds before RTC + * registers will be updated. Spec sheet says that this is the + * reliable way to read RTC - registers invalid (off bus) during update + */ + while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) + cpu_relax(); - do { - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); + + /* now read all RTC registers while stable with interrupts disabled */ + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); #ifdef CONFIG_ACPI - if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && - acpi_gbl_FADT.century) - century = CMOS_READ(acpi_gbl_FADT.century); + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) + century = CMOS_READ(acpi_gbl_FADT.century); #endif - } while (sec != CMOS_READ(RTC_SECONDS)); - spin_unlock_irqrestore(&rtc_lock, flags); /* -- cgit v1.2.3 From f4df73c2914a49bf6a55896aaecb0563c955e167 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Nov 2007 21:41:50 -0500 Subject: x86: add hpet sanity checks Some BIOSes advertise HPET at 0x0. We really do no want to allocate a resource there. Check for it and leave early. Other BIOSes tell us the HPET is at 0xfed0000000000000 instead of 0xfed00000. Add a check and fix it up with a warning on user request. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 289247d974c..0ca27c7b0e8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -637,6 +637,38 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) } hpet_address = hpet_tbl->address.address; + + /* + * Some broken BIOSes advertise HPET at 0x0. We really do not + * want to allocate a resource there. + */ + if (!hpet_address) { + printk(KERN_WARNING PREFIX + "HPET id: %#x base: %#lx is invalid\n", + hpet_tbl->id, hpet_address); + return 0; + } +#ifdef CONFIG_X86_64 + /* + * Some even more broken BIOSes advertise HPET at + * 0xfed0000000000000 instead of 0xfed00000. Fix it up and add + * some noise: + */ + if (hpet_address == 0xfed0000000000000UL) { + if (!hpet_force_user) { + printk(KERN_WARNING PREFIX "HPET id: %#x " + "base: 0xfed0000000000000 is bogus\n " + "try hpet=force on the kernel command line to " + "fix it up to 0xfed00000.\n", hpet_tbl->id); + hpet_address = 0; + return 0; + } + printk(KERN_WARNING PREFIX + "HPET id: %#x base: 0xfed0000000000000 fixed up " + "to 0xfed00000.\n", hpet_tbl->id); + hpet_address >>= 32; + } +#endif printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet_address); -- cgit v1.2.3 From 05dfa35e84331c6921ab394463069e9376f0bd76 Mon Sep 17 00:00:00 2001 From: Truxton Fulton Date: Sat, 17 Nov 2007 16:27:01 +0100 Subject: x86: fix reboot with no keyboard attached Attempt to fix http://bugzilla.kernel.org/show_bug.cgi?id=8378 Hiroto Shibuya wrote to tell me that he has a VIA EPIA-EK10000 which suffers from the reboot problem when no keyboard is attached. My first patch works for him: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=59f4e7d572980a521b7bdba74ab71b21f5995538 But the latest patch does not work for him : http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=8b93789808756bcc1e5c90c99f1b1ef52f839a51 We found that it was necessary to also set the "disable keyboard" flag in the command byte, as the first patch was doing. The second patch tries to minimally modify the command byte, but it is not enough. Please consider this simple one-line patch to help people with low end VIA motherboards reboot when no keyboard is attached. Hiroto Shibuya has verified that this works for him (as I no longer have an afflicted machine). Additional discussion: Note that original patch from Truxton DOES disable keyboard and this has been in main tree since 2.6.14, thus it must have quite a bit of air time already. http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.14.y.git;a=commit;h=59f4e7d572980a521b7bdba74ab71b21f5995538 Note that he only mention "System flag" in the description and comment, but in the code, "disable keyboard" flag is set. outb(0x14, 0x60); /* set "System flag" */ In 2.6.23, he made a change to read the current byte and then mask the flags, but along this change, he only set the "System flag" and dropped the setting of "disable keyboard" flag. http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.23.y.git;a=commit;h=8b93789808756bcc1e5c90c99f1b1ef52f839a51 outb(cmd | 0x04, 0x60); /* set "System flag" */ So my request is to restore the setting of disable keyboard flag which has been there since 2.6.14 but disappeared in 2.6.23. Cc: Lee Garrett Cc: "Hiroto Shibuya" Cc: Natalie Protasevich Cc: Dmitry Torokhov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Aristeu Rozanski Cc: Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/asm-x86/mach-default/mach_reboot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-x86/mach-default/mach_reboot.h b/include/asm-x86/mach-default/mach_reboot.h index e23fd9fbebb..6adee6a97de 100644 --- a/include/asm-x86/mach-default/mach_reboot.h +++ b/include/asm-x86/mach-default/mach_reboot.h @@ -49,7 +49,7 @@ static inline void mach_reboot(void) udelay(50); kb_wait(); udelay(50); - outb(cmd | 0x04, 0x60); /* set "System flag" */ + outb(cmd | 0x14, 0x60); /* set "System flag" and "Keyboard Disabled" */ udelay(50); kb_wait(); udelay(50); -- cgit v1.2.3 From 3d9befd2cdf65b1768b0d3078a65cc0ae9aa6412 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 17 Nov 2007 16:27:01 +0100 Subject: x86: check boundary in count setup resource need to check info->res_num less than PCI_BUS_NUM_RESOURCES, so info->bus->resource[info->res_num] = res will not beyond of bus resource array when acpi returns too many resource entries. Signed-off-by: Yinghai Lu Cc: Greg Kroah-Hartman Cc: Gary Hade Cc: Len Brown Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- arch/x86/pci/acpi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2d88f7c6d6a..7e35078673a 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -77,6 +77,9 @@ count_resource(struct acpi_resource *acpi_res, void *data) struct acpi_resource_address64 addr; acpi_status status; + if (info->res_num >= PCI_BUS_NUM_RESOURCES) + return AE_OK; + status = resource_to_addr(acpi_res, &addr); if (ACPI_SUCCESS(status)) info->res_num++; @@ -93,6 +96,9 @@ setup_resource(struct acpi_resource *acpi_res, void *data) unsigned long flags; struct resource *root; + if (info->res_num >= PCI_BUS_NUM_RESOURCES) + return AE_OK; + status = resource_to_addr(acpi_res, &addr); if (!ACPI_SUCCESS(status)) return AE_OK; -- cgit v1.2.3 From 6d1b30e30ca1c831b82c44aedd2536820bdb2bc4 Mon Sep 17 00:00:00 2001 From: Denys Date: Sat, 17 Nov 2007 16:27:02 +0100 Subject: x86: reboot fixup for wrap2c board Needed to make the wireless board, WRAP2C reboot. Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot_fixups_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 1a07bbea7be..f452726c0fe 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -39,6 +39,7 @@ struct device_fixup { static struct device_fixup fixups_table[] = { { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, +{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, }; /* -- cgit v1.2.3 From 80ef88d6d23bf1b94d65db0ac32334d01b9f7350 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 17 Nov 2007 15:37:31 +0100 Subject: x86: simplify "make ARCH=x86" and fix kconfig all.config Simplify "make ARCH=x86" and fix kconfig so we again can set 64BIT in all.config. For a fix the diffstat is nice: 6 files changed, 3 insertions(+), 36 deletions(-) The patch reverts these commits: 0f855aa64b3f63d35a891510cf7db932a435c116 -> kconfig: add helper to set config symbol from environment variable 2a113281f5cd2febbab21a93c8943f8d3eece4d3 -> kconfig: use $K64BIT to set 64BIT with all*config targets Roman Zippel pointed out that kconfig supported string compares so the additional complexity introduced by the above two patches were not needed. With this patch we have following behaviour: # make {allno,allyes,allmod,rand}config [ARCH=...] option \ host arch | 32bit | 64bit ===================================================== ./. | 32bit | 64bit ARCH=x86 | 32bit | 32bit ARCH=i386 | 32bit | 32bit ARCH=x86_64 | 64bit | 64bit The general rule are that ARCH= and native architecture takes precedence over the configuration. So make ARCH=i386 [whatever] will always build a 32-bit kernel no matter what the configuration says. The configuration will be updated to 32-bit if it was configured to 64-bit and the other way around. This behaviour is consistent with previous behaviour so no suprises here. make ARCH=x86 will per default result in a 32-bit kernel but as the only ARCH= value x86 allow the user to select between 32-bit and 64-bit using menuconfig. Signed-off-by: Sam Ravnborg Cc: Roman Zippel Cc: Andreas Herrmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- Makefile | 4 +--- README | 2 -- arch/x86/Kconfig | 4 ++-- scripts/kconfig/conf.c | 1 - scripts/kconfig/confdata.c | 27 --------------------------- scripts/kconfig/lkc_proto.h | 1 - 6 files changed, 3 insertions(+), 36 deletions(-) diff --git a/Makefile b/Makefile index 11961a225cb..a65ffd27de6 100644 --- a/Makefile +++ b/Makefile @@ -200,11 +200,9 @@ SRCARCH := $(ARCH) # Additional ARCH settings for x86 ifeq ($(ARCH),i386) SRCARCH := x86 - K64BIT := n endif ifeq ($(ARCH),x86_64) SRCARCH := x86 - K64BIT := y endif KCONFIG_CONFIG ?= .config @@ -341,7 +339,7 @@ KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION -export ARCH SRCARCH K64BIT CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC +export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS diff --git a/README b/README index 592f8a23828..159912cf515 100644 --- a/README +++ b/README @@ -194,8 +194,6 @@ CONFIGURING the kernel: "make *config" checks for a file named "all{yes/mod/no/random}.config" for symbol values that are to be forced. If this file is not found, it checks for a file named "all.config" to contain forced values. - Finally it checks the environment variable K64BIT and if found, sets - the config symbol "64BIT" to the value of the K64BIT variable. NOTES on "make config": - having unnecessary drivers will make the kernel bigger, and can diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1eb59971af5..368864dfe6e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -3,8 +3,8 @@ mainmenu "Linux Kernel Configuration for x86" # Select 32 or 64 bit config 64BIT - bool "64-bit kernel" - default n + bool "64-bit kernel" if ARCH = "x86" + default ARCH = "x86_64" help Say yes to build a 64-bit kernel - formerly known as x86_64 Say no to build a 32-bit kernel - formerly known as i386 diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index c6bee85c396..a38787a881e 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -591,7 +591,6 @@ int main(int ac, char **av) conf_read_simple(name, S_DEF_USER); else if (!stat("all.config", &tmpstat)) conf_read_simple("all.config", S_DEF_USER); - conf_set_env_sym("K64BIT", "64BIT", S_DEF_USER); break; default: break; diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index e4fa3f30254..e0f402f3b75 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -145,33 +145,6 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p) return 0; } -/* Read an environment variable and assign the value to the symbol */ -int conf_set_env_sym(const char *env, const char *symname, int def) -{ - struct symbol *sym; - char *p; - int def_flags; - - p = getenv(env); - if (p) { - char warning[200]; - sprintf(warning, "Environment variable (%s = \"%s\")", env, p); - conf_filename = warning; - def_flags = SYMBOL_DEF << def; - if (def == S_DEF_USER) { - sym = sym_find(symname); - if (!sym) - return 1; - } else { - sym = sym_lookup(symname, 0); - if (sym->type == S_UNKNOWN) - sym->type = S_OTHER; - } - conf_set_sym_val(sym, def, def_flags, p); - } - return 0; -} - int conf_read_simple(const char *name, int def) { FILE *in = NULL; diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h index dca294e90cc..4d09f6ddefe 100644 --- a/scripts/kconfig/lkc_proto.h +++ b/scripts/kconfig/lkc_proto.h @@ -1,7 +1,6 @@ /* confdata.c */ P(conf_parse,void,(const char *name)); -P(conf_set_env_sym,int,(const char *envname, const char *symname, int def)); P(conf_read,int,(const char *name)); P(conf_read_simple,int,(const char *name, int)); P(conf_write,int,(const char *name)); -- cgit v1.2.3 From 6840999b192b1b57d713ddee3761c457a2779036 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 17 Nov 2007 15:37:31 +0100 Subject: x86: simplify "make ARCH=x86" and fix kconfig all.config Simplify "make ARCH=x86" and fix kconfig so we again can set 64BIT in all.config. For a fix the diffstat is nice: 6 files changed, 3 insertions(+), 36 deletions(-) The patch reverts these commits: - 0f855aa64b3f63d35a891510cf7db932a435c116 ("kconfig: add helper to set config symbol from environment variable") - 2a113281f5cd2febbab21a93c8943f8d3eece4d3 ("kconfig: use $K64BIT to set 64BIT with all*config targets") Roman Zippel pointed out that kconfig supported string compares so the additional complexity introduced by the above two patches were not needed. With this patch we have following behaviour: # make {allno,allyes,allmod,rand}config [ARCH=...] option \ host arch | 32bit | 64bit ===================================================== ./. | 32bit | 64bit ARCH=x86 | 32bit | 32bit ARCH=i386 | 32bit | 32bit ARCH=x86_64 | 64bit | 64bit The general rule are that ARCH= and native architecture takes precedence over the configuration. So make ARCH=i386 [whatever] will always build a 32-bit kernel no matter what the configuration says. The configuration will be updated to 32-bit if it was configured to 64-bit and the other way around. This behaviour is consistent with previous behaviour so no suprises here. make ARCH=x86 will per default result in a 32-bit kernel but as the only ARCH= value x86 allow the user to select between 32-bit and 64-bit using menuconfig. Signed-off-by: Sam Ravnborg Cc: Roman Zippel Cc: Andreas Herrmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Linus Torvalds --- Makefile | 4 +--- README | 2 -- arch/x86/Kconfig | 4 ++-- scripts/kconfig/conf.c | 1 - scripts/kconfig/confdata.c | 27 --------------------------- scripts/kconfig/lkc_proto.h | 1 - 6 files changed, 3 insertions(+), 36 deletions(-) diff --git a/Makefile b/Makefile index 7f969303ed4..6d7d7e991c9 100644 --- a/Makefile +++ b/Makefile @@ -200,11 +200,9 @@ SRCARCH := $(ARCH) # Additional ARCH settings for x86 ifeq ($(ARCH),i386) SRCARCH := x86 - K64BIT := n endif ifeq ($(ARCH),x86_64) SRCARCH := x86 - K64BIT := y endif KCONFIG_CONFIG ?= .config @@ -341,7 +339,7 @@ KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION -export ARCH SRCARCH K64BIT CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC +export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS diff --git a/README b/README index 592f8a23828..159912cf515 100644 --- a/README +++ b/README @@ -194,8 +194,6 @@ CONFIGURING the kernel: "make *config" checks for a file named "all{yes/mod/no/random}.config" for symbol values that are to be forced. If this file is not found, it checks for a file named "all.config" to contain forced values. - Finally it checks the environment variable K64BIT and if found, sets - the config symbol "64BIT" to the value of the K64BIT variable. NOTES on "make config": - having unnecessary drivers will make the kernel bigger, and can diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1eb59971af5..368864dfe6e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -3,8 +3,8 @@ mainmenu "Linux Kernel Configuration for x86" # Select 32 or 64 bit config 64BIT - bool "64-bit kernel" - default n + bool "64-bit kernel" if ARCH = "x86" + default ARCH = "x86_64" help Say yes to build a 64-bit kernel - formerly known as x86_64 Say no to build a 32-bit kernel - formerly known as i386 diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index c6bee85c396..a38787a881e 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -591,7 +591,6 @@ int main(int ac, char **av) conf_read_simple(name, S_DEF_USER); else if (!stat("all.config", &tmpstat)) conf_read_simple("all.config", S_DEF_USER); - conf_set_env_sym("K64BIT", "64BIT", S_DEF_USER); break; default: break; diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index e4fa3f30254..e0f402f3b75 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -145,33 +145,6 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p) return 0; } -/* Read an environment variable and assign the value to the symbol */ -int conf_set_env_sym(const char *env, const char *symname, int def) -{ - struct symbol *sym; - char *p; - int def_flags; - - p = getenv(env); - if (p) { - char warning[200]; - sprintf(warning, "Environment variable (%s = \"%s\")", env, p); - conf_filename = warning; - def_flags = SYMBOL_DEF << def; - if (def == S_DEF_USER) { - sym = sym_find(symname); - if (!sym) - return 1; - } else { - sym = sym_lookup(symname, 0); - if (sym->type == S_UNKNOWN) - sym->type = S_OTHER; - } - conf_set_sym_val(sym, def, def_flags, p); - } - return 0; -} - int conf_read_simple(const char *name, int def) { FILE *in = NULL; diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h index dca294e90cc..4d09f6ddefe 100644 --- a/scripts/kconfig/lkc_proto.h +++ b/scripts/kconfig/lkc_proto.h @@ -1,7 +1,6 @@ /* confdata.c */ P(conf_parse,void,(const char *name)); -P(conf_set_env_sym,int,(const char *envname, const char *symname, int def)); P(conf_read,int,(const char *name)); P(conf_read_simple,int,(const char *name, int)); P(conf_write,int,(const char *name)); -- cgit v1.2.3