diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-11-16 18:30:26 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-11-16 18:30:26 -0800 |
commit | 4c5cdb1e1f2a502069f57a60b5c6b97b8106c73c (patch) | |
tree | 1067a3ce7dd39f3ac3aa3b961b1f0612507f694e | |
parent | 8c0863403f109a43d7000b4646da4818220d501f (diff) | |
parent | 18a7247d1bb2e2dcbab628d7e786d03df5bf1eed (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq
* master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq:
[CPUFREQ] Fix up whitespace in conservative governor.
[CPUFREQ] Make cpufreq_conservative handle out-of-sync events properly
[CPUFREQ] architectural pstate driver for powernow-k8
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 90 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 20 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 153 |
3 files changed, 119 insertions, 144 deletions
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 9c36a53676b..99e1ef9939b 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -46,7 +46,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 2.00.00" +#define VERSION "version 2.20.00" #include "powernow-k8.h" /* serialize freq changes */ @@ -73,33 +73,11 @@ static u32 find_khz_freq_from_fid(u32 fid) return 1000 * find_freq_from_fid(fid); } -/* Return a frequency in MHz, given an input fid and did */ -static u32 find_freq_from_fiddid(u32 fid, u32 did) +static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate) { - if (current_cpu_data.x86 == 0x10) - return 100 * (fid + 0x10) >> did; - else - return 100 * (fid + 0x8) >> did; -} - -static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) -{ - return 1000 * find_freq_from_fiddid(fid, did); -} - -static u32 find_fid_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return lo & HW_PSTATE_FID_MASK; + return data[pstate].frequency; } -static u32 find_did_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; -} /* Return the vco fid for an input fid * @@ -142,9 +120,7 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) if (cpu_family == CPU_HW_PSTATE) { rdmsr(MSR_PSTATE_STATUS, lo, hi); i = lo & HW_PSTATE_MASK; - rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); - data->currfid = lo & HW_PSTATE_FID_MASK; - data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + data->currpstate = i; return 0; } do { @@ -295,7 +271,7 @@ static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, static int transition_pstate(struct powernow_k8_data *data, u32 pstate) { wrmsr(MSR_PSTATE_CTRL, pstate, 0); - data->currfid = find_fid_from_pstate(pstate); + data->currpstate = pstate; return 0; } @@ -845,17 +821,20 @@ err_out: static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) { int i; + u32 hi = 0, lo = 0; + rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); + data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; for (i = 0; i < data->acpi_data.state_count; i++) { u32 index; u32 hi = 0, lo = 0; - u32 fid; - u32 did; index = data->acpi_data.states[i].control & HW_PSTATE_MASK; - if (index > MAX_HW_PSTATE) { + if (index > data->max_hw_pstate) { printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; } rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); if (!(hi & HW_PSTATE_VALID_MASK)) { @@ -864,22 +843,9 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf continue; } - fid = lo & HW_PSTATE_FID_MASK; - did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + powernow_table[i].index = index; - dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); - - powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); - - powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); - - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } + powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; } return 0; } @@ -1020,22 +986,18 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i /* Take a frequency, and issue the hardware pstate transition command */ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) { - u32 fid = 0; - u32 did = 0; u32 pstate = 0; int res, i; struct cpufreq_freqs freqs; dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - /* get fid did for hardware pstate transition */ + /* get MSR index for hardware pstate transition */ pstate = index & HW_PSTATE_MASK; - if (pstate > MAX_HW_PSTATE) + if (pstate > data->max_hw_pstate) return 0; - fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; - did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; - freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); - freqs.new = find_khz_freq_from_fiddid(fid, did); + freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; @@ -1043,9 +1005,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i } res = transition_pstate(data, pstate); - data->currfid = find_fid_from_pstate(pstate); - data->currdid = find_did_from_pstate(pstate); - freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); + freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; @@ -1090,10 +1050,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi if (query_current_values_with_pending_wait(data)) goto err_out; - if (cpu_family == CPU_HW_PSTATE) - dprintk("targ: curr fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); - else { + if (cpu_family != CPU_HW_PSTATE) { dprintk("targ: curr fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1124,7 +1081,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi mutex_unlock(&fidvid_mutex); if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate); else pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -1223,7 +1180,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) + (3 * (1 << data->irt) * 10)) * 1000; if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else pol->cur = find_khz_freq_from_fid(data->currfid); dprintk("policy current frequency %d kHz\n", pol->cur); @@ -1240,8 +1197,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); if (cpu_family == CPU_HW_PSTATE) - dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); + dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate); else dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1297,7 +1253,7 @@ static unsigned int powernowk8_get (unsigned int cpu) goto out; if (cpu_family == CPU_HW_PSTATE) - khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); + khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else khz = find_khz_freq_from_fid(data->currfid); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 7c4f6e0faed..afd2b520d35 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -10,6 +10,7 @@ struct powernow_k8_data { u32 numps; /* number of p-states */ u32 batps; /* number of p-states supported on battery */ + u32 max_hw_pstate; /* maximum legal hardware pstate */ /* these values are constant when the PSB is used to determine * vid/fid pairings, but are modified during the ->target() call @@ -21,8 +22,8 @@ struct powernow_k8_data { u32 plllock; /* pll lock time, units 1 us */ u32 exttype; /* extended interface = 1 */ - /* keep track of the current fid / vid or did */ - u32 currvid, currfid, currdid; + /* keep track of the current fid / vid or pstate */ + u32 currvid, currfid, currpstate; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. @@ -87,23 +88,14 @@ struct powernow_k8_data { /* Hardware Pstate _PSS and MSR definitions */ #define USE_HW_PSTATE 0x00000080 -#define HW_PSTATE_FID_MASK 0x0000003f -#define HW_PSTATE_DID_MASK 0x000001c0 -#define HW_PSTATE_DID_SHIFT 6 #define HW_PSTATE_MASK 0x00000007 #define HW_PSTATE_VALID_MASK 0x80000000 -#define HW_FID_INDEX_SHIFT 8 -#define HW_FID_INDEX_MASK 0x0000ff00 -#define HW_DID_INDEX_SHIFT 16 -#define HW_DID_INDEX_MASK 0x00ff0000 -#define HW_WATTS_MASK 0xff -#define HW_PWR_DVR_MASK 0x300 -#define HW_PWR_DVR_SHIFT 8 -#define HW_PWR_MAX_MULT 3 -#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */ +#define HW_PSTATE_MAX_MASK 0x000000f0 +#define HW_PSTATE_MAX_SHIFT 4 #define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ #define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ #define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ +#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */ /* define the two driver architectures */ #define CPU_OPTERON 0 diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4bd33ce8a6f..1bba99747f5 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -37,17 +37,17 @@ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) -/* - * The polling frequency of this governor depends on the capability of +/* + * The polling frequency of this governor depends on the capability of * the processor. Default polling frequency is 1000 times the transition - * latency of the processor. The governor will work on any processor with - * transition latency <= 10mS, using appropriate sampling + * latency of the processor. The governor will work on any processor with + * transition latency <= 10mS, using appropriate sampling * rate. * For CPUs with transition latency > 10mS (mostly drivers * with CPUFREQ_ETERNAL), this governor will not work. * All times here are in uS. */ -static unsigned int def_sampling_rate; +static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE_RATIO (2) /* for correct statistics, we need at least 10 ticks between each measure */ #define MIN_STAT_SAMPLING_RATE \ @@ -63,12 +63,12 @@ static unsigned int def_sampling_rate; static void do_dbs_timer(struct work_struct *work); struct cpu_dbs_info_s { - struct cpufreq_policy *cur_policy; - unsigned int prev_cpu_idle_up; - unsigned int prev_cpu_idle_down; - unsigned int enable; - unsigned int down_skip; - unsigned int requested_freq; + struct cpufreq_policy *cur_policy; + unsigned int prev_cpu_idle_up; + unsigned int prev_cpu_idle_down; + unsigned int enable; + unsigned int down_skip; + unsigned int requested_freq; }; static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); @@ -82,24 +82,24 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */ * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock * is recursive for the same process. -Venki */ -static DEFINE_MUTEX (dbs_mutex); +static DEFINE_MUTEX (dbs_mutex); static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer); struct dbs_tuners { - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; - unsigned int down_threshold; - unsigned int ignore_nice; - unsigned int freq_step; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int down_threshold; + unsigned int ignore_nice; + unsigned int freq_step; }; static struct dbs_tuners dbs_tuners_ins = { - .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, - .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, - .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, - .ignore_nice = 0, - .freq_step = 5, + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .ignore_nice = 0, + .freq_step = 5, }; static inline unsigned int get_cpu_idle_time(unsigned int cpu) @@ -109,13 +109,34 @@ static inline unsigned int get_cpu_idle_time(unsigned int cpu) if (dbs_tuners_ins.ignore_nice) add_nice = kstat_cpu(cpu).cpustat.nice; - ret = kstat_cpu(cpu).cpustat.idle + + ret = kstat_cpu(cpu).cpustat.idle + kstat_cpu(cpu).cpustat.iowait + add_nice; return ret; } +/* keep track of frequency transitions */ +static int +dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, + freq->cpu); + + if (!this_dbs_info->enable) + return 0; + + this_dbs_info->requested_freq = freq->new; + + return 0; +} + +static struct notifier_block dbs_cpufreq_notifier_block = { + .notifier_call = dbs_cpufreq_notifier +}; + /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { @@ -127,8 +148,8 @@ static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); } -#define define_one_ro(_name) \ -static struct freq_attr _name = \ +#define define_one_ro(_name) \ +static struct freq_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) define_one_ro(sampling_rate_max); @@ -148,7 +169,7 @@ show_one(down_threshold, down_threshold); show_one(ignore_nice_load, ignore_nice); show_one(freq_step, freq_step); -static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, +static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, const char *buf, size_t count) { unsigned int input; @@ -164,7 +185,7 @@ static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, return count; } -static ssize_t store_sampling_rate(struct cpufreq_policy *unused, +static ssize_t store_sampling_rate(struct cpufreq_policy *unused, const char *buf, size_t count) { unsigned int input; @@ -183,7 +204,7 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, return count; } -static ssize_t store_up_threshold(struct cpufreq_policy *unused, +static ssize_t store_up_threshold(struct cpufreq_policy *unused, const char *buf, size_t count) { unsigned int input; @@ -202,7 +223,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, return count; } -static ssize_t store_down_threshold(struct cpufreq_policy *unused, +static ssize_t store_down_threshold(struct cpufreq_policy *unused, const char *buf, size_t count) { unsigned int input; @@ -228,16 +249,16 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, int ret; unsigned int j; - - ret = sscanf (buf, "%u", &input); - if ( ret != 1 ) + + ret = sscanf(buf, "%u", &input); + if (ret != 1) return -EINVAL; - if ( input > 1 ) + if (input > 1) input = 1; - + mutex_lock(&dbs_mutex); - if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ mutex_unlock(&dbs_mutex); return count; } @@ -261,14 +282,14 @@ static ssize_t store_freq_step(struct cpufreq_policy *policy, unsigned int input; int ret; - ret = sscanf (buf, "%u", &input); + ret = sscanf(buf, "%u", &input); - if ( ret != 1 ) + if (ret != 1) return -EINVAL; - if ( input > 100 ) + if (input > 100) input = 100; - + /* no need to test here if freq_step is zero as the user might actually * want this, they would be crazy though :) */ mutex_lock(&dbs_mutex); @@ -322,18 +343,18 @@ static void dbs_check_cpu(int cpu) policy = this_dbs_info->cur_policy; - /* - * The default safe range is 20% to 80% + /* + * The default safe range is 20% to 80% * Every sampling_rate, we check - * - If current idle time is less than 20%, then we try to - * increase frequency + * - If current idle time is less than 20%, then we try to + * increase frequency * Every sampling_rate*sampling_down_factor, we check - * - If current idle time is more than 80%, then we try to - * decrease frequency + * - If current idle time is more than 80%, then we try to + * decrease frequency * - * Any frequency increase takes it to the maximum frequency. - * Frequency reduction happens at minimum steps of - * 5% (default) of max_frequency + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of max_frequency */ /* Check for frequency increase */ @@ -361,13 +382,13 @@ static void dbs_check_cpu(int cpu) /* if we are already at full speed then break out early */ if (this_dbs_info->requested_freq == policy->max) return; - + freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; /* max freq cannot be less than 100. But who knows.... */ if (unlikely(freq_step == 0)) freq_step = 5; - + this_dbs_info->requested_freq += freq_step; if (this_dbs_info->requested_freq > policy->max) this_dbs_info->requested_freq = policy->max; @@ -427,15 +448,15 @@ static void dbs_check_cpu(int cpu) } static void do_dbs_timer(struct work_struct *work) -{ +{ int i; mutex_lock(&dbs_mutex); for_each_online_cpu(i) dbs_check_cpu(i); - schedule_delayed_work(&dbs_work, + schedule_delayed_work(&dbs_work, usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); mutex_unlock(&dbs_mutex); -} +} static inline void dbs_timer_init(void) { @@ -462,13 +483,12 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, switch (event) { case CPUFREQ_GOV_START: - if ((!cpu_online(cpu)) || - (!policy->cur)) + if ((!cpu_online(cpu)) || (!policy->cur)) return -EINVAL; if (this_dbs_info->enable) /* Already enabled */ break; - + mutex_lock(&dbs_mutex); rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); @@ -481,7 +501,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; - + j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu); j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; @@ -511,8 +531,11 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_tuners_ins.sampling_rate = def_sampling_rate; dbs_timer_init(); + cpufreq_register_notifier( + &dbs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); } - + mutex_unlock(&dbs_mutex); break; @@ -525,9 +548,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, * Stop the timerschedule work, when this governor * is used for first time */ - if (dbs_enable == 0) + if (dbs_enable == 0) { dbs_timer_exit(); - + cpufreq_unregister_notifier( + &dbs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + mutex_unlock(&dbs_mutex); break; @@ -537,11 +564,11 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target( this_dbs_info->cur_policy, - policy->max, CPUFREQ_RELATION_H); + policy->max, CPUFREQ_RELATION_H); else if (policy->min > this_dbs_info->cur_policy->cur) __cpufreq_driver_target( this_dbs_info->cur_policy, - policy->min, CPUFREQ_RELATION_L); + policy->min, CPUFREQ_RELATION_L); mutex_unlock(&dbs_mutex); break; } |