diff options
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 423 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 55 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 227 |
3 files changed, 399 insertions, 306 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6e2ec0b1894..3938c781709 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -61,6 +61,8 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock); * are concerned with are online after they get the lock. * - Governor routines that can be called in cpufreq hotplug path should not * take this sem as top level hotplug notifier handler takes this. + * - Lock should not be held across + * __cpufreq_governor(data, CPUFREQ_GOV_STOP); */ static DEFINE_PER_CPU(int, policy_cpu); static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); @@ -686,6 +688,9 @@ static struct attribute *default_attrs[] = { NULL }; +struct kobject *cpufreq_global_kobject; +EXPORT_SYMBOL(cpufreq_global_kobject); + #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) #define to_attr(a) container_of(a, struct freq_attr, attr) @@ -756,92 +761,19 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; - -/** - * cpufreq_add_dev - add a CPU device - * - * Adds the cpufreq interface for a CPU device. +/* + * Returns: + * Negative: Failure + * 0: Success + * Positive: When we have a managed CPU and the sysfs got symlinked */ -static int cpufreq_add_dev(struct sys_device *sys_dev) +int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, + struct sys_device *sys_dev) { - unsigned int cpu = sys_dev->id; int ret = 0; - struct cpufreq_policy new_policy; - struct cpufreq_policy *policy; - struct freq_attr **drv_attr; - struct sys_device *cpu_sys_dev; +#ifdef CONFIG_SMP unsigned long flags; unsigned int j; -#ifdef CONFIG_SMP - struct cpufreq_policy *managed_policy; -#endif - - if (cpu_is_offline(cpu)) - return 0; - - cpufreq_debug_disable_ratelimit(); - dprintk("adding CPU %u\n", cpu); - -#ifdef CONFIG_SMP - /* check whether a different CPU already registered this - * CPU because it is in the same boat. */ - policy = cpufreq_cpu_get(cpu); - if (unlikely(policy)) { - cpufreq_cpu_put(policy); - cpufreq_debug_enable_ratelimit(); - return 0; - } -#endif - - if (!try_module_get(cpufreq_driver->owner)) { - ret = -EINVAL; - goto module_out; - } - - policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); - if (!policy) { - ret = -ENOMEM; - goto nomem_out; - } - if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) { - kfree(policy); - ret = -ENOMEM; - goto nomem_out; - } - if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { - free_cpumask_var(policy->cpus); - kfree(policy); - ret = -ENOMEM; - goto nomem_out; - } - - policy->cpu = cpu; - cpumask_copy(policy->cpus, cpumask_of(cpu)); - - /* Initially set CPU itself as the policy_cpu */ - per_cpu(policy_cpu, cpu) = cpu; - lock_policy_rwsem_write(cpu); - - init_completion(&policy->kobj_unregister); - INIT_WORK(&policy->update, handle_update); - - /* Set governor before ->init, so that driver could check it */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - /* call driver. From then on the cpufreq must be able - * to accept all calls to ->verify and ->setpolicy for this CPU - */ - ret = cpufreq_driver->init(policy); - if (ret) { - dprintk("initialization failed\n"); - goto err_out; - } - policy->user_policy.min = policy->min; - policy->user_policy.max = policy->max; - - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_START, policy); - -#ifdef CONFIG_SMP #ifdef CONFIG_HOTPLUG_CPU if (per_cpu(cpufreq_cpu_governor, cpu)) { @@ -852,21 +784,30 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) #endif for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + if (cpu == j) continue; /* Check for existing affected CPUs. * They may not be aware of it due to CPU Hotplug. + * cpufreq_cpu_put is called when the device is removed + * in __cpufreq_remove_dev() */ - managed_policy = cpufreq_cpu_get(j); /* FIXME: Where is this released? What about error paths? */ + managed_policy = cpufreq_cpu_get(j); if (unlikely(managed_policy)) { /* Set proper policy_cpu */ unlock_policy_rwsem_write(cpu); per_cpu(policy_cpu, cpu) = managed_policy->cpu; - if (lock_policy_rwsem_write(cpu) < 0) - goto err_out_driver_exit; + if (lock_policy_rwsem_write(cpu) < 0) { + /* Should not go through policy unlock path */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + cpufreq_cpu_put(managed_policy); + return -EBUSY; + } spin_lock_irqsave(&cpufreq_driver_lock, flags); cpumask_copy(managed_policy->cpus, policy->cpus); @@ -878,66 +819,104 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) &managed_policy->kobj, "cpufreq"); if (ret) - goto err_out_driver_exit; - - cpufreq_debug_enable_ratelimit(); - ret = 0; - goto err_out_driver_exit; /* call driver->exit() */ + cpufreq_cpu_put(managed_policy); + /* + * Success. We only needed to be added to the mask. + * Call driver->exit() because only the cpu parent of + * the kobj needed to call init(). + */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + + if (!ret) + return 1; + else + return ret; } } #endif - memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + return ret; +} + + +/* symlink affected CPUs */ +int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) +{ + unsigned int j; + int ret = 0; + + for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + struct sys_device *cpu_sys_dev; + + if (j == cpu) + continue; + if (!cpu_online(j)) + continue; + + dprintk("CPU %u already managed, adding link\n", j); + managed_policy = cpufreq_cpu_get(cpu); + cpu_sys_dev = get_cpu_sysdev(j); + ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, + "cpufreq"); + if (ret) { + cpufreq_cpu_put(managed_policy); + return ret; + } + } + return ret; +} + +int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, + struct sys_device *sys_dev) +{ + struct cpufreq_policy new_policy; + struct freq_attr **drv_attr; + unsigned long flags; + int ret = 0; + unsigned int j; /* prepare interface data */ - ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, - "cpufreq"); + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, + &sys_dev->kobj, "cpufreq"); if (ret) - goto err_out_driver_exit; + return ret; /* set up files for this cpu device */ drv_attr = cpufreq_driver->attr; while ((drv_attr) && (*drv_attr)) { ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); if (ret) - goto err_out_driver_exit; + goto err_out_kobj_put; drv_attr++; } if (cpufreq_driver->get) { ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); if (ret) - goto err_out_driver_exit; + goto err_out_kobj_put; } if (cpufreq_driver->target) { ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); if (ret) - goto err_out_driver_exit; + goto err_out_kobj_put; } spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) { + if (!cpu_online(j)) + continue; per_cpu(cpufreq_cpu_data, j) = policy; per_cpu(policy_cpu, j) = policy->cpu; } spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - /* symlink affected CPUs */ - for_each_cpu(j, policy->cpus) { - if (j == cpu) - continue; - if (!cpu_online(j)) - continue; - - dprintk("CPU %u already managed, adding link\n", j); - cpufreq_cpu_get(cpu); - cpu_sys_dev = get_cpu_sysdev(j); - ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, - "cpufreq"); - if (ret) - goto err_out_unregister; - } + ret = cpufreq_add_dev_symlink(cpu, policy); + if (ret) + goto err_out_kobj_put; - policy->governor = NULL; /* to assure that the starting sequence is - * run in cpufreq_set_policy */ + memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + /* assure that the starting sequence is run in __cpufreq_set_policy */ + policy->governor = NULL; /* set default policy */ ret = __cpufreq_set_policy(policy, &new_policy); @@ -946,9 +925,108 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) if (ret) { dprintk("setting policy failed\n"); - goto err_out_unregister; + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + } + return ret; + +err_out_kobj_put: + kobject_put(&policy->kobj); + wait_for_completion(&policy->kobj_unregister); + return ret; +} + + +/** + * cpufreq_add_dev - add a CPU device + * + * Adds the cpufreq interface for a CPU device. + * + * The Oracle says: try running cpufreq registration/unregistration concurrently + * with with cpu hotplugging and all hell will break loose. Tried to clean this + * mess up, but more thorough testing is needed. - Mathieu + */ +static int cpufreq_add_dev(struct sys_device *sys_dev) +{ + unsigned int cpu = sys_dev->id; + int ret = 0; + struct cpufreq_policy *policy; + unsigned long flags; + unsigned int j; + + if (cpu_is_offline(cpu)) + return 0; + + cpufreq_debug_disable_ratelimit(); + dprintk("adding CPU %u\n", cpu); + +#ifdef CONFIG_SMP + /* check whether a different CPU already registered this + * CPU because it is in the same boat. */ + policy = cpufreq_cpu_get(cpu); + if (unlikely(policy)) { + cpufreq_cpu_put(policy); + cpufreq_debug_enable_ratelimit(); + return 0; + } +#endif + + if (!try_module_get(cpufreq_driver->owner)) { + ret = -EINVAL; + goto module_out; } + ret = -ENOMEM; + policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); + if (!policy) + goto nomem_out; + + if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) + goto err_free_policy; + + if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) + goto err_free_cpumask; + + policy->cpu = cpu; + cpumask_copy(policy->cpus, cpumask_of(cpu)); + + /* Initially set CPU itself as the policy_cpu */ + per_cpu(policy_cpu, cpu) = cpu; + ret = (lock_policy_rwsem_write(cpu) < 0); + WARN_ON(ret); + + init_completion(&policy->kobj_unregister); + INIT_WORK(&policy->update, handle_update); + + /* Set governor before ->init, so that driver could check it */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + /* call driver. From then on the cpufreq must be able + * to accept all calls to ->verify and ->setpolicy for this CPU + */ + ret = cpufreq_driver->init(policy); + if (ret) { + dprintk("initialization failed\n"); + goto err_unlock_policy; + } + policy->user_policy.min = policy->min; + policy->user_policy.max = policy->max; + + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_START, policy); + + ret = cpufreq_add_dev_policy(cpu, policy, sys_dev); + if (ret) { + if (ret > 0) + /* This is a managed cpu, symlink created, + exit with 0 */ + ret = 0; + goto err_unlock_policy; + } + + ret = cpufreq_add_dev_interface(cpu, policy, sys_dev); + if (ret) + goto err_out_unregister; + unlock_policy_rwsem_write(cpu); kobject_uevent(&policy->kobj, KOBJ_ADD); @@ -968,14 +1046,12 @@ err_out_unregister: kobject_put(&policy->kobj); wait_for_completion(&policy->kobj_unregister); -err_out_driver_exit: - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - -err_out: +err_unlock_policy: unlock_policy_rwsem_write(cpu); +err_free_cpumask: + free_cpumask_var(policy->cpus); +err_free_policy: kfree(policy); - nomem_out: module_put(cpufreq_driver->owner); module_out: @@ -1070,8 +1146,6 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) spin_unlock_irqrestore(&cpufreq_driver_lock, flags); #endif - unlock_policy_rwsem_write(cpu); - if (cpufreq_driver->target) __cpufreq_governor(data, CPUFREQ_GOV_STOP); @@ -1088,6 +1162,8 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) if (cpufreq_driver->exit) cpufreq_driver->exit(data); + unlock_policy_rwsem_write(cpu); + free_cpumask_var(data->related_cpus); free_cpumask_var(data->cpus); kfree(data); @@ -1228,9 +1304,9 @@ EXPORT_SYMBOL(cpufreq_get); static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) { - int cpu = sysdev->id; int ret = 0; - unsigned int cur_freq = 0; + + int cpu = sysdev->id; struct cpufreq_policy *cpu_policy; dprintk("suspending cpu %u\n", cpu); @@ -1253,42 +1329,9 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) if (cpufreq_driver->suspend) { ret = cpufreq_driver->suspend(cpu_policy, pmsg); - if (ret) { + if (ret) printk(KERN_ERR "cpufreq: suspend failed in ->suspend " "step on CPU %u\n", cpu_policy->cpu); - goto out; - } - } - - if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS) - goto out; - - if (cpufreq_driver->get) - cur_freq = cpufreq_driver->get(cpu_policy->cpu); - - if (!cur_freq || !cpu_policy->cur) { - printk(KERN_ERR "cpufreq: suspend failed to assert current " - "frequency is what timing core thinks it is.\n"); - goto out; - } - - if (unlikely(cur_freq != cpu_policy->cur)) { - struct cpufreq_freqs freqs; - - if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) - dprintk("Warning: CPU frequency is %u, " - "cpufreq assumed %u kHz.\n", - cur_freq, cpu_policy->cur); - - freqs.cpu = cpu; - freqs.old = cpu_policy->cur; - freqs.new = cur_freq; - - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_SUSPENDCHANGE, &freqs); - adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs); - - cpu_policy->cur = cur_freq; } out: @@ -1300,14 +1343,17 @@ out: * cpufreq_resume - restore proper CPU frequency handling after resume * * 1.) resume CPUfreq hardware support (cpufreq_driver->resume()) - * 2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync - * 3.) schedule call cpufreq_update_policy() ASAP as interrupts are - * restored. + * 2.) schedule call cpufreq_update_policy() ASAP as interrupts are + * restored. It will verify that the current freq is in sync with + * what we believe it to be. This is a bit later than when it + * should be, but nonethteless it's better than calling + * cpufreq_driver->get() here which might re-enable interrupts... */ static int cpufreq_resume(struct sys_device *sysdev) { - int cpu = sysdev->id; int ret = 0; + + int cpu = sysdev->id; struct cpufreq_policy *cpu_policy; dprintk("resuming cpu %u\n", cpu); @@ -1337,42 +1383,8 @@ static int cpufreq_resume(struct sys_device *sysdev) } } - if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { - unsigned int cur_freq = 0; - - if (cpufreq_driver->get) - cur_freq = cpufreq_driver->get(cpu_policy->cpu); - - if (!cur_freq || !cpu_policy->cur) { - printk(KERN_ERR "cpufreq: resume failed to assert " - "current frequency is what timing core " - "thinks it is.\n"); - goto out; - } - - if (unlikely(cur_freq != cpu_policy->cur)) { - struct cpufreq_freqs freqs; - - if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) - dprintk("Warning: CPU frequency " - "is %u, cpufreq assumed %u kHz.\n", - cur_freq, cpu_policy->cur); - - freqs.cpu = cpu; - freqs.old = cpu_policy->cur; - freqs.new = cur_freq; - - srcu_notifier_call_chain( - &cpufreq_transition_notifier_list, - CPUFREQ_RESUMECHANGE, &freqs); - adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs); - - cpu_policy->cur = cur_freq; - } - } - -out: schedule_work(&cpu_policy->update); + fail: cpufreq_cpu_put(cpu_policy); return ret; @@ -1697,8 +1709,17 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, dprintk("governor switch\n"); /* end old governor */ - if (data->governor) + if (data->governor) { + /* + * Need to release the rwsem around governor + * stop due to lock dependency between + * cancel_delayed_work_sync and the read lock + * taken in the delayed work handler. + */ + unlock_policy_rwsem_write(data->cpu); __cpufreq_governor(data, CPUFREQ_GOV_STOP); + lock_policy_rwsem_write(data->cpu); + } /* start new governor */ data->governor = policy->governor; @@ -1928,7 +1949,11 @@ static int __init cpufreq_core_init(void) per_cpu(policy_cpu, cpu) = -1; init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); } + + cpufreq_global_kobject = kobject_create_and_add("cpufreq", + &cpu_sysdev_class.kset.kobj); + BUG_ON(!cpufreq_global_kobject); + return 0; } - core_initcall(cpufreq_core_init); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 7fc58af748b..bc33ddc9c97 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -64,21 +64,20 @@ struct cpu_dbs_info_s { unsigned int requested_freq; int cpu; unsigned int enable:1; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; }; -static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); +static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info); static unsigned int dbs_enable; /* number of CPUs using this policy */ /* - * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug - * lock and dbs_mutex. cpu_hotplug lock should always be held before - * dbs_mutex. If any function that can potentially take cpu_hotplug lock - * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then - * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock - * is recursive for the same process. -Venki - * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it - * would deadlock with cancel_delayed_work_sync(), which is needed for proper - * raceless workqueue teardown. + * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on + * different CPUs. It protects dbs_enable in governor start/stop. */ static DEFINE_MUTEX(dbs_mutex); @@ -138,7 +137,7 @@ dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, + struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info, freq->cpu); struct cpufreq_policy *policy; @@ -298,7 +297,7 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { struct cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(cpu_dbs_info, j); + dbs_info = &per_cpu(cs_cpu_dbs_info, j); dbs_info->prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->prev_cpu_wall); if (dbs_tuners_ins.ignore_nice) @@ -388,7 +387,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) cputime64_t cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; - j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); @@ -488,18 +487,12 @@ static void do_dbs_timer(struct work_struct *work) delay -= jiffies % delay; - if (lock_policy_rwsem_write(cpu) < 0) - return; - - if (!dbs_info->enable) { - unlock_policy_rwsem_write(cpu); - return; - } + mutex_lock(&dbs_info->timer_mutex); dbs_check_cpu(dbs_info); queue_delayed_work_on(cpu, kconservative_wq, &dbs_info->work, delay); - unlock_policy_rwsem_write(cpu); + mutex_unlock(&dbs_info->timer_mutex); } static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) @@ -528,16 +521,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int j; int rc; - this_dbs_info = &per_cpu(cpu_dbs_info, cpu); + this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); switch (event) { case CPUFREQ_GOV_START: if ((!cpu_online(cpu)) || (!policy->cur)) return -EINVAL; - if (this_dbs_info->enable) /* Already enabled */ - break; - mutex_lock(&dbs_mutex); rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); @@ -548,7 +538,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, for_each_cpu(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; - j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); j_dbs_info->cur_policy = policy; j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, @@ -561,6 +551,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, this_dbs_info->down_skip = 0; this_dbs_info->requested_freq = policy->cur; + mutex_init(&this_dbs_info->timer_mutex); dbs_enable++; /* * Start the timerschedule work, when this governor @@ -590,17 +581,19 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, &dbs_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); } - dbs_timer_init(this_dbs_info); - mutex_unlock(&dbs_mutex); + dbs_timer_init(this_dbs_info); + break; case CPUFREQ_GOV_STOP: - mutex_lock(&dbs_mutex); dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); sysfs_remove_group(&policy->kobj, &dbs_attr_group); dbs_enable--; + mutex_destroy(&this_dbs_info->timer_mutex); /* * Stop the timerschedule work, when this governor @@ -616,7 +609,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, break; case CPUFREQ_GOV_LIMITS: - mutex_lock(&dbs_mutex); + mutex_lock(&this_dbs_info->timer_mutex); if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target( this_dbs_info->cur_policy, @@ -625,7 +618,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, __cpufreq_driver_target( this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); - mutex_unlock(&dbs_mutex); + mutex_unlock(&this_dbs_info->timer_mutex); break; } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 1911d172935..071699de50e 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -55,6 +55,18 @@ static unsigned int min_sampling_rate; #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static +#endif +struct cpufreq_governor cpufreq_gov_ondemand = { + .name = "ondemand", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; /* Sampling types */ enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; @@ -70,23 +82,21 @@ struct cpu_dbs_info_s { unsigned int freq_lo_jiffies; unsigned int freq_hi_jiffies; int cpu; - unsigned int enable:1, - sample_type:1; + unsigned int sample_type:1; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; }; -static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); +static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); static unsigned int dbs_enable; /* number of CPUs using this policy */ /* - * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug - * lock and dbs_mutex. cpu_hotplug lock should always be held before - * dbs_mutex. If any function that can potentially take cpu_hotplug lock - * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then - * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock - * is recursive for the same process. -Venki - * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it - * would deadlock with cancel_delayed_work_sync(), which is needed for proper - * raceless workqueue teardown. + * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on + * different CPUs. It protects dbs_enable in governor start/stop. */ static DEFINE_MUTEX(dbs_mutex); @@ -151,7 +161,8 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_hi, freq_lo; unsigned int index = 0; unsigned int jiffies_total, jiffies_hi, jiffies_lo; - struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu); + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + policy->cpu); if (!dbs_info->freq_table) { dbs_info->freq_lo = 0; @@ -192,31 +203,39 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, return freq_hi; } +static void ondemand_powersave_bias_init_cpu(int cpu) +{ + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; +} + static void ondemand_powersave_bias_init(void) { int i; for_each_online_cpu(i) { - struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i); - dbs_info->freq_table = cpufreq_frequency_get_table(i); - dbs_info->freq_lo = 0; + ondemand_powersave_bias_init_cpu(i); } } /************************** sysfs interface ************************/ -static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) + +static ssize_t show_sampling_rate_max(struct kobject *kobj, + struct attribute *attr, char *buf) { printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " "sysfs file is deprecated - used by: %s\n", current->comm); return sprintf(buf, "%u\n", -1U); } -static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) { return sprintf(buf, "%u\n", min_sampling_rate); } #define define_one_ro(_name) \ -static struct freq_attr _name = \ +static struct global_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) define_one_ro(sampling_rate_max); @@ -225,7 +244,7 @@ define_one_ro(sampling_rate_min); /* cpufreq_ondemand Governor Tunables */ #define show_one(file_name, object) \ static ssize_t show_##file_name \ -(struct cpufreq_policy *unused, char *buf) \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ { \ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ } @@ -234,46 +253,73 @@ show_one(up_threshold, up_threshold); show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); -static ssize_t store_sampling_rate(struct cpufreq_policy *unused, - const char *buf, size_t count) +/*** delete after deprecation time ***/ + +#define DEPRECATION_MSG(file_name) \ + printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ + "interface is deprecated - " #file_name "\n"); + +#define show_one_old(file_name) \ +static ssize_t show_##file_name##_old \ +(struct cpufreq_policy *unused, char *buf) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return show_##file_name(NULL, NULL, buf); \ +} +show_one_old(sampling_rate); +show_one_old(up_threshold); +show_one_old(ignore_nice_load); +show_one_old(powersave_bias); +show_one_old(sampling_rate_min); +show_one_old(sampling_rate_max); + +#define define_one_ro_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0444, show_##_name##_old, NULL) + +define_one_ro_old(sampling_rate_min_old, sampling_rate_min); +define_one_ro_old(sampling_rate_max_old, sampling_rate_max); + +/*** delete after deprecation time ***/ + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; mutex_lock(&dbs_mutex); - if (ret != 1) { - mutex_unlock(&dbs_mutex); - return -EINVAL; - } dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); mutex_unlock(&dbs_mutex); return count; } -static ssize_t store_up_threshold(struct cpufreq_policy *unused, - const char *buf, size_t count) +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; ret = sscanf(buf, "%u", &input); - mutex_lock(&dbs_mutex); if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || input < MIN_FREQUENCY_UP_THRESHOLD) { - mutex_unlock(&dbs_mutex); return -EINVAL; } + mutex_lock(&dbs_mutex); dbs_tuners_ins.up_threshold = input; mutex_unlock(&dbs_mutex); return count; } -static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, - const char *buf, size_t count) +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -297,7 +343,7 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { struct cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(cpu_dbs_info, j); + dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->prev_cpu_wall); if (dbs_tuners_ins.ignore_nice) @@ -309,8 +355,8 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, return count; } -static ssize_t store_powersave_bias(struct cpufreq_policy *unused, - const char *buf, size_t count) +static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -331,7 +377,7 @@ static ssize_t store_powersave_bias(struct cpufreq_policy *unused, } #define define_one_rw(_name) \ -static struct freq_attr _name = \ +static struct global_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_rw(sampling_rate); @@ -354,6 +400,47 @@ static struct attribute_group dbs_attr_group = { .name = "ondemand", }; +/*** delete after deprecation time ***/ + +#define write_one_old(file_name) \ +static ssize_t store_##file_name##_old \ +(struct cpufreq_policy *unused, const char *buf, size_t count) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return store_##file_name(NULL, NULL, buf, count); \ +} +write_one_old(sampling_rate); +write_one_old(up_threshold); +write_one_old(ignore_nice_load); +write_one_old(powersave_bias); + +#define define_one_rw_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old) + +define_one_rw_old(sampling_rate_old, sampling_rate); +define_one_rw_old(up_threshold_old, up_threshold); +define_one_rw_old(ignore_nice_load_old, ignore_nice_load); +define_one_rw_old(powersave_bias_old, powersave_bias); + +static struct attribute *dbs_attributes_old[] = { + &sampling_rate_max_old.attr, + &sampling_rate_min_old.attr, + &sampling_rate_old.attr, + &up_threshold_old.attr, + &ignore_nice_load_old.attr, + &powersave_bias_old.attr, + NULL +}; + +static struct attribute_group dbs_attr_group_old = { + .attrs = dbs_attributes_old, + .name = "ondemand", +}; + +/*** delete after deprecation time ***/ + /************************** sysfs end ************************/ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) @@ -363,9 +450,6 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) struct cpufreq_policy *policy; unsigned int j; - if (!this_dbs_info->enable) - return; - this_dbs_info->freq_lo = 0; policy = this_dbs_info->cur_policy; @@ -391,7 +475,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) unsigned int load, load_freq; int freq_avg; - j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); @@ -493,14 +577,7 @@ static void do_dbs_timer(struct work_struct *work) int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); delay -= jiffies % delay; - - if (lock_policy_rwsem_write(cpu) < 0) - return; - - if (!dbs_info->enable) { - unlock_policy_rwsem_write(cpu); - return; - } + mutex_lock(&dbs_info->timer_mutex); /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = DBS_NORMAL_SAMPLE; @@ -517,7 +594,7 @@ static void do_dbs_timer(struct work_struct *work) dbs_info->freq_lo, CPUFREQ_RELATION_H); } queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); - unlock_policy_rwsem_write(cpu); + mutex_unlock(&dbs_info->timer_mutex); } static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) @@ -526,8 +603,6 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); delay -= jiffies % delay; - dbs_info->enable = 1; - ondemand_powersave_bias_init(); dbs_info->sample_type = DBS_NORMAL_SAMPLE; INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work, @@ -536,7 +611,6 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { - dbs_info->enable = 0; cancel_delayed_work_sync(&dbs_info->work); } @@ -548,29 +622,25 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int j; int rc; - this_dbs_info = &per_cpu(cpu_dbs_info, cpu); + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); switch (event) { case CPUFREQ_GOV_START: if ((!cpu_online(cpu)) || (!policy->cur)) return -EINVAL; - if (this_dbs_info->enable) /* Already enabled */ - break; - mutex_lock(&dbs_mutex); - dbs_enable++; - rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); + rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old); if (rc) { - dbs_enable--; mutex_unlock(&dbs_mutex); return rc; } + dbs_enable++; for_each_cpu(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; - j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); j_dbs_info->cur_policy = policy; j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, @@ -581,12 +651,21 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, } } this_dbs_info->cpu = cpu; + ondemand_powersave_bias_init_cpu(cpu); /* * Start the timerschedule work, when this governor * is used for first time */ if (dbs_enable == 1) { unsigned int latency; + + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + /* policy latency is in nS. Convert it to uS first */ latency = policy->cpuinfo.transition_latency / 1000; if (latency == 0) @@ -598,44 +677,40 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, max(min_sampling_rate, latency * LATENCY_MULTIPLIER); } - dbs_timer_init(this_dbs_info); - mutex_unlock(&dbs_mutex); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); break; case CPUFREQ_GOV_STOP: - mutex_lock(&dbs_mutex); dbs_timer_exit(this_dbs_info); - sysfs_remove_group(&policy->kobj, &dbs_attr_group); + + mutex_lock(&dbs_mutex); + sysfs_remove_group(&policy->kobj, &dbs_attr_group_old); + mutex_destroy(&this_dbs_info->timer_mutex); dbs_enable--; mutex_unlock(&dbs_mutex); + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); break; case CPUFREQ_GOV_LIMITS: - mutex_lock(&dbs_mutex); + mutex_lock(&this_dbs_info->timer_mutex); if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target(this_dbs_info->cur_policy, policy->max, CPUFREQ_RELATION_H); else if (policy->min > this_dbs_info->cur_policy->cur) __cpufreq_driver_target(this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); - mutex_unlock(&dbs_mutex); + mutex_unlock(&this_dbs_info->timer_mutex); break; } return 0; } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND -static -#endif -struct cpufreq_governor cpufreq_gov_ondemand = { - .name = "ondemand", - .governor = cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static int __init cpufreq_gov_dbs_init(void) { int err; |