aboutsummaryrefslogtreecommitdiff
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c143
1 files changed, 126 insertions, 17 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 534e20d14d6..53abcbefa0b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -469,7 +469,8 @@ static void update_counter_times(struct perf_counter *counter)
struct perf_counter_context *ctx = counter->ctx;
u64 run_end;
- if (counter->state < PERF_COUNTER_STATE_INACTIVE)
+ if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
+ counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE)
return;
counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
@@ -518,7 +519,7 @@ static void __perf_counter_disable(void *info)
*/
if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
update_context_time(ctx);
- update_counter_times(counter);
+ update_group_times(counter);
if (counter == counter->group_leader)
group_sched_out(counter, cpuctx, ctx);
else
@@ -573,7 +574,7 @@ static void perf_counter_disable(struct perf_counter *counter)
* in, so we can change the state safely.
*/
if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
- update_counter_times(counter);
+ update_group_times(counter);
counter->state = PERF_COUNTER_STATE_OFF;
}
@@ -851,6 +852,27 @@ retry:
}
/*
+ * Put a counter into inactive state and update time fields.
+ * Enabling the leader of a group effectively enables all
+ * the group members that aren't explicitly disabled, so we
+ * have to update their ->tstamp_enabled also.
+ * Note: this works for group members as well as group leaders
+ * since the non-leader members' sibling_lists will be empty.
+ */
+static void __perf_counter_mark_enabled(struct perf_counter *counter,
+ struct perf_counter_context *ctx)
+{
+ struct perf_counter *sub;
+
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
+ list_for_each_entry(sub, &counter->sibling_list, list_entry)
+ if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
+ sub->tstamp_enabled =
+ ctx->time - sub->total_time_enabled;
+}
+
+/*
* Cross CPU call to enable a performance counter
*/
static void __perf_counter_enable(void *info)
@@ -877,8 +899,7 @@ static void __perf_counter_enable(void *info)
if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
goto unlock;
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
+ __perf_counter_mark_enabled(counter, ctx);
/*
* If the counter is in a group and isn't the group leader,
@@ -971,11 +992,9 @@ static void perf_counter_enable(struct perf_counter *counter)
* Since we have the lock this context can't be scheduled
* in, so we can change the state safely.
*/
- if (counter->state == PERF_COUNTER_STATE_OFF) {
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->tstamp_enabled =
- ctx->time - counter->total_time_enabled;
- }
+ if (counter->state == PERF_COUNTER_STATE_OFF)
+ __perf_counter_mark_enabled(counter, ctx);
+
out:
spin_unlock_irq(&ctx->lock);
}
@@ -1479,9 +1498,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
counter->attr.enable_on_exec = 0;
if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
continue;
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->tstamp_enabled =
- ctx->time - counter->total_time_enabled;
+ __perf_counter_mark_enabled(counter, ctx);
enabled = 1;
}
@@ -1503,10 +1520,21 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
*/
static void __perf_counter_read(void *info)
{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_counter *counter = info;
struct perf_counter_context *ctx = counter->ctx;
unsigned long flags;
+ /*
+ * If this is a task context, we need to check whether it is
+ * the current task context of this cpu. If not it has been
+ * scheduled out before the smp call arrived. In that case
+ * counter->count would have been updated to a recent sample
+ * when the counter was scheduled out.
+ */
+ if (ctx->task && cpuctx->task_ctx != ctx)
+ return;
+
local_irq_save(flags);
if (ctx->is_active)
update_context_time(ctx);
@@ -1664,6 +1692,11 @@ static void free_counter(struct perf_counter *counter)
atomic_dec(&nr_task_counters);
}
+ if (counter->output) {
+ fput(counter->output->filp);
+ counter->output = NULL;
+ }
+
if (counter->destroy)
counter->destroy(counter);
@@ -1780,7 +1813,7 @@ static int perf_counter_read_group(struct perf_counter *counter,
size += err;
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- err = perf_counter_read_entry(counter, read_format,
+ err = perf_counter_read_entry(sub, read_format,
buf + size);
if (err < 0)
return err;
@@ -1949,6 +1982,8 @@ unlock:
return ret;
}
+int perf_counter_set_output(struct perf_counter *counter, int output_fd);
+
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct perf_counter *counter = file->private_data;
@@ -1972,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case PERF_COUNTER_IOC_PERIOD:
return perf_counter_period(counter, (u64 __user *)arg);
+ case PERF_COUNTER_IOC_SET_OUTPUT:
+ return perf_counter_set_output(counter, arg);
+
default:
return -ENOTTY;
}
@@ -2008,6 +2046,10 @@ int perf_counter_task_disable(void)
return 0;
}
+#ifndef PERF_COUNTER_INDEX_OFFSET
+# define PERF_COUNTER_INDEX_OFFSET 0
+#endif
+
static int perf_counter_index(struct perf_counter *counter)
{
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
@@ -2238,6 +2280,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
WARN_ON_ONCE(counter->ctx->parent_ctx);
mutex_lock(&counter->mmap_mutex);
+ if (counter->output) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
if (atomic_inc_not_zero(&counter->mmap_count)) {
if (nr_pages != counter->data->nr_pages)
ret = -EINVAL;
@@ -2623,6 +2670,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
struct perf_counter *counter, unsigned int size,
int nmi, int sample)
{
+ struct perf_counter *output_counter;
struct perf_mmap_data *data;
unsigned int offset, head;
int have_lost;
@@ -2632,13 +2680,17 @@ static int perf_output_begin(struct perf_output_handle *handle,
u64 lost;
} lost_event;
+ rcu_read_lock();
/*
* For inherited counters we send all the output towards the parent.
*/
if (counter->parent)
counter = counter->parent;
- rcu_read_lock();
+ output_counter = rcu_dereference(counter->output);
+ if (output_counter)
+ counter = output_counter;
+
data = rcu_dereference(counter->data);
if (!data)
goto out;
@@ -4186,6 +4238,57 @@ err_size:
goto out;
}
+int perf_counter_set_output(struct perf_counter *counter, int output_fd)
+{
+ struct perf_counter *output_counter = NULL;
+ struct file *output_file = NULL;
+ struct perf_counter *old_output;
+ int fput_needed = 0;
+ int ret = -EINVAL;
+
+ if (!output_fd)
+ goto set;
+
+ output_file = fget_light(output_fd, &fput_needed);
+ if (!output_file)
+ return -EBADF;
+
+ if (output_file->f_op != &perf_fops)
+ goto out;
+
+ output_counter = output_file->private_data;
+
+ /* Don't chain output fds */
+ if (output_counter->output)
+ goto out;
+
+ /* Don't set an output fd when we already have an output channel */
+ if (counter->data)
+ goto out;
+
+ atomic_long_inc(&output_file->f_count);
+
+set:
+ mutex_lock(&counter->mmap_mutex);
+ old_output = counter->output;
+ rcu_assign_pointer(counter->output, output_counter);
+ mutex_unlock(&counter->mmap_mutex);
+
+ if (old_output) {
+ /*
+ * we need to make sure no existing perf_output_*()
+ * is still referencing this counter.
+ */
+ synchronize_rcu();
+ fput(old_output->filp);
+ }
+
+ ret = 0;
+out:
+ fput_light(output_file, fput_needed);
+ return ret;
+}
+
/**
* sys_perf_counter_open - open a performance counter, associate it to a task/cpu
*
@@ -4208,7 +4311,7 @@ SYSCALL_DEFINE5(perf_counter_open,
int ret;
/* for future expandability... */
- if (flags)
+ if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
return -EINVAL;
ret = perf_copy_attr(attr_uptr, &attr);
@@ -4236,7 +4339,7 @@ SYSCALL_DEFINE5(perf_counter_open,
* Look up the group leader (we will attach this counter to it):
*/
group_leader = NULL;
- if (group_fd != -1) {
+ if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
ret = -EINVAL;
group_file = fget_light(group_fd, &fput_needed);
if (!group_file)
@@ -4278,6 +4381,12 @@ SYSCALL_DEFINE5(perf_counter_open,
if (!counter_file)
goto err_free_put_context;
+ if (flags & PERF_FLAG_FD_OUTPUT) {
+ ret = perf_counter_set_output(counter, group_fd);
+ if (ret)
+ goto err_free_put_context;
+ }
+
counter->filp = counter_file;
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);