aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2010-01-21 17:39:01 +0200
committerIngo Molnar <mingo@elte.hu>2010-01-29 09:01:34 +0100
commit8113070d6639d2245c6c79afb8df42cedab30540 (patch)
treee3735ea7f38bbfffcbda5e7610ee7f13ff5e7c83 /arch/x86
parent1da53e023029c067ba1277a33038c65d6e4c99b3 (diff)
perf_events: Add fast-path to the rescheduling code
Implement correct fastpath scheduling, i.e., reuse previous assignment. Signed-off-by: Stephane Eranian <eranian@google.com> [ split from larger patch] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <4b588464.1818d00a.4456.383b@mx.google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c91
1 files changed, 61 insertions, 30 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 995ac4ae379..0bd23d01af3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1245,6 +1245,46 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
}
/*
+ * fastpath, try to reuse previous register
+ */
+ for (i = 0, num = n; i < n; i++, num--) {
+ hwc = &cpuc->event_list[i]->hw;
+ c = (unsigned long *)constraints[i];
+
+ /* never assigned */
+ if (hwc->idx == -1)
+ break;
+
+ /* constraint still honored */
+ if (!test_bit(hwc->idx, c))
+ break;
+
+ /* not already used */
+ if (test_bit(hwc->idx, used_mask))
+ break;
+
+#if 0
+ pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n",
+ smp_processor_id(),
+ hwc->config,
+ hwc->idx,
+ assign ? 'y' : 'n');
+#endif
+
+ set_bit(hwc->idx, used_mask);
+ if (assign)
+ assign[i] = hwc->idx;
+ }
+ if (!num)
+ goto done;
+
+ /*
+ * begin slow path
+ */
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+ /*
* weight = number of possible counters
*
* 1 = most constrained, only works on one counter
@@ -1263,10 +1303,9 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (x86_pmu.num_events_fixed)
wmax++;
- num = n;
- for (w = 1; num && w <= wmax; w++) {
+ for (w = 1, num = n; num && w <= wmax; w++) {
/* for each event */
- for (i = 0; i < n; i++) {
+ for (i = 0; num && i < n; i++) {
c = (unsigned long *)constraints[i];
hwc = &cpuc->event_list[i]->hw;
@@ -1274,28 +1313,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (weight != w)
continue;
- /*
- * try to reuse previous assignment
- *
- * This is possible despite the fact that
- * events or events order may have changed.
- *
- * What matters is the level of constraints
- * of an event and this is constant for now.
- *
- * This is possible also because we always
- * scan from most to least constrained. Thus,
- * if a counter can be reused, it means no,
- * more constrained events, needed it. And
- * next events will either compete for it
- * (which cannot be solved anyway) or they
- * have fewer constraints, and they can use
- * another counter.
- */
- j = hwc->idx;
- if (j != -1 && !test_bit(j, used_mask))
- goto skip;
-
for_each_bit(j, c, X86_PMC_IDX_MAX) {
if (!test_bit(j, used_mask))
break;
@@ -1303,22 +1320,23 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (j == X86_PMC_IDX_MAX)
break;
-skip:
- set_bit(j, used_mask);
#if 0
- pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n",
+ pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n",
smp_processor_id(),
hwc->config,
j,
assign ? 'y' : 'n');
#endif
+ set_bit(j, used_mask);
+
if (assign)
assign[i] = j;
num--;
}
}
+done:
/*
* scheduling failed or is just a simulation,
* free resources if necessary
@@ -1357,7 +1375,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
list_for_each_entry(event, &leader->sibling_list, group_entry) {
if (!is_x86_event(event) ||
- event->state == PERF_EVENT_STATE_OFF)
+ event->state <= PERF_EVENT_STATE_OFF)
continue;
if (n >= max_count)
@@ -2184,6 +2202,8 @@ static void amd_get_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event,
u64 *idxmsk)
{
+ /* no constraints, means supports all generic counters */
+ bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
}
static int x86_event_sched_in(struct perf_event *event,
@@ -2258,7 +2278,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
n1 = 1;
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
- if (sub->state != PERF_EVENT_STATE_OFF) {
+ if (sub->state > PERF_EVENT_STATE_OFF) {
ret = x86_event_sched_in(sub, cpuctx, cpu);
if (ret)
goto undo;
@@ -2613,12 +2633,23 @@ static int validate_group(struct perf_event *event)
const struct pmu *hw_perf_event_init(struct perf_event *event)
{
+ const struct pmu *tmp;
int err;
err = __hw_perf_event_init(event);
if (!err) {
+ /*
+ * we temporarily connect event to its pmu
+ * such that validate_group() can classify
+ * it as an x86 event using is_x86_event()
+ */
+ tmp = event->pmu;
+ event->pmu = &pmu;
+
if (event->group_leader != event)
err = validate_group(event);
+
+ event->pmu = tmp;
}
if (err) {
if (event->destroy)