aboutsummaryrefslogtreecommitdiff
path: root/Documentation/perf_counter/perfstat.c
blob: fd594468e655b970ae74083a840f54fdb219460d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
/*
 * perfstat:  /usr/bin/time -alike performance counter statistics utility
 *
 *        It summarizes the counter events of all tasks (and child tasks),
 *        covering all CPUs that the command (or workload) executes on.
 *        It only counts the per-task events of the workload started,
 *        independent of how many other tasks run on those CPUs.
 *
 * Build with:       cc -O2 -g -lrt -Wall -W -o perfstat perfstat.c
 *
 * Sample output:
 *

   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null

   Performance counter stats for 'ls':

           163516953 instructions
                2295 cache-misses
             2855182 branch-misses

 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
 *
 * Released under the GPLv2 (not later).
 *
 * Percpu counter support by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
 * Symbolic event options by: Wu Fengguang <fengguang.wu@intel.com>
 */
#define _GNU_SOURCE

#include <assert.h>
#include <getopt.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <time.h>

#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/uio.h>

#include <linux/unistd.h>

#include "perfcounters.h"

static int			nr_cpus			= 0;

static int			system_wide		= 0;

static void display_help(void)
{
	unsigned int i;
	int e;

	printf(
	"Usage: perfstat [<events...>] <cmd...>\n\n"
	"PerfStat Options (up to %d event types can be specified):\n\n",
		 MAX_COUNTERS);
	printf(
	" -e EVENT     --event=EVENT        #  symbolic-name        abbreviations");

	for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) {
		if (e != event_symbols[i].event) {
			e = event_symbols[i].event;
			printf(
	"\n                                  %2d: %-20s", e, event_symbols[i].symbol);
		} else
			printf(" %s", event_symbols[i].symbol);
	}

	printf("\n"
	"                                rNNN: raw event type\n\n"
	" -s                                # system-wide collection\n\n"
	" -c <cmd..>   --command=<cmd..>    # command+arguments to be timed.\n"
	"\n");
	exit(0);
}

static void process_options(int argc, char *argv[])
{
	for (;;) {
		int option_index = 0;
		/** Options for getopt */
		static struct option long_options[] = {
			{"event",	required_argument,	NULL, 'e'},
			{"help",	no_argument,		NULL, 'h'},
			{"command",	no_argument,		NULL, 'c'},
			{NULL,		0,			NULL,  0 }
		};
		int c = getopt_long(argc, argv, "+:e:c:s",
				    long_options, &option_index);
		if (c == -1)
			break;

		switch (c) {
		case 'c':
			break;
		case 's':
			system_wide = 1;
			break;
		case 'e':
			parse_events(optarg);
			break;
		default:
			break;
		}
	}
	if (optind == argc)
		goto err;

	if (!nr_counters)
		nr_counters = 8;
	return;

err:
	display_help();
}

char fault_here[1000000];

static int fd[MAX_NR_CPUS][MAX_COUNTERS];

static void create_counter(int counter)
{
	struct perf_counter_hw_event hw_event;

	memset(&hw_event, 0, sizeof(hw_event));
	hw_event.type		= event_id[counter];
	hw_event.raw		= event_raw[counter];
	hw_event.record_type	= PERF_RECORD_SIMPLE;
	hw_event.nmi		= 0;

	if (system_wide) {
		int cpu;
		for (cpu = 0; cpu < nr_cpus; cpu ++) {
			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
			if (fd[cpu][counter] < 0) {
				printf("perfstat error: syscall returned with %d (%s)\n",
						fd[cpu][counter], strerror(errno));
				exit(-1);
			}
			
		}
	} else {
		hw_event.inherit	= 1;
		hw_event.disabled	= 1;

		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
		if (fd[0][counter] < 0) {
			printf("perfstat error: syscall returned with %d (%s)\n",
					fd[0][counter], strerror(errno));
			exit(-1);
		}
	}
}


int main(int argc, char *argv[])
{
	unsigned long long t0, t1;
	int counter;
	ssize_t res;
	int status;
	int pid;

	process_options(argc, argv);

	if (system_wide) {
		nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
		assert(nr_cpus <= MAX_NR_CPUS);
		assert(nr_cpus >= 0);
	} else
		nr_cpus = 1;

	for (counter = 0; counter < nr_counters; counter++)
		create_counter(counter);

	argc -= optind;
	argv += optind;

	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();
	prctl(PR_TASK_PERF_COUNTERS_ENABLE);

	if ((pid = fork()) < 0)
		perror("failed to fork");
	if (!pid) {
		if (execvp(argv[0], argv)) {
			perror(argv[0]);
			exit(-1);
		}
	}
	while (wait(&status) >= 0)
		;
	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
	t1 = rdclock();

	fflush(stdout);

	fprintf(stderr, "\n");
	fprintf(stderr, " Performance counter stats for \'%s\':\n",
		argv[0]);
	fprintf(stderr, "\n");

	for (counter = 0; counter < nr_counters; counter++) {
		int cpu;
		__u64 count, single_count;

		count = 0;
		for (cpu = 0; cpu < nr_cpus; cpu ++) {
			res = read(fd[cpu][counter],
					(char *) &single_count, sizeof(single_count));
			assert(res == sizeof(single_count));
			count += single_count;
		}

		if (!event_raw[counter] &&
		    (event_id[counter] == PERF_COUNT_CPU_CLOCK ||
		     event_id[counter] == PERF_COUNT_TASK_CLOCK)) {

			double msecs = (double)count / 1000000;

			fprintf(stderr, " %14.6f  %-20s (msecs)\n",
				msecs, event_name(counter));
		} else {
			fprintf(stderr, " %14Ld  %-20s (events)\n",
				count, event_name(counter));
		}
		if (!counter)
			fprintf(stderr, "\n");
	}
	fprintf(stderr, "\n");
	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
			(double)(t1-t0)/1e6);
	fprintf(stderr, "\n");

	return 0;
}