Web lists-archives.com

[PATCH] perf stat: Support metrics with perf stat --per-thread




perf stat --per-thread doesn't support outputting metrics, like IPC.

We should support this because it would allow easily to collect metrics
for different threads in applications.

1. Current output, for example:

root@skl:/tmp# perf stat --per-thread -p 21623
^C
 Performance counter stats for process id '21623':

          vmstat-21623              0.517479      task-clock (msec)         #    0.000 CPUs utilized
          vmstat-21623                     1      context-switches
          vmstat-21623                     0      cpu-migrations
          vmstat-21623                     0      page-faults
          vmstat-21623               461,306      cycles
          vmstat-21623               630,724      instructions
          vmstat-21623               136,265      branches
          vmstat-21623                 2,520      branch-misses

       1.444020756 seconds time elapsed

root@skl:/tmp# perf stat --per-thread --metrics ipc -p 21623
^C
 Performance counter stats for process id '21623':

          vmstat-21623               631,185      inst_retired.any
          vmstat-21623               605,893      cpu_clk_unhalted.thread

       1.415679293 seconds time elapsed

2. With this patch, the result would be:

root@skl:/tmp# perf stat --per-thread -p 21623
^C
 Performance counter stats for process id '21623':

          vmstat-21623              0.533759      task-clock (msec)         #    0.000 CPUs utilized
          vmstat-21623                     1      context-switches          #    0.002 M/sec
          vmstat-21623                     0      cpu-migrations            #    0.000 K/sec
          vmstat-21623                     0      page-faults               #    0.000 K/sec
          vmstat-21623               473,896      cycles                    #    0.888 GHz
          vmstat-21623               631,072      instructions              #    1.33  insn per cycle
          vmstat-21623               136,307      branches                  #  255.372 M/sec
          vmstat-21623                 2,524      branch-misses             #    1.85% of all branches

       1.544862861 seconds time elapsed

root@skl:/tmp# perf stat --per-thread --metrics ipc -p 21623
^C
 Performance counter stats for process id '21623':

          vmstat-21623             1,259,104      inst_retired.any          #      1.2 IPC
          vmstat-21623             1,056,756      cpu_clk_unhalted.thread

       2.040954502 seconds time elapsed

Signed-off-by: Jin Yao <yao.jin@xxxxxxxxxxxxxxx>
---
 tools/perf/util/stat.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 35e9848..1164e68 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -314,6 +314,26 @@ static int process_counter_maps(struct perf_stat_config *config,
 	return 0;
 }
 
+static int process_aggr_thread_counter(struct perf_evsel *counter)
+{
+	int nthreads = thread_map__nr(counter->threads);
+	int ncpus = cpu_map__nr(counter->cpus);
+	int cpu, thread;
+	u64 tmp;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		u64 val = 0;
+
+		for (cpu = 0; cpu < ncpus; cpu++)
+			val += perf_counts(counter->counts, cpu, thread)->val;
+
+		tmp = val * counter->scale;
+		perf_stat__update_shadow_stats(counter, &tmp, 0);
+	}
+
+	return 0;
+}
+
 int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct perf_evsel *counter)
 {
@@ -342,6 +362,9 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	if (ret)
 		return ret;
 
+	if (config->aggr_mode == AGGR_THREAD)
+		return process_aggr_thread_counter(counter);
+
 	if (config->aggr_mode != AGGR_GLOBAL)
 		return 0;
 
-- 
2.7.4