diff options
author | Ian Rogers <irogers@google.com> | 2024-05-04 01:28:49 +0200 |
---|---|---|
committer | Namhyung Kim <namhyung@kernel.org> | 2024-06-11 01:45:10 +0200 |
commit | 6828d6929b763499b7a8768c623891f5d4fca258 (patch) | |
tree | 4d230dd5574bc3babc22117f0476f9d33f7003ad /tools/perf/builtin-stat.c | |
parent | perf test: Speed up test case 70 annotate basic tests (diff) | |
download | linux-6828d6929b763499b7a8768c623891f5d4fca258.tar.xz linux-6828d6929b763499b7a8768c623891f5d4fca258.zip |
perf evsel: Refactor tool events
Tool events unnecessarily open a dummy perf event which is useless
even with `perf record` which will still open a dummy event. Change
the behavior of tool events so:
- duration_time - call `rdclock` on open and then report the count as
a delta since the start in evsel__read_counter. This moves code out
of builtin-stat making it more general purpose.
- user_time/system_time - open the fd as either `/proc/pid/stat` or
`/proc/stat` for cases like system wide. evsel__read_counter will
read the appropriate field out of the procfs file. These values
were previously supplied by wait4, if the procfs read fails then
the wait4 values are used, assuming the process/thread terminated.
By reading user_time and system_time this way, interval mode, per
PID and per CPU can be supported although there are restrictions
given what the files provide (e.g. per PID can't be combined with
per CPU).
Opening any of the tool events for `perf record` is changed to return
invalid.
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Weilin Wang <weilin.wang@intel.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: James Clark <james.clark@arm.com>
Cc: Dmitrii Dolgov <9erthalion6@gmail.com>
Cc: Ze Gao <zegao2021@gmail.com>
Cc: Song Liu <song@kernel.org>
Cc: Leo Yan <leo.yan@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240503232849.17752-1-irogers@google.com
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 75 |
1 files changed, 33 insertions, 42 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 603a9684153d..661832756a24 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -284,45 +284,38 @@ static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 t process_synthesized_event, NULL); } -static int read_single_counter(struct evsel *counter, int cpu_map_idx, - int thread, struct timespec *rs) -{ - switch(counter->tool_event) { - case PERF_TOOL_DURATION_TIME: { - u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; - struct perf_counts_values *count = - perf_counts(counter->counts, cpu_map_idx, thread); - count->ena = count->run = val; - count->val = val; - return 0; - } - case PERF_TOOL_USER_TIME: - case PERF_TOOL_SYSTEM_TIME: { - u64 val; - struct perf_counts_values *count = - perf_counts(counter->counts, cpu_map_idx, thread); - if (counter->tool_event == PERF_TOOL_USER_TIME) - val = ru_stats.ru_utime_usec_stat.mean; - else - val = ru_stats.ru_stime_usec_stat.mean; - count->ena = count->run = val; - count->val = val; - return 0; - } - default: - case PERF_TOOL_NONE: - return evsel__read_counter(counter, cpu_map_idx, thread); - case PERF_TOOL_MAX: - /* This should never be reached */ - return 0; +static int read_single_counter(struct evsel *counter, int cpu_map_idx, int thread) +{ + int err = evsel__read_counter(counter, cpu_map_idx, thread); + + /* + * Reading user and system time will fail when the process + * terminates. Use the wait4 values in that case. + */ + if (err && cpu_map_idx == 0 && + (counter->tool_event == PERF_TOOL_USER_TIME || + counter->tool_event == PERF_TOOL_SYSTEM_TIME)) { + u64 val, *start_time; + struct perf_counts_values *count = + perf_counts(counter->counts, cpu_map_idx, thread); + + start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread); + if (counter->tool_event == PERF_TOOL_USER_TIME) + val = ru_stats.ru_utime_usec_stat.mean; + else + val = ru_stats.ru_stime_usec_stat.mean; + count->ena = count->run = *start_time + val; + count->val = val; + return 0; } + return err; } /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx) +static int read_counter_cpu(struct evsel *counter, int cpu_map_idx) { int nthreads = perf_thread_map__nr(evsel_list->core.threads); int thread; @@ -340,7 +333,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_ * (via evsel__read_counter()) and sets their count->loaded. */ if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) && - read_single_counter(counter, cpu_map_idx, thread, rs)) { + read_single_counter(counter, cpu_map_idx, thread)) { counter->counts->scaled = -1; perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0; perf_counts(counter->counts, cpu_map_idx, thread)->run = 0; @@ -369,7 +362,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_ return 0; } -static int read_affinity_counters(struct timespec *rs) +static int read_affinity_counters(void) { struct evlist_cpu_iterator evlist_cpu_itr; struct affinity saved_affinity, *affinity; @@ -390,10 +383,8 @@ static int read_affinity_counters(struct timespec *rs) if (evsel__is_bpf(counter)) continue; - if (!counter->err) { - counter->err = read_counter_cpu(counter, rs, - evlist_cpu_itr.cpu_map_idx); - } + if (!counter->err) + counter->err = read_counter_cpu(counter, evlist_cpu_itr.cpu_map_idx); } if (affinity) affinity__cleanup(&saved_affinity); @@ -417,11 +408,11 @@ static int read_bpf_map_counters(void) return 0; } -static int read_counters(struct timespec *rs) +static int read_counters(void) { if (!stat_config.stop_read_counter) { if (read_bpf_map_counters() || - read_affinity_counters(rs)) + read_affinity_counters()) return -1; } return 0; @@ -452,7 +443,7 @@ static void process_interval(void) evlist__reset_aggr_stats(evsel_list); - if (read_counters(&rs) == 0) + if (read_counters() == 0) process_counters(); if (STAT_RECORD) { @@ -940,7 +931,7 @@ try_again_reset: * avoid arbitrary skew, we must read all counters before closing any * group leaders. */ - if (read_counters(&(struct timespec) { .tv_nsec = t1-t0 }) == 0) + if (read_counters() == 0) process_counters(); /* |