diff options
Diffstat (limited to 'tools/perf')
167 files changed, 5472 insertions, 650 deletions
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 1b5042f134a8..80c1be5d566c 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -124,6 +124,13 @@ OPTIONS --group:: Show event group information together +--demangle:: + Demangle symbol names to human readable form. It's enabled by default, + disable with --no-demangle. + +--demangle-kernel:: + Demangle kernel symbol names to human readable form (for C++ kernels). + --percent-type:: Set annotation percent type from following choices: global-period, local-period, global-hits, local-hits diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index bb167e32a1d7..cd8ce6e8ec12 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -57,7 +57,7 @@ OPTIONS -u:: --update=:: Update specified file of the cache. Note that this doesn't remove - older entires since those may be still needed for annotating old + older entries since those may be still needed for annotating old (or remote) perf.data. Only if there is already a cache which has exactly same build-id, that is replaced by new one. It can be used to update kallsyms and kernel dso to vmlinux in order to support diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 153bde14bbe0..154a1ced72b2 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -393,6 +393,12 @@ annotate.*:: This option works with tui, stdio2 browsers. + annotate.demangle:: + Demangle symbol names to human readable form. Default is 'true'. + + annotate.demangle_kernel:: + Demangle kernel symbol names to human readable form. Default is 'true'. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f546b5e9db05..f51f0000676e 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -112,6 +112,8 @@ OPTIONS - ins_lat: Instruction latency in core cycles. This is the global instruction latency - local_ins_lat: Local instruction latency version + - p_stage_cyc: On powerpc, this presents the number of cycles spent in a + pipeline stage. And currently supported only on powerpc. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) @@ -224,6 +226,9 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. +--disable-order:: + Disable raw trace ordering. + -g:: --call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>:: Display call chains using type, min percent threshold, print limit, @@ -472,7 +477,7 @@ OPTIONS but probably we'll make the default not to show the switch-on/off events on the --group mode and if there is only one event besides the off/on ones, go straight to the histogram browser, just like 'perf report' with no events - explicitely specified does. + explicitly specified does. --itrace:: Options for decoding instruction tracing data. The options are: diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 08a1714494f8..6ec5960b08c3 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -93,6 +93,17 @@ report:: 1.102235068 seconds time elapsed +--bpf-counters:: + Use BPF programs to aggregate readings from perf_events. This + allows multiple perf-stat sessions that are counting the same metric (cycles, + instructions, etc.) to share hardware counters. + +--bpf-attr-map:: + With option "--bpf-counters", different perf-stat sessions share + information about shared BPF programs and maps via a pinned hashmap. + Use "--bpf-attr-map" to specify the path of this pinned hashmap. + The default path is /sys/fs/bpf/perf_attr_map. + ifdef::HAVE_LIBPFM[] --pfm-events events:: Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) @@ -142,7 +153,10 @@ Do not aggregate counts across all monitored CPUs. -n:: --null:: - null run - don't start any counters +null run - Don't start any counters. + +This can be useful to measure just elapsed wall-clock time - or to assess the +raw overhead of perf stat itself, without running any counters. -v:: --verbose:: @@ -468,6 +482,15 @@ convenient for post processing. --summary:: Print summary for interval mode (-I). +--no-csv-summary:: +Don't print 'summary' at the first column for CVS summary output. +This option must be used with -x and --summary. + +This option can be enabled in perf config by setting the variable +'stat.no-csv-summary'. + +$ perf config stat.no-csv-summary=true + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index ee2024691d46..bba5ffb05463 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -317,7 +317,7 @@ Default is to monitor all CPUS. but probably we'll make the default not to show the switch-on/off events on the --group mode and if there is only one event besides the off/on ones, go straight to the histogram browser, just like 'perf top' with no events - explicitely specified does. + explicitly specified does. --stitch-lbr:: Show callgraph with stitched LBRs, which may have more complete diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index c130a3c46a90..9c330cdfa973 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -76,3 +76,15 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-list[1] + +linkperf:perf-annotate[1],linkperf:perf-archive[1], +linkperf:perf-bench[1], linkperf:perf-buildid-cache[1], +linkperf:perf-buildid-list[1], linkperf:perf-c2c[1], +linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1], +linkperf:perf-evlist[1], linkperf:perf-ftrace[1], +linkperf:perf-help[1], linkperf:perf-inject[1], +linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1], +linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1], +linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1], +linkperf:perf-script[1], linkperf:perf-test[1], +linkperf:perf-trace[1], linkperf:perf-version[1] diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index d8e59d31399a..3514fe956ed1 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -32,7 +32,7 @@ ifneq ($(NO_SYSCALL_TABLE),1) NO_SYSCALL_TABLE := 0 endif else - ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips)) NO_SYSCALL_TABLE := 0 endif endif @@ -87,6 +87,13 @@ ifeq ($(ARCH),s390) CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated endif +ifeq ($(ARCH),mips) + NO_PERF_REGS := 0 + CFLAGS += -I$(OUTPUT)arch/mips/include/generated + CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi + LIBUNWIND_LIBS = -lunwind -lunwind-mips +endif + ifeq ($(NO_PERF_REGS),0) $(call detected,CONFIG_PERF_REGS) endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f6e609673de2..090fb9d62665 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1007,6 +1007,7 @@ python-clean: SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h +SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool @@ -1021,7 +1022,7 @@ $(BPFTOOL): | $(SKEL_TMP_OUT) OUTPUT=$(SKEL_TMP_OUT)/ bootstrap $(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) - $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf $(BPF_INCLUDE) \ + $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \ -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@ $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index c25c878fd06c..d942f118d32c 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -67,6 +67,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, char path[PATH_MAX]; int err = -EINVAL; u32 val; + u64 contextid; ptr = container_of(itr, struct cs_etm_recording, itr); cs_etm_pmu = ptr->cs_etm_pmu; @@ -86,25 +87,59 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, goto out; } + /* User has configured for PID tracing, respects it. */ + contextid = evsel->core.attr.config & + (BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_CTXTID2)); + /* - * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing - * is supported: - * 0b00000 Context ID tracing is not supported. - * 0b00100 Maximum of 32-bit Context ID size. - * All other values are reserved. + * If user doesn't configure the contextid format, parse PMU format and + * enable PID tracing according to the "contextid" format bits: + * + * If bit ETM_OPT_CTXTID is set, trace CONTEXTIDR_EL1; + * If bit ETM_OPT_CTXTID2 is set, trace CONTEXTIDR_EL2. */ - val = BMVAL(val, 5, 9); - if (!val || val != 0x4) { - err = -EINVAL; - goto out; + if (!contextid) + contextid = perf_pmu__format_bits(&cs_etm_pmu->format, + "contextid"); + + if (contextid & BIT(ETM_OPT_CTXTID)) { + /* + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID + * tracing is supported: + * 0b00000 Context ID tracing is not supported. + * 0b00100 Maximum of 32-bit Context ID size. + * All other values are reserved. + */ + val = BMVAL(val, 5, 9); + if (!val || val != 0x4) { + pr_err("%s: CONTEXTIDR_EL1 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } + } + + if (contextid & BIT(ETM_OPT_CTXTID2)) { + /* + * TRCIDR2.VMIDOPT[30:29] != 0 and + * TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid) + * We can't support CONTEXTIDR in VMID if the size of the + * virtual context id is < 32bit. + * Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us. + */ + if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) { + pr_err("%s: CONTEXTIDR_EL2 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } } /* All good, let the kernel know */ - evsel->core.attr.config |= (1 << ETM_OPT_CTXTID); + evsel->core.attr.config |= contextid; err = 0; out: - return err; } @@ -173,17 +208,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr, !cpu_map__has(online_cpus, i)) continue; - if (option & ETM_SET_OPT_CTXTID) { + if (option & BIT(ETM_OPT_CTXTID)) { err = cs_etm_set_context_id(itr, evsel, i); if (err) goto out; } - if (option & ETM_SET_OPT_TS) { + if (option & BIT(ETM_OPT_TS)) { err = cs_etm_set_timestamp(itr, evsel, i); if (err) goto out; } - if (option & ~(ETM_SET_OPT_MASK)) + if (option & ~(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS))) /* Nothing else is currently supported */ goto out; } @@ -343,7 +378,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); } - /* Snapshost size can't be bigger than the auxtrace area */ + /* Snapshot size can't be bigger than the auxtrace area */ if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", @@ -410,7 +445,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, - ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS); + BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)); if (err) goto out; } @@ -489,7 +524,9 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr) config |= BIT(ETM4_CFG_BIT_TS); if (config_opts & BIT(ETM_OPT_RETSTK)) config |= BIT(ETM4_CFG_BIT_RETSTK); - + if (config_opts & BIT(ETM_OPT_CTXTID2)) + config |= BIT(ETM4_CFG_BIT_VMID) | + BIT(ETM4_CFG_BIT_VMID_OPT); return config; } @@ -576,7 +613,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, struct auxtrace_record *itr, struct perf_record_auxtrace_info *info) { - u32 increment; + u32 increment, nr_trc_params; u64 magic; struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); @@ -611,6 +648,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* How much space was used */ increment = CS_ETMV4_PRIV_MAX; + nr_trc_params = CS_ETMV4_PRIV_MAX - CS_ETMV4_TRCCONFIGR; } else { magic = __perf_cs_etmv3_magic; /* Get configuration register */ @@ -628,11 +666,13 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* How much space was used */ increment = CS_ETM_PRIV_MAX; + nr_trc_params = CS_ETM_PRIV_MAX - CS_ETM_ETMCR; } /* Build generic header portion */ info->priv[*offset + CS_ETM_MAGIC] = magic; info->priv[*offset + CS_ETM_CPU] = cpu; + info->priv[*offset + CS_ETM_NR_TRC_PARAMS] = nr_trc_params; /* Where the next CPU entry should start from */ *offset += increment; } @@ -678,7 +718,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, /* First fill out the session header */ info->type = PERF_AUXTRACE_CS_ETM; - info->priv[CS_HEADER_VERSION_0] = 0; + info->priv[CS_HEADER_VERSION] = CS_HEADER_CURRENT_VERSION; info->priv[CS_PMU_TYPE_CPUS] = type << 32; info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu; info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode; diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index ead2f2275eee..9fcb4e68add9 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -2,6 +2,7 @@ perf-y += header.o perf-y += machine.o perf-y += perf_regs.o perf-y += tsc.o +perf-y += pmu.o perf-y += kvm-stat.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index 40c5e0b5bda8..7e7714290a87 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -6,11 +6,11 @@ #include "debug.h" #include "symbol.h" -/* On arm64, kernel text segment start at high memory address, +/* On arm64, kernel text segment starts at high memory address, * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory - * address, like 0xffff 0000 00ax xxxx. When only samll amount of + * address, like 0xffff 0000 00ax xxxx. When only small amount of * memory is used by modules, gap between end of module's text segment - * and start of kernel text segment may be reach 2G. + * and start of kernel text segment may reach 2G. * Therefore do not fill this gap and do not assign it to the kernel dso map. */ diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2518cde18b34..476b037eea1c 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -108,7 +108,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) /* [sp], [sp, NUM] or [sp,NUM] */ new_len = 7; /* + ( % s p ) NULL */ - /* If the arugment is [sp], need to fill offset '0' */ + /* If the argument is [sp], need to fill offset '0' */ if (rm[2].rm_so == -1) new_len += 1; else diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c new file mode 100644 index 000000000000..d3259d61ca75 --- /dev/null +++ b/tools/perf/arch/arm64/util/pmu.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../../util/cpumap.h" +#include "../../util/pmu.h" + +struct pmu_events_map *pmu_events_map__find(void) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!is_pmu_core(pmu->name)) + continue; + + /* + * The cpumap should cover all CPUs. Otherwise, some CPUs may + * not support some events or have different event IDs. + */ + if (pmu->cpus->nr != cpu__max_cpu()) + return NULL; + + return perf_pmu__find_map(pmu); + } + + return NULL; +} diff --git a/tools/perf/arch/mips/Makefile b/tools/perf/arch/mips/Makefile new file mode 100644 index 000000000000..8bc09072e3d6 --- /dev/null +++ b/tools/perf/arch/mips/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif + +# Syscall table generation for perf +out := $(OUTPUT)arch/mips/include/generated/asm +header := $(out)/syscalls_n64.c +sysprf := $(srctree)/tools/perf/arch/mips/entry/syscalls +sysdef := $(sysprf)/syscall_n64.tbl +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, mips) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/mips/entry/syscalls/mksyscalltbl b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl new file mode 100644 index 000000000000..fb1f49451af6 --- /dev/null +++ b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# s390 script. +# +# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> +# Changed by: Tiezhu Yang <yangtiezhu@loongson.cn> + +SYSCALL_TBL=$1 + +if ! test -r $SYSCALL_TBL; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table() +{ + local max_nr nr abi sc discard + + echo 'static const char *syscalltbl_mips_n64[] = {' + while read nr abi sc discard; do + printf '\t[%d] = "%s",\n' $nr $sc + max_nr=$nr + done + echo '};' + echo "#define SYSCALLTBL_MIPS_N64_MAX_ID $max_nr" +} + +grep -E "^[[:digit:]]+[[:space:]]+(n64)" $SYSCALL_TBL \ + |sort -k1 -n \ + |create_table diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl new file mode 100644 index 000000000000..91649690b52f --- /dev/null +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -0,0 +1,358 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# system call numbers and entry vectors for mips +# +# The format is: +# <number> <abi> <name> <entry point> +# +# The <abi> is always "n64" for this file. +# +0 n64 read sys_read +1 n64 write sys_write +2 n64 open sys_open +3 n64 close sys_close +4 n64 stat sys_newstat +5 n64 fstat sys_newfstat +6 n64 lstat sys_newlstat +7 n64 poll sys_poll +8 n64 lseek sys_lseek +9 n64 mmap sys_mips_mmap +10 n64 mprotect sys_mprotect +11 n64 munmap sys_munmap +12 n64 brk sys_brk +13 n64 rt_sigaction sys_rt_sigaction +14 n64 rt_sigprocmask sys_rt_sigprocmask +15 n64 ioctl sys_ioctl +16 n64 pread64 sys_pread64 +17 n64 pwrite64 sys_pwrite64 +18 n64 readv sys_readv +19 n64 writev sys_writev +20 n64 access sys_access +21 n64 pipe sysm_pipe +22 n64 _newselect sys_select +23 n64 sched_yield sys_sched_yield +24 n64 mremap sys_mremap +25 n64 msync sys_msync +26 n64 mincore sys_mincore +27 n64 madvise sys_madvise +28 n64 shmget sys_shmget +29 n64 shmat sys_shmat +30 n64 shmctl sys_old_shmctl +31 n64 dup sys_dup +32 n64 dup2 sys_dup2 +33 n64 pause sys_pause +34 n64 nanosleep sys_nanosleep +35 n64 getitimer sys_getitimer +36 n64 setitimer sys_setitimer +37 n64 alarm sys_alarm +38 n64 getpid sys_getpid +39 n64 sendfile sys_sendfile64 +40 n64 socket sys_socket +41 n64 connect sys_connect +42 n64 accept sys_accept +43 n64 sendto sys_sendto +44 n64 recvfrom sys_recvfrom +45 n64 sendmsg sys_sendmsg +46 n64 recvmsg sys_recvmsg +47 n64 shutdown sys_shutdown +48 n64 bind sys_bind +49 n64 listen sys_listen +50 n64 getsockname sys_getsockname +51 n64 getpeername sys_getpeername +52 n64 socketpair sys_socketpair +53 n64 setsockopt sys_setsockopt +54 n64 getsockopt sys_getsockopt +55 n64 clone __sys_clone +56 n64 fork __sys_fork +57 n64 execve sys_execve +58 n64 exit sys_exit +59 n64 wait4 sys_wait4 +60 n64 kill sys_kill +61 n64 uname sys_newuname +62 n64 semget sys_semget +63 n64 semop sys_semop +64 n64 semctl sys_old_semctl +65 n64 shmdt sys_shmdt +66 n64 msgget sys_msgget +67 n64 msgsnd sys_msgsnd +68 n64 msgrcv sys_msgrcv +69 n64 msgctl sys_old_msgctl +70 n64 fcntl sys_fcntl +71 n64 flock sys_flock +72 n64 fsync sys_fsync +73 n64 fdatasync sys_fdatasync +74 n64 truncate sys_truncate +75 n64 ftruncate sys_ftruncate +76 n64 getdents sys_getdents +77 n64 getcwd sys_getcwd +78 n64 chdir sys_chdir +79 n64 fchdir sys_fchdir +80 n64 rename sys_rename +81 n64 mkdir sys_mkdir +82 n64 rmdir sys_rmdir +83 n64 creat sys_creat +84 n64 link sys_link +85 n64 unlink sys_unlink +86 n64 symlink sys_symlink +87 n64 readlink sys_readlink +88 n64 chmod sys_chmod +89 n64 fchmod sys_fchmod +90 n64 chown sys_chown +91 n64 fchown sys_fchown +92 n64 lchown sys_lchown +93 n64 umask sys_umask +94 n64 gettimeofday sys_gettimeofday +95 n64 getrlimit sys_getrlimit +96 n64 getrusage sys_getrusage +97 n64 sysinfo sys_sysinfo +98 n64 times sys_times +99 n64 ptrace sys_ptrace +100 n64 getuid sys_getuid +101 n64 syslog sys_syslog +102 n64 getgid sys_getgid +103 n64 setuid sys_setuid +104 n64 setgid sys_setgid +105 n64 geteuid sys_geteuid +106 n64 getegid sys_getegid +107 n64 setpgid sys_setpgid +108 n64 getppid sys_getppid +109 n64 getpgrp sys_getpgrp +110 n64 setsid sys_setsid +111 n64 setreuid sys_setreuid +112 n64 setregid sys_setregid +113 n64 getgroups sys_getgroups +114 n64 setgroups sys_setgroups +115 n64 setresuid sys_setresuid +116 n64 getresuid sys_getresuid +117 n64 setresgid sys_setresgid +118 n64 getresgid sys_getresgid +119 n64 getpgid sys_getpgid +120 n64 setfsuid sys_setfsuid +121 n64 setfsgid sys_setfsgid +122 n64 getsid sys_getsid +123 n64 capget sys_capget +124 n64 capset sys_capset +125 n64 rt_sigpending sys_rt_sigpending +126 n64 rt_sigtimedwait sys_rt_sigtimedwait +127 n64 rt_sigqueueinfo sys_rt_sigqueueinfo +128 n64 rt_sigsuspend sys_rt_sigsuspend +129 n64 sigaltstack sys_sigaltstack +130 n64 utime sys_utime +131 n64 mknod sys_mknod +132 n64 personality sys_personality +133 n64 ustat sys_ustat +134 n64 statfs sys_statfs +135 n64 fstatfs sys_fstatfs +136 n64 sysfs sys_sysfs +137 n64 getpriority sys_getpriority +138 n64 setpriority sys_setpriority +139 n64 sched_setparam sys_sched_setparam +140 n64 sched_getparam sys_sched_getparam +141 n64 sched_setscheduler sys_sched_setscheduler +142 n64 sched_getscheduler sys_sched_getscheduler +143 n64 sched_get_priority_max sys_sched_get_priority_max +144 n64 sched_get_priority_min sys_sched_get_priority_min +145 n64 sched_rr_get_interval sys_sched_rr_get_interval +146 n64 mlock sys_mlock +147 n64 munlock sys_munlock +148 n64 mlockall sys_mlockall +149 n64 munlockall sys_munlockall +150 n64 vhangup sys_vhangup +151 n64 pivot_root sys_pivot_root +152 n64 _sysctl sys_ni_syscall +153 n64 prctl sys_prctl +154 n64 adjtimex sys_adjtimex +155 n64 setrlimit sys_setrlimit +156 n64 chroot sys_chroot +157 n64 sync sys_sync +158 n64 acct sys_acct +159 n64 settimeofday sys_settimeofday +160 n64 mount sys_mount +161 n64 umount2 sys_umount +162 n64 swapon sys_swapon +163 n64 swapoff sys_swapoff +164 n64 reboot sys_reboot +165 n64 sethostname sys_sethostname +166 n64 setdomainname sys_setdomainname +167 n64 create_module sys_ni_syscall +168 n64 init_module sys_init_module +169 n64 delete_module sys_delete_module +170 n64 get_kernel_syms sys_ni_syscall +171 n64 query_module sys_ni_syscall +172 n64 quotactl sys_quotactl +173 n64 nfsservctl sys_ni_syscall +174 n64 getpmsg sys_ni_syscall +175 n64 putpmsg sys_ni_syscall +176 n64 afs_syscall sys_ni_syscall +# 177 reserved for security +177 n64 reserved177 sys_ni_syscall +178 n64 gettid sys_gettid +179 n64 readahead sys_readahead +180 n64 setxattr sys_setxattr +181 n64 lsetxattr sys_lsetxattr +182 n64 fsetxattr sys_fsetxattr +183 n64 getxattr sys_getxattr +184 n64 lgetxattr sys_lgetxattr +185 n64 fgetxattr sys_fgetxattr +186 n64 listxattr sys_listxattr +187 n64 llistxattr sys_llistxattr +188 n64 flistxattr sys_flistxattr +189 n64 removexattr sys_removexattr +190 n64 lremovexattr sys_lremovexattr +191 n64 fremovexattr sys_fremovexattr +192 n64 tkill sys_tkill +193 n64 reserved193 sys_ni_syscall +194 n64 futex sys_futex +195 n64 sched_setaffinity sys_sched_setaffinity +196 n64 sched_getaffinity sys_sched_getaffinity +197 n64 cacheflush sys_cacheflush +198 n64 cachectl sys_cachectl +199 n64 sysmips __sys_sysmips +200 n64 io_setup sys_io_setup +201 n64 io_destroy sys_io_destroy +202 n64 io_getevents sys_io_getevents +203 n64 io_submit sys_io_submit +204 n64 io_cancel sys_io_cancel +205 n64 exit_group sys_exit_group +206 n64 lookup_dcookie sys_lookup_dcookie +207 n64 epoll_create sys_epoll_create +208 n64 epoll_ctl sys_epoll_ctl +209 n64 epoll_wait sys_epoll_wait +210 n64 remap_file_pages sys_remap_file_pages +211 n64 rt_sigreturn sys_rt_sigreturn +212 n64 set_tid_address sys_set_tid_address +213 n64 restart_syscall sys_restart_syscall +214 n64 semtimedop sys_semtimedop +215 n64 fadvise64 sys_fadvise64_64 +216 n64 timer_create sys_timer_create +217 n64 timer_settime sys_timer_settime +218 n64 timer_gettime sys_timer_gettime +219 n64 timer_getoverrun sys_timer_getoverrun +220 n64 timer_delete sys_timer_delete +221 n64 clock_settime sys_clock_settime +222 n64 clock_gettime sys_clock_gettime +223 n64 clock_getres sys_clock_getres +224 n64 clock_nanosleep sys_clock_nanosleep +225 n64 tgkill sys_tgkill +226 n64 utimes sys_utimes +227 n64 mbind sys_mbind +228 n64 get_mempolicy sys_get_mempolicy +229 n64 set_mempolicy sys_set_mempolicy +230 n64 mq_open sys_mq_open +231 n64 mq_unlink sys_mq_unlink +232 n64 mq_timedsend sys_mq_timedsend +233 n64 mq_timedreceive sys_mq_timedreceive +234 n64 mq_notify sys_mq_notify +235 n64 mq_getsetattr sys_mq_getsetattr +236 n64 vserver sys_ni_syscall +237 n64 waitid sys_waitid +# 238 was sys_setaltroot +239 n64 add_key sys_add_key +240 n64 request_key sys_request_key +241 n64 keyctl sys_keyctl +242 n64 set_thread_area sys_set_thread_area +243 n64 inotify_init sys_inotify_init +244 n64 inotify_add_watch sys_inotify_add_watch +245 n64 inotify_rm_watch sys_inotify_rm_watch +246 n64 migrate_pages sys_migrate_pages +247 n64 openat sys_openat +248 n64 mkdirat sys_mkdirat +249 n64 mknodat sys_mknodat +250 n64 fchownat sys_fchownat +251 n64 futimesat sys_futimesat +252 n64 newfstatat sys_newfstatat +253 n64 unlinkat sys_unlinkat +254 n64 renameat sys_renameat +255 n64 linkat sys_linkat +256 n64 symlinkat sys_symlinkat +257 n64 readlinkat sys_readlinkat +258 n64 fchmodat sys_fchmodat +259 n64 faccessat sys_faccessat +260 n64 pselect6 sys_pselect6 +261 n64 ppoll sys_ppoll +262 n64 unshare sys_unshare +263 n64 splice sys_splice +264 n64 sync_file_range sys_sync_file_range +265 n64 tee sys_tee +266 n64 vmsplice sys_vmsplice +267 n64 move_pages sys_move_pages +268 n64 set_robust_list sys_set_robust_list +269 n64 get_robust_list sys_get_robust_list +270 n64 kexec_load sys_kexec_load +271 n64 getcpu sys_getcpu +272 n64 epoll_pwait sys_epoll_pwait +273 n64 ioprio_set sys_ioprio_set +274 n64 ioprio_get sys_ioprio_get +275 n64 utimensat sys_utimensat +276 n64 signalfd sys_signalfd +277 n64 timerfd sys_ni_syscall +278 n64 eventfd sys_eventfd +279 n64 fallocate sys_fallocate +280 n64 timerfd_create sys_timerfd_create +281 n64 timerfd_gettime sys_timerfd_gettime +282 n64 timerfd_settime sys_timerfd_settime +283 n64 signalfd4 sys_signalfd4 +284 n64 eventfd2 sys_eventfd2 +285 n64 epoll_create1 sys_epoll_create1 +286 n64 dup3 sys_dup3 +287 n64 pipe2 sys_pipe2 +288 n64 inotify_init1 sys_inotify_init1 +289 n64 preadv sys_preadv +290 n64 pwritev sys_pwritev +291 n64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +292 n64 perf_event_open sys_perf_event_open +293 n64 accept4 sys_accept4 +294 n64 recvmmsg sys_recvmmsg +295 n64 fanotify_init sys_fanotify_init +296 n64 fanotify_mark sys_fanotify_mark +297 n64 prlimit64 sys_prlimit64 +298 n64 name_to_handle_at sys_name_to_handle_at +299 n64 open_by_handle_at sys_open_by_handle_at +300 n64 clock_adjtime sys_clock_adjtime +301 n64 syncfs sys_syncfs +302 n64 sendmmsg sys_sendmmsg +303 n64 setns sys_setns +304 n64 process_vm_readv sys_process_vm_readv +305 n64 process_vm_writev sys_process_vm_writev +306 n64 kcmp sys_kcmp +307 n64 finit_module sys_finit_module +308 n64 getdents64 sys_getdents64 +309 n64 sched_setattr sys_sched_setattr +310 n64 sched_getattr sys_sched_getattr +311 n64 renameat2 sys_renameat2 +312 n64 seccomp sys_seccomp +313 n64 getrandom sys_getrandom +314 n64 memfd_create sys_memfd_create +315 n64 bpf sys_bpf +316 n64 execveat sys_execveat +317 n64 userfaultfd sys_userfaultfd +318 n64 membarrier sys_membarrier +319 n64 mlock2 sys_mlock2 +320 n64 copy_file_range sys_copy_file_range +321 n64 preadv2 sys_preadv2 +322 n64 pwritev2 sys_pwritev2 +323 n64 pkey_mprotect sys_pkey_mprotect +324 n64 pkey_alloc sys_pkey_alloc +325 n64 pkey_free sys_pkey_free +326 n64 statx sys_statx +327 n64 rseq sys_rseq +328 n64 io_pgetevents sys_io_pgetevents +# 329 through 423 are reserved to sync up with other architectures +424 n64 pidfd_send_signal sys_pidfd_send_signal +425 n64 io_uring_setup sys_io_uring_setup +426 n64 io_uring_enter sys_io_uring_enter +427 n64 io_uring_register sys_io_uring_register +428 n64 open_tree sys_open_tree +429 n64 move_mount sys_move_mount +430 n64 fsopen sys_fsopen +431 n64 fsconfig sys_fsconfig +432 n64 fsmount sys_fsmount +433 n64 fspick sys_fspick +434 n64 pidfd_open sys_pidfd_open +435 n64 clone3 __sys_clone3 +436 n64 close_range sys_close_range +437 n64 openat2 sys_openat2 +438 n64 pidfd_getfd sys_pidfd_getfd +439 n64 faccessat2 sys_faccessat2 +440 n64 process_madvise sys_process_madvise +441 n64 epoll_pwait2 sys_epoll_pwait2 diff --git a/tools/perf/arch/mips/include/dwarf-regs-table.h b/tools/perf/arch/mips/include/dwarf-regs-table.h new file mode 100644 index 000000000000..5badbcd3c5ec --- /dev/null +++ b/tools/perf/arch/mips/include/dwarf-regs-table.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * dwarf-regs-table.h : Mapping of DWARF debug register numbers into + * register names. + * + * Copyright (C) 2013 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifdef DEFINE_DWARF_REGSTR_TABLE +#undef REG_DWARFNUM_NAME +#define REG_DWARFNUM_NAME(reg, idx) [idx] = "$" #reg +static const char * const mips_regstr_tbl[] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "%29", + "$30", "$31", + REG_DWARFNUM_NAME(hi, 64), + REG_DWARFNUM_NAME(lo, 65), +}; +#endif diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h new file mode 100644 index 000000000000..ee73b36a14d1 --- /dev/null +++ b/tools/perf/arch/mips/include/perf_regs.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include <stdlib.h> +#include <linux/types.h> +#include <asm/perf_regs.h> + +#define PERF_REGS_MAX PERF_REG_MIPS_MAX +#define PERF_REG_IP PERF_REG_MIPS_PC +#define PERF_REG_SP PERF_REG_MIPS_R29 + +#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1) + +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_MIPS_PC: + return "PC"; + case PERF_REG_MIPS_R1: + return "$1"; + case PERF_REG_MIPS_R2: + return "$2"; + case PERF_REG_MIPS_R3: + return "$3"; + case PERF_REG_MIPS_R4: + return "$4"; + case PERF_REG_MIPS_R5: + return "$5"; + case PERF_REG_MIPS_R6: + return "$6"; + case PERF_REG_MIPS_R7: + return "$7"; + case PERF_REG_MIPS_R8: + return "$8"; + case PERF_REG_MIPS_R9: + return "$9"; + case PERF_REG_MIPS_R10: + return "$10"; + case PERF_REG_MIPS_R11: + return "$11"; + case PERF_REG_MIPS_R12: + return "$12"; + case PERF_REG_MIPS_R13: + return "$13"; + case PERF_REG_MIPS_R14: + return "$14"; + case PERF_REG_MIPS_R15: + return "$15"; + case PERF_REG_MIPS_R16: + return "$16"; + case PERF_REG_MIPS_R17: + return "$17"; + case PERF_REG_MIPS_R18: + return "$18"; + case PERF_REG_MIPS_R19: + return "$19"; + case PERF_REG_MIPS_R20: + return "$20"; + case PERF_REG_MIPS_R21: + return "$21"; + case PERF_REG_MIPS_R22: + return "$22"; + case PERF_REG_MIPS_R23: + return "$23"; + case PERF_REG_MIPS_R24: + return "$24"; + case PERF_REG_MIPS_R25: + return "$25"; + case PERF_REG_MIPS_R28: + return "$28"; + case PERF_REG_MIPS_R29: + return "$29"; + case PERF_REG_MIPS_R30: + return "$30"; + case PERF_REG_MIPS_R31: + return "$31"; + default: + break; + } + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/mips/util/Build b/tools/perf/arch/mips/util/Build new file mode 100644 index 000000000000..51c8900a9a10 --- /dev/null +++ b/tools/perf/arch/mips/util/Build @@ -0,0 +1,3 @@ +perf-y += perf_regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/mips/util/dwarf-regs.c b/tools/perf/arch/mips/util/dwarf-regs.c new file mode 100644 index 000000000000..25c13a91c2a7 --- /dev/null +++ b/tools/perf/arch/mips/util/dwarf-regs.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2013 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <stdio.h> +#include <dwarf-regs.h> + +static const char *mips_gpr_names[32] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", + "$30", "$31" +}; + +const char *get_arch_regstr(unsigned int n) +{ + if (n < 32) + return mips_gpr_names[n]; + if (n == 64) + return "hi"; + if (n == 65) + return "lo"; + return NULL; +} diff --git a/tools/perf/arch/mips/util/perf_regs.c b/tools/perf/arch/mips/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/mips/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/mips/util/unwind-libunwind.c b/tools/perf/arch/mips/util/unwind-libunwind.c new file mode 100644 index 000000000000..0d8c99c29da6 --- /dev/null +++ b/tools/perf/arch/mips/util/unwind-libunwind.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <libunwind.h> +#include "perf_regs.h" +#include "../../util/unwind.h" +#include "util/debug.h" + +int libunwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_MIPS_R1 ... UNW_MIPS_R25: + return regnum - UNW_MIPS_R1 + PERF_REG_MIPS_R1; + case UNW_MIPS_R28 ... UNW_MIPS_R31: + return regnum - UNW_MIPS_R28 + PERF_REG_MIPS_R28; + case UNW_MIPS_PC: + return PERF_REG_MIPS_PC; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } +} diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index b7945e5a543b..8a79c4126e5b 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -4,6 +4,8 @@ perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o perf-y += sym-handling.o +perf-y += evsel.o +perf-y += event.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c new file mode 100644 index 000000000000..3bf441257466 --- /dev/null +++ b/tools/perf/arch/powerpc/util/event.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <linux/string.h> +#include <linux/zalloc.h> + +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" +#include "../../../util/machine.h" +#include "../../../util/tool.h" +#include "../../../util/map.h" +#include "../../../util/debug.h" + +void arch_perf_parse_sample_weight(struct perf_sample *data, + const __u64 *array, u64 type) +{ + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT) + data->weight = weight.full; + else { + data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + data->p_stage_cyc = weight.var3_w; + } +} + +void arch_perf_synthesize_sample_weight(const struct perf_sample *data, + __u64 *array, u64 type) +{ + *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + *array &= 0xffffffff; + *array |= ((u64)data->ins_lat << 32); + } +} + +const char *arch_perf_header_entry(const char *se_header) +{ + if (!strcmp(se_header, "Local INSTR Latency")) + return "Finish Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Dispatch Cyc"; + return se_header; +} + +int arch_support_sort_key(const char *sort_key) +{ + if (!strcmp(sort_key, "p_stage_cyc")) + return 1; + return 0; +} diff --git a/tools/perf/arch/powerpc/util/evsel.c b/tools/perf/arch/powerpc/util/evsel.c new file mode 100644 index 000000000000..2f733cdc8dbb --- /dev/null +++ b/tools/perf/arch/powerpc/util/evsel.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include "util/evsel.h" + +void arch_evsel__set_sample_weight(struct evsel *evsel) +{ + evsel__set_sample_bit(evsel, WEIGHT_STRUCT); +} diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index eed9e5a42935..16510686c138 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -176,7 +176,7 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) } /* - * Incase of powerpc architecture, pmu registers are programmable + * In case of powerpc architecture, pmu registers are programmable * by guest kernel. So monitoring guest via host may not provide * valid samples with default 'cycles' event. It is better to use * 'trace_imc/trace_cycles' event for guest profiling, since it diff --git a/tools/perf/arch/powerpc/util/utils_header.h b/tools/perf/arch/powerpc/util/utils_header.h index 5788eb1f1fe3..2baeb1c1ae85 100644 --- a/tools/perf/arch/powerpc/util/utils_header.h +++ b/tools/perf/arch/powerpc/util/utils_header.h @@ -10,6 +10,6 @@ #define SPRN_PVR 0x11F /* Processor Version Register */ #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ -#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revision field */ #endif /* __PERF_UTIL_HEADER_H */ diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c index adcacf1b6609..dffcf9b52153 100644 --- a/tools/perf/arch/x86/tests/bp-modify.c +++ b/tools/perf/arch/x86/tests/bp-modify.c @@ -73,7 +73,7 @@ static int bp_modify1(void) /* * The parent does following steps: * - creates a new breakpoint (id 0) for bp_2 function - * - changes that breakponit to bp_1 function + * - changes that breakpoint to bp_1 function * - waits for the breakpoint to hit and checks * it has proper rip of bp_1 function * - detaches the child diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index fca81b39b09f..207c56805c55 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -165,7 +165,7 @@ static int sdt_init_op_regex(void) /* * Max x86 register name length is 5(ex: %r15d). So, 6th char * should always contain NULL. This helps to find register name - * length using strlen, insted of maintaing one more variable. + * length using strlen, instead of maintaining one more variable. */ #define SDT_REG_NAME_SIZE 6 @@ -207,7 +207,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) * and displacement 0 (Both sign and displacement 0 are * optional so it may be empty). Use one more character * to hold last NULL so that strlen can be used to find - * prefix length, instead of maintaing one more variable. + * prefix length, instead of maintaining one more variable. */ char prefix[3] = {0}; diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 0a0ff1247c83..79d13dbc0a47 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -17,7 +17,7 @@ * While the second model, enabled via --multiq option, uses multiple * queueing (which refers to one epoll instance per worker). For example, * short lived tcp connections in a high throughput httpd server will - * ditribute the accept()'ing connections across CPUs. In this case each + * distribute the accept()'ing connections across CPUs. In this case each * worker does a limited amount of processing. * * [queue A] ---> [worker] @@ -198,7 +198,7 @@ static void *workerfn(void *arg) do { /* - * Block undefinitely waiting for the IN event. + * Block indefinitely waiting for the IN event. * In order to stress the epoll_wait(2) syscall, * call it event per event, instead of a larger * batch (max)limit. diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 280227e3ffd7..55d373b75791 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -372,7 +372,7 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss) len += synthesize_flush(data); } - /* tihs makes the child to finish */ + /* this makes the child to finish */ close(data->input_pipe[1]); wait4(data->pid, &status, 0, &rusage); diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 20b87e29c96f..f2640179ada9 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -42,7 +42,7 @@ #endif /* - * Regular printout to the terminal, supressed if -q is specified: + * Regular printout to the terminal, suppressed if -q is specified: */ #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index a23ba6bb99b6..021d974c978e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -239,7 +239,7 @@ static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample, } /* - * XXX filtered samples can still have branch entires pointing into our + * XXX filtered samples can still have branch entries pointing into our * symbol and are missed. */ process_branch_stack(sample->branch_stack, al, sample); @@ -538,6 +538,10 @@ int cmd_annotate(int argc, const char **argv) "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), + OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, + "Enable symbol demangling"), + OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, + "Enable kernel symbol demangling"), OPT_BOOLEAN(0, "group", &symbol_conf.event_group, "Show event group information together"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 7c4a9d424a64..61929f63a047 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -6,7 +6,6 @@ #include <linux/zalloc.h> #include <linux/string.h> #include <linux/limits.h> -#include <linux/string.h> #include <string.h> #include <sys/file.h> #include <signal.h> @@ -24,8 +23,6 @@ #include <sys/signalfd.h> #include <sys/wait.h> #include <poll.h> -#include <sys/stat.h> -#include <time.h> #include "builtin.h" #include "perf.h" #include "debug.h" diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 878e04b1fab7..f52b3a799e76 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1796,7 +1796,7 @@ static int ui_init(void) data__for_each_file(i, d) { /* - * Baseline or compute realted columns: + * Baseline or compute related columns: * * PERF_HPP_DIFF__BASELINE * PERF_HPP_DIFF__DELTA diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index a2f1e53f37a7..01326e370009 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -49,7 +49,7 @@ struct lock_stat { /* * FIXME: evsel__intval() returns u64, - * so address of lockdep_map should be dealed as 64bit. + * so address of lockdep_map should be treated as 64bit. * Is there more better solution? */ void *addr; /* address of lockdep_map, used as ID */ diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2a845d6cac09..0d65c98794a8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -84,6 +84,7 @@ struct report { bool nonany_branch_mode; bool group_set; bool stitch_lbr; + bool disable_order; int max_stack; struct perf_read_values show_threads_values; struct annotation_options annotation_opts; @@ -1296,6 +1297,8 @@ int cmd_report(int argc, const char **argv) OPTS_EVSWITCH(&report.evswitch), OPT_BOOLEAN(0, "total-cycles", &report.total_cycles_mode, "Sort all blocks by 'Sampled Cycles%'"), + OPT_BOOLEAN(0, "disable-order", &report.disable_order, + "Disable raw trace ordering"), OPT_END() }; struct perf_data data = { @@ -1329,7 +1332,7 @@ int cmd_report(int argc, const char **argv) if (report.mmaps_mode) report.tasks_mode = true; - if (dump_trace) + if (dump_trace && report.disable_order) report.tool.ordered_events = false; if (quiet) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 69c769b04a61..954ce2f594e9 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1712,7 +1712,7 @@ static int perf_sched__process_fork_event(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - /* run the fork event through the perf machineruy */ + /* run the fork event through the perf machinery */ perf_event__process_fork(tool, event, sample, machine); /* and then run additional processing needed for this command */ diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5915f19cee55..1280cbfad4db 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -314,8 +314,7 @@ static inline struct evsel_script *evsel_script(struct evsel *evsel) return (struct evsel_script *)evsel->priv; } -static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, - struct perf_data *data) +static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data) { struct evsel_script *es = zalloc(sizeof(*es)); @@ -335,7 +334,7 @@ out_free: return NULL; } -static void perf_evsel_script__delete(struct evsel_script *es) +static void evsel_script__delete(struct evsel_script *es) { zfree(&es->filename); fclose(es->fp); @@ -343,7 +342,7 @@ static void perf_evsel_script__delete(struct evsel_script *es) free(es); } -static int perf_evsel_script__fprintf(struct evsel_script *es, FILE *fp) +static int evsel_script__fprintf(struct evsel_script *es, FILE *fp) { struct stat st; @@ -2219,8 +2218,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, if (!evsel->priv) { if (scr->per_event_dump) { - evsel->priv = perf_evsel_script__new(evsel, - scr->session->data); + evsel->priv = evsel_script__new(evsel, scr->session->data); } else { es = zalloc(sizeof(*es)); if (!es) @@ -2475,7 +2473,7 @@ static void perf_script__fclose_per_event_dump(struct perf_script *script) evlist__for_each_entry(evlist, evsel) { if (!evsel->priv) break; - perf_evsel_script__delete(evsel->priv); + evsel_script__delete(evsel->priv); evsel->priv = NULL; } } @@ -2488,14 +2486,14 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) /* * Already setup? I.e. we may be called twice in cases like * Intel PT, one for the intel_pt// and dummy events, then - * for the evsels syntheized from the auxtrace info. + * for the evsels synthesized from the auxtrace info. * * Ses perf_script__process_auxtrace_info. */ if (evsel->priv != NULL) continue; - evsel->priv = perf_evsel_script__new(evsel, script->session->data); + evsel->priv = evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; } @@ -2530,8 +2528,8 @@ static void perf_script__exit_per_event_dump_stats(struct perf_script *script) evlist__for_each_entry(script->session->evlist, evsel) { struct evsel_script *es = evsel->priv; - perf_evsel_script__fprintf(es, stdout); - perf_evsel_script__delete(es); + evsel_script__fprintf(es, stdout); + evsel_script__delete(es); evsel->priv = NULL; } } @@ -3085,7 +3083,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused, * * Fixme: All existing "xxx-record" are all in good formats "-e event ", * which is covered well now. And new parsing code should be added to - * cover the future complexing formats like event groups etc. + * cover the future complex formats like event groups etc. */ static int check_ev_match(char *dir_name, char *scriptname, struct perf_session *session) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2e2e4a8345ea..2a2c15cac80a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -792,6 +792,12 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } evlist__for_each_cpu (evsel_list, i, cpu) { + /* + * bperf calls evsel__open_per_cpu() in bperf__load(), so + * no need to call it again here. + */ + if (target.use_bpf) + break; affinity__set(&affinity, cpu); evlist__for_each_entry(evsel_list, counter) { @@ -925,15 +931,15 @@ try_again_reset: /* * Enable counters and exec the command: */ - t0 = rdclock(); - clock_gettime(CLOCK_MONOTONIC, &ref_time); - if (forks) { evlist__start_workload(evsel_list); err = enable_counters(); if (err) return -1; + t0 = rdclock(); + clock_gettime(CLOCK_MONOTONIC, &ref_time); + if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) status = dispatch_events(forks, timeout, interval, ×); if (child_pid != -1) { @@ -954,6 +960,10 @@ try_again_reset: err = enable_counters(); if (err) return -1; + + t0 = rdclock(); + clock_gettime(CLOCK_MONOTONIC, &ref_time); + status = dispatch_events(forks, timeout, interval, ×); } @@ -1083,6 +1093,11 @@ void perf_stat__set_big_num(int set) stat_config.big_num = (set != 0); } +void perf_stat__set_no_csv_summary(int set) +{ + stat_config.no_csv_summary = (set != 0); +} + static int stat__set_big_num(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset) { @@ -1146,6 +1161,10 @@ static struct option stat_options[] = { #ifdef HAVE_BPF_SKEL OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id", "stat events on existing bpf program id"), + OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf, + "use bpf program to count events"), + OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path", + "path to perf_event_attr map"), #endif OPT_BOOLEAN('a', "all-cpus", &target.system_wide, "system-wide collection from all CPUs"), @@ -1235,6 +1254,8 @@ static struct option stat_options[] = { "threads of same physical core"), OPT_BOOLEAN(0, "summary", &stat_config.summary, "print summary for interval mode"), + OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary, + "don't print 'summary' for CSV summary output"), OPT_BOOLEAN(0, "quiet", &stat_config.quiet, "don't print output (useful with record)"), #ifdef HAVE_LIBPFM @@ -1705,7 +1726,7 @@ static int add_default_attributes(void) bzero(&errinfo, sizeof(errinfo)); if (transaction_run) { /* Handle -T as -M transaction. Once platform specific metrics - * support has been added to the json files, all archictures + * support has been added to the json files, all architectures * will use this approach. To determine transaction support * on an architecture test for such a metric name. */ @@ -2459,7 +2480,7 @@ int cmd_stat(int argc, const char **argv) /* * We synthesize the kernel mmap record just so that older tools * don't emit warnings about not being able to resolve symbols - * due to /proc/sys/kernel/kptr_restrict settings and instear provide + * due to /proc/sys/kernel/kptr_restrict settings and instead provide * a saner message about no samples being in the perf.data file. * * This also serves to suppress a warning about f_header.data.size == 0 diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 3673c04d16b6..173ace43f845 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1607,7 +1607,7 @@ int cmd_top(int argc, const char **argv) if (status) { /* * Some arches do not provide a get_cpuid(), so just use pr_debug, otherwise - * warn the user explicitely. + * warn the user explicitly. */ eprintf(status == ENOSYS ? 1 : 0, verbose, "Couldn't read the cpuid for this machine: %s\n", diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index dded93a2bc89..39eada935d4e 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -146,6 +146,7 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl +check_2 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl for i in $BEAUTY_FILES; do beauty_check $i -B diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index b80437971d80..a262dcd020f4 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -262,7 +262,7 @@ int sys_enter(struct syscall_enter_args *args) /* * Jump to syscall specific augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the - * unagmented tracepoint payload. + * unaugmented tracepoint payload. */ bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); @@ -282,7 +282,7 @@ int sys_exit(struct syscall_exit_args *args) /* * Jump to syscall specific return augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the - * unagmented tracepoint payload. + * unaugmented tracepoint payload. */ bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); /* diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index 88108598d6e9..526dcaf9f079 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -390,7 +390,7 @@ jvmti_write_code(void *agent, char const *sym, rec.p.total_size += size; /* - * If JVM is multi-threaded, nultiple concurrent calls to agent + * If JVM is multi-threaded, multiple concurrent calls to agent * may be possible, so protect file writes */ flockfile(fp); @@ -457,7 +457,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, rec.p.total_size = size; /* - * If JVM is multi-threaded, nultiple concurrent calls to agent + * If JVM is multi-threaded, multiple concurrent calls to agent * may be possible, so protect file writes */ flockfile(fp); diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json index 75376c7cc072..913fb200ea52 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json +++ b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json @@ -210,12 +210,24 @@ "BriefDescription": "Attributable Level 2 data TLB refill" }, { + "PublicDescription": "Attributable Level 2 instruction TLB refill.", + "EventCode": "0x2E", + "EventName": "L2I_TLB_REFILL", + "BriefDescription": "Attributable Level 2 instruction TLB refill." + }, + { "PublicDescription": "Attributable Level 2 data or unified TLB access", "EventCode": "0x2F", "EventName": "L2D_TLB", "BriefDescription": "Attributable Level 2 data or unified TLB access" }, { + "PublicDescription": "Attributable Level 2 instruction TLB access.", + "EventCode": "0x30", + "EventName": "L2I_TLB", + "BriefDescription": "Attributable Level 2 instruction TLB access." + }, + { "PublicDescription": "Access to another socket in a multi-socket system", "EventCode": "0x31", "EventName": "REMOTE_ACCESS", @@ -244,5 +256,221 @@ "EventCode": "0x37", "EventName": "LL_CACHE_MISS_RD", "BriefDescription": "Last level cache miss, read" + }, + { + "PublicDescription": "SIMD Instruction architecturally executed.", + "EventCode": "0x8000", + "EventName": "SIMD_INST_RETIRED", + "BriefDescription": "SIMD Instruction architecturally executed." + }, + { + "PublicDescription": "Instruction architecturally executed, SVE.", + "EventCode": "0x8002", + "EventName": "SVE_INST_RETIRED", + "BriefDescription": "Instruction architecturally executed, SVE." + }, + { + "PublicDescription": "Microarchitectural operation, Operations speculatively executed.", + "EventCode": "0x8008", + "EventName": "UOP_SPEC", + "BriefDescription": "Microarchitectural operation, Operations speculatively executed." + }, + { + "PublicDescription": "SVE Math accelerator Operations speculatively executed.", + "EventCode": "0x800E", + "EventName": "SVE_MATH_SPEC", + "BriefDescription": "SVE Math accelerator Operations speculatively executed." + }, + { + "PublicDescription": "Floating-point Operations speculatively executed.", + "EventCode": "0x8010", + "EventName": "FP_SPEC", + "BriefDescription": "Floating-point Operations speculatively executed." + }, + { + "PublicDescription": "Floating-point FMA Operations speculatively executed.", + "EventCode": "0x8028", + "EventName": "FP_FMA_SPEC", + "BriefDescription": "Floating-point FMA Operations speculatively executed." + }, + { + "PublicDescription": "Floating-point reciprocal estimate Operations speculatively executed.", + "EventCode": "0x8034", + "EventName": "FP_RECPE_SPEC", + "BriefDescription": "Floating-point reciprocal estimate Operations speculatively executed." + }, + { + "PublicDescription": "floating-point convert Operations speculatively executed.", + "EventCode": "0x8038", + "EventName": "FP_CVT_SPEC", + "BriefDescription": "floating-point convert Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE integer Operations speculatively executed.", + "EventCode": "0x8043", + "EventName": "ASE_SVE_INT_SPEC", + "BriefDescription": "Advanced SIMD and SVE integer Operations speculatively executed." + }, + { + "PublicDescription": "SVE predicated Operations speculatively executed.", + "EventCode": "0x8074", + "EventName": "SVE_PRED_SPEC", + "BriefDescription": "SVE predicated Operations speculatively executed." + }, + { + "PublicDescription": "SVE MOVPRFX Operations speculatively executed.", + "EventCode": "0x807C", + "EventName": "SVE_MOVPRFX_SPEC", + "BriefDescription": "SVE MOVPRFX Operations speculatively executed." + }, + { + "PublicDescription": "SVE MOVPRFX unfused Operations speculatively executed.", + "EventCode": "0x807F", + "EventName": "SVE_MOVPRFX_U_SPEC", + "BriefDescription": "SVE MOVPRFX unfused Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE load Operations speculatively executed.", + "EventCode": "0x8085", + "EventName": "ASE_SVE_LD_SPEC", + "BriefDescription": "Advanced SIMD and SVE load Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE store Operations speculatively executed.", + "EventCode": "0x8086", + "EventName": "ASE_SVE_ST_SPEC", + "BriefDescription": "Advanced SIMD and SVE store Operations speculatively executed." + }, + { + "PublicDescription": "Prefetch Operations speculatively executed.", + "EventCode": "0x8087", + "EventName": "PRF_SPEC", + "BriefDescription": "Prefetch Operations speculatively executed." + }, + { + "PublicDescription": "General-purpose register load Operations speculatively executed.", + "EventCode": "0x8089", + "EventName": "BASE_LD_REG_SPEC", + "BriefDescription": "General-purpose register load Operations speculatively executed." + }, + { + "PublicDescription": "General-purpose register store Operations speculatively executed.", + "EventCode": "0x808A", + "EventName": "BASE_ST_REG_SPEC", + "BriefDescription": "General-purpose register store Operations speculatively executed." + }, + { + "PublicDescription": "SVE unpredicated load register Operations speculatively executed.", + "EventCode": "0x8091", + "EventName": "SVE_LDR_REG_SPEC", + "BriefDescription": "SVE unpredicated load register Operations speculatively executed." + }, + { + "PublicDescription": "SVE unpredicated store register Operations speculatively executed.", + "EventCode": "0x8092", + "EventName": "SVE_STR_REG_SPEC", + "BriefDescription": "SVE unpredicated store register Operations speculatively executed." + }, + { + "PublicDescription": "SVE load predicate register Operations speculatively executed.", + "EventCode": "0x8095", + "EventName": "SVE_LDR_PREG_SPEC", + "BriefDescription": "SVE load predicate register Operations speculatively executed." + }, + { + "PublicDescription": "SVE store predicate register Operations speculatively executed.", + "EventCode": "0x8096", + "EventName": "SVE_STR_PREG_SPEC", + "BriefDescription": "SVE store predicate register Operations speculatively executed." + }, + { + "PublicDescription": "SVE contiguous prefetch element Operations speculatively executed.", + "EventCode": "0x809F", + "EventName": "SVE_PRF_CONTIG_SPEC", + "BriefDescription": "SVE contiguous prefetch element Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed.", + "EventCode": "0x80A5", + "EventName": "ASE_SVE_LD_MULTI_SPEC", + "BriefDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed.", + "EventCode": "0x80A6", + "EventName": "ASE_SVE_ST_MULTI_SPEC", + "BriefDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed." + }, + { + "PublicDescription": "SVE gather-load Operations speculatively executed.", + "EventCode": "0x80AD", + "EventName": "SVE_LD_GATHER_SPEC", + "BriefDescription": "SVE gather-load Operations speculatively executed." + }, + { + "PublicDescription": "SVE scatter-store Operations speculatively executed.", + "EventCode": "0x80AE", + "EventName": "SVE_ST_SCATTER_SPEC", + "BriefDescription": "SVE scatter-store Operations speculatively executed." + }, + { + "PublicDescription": "SVE gather-prefetch Operations speculatively executed.", + "EventCode": "0x80AF", + "EventName": "SVE_PRF_GATHER_SPEC", + "BriefDescription": "SVE gather-prefetch Operations speculatively executed." + }, + { + "PublicDescription": "SVE First-fault load Operations speculatively executed.", + "EventCode": "0x80BC", + "EventName": "SVE_LDFF_SPEC", + "BriefDescription": "SVE First-fault load Operations speculatively executed." + }, + { + "PublicDescription": "Scalable floating-point element Operations speculatively executed.", + "EventCode": "0x80C0", + "EventName": "FP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable floating-point element Operations speculatively executed.", + "EventCode": "0x80C1", + "EventName": "FP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Scalable half-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C2", + "EventName": "FP_HP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable half-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable half-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C3", + "EventName": "FP_HP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable half-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Scalable single-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C4", + "EventName": "FP_SP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable single-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable single-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C5", + "EventName": "FP_SP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable single-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Scalable double-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C6", + "EventName": "FP_DP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable double-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable double-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C7", + "EventName": "FP_DP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json new file mode 100644 index 000000000000..b011af11bf94 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json new file mode 100644 index 000000000000..084e88d7df73 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json @@ -0,0 +1,62 @@ +[ + { + "PublicDescription": "This event counts read transactions from tofu controller to measured CMG.", + "EventCode": "0x314", + "EventName": "BUS_READ_TOTAL_TOFU", + "BriefDescription": "This event counts read transactions from tofu controller to measured CMG." + }, + { + "PublicDescription": "This event counts read transactions from PCI controller to measured CMG.", + "EventCode": "0x315", + "EventName": "BUS_READ_TOTAL_PCI", + "BriefDescription": "This event counts read transactions from PCI controller to measured CMG." + }, + { + "PublicDescription": "This event counts read transactions from measured CMG local memory to measured CMG.", + "EventCode": "0x316", + "EventName": "BUS_READ_TOTAL_MEM", + "BriefDescription": "This event counts read transactions from measured CMG local memory to measured CMG." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0.", + "EventCode": "0x318", + "EventName": "BUS_WRITE_TOTAL_CMG0", + "BriefDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1.", + "EventCode": "0x319", + "EventName": "BUS_WRITE_TOTAL_CMG1", + "BriefDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2.", + "EventCode": "0x31A", + "EventName": "BUS_WRITE_TOTAL_CMG2", + "BriefDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3.", + "EventCode": "0x31B", + "EventName": "BUS_WRITE_TOTAL_CMG3", + "BriefDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to tofu controller.", + "EventCode": "0x31C", + "EventName": "BUS_WRITE_TOTAL_TOFU", + "BriefDescription": "This event counts write transactions from measured CMG to tofu controller." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to PCI controller.", + "EventCode": "0x31D", + "EventName": "BUS_WRITE_TOTAL_PCI", + "BriefDescription": "This event counts write transactions from measured CMG to PCI controller." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to measured CMG local memory.", + "EventCode": "0x31E", + "EventName": "BUS_WRITE_TOTAL_MEM", + "BriefDescription": "This event counts write transactions from measured CMG to measured CMG local memory." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json new file mode 100644 index 000000000000..2e341a951a10 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json @@ -0,0 +1,128 @@ +[ + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L1D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2I_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "L2I_TLB" + }, + { + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch.", + "EventCode": "0x49", + "EventName": "L1D_CACHE_REFILL_PRF", + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch.", + "EventCode": "0x59", + "EventName": "L2D_CACHE_REFILL_PRF", + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by demand access.", + "EventCode": "0x200", + "EventName": "L1D_CACHE_REFILL_DM", + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by demand access." + }, + { + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch.", + "EventCode": "0x202", + "EventName": "L1D_CACHE_REFILL_HWPRF", + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch." + }, + { + "PublicDescription": "This event counts outstanding L1D cache miss requests per cycle.", + "EventCode": "0x208", + "EventName": "L1_MISS_WAIT", + "BriefDescription": "This event counts outstanding L1D cache miss requests per cycle." + }, + { + "PublicDescription": "This event counts outstanding L1I cache miss requests per cycle.", + "EventCode": "0x209", + "EventName": "L1I_MISS_WAIT", + "BriefDescription": "This event counts outstanding L1I cache miss requests per cycle." + }, + { + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by demand access.", + "EventCode": "0x300", + "EventName": "L2D_CACHE_REFILL_DM", + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand access." + }, + { + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch.", + "EventCode": "0x302", + "EventName": "L2D_CACHE_REFILL_HWPRF", + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch." + }, + { + "PublicDescription": "This event counts outstanding L2 cache miss requests per cycle.", + "EventCode": "0x308", + "EventName": "L2_MISS_WAIT", + "BriefDescription": "This event counts outstanding L2 cache miss requests per cycle." + }, + { + "PublicDescription": "This event counts the number of times of L2 cache miss.", + "EventCode": "0x309", + "EventName": "L2_MISS_COUNT", + "BriefDescription": "This event counts the number of times of L2 cache miss." + }, + { + "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", + "EventCode": "0x325", + "EventName": "L2D_SWAP_DM", + "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", + "EventCode": "0x326", + "EventName": "L2D_CACHE_MIBMCH_PRF", + "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access." + }, + { + "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", + "EventCode": "0x396", + "EventName": "L2D_CACHE_SWAP_LOCAL", + "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts energy consumption per cycle of L2 cache.", + "EventCode": "0x3E0", + "EventName": "EA_L2", + "BriefDescription": "This event counts energy consumption per cycle of L2 cache." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json new file mode 100644 index 000000000000..b16484628290 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json new file mode 100644 index 000000000000..348749c154c0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json @@ -0,0 +1,29 @@ +[ + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_HVC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json new file mode 100644 index 000000000000..6d258b1080cf --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json @@ -0,0 +1,131 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "PublicDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction.", + "EventCode": "0x9F", + "EventName": "DCZVA_SPEC", + "BriefDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction." + }, + { + "PublicDescription": "This event counts architecturally executed floating-point move operations.", + "EventCode": "0x105", + "EventName": "FP_MV_SPEC", + "BriefDescription": "This event counts architecturally executed floating-point move operations." + }, + { + "PublicDescription": "This event counts architecturally executed operations that using predicate register.", + "EventCode": "0x108", + "EventName": "PRD_SPEC", + "BriefDescription": "This event counts architecturally executed operations that using predicate register." + }, + { + "PublicDescription": "This event counts architecturally executed inter-element manipulation operations.", + "EventCode": "0x109", + "EventName": "IEL_SPEC", + "BriefDescription": "This event counts architecturally executed inter-element manipulation operations." + }, + { + "PublicDescription": "This event counts architecturally executed inter-register manipulation operations.", + "EventCode": "0x10A", + "EventName": "IREG_SPEC", + "BriefDescription": "This event counts architecturally executed inter-register manipulation operations." + }, + { + "PublicDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers.", + "EventCode": "0x112", + "EventName": "FP_LD_SPEC", + "BriefDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers." + }, + { + "PublicDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers.", + "EventCode": "0x113", + "EventName": "FP_ST_SPEC", + "BriefDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers." + }, + { + "PublicDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations.", + "EventCode": "0x11A", + "EventName": "BC_LD_SPEC", + "BriefDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations." + }, + { + "PublicDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction.", + "EventCode": "0x121", + "EventName": "EFFECTIVE_INST_SPEC", + "BriefDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction." + }, + { + "PublicDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode.", + "EventCode": "0x123", + "EventName": "PRE_INDEX_SPEC", + "BriefDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode." + }, + { + "PublicDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode.", + "EventCode": "0x124", + "EventName": "POST_INDEX_SPEC", + "BriefDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json new file mode 100644 index 000000000000..c1f6479e92b4 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json @@ -0,0 +1,8 @@ +[ + { + "PublicDescription": "This event counts energy consumption per cycle of CMG local memory.", + "EventCode": "0x3E8", + "EventName": "EA_MEMORY", + "BriefDescription": "This event counts energy consumption per cycle of CMG local memory." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json new file mode 100644 index 000000000000..10c823ac26cc --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json @@ -0,0 +1,188 @@ +[ + { + "PublicDescription": "This event counts the occurrence count of the micro-operation split.", + "EventCode": "0x139", + "EventName": "UOP_SPLIT", + "BriefDescription": "This event counts the occurrence count of the micro-operation split." + }, + { + "PublicDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access.", + "EventCode": "0x180", + "EventName": "LD_COMP_WAIT_L2_MISS", + "BriefDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access.", + "EventCode": "0x181", + "EventName": "LD_COMP_WAIT_L2_MISS_EX", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access.", + "EventCode": "0x182", + "EventName": "LD_COMP_WAIT_L1_MISS", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access.", + "EventCode": "0x183", + "EventName": "LD_COMP_WAIT_L1_MISS_EX", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access.", + "EventCode": "0x184", + "EventName": "LD_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access.", + "EventCode": "0x185", + "EventName": "LD_COMP_WAIT_EX", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port.", + "EventCode": "0x186", + "EventName": "LD_COMP_WAIT_PFP_BUSY", + "BriefDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port." + }, + { + "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation.", + "EventCode": "0x187", + "EventName": "LD_COMP_WAIT_PFP_BUSY_EX", + "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation." + }, + { + "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction.", + "EventCode": "0x188", + "EventName": "LD_COMP_WAIT_PFP_BUSY_SWPF", + "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction.", + "EventCode": "0x189", + "EventName": "EU_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction.", + "EventCode": "0x18A", + "EventName": "FL_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction.", + "EventCode": "0x18B", + "EventName": "BR_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty.", + "EventCode": "0x18C", + "EventName": "ROB_EMPTY", + "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full.", + "EventCode": "0x18D", + "EventName": "ROB_EMPTY_STQ_BUSY", + "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full." + }, + { + "PublicDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction.", + "EventCode": "0x18E", + "EventName": "WFE_WFI_CYCLE", + "BriefDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only.", + "EventCode": "0x190", + "EventName": "_0INST_COMMIT", + "BriefDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only." + }, + { + "PublicDescription": "This event counts every cycle that one instruction is committed.", + "EventCode": "0x191", + "EventName": "_1INST_COMMIT", + "BriefDescription": "This event counts every cycle that one instruction is committed." + }, + { + "PublicDescription": "This event counts every cycle that two instructions are committed.", + "EventCode": "0x192", + "EventName": "_2INST_COMMIT", + "BriefDescription": "This event counts every cycle that two instructions are committed." + }, + { + "PublicDescription": "This event counts every cycle that three instructions are committed.", + "EventCode": "0x193", + "EventName": "_3INST_COMMIT", + "BriefDescription": "This event counts every cycle that three instructions are committed." + }, + { + "PublicDescription": "This event counts every cycle that four instructions are committed.", + "EventCode": "0x194", + "EventName": "_4INST_COMMIT", + "BriefDescription": "This event counts every cycle that four instructions are committed." + }, + { + "PublicDescription": "This event counts every cycle that only any micro-operations are committed.", + "EventCode": "0x198", + "EventName": "UOP_ONLY_COMMIT", + "BriefDescription": "This event counts every cycle that only any micro-operations are committed." + }, + { + "PublicDescription": "This event counts every cycle that only the MOVPRFX instruction is committed.", + "EventCode": "0x199", + "EventName": "SINGLE_MOVPRFX_COMMIT", + "BriefDescription": "This event counts every cycle that only the MOVPRFX instruction is committed." + }, + { + "PublicDescription": "This event counts energy consumption per cycle of core.", + "EventCode": "0x1E0", + "EventName": "EA_CORE", + "BriefDescription": "This event counts energy consumption per cycle of core." + }, + { + "PublicDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher.", + "EventCode": "0x230", + "EventName": "L1HWPF_STREAM_PF", + "BriefDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", + "EventCode": "0x231", + "EventName": "L1HWPF_INJ_ALLOC_PF", + "BriefDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", + "EventCode": "0x232", + "EventName": "L1HWPF_INJ_NOALLOC_PF", + "BriefDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher.", + "EventCode": "0x233", + "EventName": "L2HWPF_STREAM_PF", + "BriefDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher." + }, + { + "PublicDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", + "EventCode": "0x234", + "EventName": "L2HWPF_INJ_ALLOC_PF", + "BriefDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", + "EventCode": "0x235", + "EventName": "L2HWPF_INJ_NOALLOC_PF", + "BriefDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts prefetch requests to L2 cache generated by the other causes.", + "EventCode": "0x236", + "EventName": "L2HWPF_OTHER", + "BriefDescription": "This event counts prefetch requests to L2 cache generated by the other causes." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json new file mode 100644 index 000000000000..dd7c97a9972b --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json @@ -0,0 +1,194 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "PublicDescription": "This event counts valid cycles of EAGA pipeline.", + "EventCode": "0x1A0", + "EventName": "EAGA_VAL", + "BriefDescription": "This event counts valid cycles of EAGA pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of EAGB pipeline.", + "EventCode": "0x1A1", + "EventName": "EAGB_VAL", + "BriefDescription": "This event counts valid cycles of EAGB pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of EXA pipeline.", + "EventCode": "0x1A2", + "EventName": "EXA_VAL", + "BriefDescription": "This event counts valid cycles of EXA pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of EXB pipeline.", + "EventCode": "0x1A3", + "EventName": "EXB_VAL", + "BriefDescription": "This event counts valid cycles of EXB pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of FLA pipeline.", + "EventCode": "0x1A4", + "EventName": "FLA_VAL", + "BriefDescription": "This event counts valid cycles of FLA pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of FLB pipeline.", + "EventCode": "0x1A5", + "EventName": "FLB_VAL", + "BriefDescription": "This event counts valid cycles of FLB pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of PRX pipeline.", + "EventCode": "0x1A6", + "EventName": "PRX_VAL", + "BriefDescription": "This event counts valid cycles of PRX pipeline." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x1B4", + "EventName": "FLA_VAL_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x1B5", + "EventName": "FLB_VAL_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts valid cycles of L1D cache pipeline#0.", + "EventCode": "0x240", + "EventName": "L1_PIPE0_VAL", + "BriefDescription": "This event counts valid cycles of L1D cache pipeline#0." + }, + { + "PublicDescription": "This event counts valid cycles of L1D cache pipeline#1.", + "EventCode": "0x241", + "EventName": "L1_PIPE1_VAL", + "BriefDescription": "This event counts valid cycles of L1D cache pipeline#1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1.", + "EventCode": "0x250", + "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_SCE", + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1.", + "EventCode": "0x251", + "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_PFE", + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1.", + "EventCode": "0x252", + "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_SCE", + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1.", + "EventCode": "0x253", + "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_PFE", + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts completed requests in L1D cache pipeline#0.", + "EventCode": "0x260", + "EventName": "L1_PIPE0_COMP", + "BriefDescription": "This event counts completed requests in L1D cache pipeline#0." + }, + { + "PublicDescription": "This event counts completed requests in L1D cache pipeline#1.", + "EventCode": "0x261", + "EventName": "L1_PIPE1_COMP", + "BriefDescription": "This event counts completed requests in L1D cache pipeline#1." + }, + { + "PublicDescription": "This event counts completed requests in L1I cache pipeline.", + "EventCode": "0x268", + "EventName": "L1I_PIPE_COMP", + "BriefDescription": "This event counts completed requests in L1I cache pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of L1I cache pipeline.", + "EventCode": "0x269", + "EventName": "L1I_PIPE_VAL", + "BriefDescription": "This event counts valid cycles of L1I cache pipeline." + }, + { + "PublicDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock.", + "EventCode": "0x274", + "EventName": "L1_PIPE_ABORT_STLD_INTLK", + "BriefDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0.", + "EventCode": "0x2A0", + "EventName": "L1_PIPE0_VAL_IU_NOT_SEC0", + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0.", + "EventCode": "0x2A1", + "EventName": "L1_PIPE1_VAL_IU_NOT_SEC0", + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0." + }, + { + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined.", + "EventCode": "0x2B0", + "EventName": "L1_PIPE_COMP_GATHER_2FLOW", + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined." + }, + { + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined.", + "EventCode": "0x2B1", + "EventName": "L1_PIPE_COMP_GATHER_1FLOW", + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined." + }, + { + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0.", + "EventCode": "0x2B2", + "EventName": "L1_PIPE_COMP_GATHER_0FLOW", + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0." + }, + { + "PublicDescription": "This event counts the number of flows of the scatter instructions.", + "EventCode": "0x2B3", + "EventName": "L1_PIPE_COMP_SCATTER_1FLOW", + "BriefDescription": "This event counts the number of flows of the scatter instructions." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x2B8", + "EventName": "L1_PIPE0_COMP_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x2B9", + "EventName": "L1_PIPE1_COMP_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts valid cycles of L2 cache pipeline.", + "EventCode": "0x330", + "EventName": "L2_PIPE_VAL", + "BriefDescription": "This event counts valid cycles of L2 cache pipeline." + }, + { + "PublicDescription": "This event counts completed requests in L2 cache pipeline.", + "EventCode": "0x350", + "EventName": "L2_PIPE_COMP_ALL", + "BriefDescription": "This event counts completed requests in L2 cache pipeline." + }, + { + "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", + "EventCode": "0x370", + "EventName": "L2_PIPE_COMP_PF_L2MIB_MCH", + "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json new file mode 100644 index 000000000000..dc1b95e42c32 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json @@ -0,0 +1,110 @@ +[ + { + "ArchStdEvent": "SIMD_INST_RETIRED" + }, + { + "ArchStdEvent": "SVE_INST_RETIRED" + }, + { + "ArchStdEvent": "UOP_SPEC" + }, + { + "ArchStdEvent": "SVE_MATH_SPEC" + }, + { + "ArchStdEvent": "FP_SPEC" + }, + { + "ArchStdEvent": "FP_FMA_SPEC" + }, + { + "ArchStdEvent": "FP_RECPE_SPEC" + }, + { + "ArchStdEvent": "FP_CVT_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_SPEC" + }, + { + "ArchStdEvent": "SVE_MOVPRFX_SPEC" + }, + { + "ArchStdEvent": "SVE_MOVPRFX_U_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_LD_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_ST_SPEC" + }, + { + "ArchStdEvent": "PRF_SPEC" + }, + { + "ArchStdEvent": "BASE_LD_REG_SPEC" + }, + { + "ArchStdEvent": "BASE_ST_REG_SPEC" + }, + { + "ArchStdEvent": "SVE_LDR_REG_SPEC" + }, + { + "ArchStdEvent": "SVE_STR_REG_SPEC" + }, + { + "ArchStdEvent": "SVE_LDR_PREG_SPEC" + }, + { + "ArchStdEvent": "SVE_STR_PREG_SPEC" + }, + { + "ArchStdEvent": "SVE_PRF_CONTIG_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_LD_MULTI_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_ST_MULTI_SPEC" + }, + { + "ArchStdEvent": "SVE_LD_GATHER_SPEC" + }, + { + "ArchStdEvent": "SVE_ST_SCATTER_SPEC" + }, + { + "ArchStdEvent": "SVE_PRF_GATHER_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_SPEC" + }, + { + "ArchStdEvent": "FP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_HP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_HP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_SP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_SP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_DP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_DP_FIXED_OPS_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json new file mode 100644 index 000000000000..dda8e59149d2 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -0,0 +1,233 @@ +[ + { + "MetricExpr": "FETCH_BUBBLE / (4 * CPU_CYCLES)", + "PublicDescription": "Frontend bound L1 topdown metric", + "BriefDescription": "Frontend bound L1 topdown metric", + "MetricGroup": "TopDownL1", + "MetricName": "frontend_bound" + }, + { + "MetricExpr": "(INST_SPEC - INST_RETIRED) / (4 * CPU_CYCLES)", + "PublicDescription": "Bad Speculation L1 topdown metric", + "BriefDescription": "Bad Speculation L1 topdown metric", + "MetricGroup": "TopDownL1", + "MetricName": "bad_speculation" + }, + { + "MetricExpr": "INST_RETIRED / (CPU_CYCLES * 4)", + "PublicDescription": "Retiring L1 topdown metric", + "BriefDescription": "Retiring L1 topdown metric", + "MetricGroup": "TopDownL1", + "MetricName": "retiring" + }, + { + "MetricExpr": "1 - (frontend_bound + bad_speculation + retiring)", + "PublicDescription": "Backend Bound L1 topdown metric", + "BriefDescription": "Backend Bound L1 topdown metric", + "MetricGroup": "TopDownL1", + "MetricName": "backend_bound" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x201d@ / CPU_CYCLES", + "PublicDescription": "Fetch latency bound L2 topdown metric", + "BriefDescription": "Fetch latency bound L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "fetch_latency_bound" + }, + { + "MetricExpr": "frontend_bound - fetch_latency_bound", + "PublicDescription": "Fetch bandwidth bound L2 topdown metric", + "BriefDescription": "Fetch bandwidth bound L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "fetch_bandwidth_bound" + }, + { + "MetricExpr": "(bad_speculation * BR_MIS_PRED) / (BR_MIS_PRED + armv8_pmuv3_0@event\\=0x2013@)", + "PublicDescription": "Branch mispredicts L2 topdown metric", + "BriefDescription": "Branch mispredicts L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "branch_mispredicts" + }, + { + "MetricExpr": "bad_speculation - branch_mispredicts", + "PublicDescription": "Machine clears L2 topdown metric", + "BriefDescription": "Machine clears L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "machine_clears" + }, + { + "MetricExpr": "(EXE_STALL_CYCLE - (MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@)) / CPU_CYCLES", + "PublicDescription": "Core bound L2 topdown metric", + "BriefDescription": "Core bound L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "core_bound" + }, + { + "MetricExpr": "(MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@) / CPU_CYCLES", + "PublicDescription": "Memory bound L2 topdown metric", + "BriefDescription": "Memory bound L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "memory_bound" + }, + { + "MetricExpr": "(((L2I_TLB - L2I_TLB_REFILL) * 15) + (L2I_TLB_REFILL * 100)) / CPU_CYCLES", + "PublicDescription": "Idle by itlb miss L3 topdown metric", + "BriefDescription": "Idle by itlb miss L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "idle_by_itlb_miss" + }, + { + "MetricExpr": "(((L2I_CACHE - L2I_CACHE_REFILL) * 15) + (L2I_CACHE_REFILL * 100)) / CPU_CYCLES", + "PublicDescription": "Idle by icache miss L3 topdown metric", + "BriefDescription": "Idle by icache miss L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "idle_by_icache_miss" + }, + { + "MetricExpr": "(BR_MIS_PRED * 5) / CPU_CYCLES", + "PublicDescription": "BP misp flush L3 topdown metric", + "BriefDescription": "BP misp flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "bp_misp_flush" + }, + { + "MetricExpr": "(armv8_pmuv3_0@event\\=0x2013@ * 5) / CPU_CYCLES", + "PublicDescription": "OOO flush L3 topdown metric", + "BriefDescription": "OOO flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "ooo_flush" + }, + { + "MetricExpr": "(armv8_pmuv3_0@event\\=0x1001@ * 5) / CPU_CYCLES", + "PublicDescription": "Static predictor flush L3 topdown metric", + "BriefDescription": "Static predictor flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "sp_flush" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x1010@ / BR_MIS_PRED", + "PublicDescription": "Indirect branch L3 topdown metric", + "BriefDescription": "Indirect branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "indirect_branch" + }, + { + "MetricExpr": "(armv8_pmuv3_0@event\\=0x1014@ + armv8_pmuv3_0@event\\=0x1018@) / BR_MIS_PRED", + "PublicDescription": "Push branch L3 topdown metric", + "BriefDescription": "Push branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "push_branch" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x100c@ / BR_MIS_PRED", + "PublicDescription": "Pop branch L3 topdown metric", + "BriefDescription": "Pop branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "pop_branch" + }, + { + "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1014@ - armv8_pmuv3_0@event\\=0x1018@ - armv8_pmuv3_0@event\\=0x100c@) / BR_MIS_PRED", + "PublicDescription": "Other branch L3 topdown metric", + "BriefDescription": "Other branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "other_branch" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x2012@ / armv8_pmuv3_0@event\\=0x2013@", + "PublicDescription": "Nuke flush L3 topdown metric", + "BriefDescription": "Nuke flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "nuke_flush" + }, + { + "MetricExpr": "1 - nuke_flush", + "PublicDescription": "Other flush L3 topdown metric", + "BriefDescription": "Other flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "other_flush" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x2010@ / CPU_CYCLES", + "PublicDescription": "Sync stall L3 topdown metric", + "BriefDescription": "Sync stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "sync_stall" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x2004@ / CPU_CYCLES", + "PublicDescription": "Rob stall L3 topdown metric", + "BriefDescription": "Rob stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "rob_stall" + }, + { + "MetricExpr": "(armv8_pmuv3_0@event\\=0x2006@ + armv8_pmuv3_0@event\\=0x2007@ + armv8_pmuv3_0@event\\=0x2008@) / CPU_CYCLES", + "PublicDescription": "Ptag stall L3 topdown metric", + "BriefDescription": "Ptag stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "ptag_stall" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x201e@ / CPU_CYCLES", + "PublicDescription": "SaveOpQ stall L3 topdown metric", + "BriefDescription": "SaveOpQ stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "saveopq_stall" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x2005@ / CPU_CYCLES", + "PublicDescription": "PC buffer stall L3 topdown metric", + "BriefDescription": "PC buffer stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "pc_buffer_stall" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x7002@ / CPU_CYCLES", + "PublicDescription": "Divider L3 topdown metric", + "BriefDescription": "Divider L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "divider" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x7003@ / CPU_CYCLES", + "PublicDescription": "FSU stall L3 topdown metric", + "BriefDescription": "FSU stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "fsu_stall" + }, + { + "MetricExpr": "core_bound - divider - fsu_stall", + "PublicDescription": "EXE ports util L3 topdown metric", + "BriefDescription": "EXE ports util L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "exe_ports_util" + }, + { + "MetricExpr": "(MEM_STALL_ANYLOAD - MEM_STALL_L1MISS) / CPU_CYCLES", + "PublicDescription": "L1 bound L3 topdown metric", + "BriefDescription": "L1 bound L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "l1_bound" + }, + { + "MetricExpr": "(MEM_STALL_L1MISS - MEM_STALL_L2MISS) / CPU_CYCLES", + "PublicDescription": "L2 bound L3 topdown metric", + "BriefDescription": "L2 bound L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "l2_bound" + }, + { + "MetricExpr": "MEM_STALL_L2MISS / CPU_CYCLES", + "PublicDescription": "Mem bound L3 topdown metric", + "BriefDescription": "Mem bound L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "mem_bound" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x7005@ / CPU_CYCLES", + "PublicDescription": "Store bound L3 topdown metric", + "BriefDescription": "Store bound L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "store_bound" + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 0d609149b82a..c43591d831b8 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -20,5 +20,6 @@ 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core +0x00000000460f0010,v1,fujitsu/a64fx,core 0x00000000480fd010,v1,hisilicon/hip08,core 0x00000000500f0000,v1,ampere/emag,core diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json index fc4aa6c2ddc9..4e25525b7da6 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json @@ -885,37 +885,37 @@ "MetricName": "flush_rate_percent" }, { - "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_11to14_slots_percent" }, { - "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_15to17_slots_percent" }, { - "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_18plus_slots_percent" }, { - "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_1to2_slots_percent" }, { - "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_3to6_slots_percent" }, { - "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_7to10_slots_percent" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index f8784c608479..db86ba36224d 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -1210,156 +1210,24 @@ "MetricName": "inst_from_rmem_percent" }, { - "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)", - "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_co_m_rd_util" - }, - { - "BriefDescription": "L2 dcache invalidates per run inst (per core)", - "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_dc_inv_rate_percent" - }, - { "BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)", "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100", "MetricGroup": "l2_stats", "MetricName": "l2_dem_ld_disp_percent" }, { - "BriefDescription": "L2 Icache invalidates per run inst (per core)", - "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ic_inv_rate_percent" - }, - { - "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)", - "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_inst_miss_ratio_percent" - }, - { - "BriefDescription": "Average number of cycles between L2 Load hits", - "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_hit_frequency" - }, - { - "BriefDescription": "Average number of cycles between L2 Load misses", - "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_miss_frequency" - }, - { - "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)", - "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_miss_ratio_percent" - }, - { - "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)", - "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_rd_util" - }, - { - "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks", - "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ldmiss_wr_util" - }, - { - "BriefDescription": "L2 local pump prediction success", - "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_local_pred_correct_percent" - }, - { - "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs", - "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_mod_co_percent" - }, - { - "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts", - "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_ld_disp_addr_fail_percent" - }, - { - "BriefDescription": "% of L2 Load RC dispatch attempts that failed", - "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_ld_disp_fail_percent" - }, - { - "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts", - "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_st_disp_addr_fail_percent" - }, - { - "BriefDescription": "% of L2 Store RC dispatch attempts that failed", - "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_st_disp_fail_percent" - }, - { - "BriefDescription": "L2 Cache Read Utilization (per core)", - "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)", - "MetricGroup": "l2_stats", - "MetricName": "l2_rd_util_percent" - }, - { - "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs", - "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_shr_co_percent" - }, - { "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", "MetricGroup": "l2_stats", "MetricName": "l2_st_miss_ratio_percent" }, { - "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)", - "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_st_rd_util" - }, - { "BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks", "MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100", "MetricGroup": "l2_stats", "MetricName": "l2_st_wr_util" }, { - "BriefDescription": "L2 Cache Write Utilization (per core)", - "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)", - "MetricGroup": "l2_stats", - "MetricName": "l2_wr_util_percent" - }, - { - "BriefDescription": "Average number of cycles between L3 Load hits", - "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2", - "MetricGroup": "l3_stats", - "MetricName": "l3_ld_hit_frequency" - }, - { - "BriefDescription": "Average number of cycles between L3 Load misses", - "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2", - "MetricGroup": "l3_stats", - "MetricName": "l3_ld_miss_frequency" - }, - { - "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle", - "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8", - "MetricGroup": "l3_stats", - "MetricName": "l3_wi_usage" - }, - { "BriefDescription": "Average icache miss latency", "MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ", "MetricGroup": "latency", @@ -1823,7 +1691,7 @@ "MetricName": "custom_secs" }, { - "BriefDescription": "Percentage Cycles atleast one instruction dispatched", + "BriefDescription": "Percentage Cycles at least one instruction dispatched", "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", "MetricName": "cycles_atleast_one_inst_dispatched_percent" }, diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json index 4ea7ec4f496e..0d46cb82bd52 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -38,31 +38,31 @@ "EventName": "ic_fetch_stall.ic_stall_any", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ic_fetch_stall.ic_stall_dq_empty", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_fetch_stall.ic_stall_back_pressure", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ic_cache_inval.l2_invalidating_probe", "EventCode": "0x8c", "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_cache_inval.fill_invalidated", "EventCode": "0x8c", "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "bp_tlb_rel", @@ -97,25 +97,25 @@ "EventName": "l2_request_g1.change_to_x", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_request_g1.prefetch_l2_cmd", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_request_g1.l2_hw_pf", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_request_g1.group2", "EventCode": "0x60", "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_request_g1.all_no_prefetch", @@ -150,31 +150,31 @@ "EventName": "l2_request_g2.ic_rd_sized_nc", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_request_g2.smc_inval", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_request_g2.bus_locks_originator", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_request_g2.bus_locks_responses", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_latency.l2_cycles_waiting_on_fills", "EventCode": "0x62", "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_wcb_req.wcb_write", @@ -192,13 +192,13 @@ "EventName": "l2_wcb_req.zero_byte_store", "EventCode": "0x63", "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_wcb_req.cl_zero", "EventCode": "0x63", "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_cs", @@ -228,37 +228,37 @@ "EventName": "l2_cache_req_stat.ls_rd_blk_c", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_x", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_s", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_cache_req_stat.ic_fill_miss", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_cache_req_stat.ic_access_in_l2", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", - "UMask": "0x7" + "UMask": "0x07" }, { "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", - "UMask": "0x9" + "UMask": "0x09" }, { "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", @@ -270,12 +270,12 @@ "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_pf_hit_l2", "EventCode": "0x70", - "BriefDescription": "L2 prefetch hit in L2.", + "BriefDescription": "L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead.", "UMask": "0xff" }, { diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json index 653b11b23399..4dceeabc4a9f 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json @@ -68,21 +68,21 @@ "EventCode": "0xcb", "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ex_ret_mmx_fp_instr.mmx_instr", "EventCode": "0xcb", "BriefDescription": "MMX instructions.", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ex_ret_mmx_fp_instr.x87_instr", "EventCode": "0xcb", "BriefDescription": "x87 instructions.", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ex_ret_cond", @@ -103,19 +103,19 @@ "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ex_ret_fus_brnch_inst", diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json index a35542bd3b36..3995b528ebd6 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json @@ -39,35 +39,35 @@ "EventCode": "0x00", "BriefDescription": "Total number uOps assigned to all fpu pipes.", "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to all pipes.", - "UMask": "0xf" + "UMask": "0x0f" }, { "EventName": "fpu_pipe_assignment.total3", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 3.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fpu_pipe_assignment.total2", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 2.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fpu_pipe_assignment.total1", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 1.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fpu_pipe_assignment.total0", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 0.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_sched_empty", @@ -79,28 +79,28 @@ "EventCode": "0x02", "BriefDescription": "All Ops.", "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", - "UMask": "0x7" + "UMask": "0x07" }, { "EventName": "fp_retx87_fp_ops.div_sqr_r_ops", "EventCode": "0x02", "BriefDescription": "Divide and square root Ops.", "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_retx87_fp_ops.mul_ops", "EventCode": "0x02", "BriefDescription": "Multiply Ops.", "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_retx87_fp_ops.add_sub_ops", "EventCode": "0x02", "BriefDescription": "Add/subtract Ops.", "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_ret_sse_avx_ops.all", @@ -142,83 +142,83 @@ "EventCode": "0x03", "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_ret_sse_avx_ops.sp_div_flops", "EventCode": "0x03", "BriefDescription": "Single-precision divide/square root FLOPS.", "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_ret_sse_avx_ops.sp_mult_flops", "EventCode": "0x03", "BriefDescription": "Single-precision multiply FLOPS.", "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops", "EventCode": "0x03", "BriefDescription": "Single-precision add/subtract FLOPS.", "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_num_mov_elim_scal_op.optimized", "EventCode": "0x04", "BriefDescription": "Number of Scalar Ops optimized.", "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_num_mov_elim_scal_op.opt_potential", "EventCode": "0x04", "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).", "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", "EventCode": "0x04", "BriefDescription": "Number of SSE Move Ops eliminated.", "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", "EventCode": "0x04", "BriefDescription": "Number of SSE Move Ops.", "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_retired_ser_ops.x87_ctrl_ret", "EventCode": "0x05", "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_retired_ser_ops.x87_bot_ret", "EventCode": "0x05", "BriefDescription": "x87 bottom-executing uOps retired.", "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_retired_ser_ops.sse_ctrl_ret", "EventCode": "0x05", "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_retired_ser_ops.sse_bot_ret", "EventCode": "0x05", "BriefDescription": "SSE bottom-executing uOps retired.", "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.", - "UMask": "0x1" + "UMask": "0x01" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json index b33a3c308019..385022fb026e 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json @@ -3,25 +3,25 @@ "EventName": "ls_locks.bus_lock", "EventCode": "0x25", "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_dispatch.ld_st_dispatch", "EventCode": "0x29", "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_dispatch.store_dispatch", "EventCode": "0x29", "BriefDescription": "Counts the number of stores dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_dispatch.ld_dispatch", "EventCode": "0x29", "BriefDescription": "Counts the number of loads dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_stlf", @@ -37,13 +37,13 @@ "EventName": "ls_mab_alloc.dc_prefetcher", "EventCode": "0x41", "BriefDescription": "LS MAB allocates by type - DC prefetcher.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_mab_alloc.stores", "EventCode": "0x41", "BriefDescription": "LS MAB allocates by type - stores.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_mab_alloc.loads", @@ -85,61 +85,61 @@ "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 1G size.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 2M size.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 32K size.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 4K size.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_tablewalker.iside", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks on I-side.", - "UMask": "0xc" + "UMask": "0x0c" }, { "EventName": "ls_tablewalker.ic_type1", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks IC Type 1.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_tablewalker.ic_type0", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks IC Type 0.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_tablewalker.dside", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks on D-side.", - "UMask": "0x3" + "UMask": "0x03" }, { "EventName": "ls_tablewalker.dc_type1", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks DC Type 1.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_tablewalker.dc_type0", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks DC Type 0.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_misal_accesses", @@ -150,31 +150,31 @@ "EventName": "ls_pref_instr_disp.prefetch_nta", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_pref_instr_disp.store_prefetch_w", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_pref_instr_disp.load_prefetch_w", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_inef_sw_pref.mab_mch_cnt", "EventCode": "0x52", "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", "EventCode": "0x52", "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_not_halted_cyc", diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/other.json b/tools/perf/pmu-events/arch/x86/amdzen1/other.json index ff780098d36e..7626986ce1fb 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/other.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/other.json @@ -3,13 +3,13 @@ "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", "EventCode": "0x28a", "BriefDescription": "OC Mode Switch. OC to IC mode switch.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", "EventCode": "0x28a", "BriefDescription": "OC Mode Switch. IC to OC mode switch.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", @@ -33,24 +33,24 @@ "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3_0 Tokens unavailable.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", - "UMask": "0x1" + "UMask": "0x01" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json index 2cfe2d2f3bfd..bf5083c1c260 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json @@ -10,7 +10,7 @@ "EventName": "all_dc_accesses", "EventCode": "0x29", "BriefDescription": "All L1 Data Cache Accesses", - "UMask": "0x7" + "UMask": "0x07" }, { "MetricName": "all_l2_cache_accesses", @@ -79,10 +79,10 @@ "UMask": "0x70" }, { - "MetricName": "l2_cache_hits_from_l2_hwpf", + "EventName": "l2_cache_hits_from_l2_hwpf", + "EventCode": "0x70", "BriefDescription": "L2 Cache Hits from L2 HWPF", - "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", - "MetricGroup": "l2_cache" + "UMask": "0xff" }, { "EventName": "l3_accesses", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json index ef4166a66288..84fb43fa59ad 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json @@ -24,25 +24,25 @@ "EventName": "bp_l1_tlb_fetch_hit", "EventCode": "0x94", "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.", - "UMask": "0xFF" + "UMask": "0xff" }, { "EventName": "bp_l1_tlb_fetch_hit.if1g", "EventCode": "0x94", "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 1GB page.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "bp_l1_tlb_fetch_hit.if2m", "EventCode": "0x94", "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 2MB page.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "bp_l1_tlb_fetch_hit.if4k", "EventCode": "0x94", "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 4KB page.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "bp_tlb_rel", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json index f61b982f83ca..c858fb9477e3 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json @@ -27,25 +27,25 @@ "EventName": "l2_request_g1.change_to_x", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_request_g1.prefetch_l2_cmd", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_request_g1.l2_hw_pf", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_request_g1.group2", "EventCode": "0x60", "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_request_g1.all_no_prefetch", @@ -80,31 +80,31 @@ "EventName": "l2_request_g2.ic_rd_sized_nc", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_request_g2.smc_inval", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_request_g2.bus_locks_originator", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_request_g2.bus_locks_responses", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_latency.l2_cycles_waiting_on_fills", "EventCode": "0x62", "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_wcb_req.wcb_write", @@ -122,13 +122,13 @@ "EventName": "l2_wcb_req.zero_byte_store", "EventCode": "0x63", "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_wcb_req.cl_zero", "EventCode": "0x63", "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_cs", @@ -158,37 +158,37 @@ "EventName": "l2_cache_req_stat.ls_rd_blk_c", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_x", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_s", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_cache_req_stat.ic_fill_miss", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_cache_req_stat.ic_access_in_l2", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", - "UMask": "0x7" + "UMask": "0x07" }, { "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", - "UMask": "0x9" + "UMask": "0x09" }, { "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", @@ -200,12 +200,12 @@ "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_pf_hit_l2", "EventCode": "0x70", - "BriefDescription": "L2 prefetch hit in L2.", + "BriefDescription": "L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead.", "UMask": "0xff" }, { @@ -255,19 +255,19 @@ "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g", "EventCode": "0x85", "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 1GB page.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m", "EventCode": "0x85", "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 2MB page.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k", "EventCode": "0x85", "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 4KB page.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "bp_snp_re_sync", @@ -278,43 +278,43 @@ "EventName": "ic_fetch_stall.ic_stall_any", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ic_fetch_stall.ic_stall_dq_empty", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_fetch_stall.ic_stall_back_pressure", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ic_cache_inval.l2_invalidating_probe", "EventCode": "0x8c", "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_cache_inval.fill_invalidated", "EventCode": "0x8c", "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", "EventCode": "0x28a", "BriefDescription": "OC Mode Switch. OC to IC mode switch.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", "EventCode": "0x28a", "BriefDescription": "OC Mode Switch. IC to OC mode switch.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l3_request_g1.caching_l3_cache_accesses", @@ -353,7 +353,7 @@ }, { "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", - "EventCode": "0x9A", + "EventCode": "0x9a", "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", "UMask": "0x3f", "Unit": "L3PMC" diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json index 4b75183da94a..bed14829f0bc 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json @@ -68,21 +68,21 @@ "EventCode": "0xcb", "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ex_ret_mmx_fp_instr.mmx_instr", "EventCode": "0xcb", "BriefDescription": "MMX instructions.", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ex_ret_mmx_fp_instr.x87_instr", "EventCode": "0xcb", "BriefDescription": "x87 instructions.", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ex_ret_cond", @@ -108,19 +108,19 @@ "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ex_ret_fus_brnch_inst", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json index 622a0c420e46..91ed96f2580b 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json @@ -4,35 +4,35 @@ "EventCode": "0x00", "BriefDescription": "Total number of fp uOps.", "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.", - "UMask": "0xf" + "UMask": "0x0f" }, { "EventName": "fpu_pipe_assignment.total3", "EventCode": "0x00", "BriefDescription": "Total number uOps assigned to pipe 3.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fpu_pipe_assignment.total2", "EventCode": "0x00", "BriefDescription": "Total number uOps assigned to pipe 2.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fpu_pipe_assignment.total1", "EventCode": "0x00", "BriefDescription": "Total number uOps assigned to pipe 1.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fpu_pipe_assignment.total0", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 0.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_ret_sse_avx_ops.all", @@ -45,96 +45,96 @@ "EventCode": "0x03", "BriefDescription": "Multiply-add FLOPS. Multiply-add counts as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", "PublicDescription": "", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_ret_sse_avx_ops.div_flops", "EventCode": "0x03", "BriefDescription": "Divide/square root FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_ret_sse_avx_ops.mult_flops", "EventCode": "0x03", "BriefDescription": "Multiply FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_ret_sse_avx_ops.add_sub_flops", "EventCode": "0x03", "BriefDescription": "Add/subtract FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_num_mov_elim_scal_op.optimized", "EventCode": "0x04", "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_num_mov_elim_scal_op.opt_potential", "EventCode": "0x04", "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", "EventCode": "0x04", "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", "EventCode": "0x04", "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_retired_ser_ops.sse_bot_ret", "EventCode": "0x05", "BriefDescription": "SSE bottom-executing uOps retired. The number of serializing Ops retired.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_retired_ser_ops.sse_ctrl_ret", "EventCode": "0x05", "BriefDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_retired_ser_ops.x87_bot_ret", "EventCode": "0x05", "BriefDescription": "x87 bottom-executing uOps retired. The number of serializing Ops retired.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_retired_ser_ops.x87_ctrl_ret", "EventCode": "0x05", "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_disp_faults.ymm_spill_fault", "EventCode": "0x0e", "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_disp_faults.ymm_fill_fault", "EventCode": "0x0e", "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_disp_faults.xmm_fill_fault", "EventCode": "0x0e", "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_disp_faults.x87_fill_fault", "EventCode": "0x0e", "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.", - "UMask": "0x1" + "UMask": "0x01" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json index 715046b339cb..89822b9ddb79 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json @@ -4,31 +4,31 @@ "EventCode": "0x24", "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.", "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_locks.spec_lock_hi_spec", "EventCode": "0x25", "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_locks.spec_lock_lo_spec", "EventCode": "0x25", "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_locks.non_spec_lock", "EventCode": "0x25", "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_locks.bus_lock", "EventCode": "0x25", "BriefDescription": "Retired lock instructions. Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type. Comparable to legacy bus lock.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_ret_cl_flush", @@ -44,33 +44,33 @@ "EventName": "ls_dispatch.ld_st_dispatch", "EventCode": "0x29", "BriefDescription": "Dispatch of a single op that performs a load from and store to the same memory address. Number of single ops that do load/store to an address.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_dispatch.store_dispatch", "EventCode": "0x29", "BriefDescription": "Number of stores dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_dispatch.ld_dispatch", "EventCode": "0x29", "BriefDescription": "Number of loads dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_smi_rx", - "EventCode": "0x2B", + "EventCode": "0x2b", "BriefDescription": "Number of SMIs received." }, { "EventName": "ls_int_taken", - "EventCode": "0x2C", + "EventCode": "0x2c", "BriefDescription": "Number of interrupts taken." }, { "EventName": "ls_rdtsc", - "EventCode": "0x2D", + "EventCode": "0x2d", "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative." }, { @@ -93,19 +93,19 @@ "EventName": "ls_mab_alloc.dc_prefetcher", "EventCode": "0x41", "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_mab_alloc.stores", "EventCode": "0x41", "BriefDescription": "LS MAB Allocates by Type. Stores.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_mab_alloc.loads", "EventCode": "0x41", "BriefDescription": "LS MAB Allocates by Type. Loads.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_refills_from_sys.ls_mabresp_rmt_dram", @@ -123,19 +123,19 @@ "EventName": "ls_refills_from_sys.ls_mabresp_lcl_dram", "EventCode": "0x43", "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from this thread's die.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_refills_from_sys.ls_mabresp_lcl_cache", "EventCode": "0x43", "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; local CCX (not Local L2), or Remote CCX and the address's Home Node is on this thread's die.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_refills_from_sys.ls_mabresp_lcl_l2", "EventCode": "0x43", "BriefDescription": "Demand Data Cache Fills by Data Source. Local L2 hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_l1_d_tlb_miss.all", @@ -171,61 +171,61 @@ "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss. DTLB reload hit a coalesced page.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_tablewalker.iside", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks on I-side.", - "UMask": "0xc" + "UMask": "0x0c" }, { "EventName": "ls_tablewalker.ic_type1", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks IC Type 1.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_tablewalker.ic_type0", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks IC Type 0.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_tablewalker.dside", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks on D-side.", - "UMask": "0x3" + "UMask": "0x03" }, { "EventName": "ls_tablewalker.dc_type1", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks DC Type 1.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_tablewalker.dc_type0", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks DC Type 0.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_misal_accesses", @@ -242,31 +242,31 @@ "EventName": "ls_pref_instr_disp.prefetch_nta", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_pref_instr_disp.prefetch_w", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). See docAPM3 PREFETCHW.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_pref_instr_disp.prefetch", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). Prefetch_T0_T1_T2. PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_inef_sw_pref.mab_mch_cnt", "EventCode": "0x52", "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", "EventCode": "0x52", "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram", @@ -284,49 +284,49 @@ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram", "EventCode": "0x59", "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. DRAM or IO from this thread's die. From DRAM (home node local).", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache", "EventCode": "0x59", "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node local).", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2", "EventCode": "0x59", "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. Local L2 hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).", "UMask": "0x40" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node remote).", "UMask": "0x10" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_not_halted_cyc", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/other.json b/tools/perf/pmu-events/arch/x86/amdzen2/other.json index e94994d4a60e..1bdf106ca785 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/other.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/other.json @@ -14,13 +14,13 @@ "EventName": "de_dis_uops_from_decoder.opcache_dispatched", "EventCode": "0xaa", "BriefDescription": "Count of dispatched Ops from OpCache.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "de_dis_uops_from_decoder.decoder_dispatched", "EventCode": "0xaa", "BriefDescription": "Count of dispatched Ops from Decoder.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "de_dis_dispatch_token_stalls1.fp_misc_rsrc_stall", @@ -50,25 +50,25 @@ "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", "EventCode": "0xae", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "de_dis_dispatch_token_stalls1.store_queue_token_stall", "EventCode": "0xae", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Store queue resource stall. Applies to all ops with store semantics.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "de_dis_dispatch_token_stalls1.load_queue_token_stall", "EventCode": "0xae", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Load queue resource stall. Applies to all ops with load semantics.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_token_stall", "EventCode": "0xae", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Physical Register File resource stall. Applies to all ops that have an integer destination register.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "de_dis_dispatch_token_stalls0.sc_agu_dispatch_stall", @@ -92,24 +92,24 @@ "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ3_0_TokenStall.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", - "UMask": "0x1" + "UMask": "0x01" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json index 2ef91e25e661..a71694a043ba 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json @@ -10,7 +10,7 @@ "EventName": "all_dc_accesses", "EventCode": "0x29", "BriefDescription": "All L1 Data Cache Accesses", - "UMask": "0x7" + "UMask": "0x07" }, { "MetricName": "all_l2_cache_accesses", @@ -79,10 +79,10 @@ "UMask": "0x70" }, { - "MetricName": "l2_cache_hits_from_l2_hwpf", + "EventName": "l2_cache_hits_from_l2_hwpf", + "EventCode": "0x70", "BriefDescription": "L2 Cache Hits from L2 HWPF", - "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", - "MetricGroup": "l2_cache" + "UMask": "0xff" }, { "EventName": "l3_accesses", diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/branch.json b/tools/perf/pmu-events/arch/x86/amdzen3/branch.json new file mode 100644 index 000000000000..018a7fe94fb9 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/branch.json @@ -0,0 +1,53 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 Branch Prediction Overrides Existing Prediction (speculative)." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 Branch Prediction Overrides Existing Prediction (speculative)." + }, + { + "EventName": "bp_dyn_ind_pred", + "EventCode": "0x8e", + "BriefDescription": "Dynamic Indirect Predictions.", + "PublicDescription": "The number of times a branch used the indirect predictor to make a prediction." + }, + { + "EventName": "bp_de_redirect", + "EventCode": "0x91", + "BriefDescription": "Decode Redirects", + "PublicDescription": "The number of times the instruction decoder overrides the predicted target." + }, + { + "EventName": "bp_l1_tlb_fetch_hit", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.", + "UMask": "0xff" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if1g", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. L1 Instruction TLB hit (1G page size).", + "UMask": "0x04" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if2m", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. L1 Instruction TLB hit (2M page size).", + "UMask": "0x02" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if4k", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. L1 Instrcution TLB hit (4K or 16K page size).", + "UMask": "0x01" + }, + { + "EventName": "bp_tlb_rel", + "EventCode": "0x99", + "BriefDescription": "The number of ITLB reload requests." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/cache.json b/tools/perf/pmu-events/arch/x86/amdzen3/cache.json new file mode 100644 index 000000000000..fa1d7499a2e3 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/cache.json @@ -0,0 +1,402 @@ +[ + { + "EventName": "l2_request_g1.rd_blk_l", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g1.rd_blk_x", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g1.ls_rd_blk_c_s", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g1.cacheable_ic_read", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g1.change_to_x", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", + "UMask": "0x08" + }, + { + "EventName": "l2_request_g1.prefetch_l2_cmd", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", + "UMask": "0x04" + }, + { + "EventName": "l2_request_g1.l2_hw_pf", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", + "UMask": "0x02" + }, + { + "EventName": "l2_request_g1.group2", + "EventCode": "0x60", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", + "UMask": "0x01" + }, + { + "EventName": "l2_request_g1.all_no_prefetch", + "EventCode": "0x60", + "UMask": "0xf9" + }, + { + "EventName": "l2_request_g2.group1", + "EventCode": "0x61", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g2.ls_rd_sized", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g2.ls_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g2.ic_rd_sized", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g2.ic_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", + "UMask": "0x08" + }, + { + "EventName": "l2_request_g2.smc_inval", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", + "UMask": "0x04" + }, + { + "EventName": "l2_request_g2.bus_locks_originator", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", + "UMask": "0x02" + }, + { + "EventName": "l2_request_g2.bus_locks_responses", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", + "UMask": "0x01" + }, + { + "EventName": "l2_latency.l2_cycles_waiting_on_fills", + "EventCode": "0x62", + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "UMask": "0x01" + }, + { + "EventName": "l2_wcb_req.wcb_write", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", + "UMask": "0x40" + }, + { + "EventName": "l2_wcb_req.wcb_close", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "UMask": "0x20" + }, + { + "EventName": "l2_wcb_req.zero_byte_store", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "UMask": "0x04" + }, + { + "EventName": "l2_wcb_req.cl_zero", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2", + "UMask": "0x80" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2. Modifiable.", + "UMask": "0x40" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit non-modifiable line in L2.", + "UMask": "0x20" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_c", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types). Use l2_cache_misses_from_dc_misses instead.", + "UMask": "0x08" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", + "UMask": "0x04" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_s", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit non-modifiable line in L2.", + "UMask": "0x02" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_miss", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2. Use l2_cache_misses_from_ic_miss instead.", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_req_stat.ic_access_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", + "UMask": "0x07" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", + "UMask": "0x09" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).", + "UMask": "0xf6" + }, + { + "EventName": "l2_fill_pending.l2_fill_busy", + "EventCode": "0x6d", + "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", + "UMask": "0x01" + }, + { + "EventName": "l2_pf_hit_l2", + "EventCode": "0x70", + "BriefDescription": "L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_hit_l3", + "EventCode": "0x71", + "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_l3", + "EventCode": "0x72", + "BriefDescription": "L2 prefetcher misses in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.", + "UMask": "0xff" + }, + { + "EventName": "ic_fw32", + "EventCode": "0x80", + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." + }, + { + "EventName": "ic_fw32_miss", + "EventCode": "0x81", + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." + }, + { + "EventName": "ic_cache_fill_l2", + "EventCode": "0x82", + "BriefDescription": "Instruction Cache Refills from L2. The number of 64 byte instruction cache line was fulfilled from the L2 cache." + }, + { + "EventName": "ic_cache_fill_sys", + "EventCode": "0x83", + "BriefDescription": "Instruction Cache Refills from System. The number of 64 byte instruction cache line fulfilled from system memory or another cache." + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_hit", + "EventCode": "0x84", + "BriefDescription": "L1 ITLB Miss, L2 ITLB Hit. The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs.", + "UMask": "0xff" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.coalesced_4k", + "EventCode": "0x85", + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk for >4K Coalesced page.", + "UMask": "0x08" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g", + "EventCode": "0x85", + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk for 1G page.", + "UMask": "0x04" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m", + "EventCode": "0x85", + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk for 2M page.", + "UMask": "0x02" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k", + "EventCode": "0x85", + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk to 4K page.", + "UMask": "0x01" + }, + { + "EventName": "bp_snp_re_sync", + "EventCode": "0x86", + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." + }, + { + "EventName": "ic_fetch_stall.ic_stall_any", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "UMask": "0x04" + }, + { + "EventName": "ic_fetch_stall.ic_stall_dq_empty", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "UMask": "0x02" + }, + { + "EventName": "ic_fetch_stall.ic_stall_back_pressure", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "UMask": "0x01" + }, + { + "EventName": "ic_cache_inval.l2_invalidating_probe", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", + "UMask": "0x02" + }, + { + "EventName": "ic_cache_inval.fill_invalidated", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", + "UMask": "0x01" + }, + { + "EventName": "ic_tag_hit_miss.all_instruction_cache_accesses", + "EventCode": "0x18e", + "BriefDescription": "All Instruction Cache Accesses. Counts various IC tag related hit and miss events.", + "UMask": "0x1f" + }, + { + "EventName": "ic_tag_hit_miss.instruction_cache_miss", + "EventCode": "0x18e", + "BriefDescription": "Instruction Cache Miss. Counts various IC tag related hit and miss events.", + "UMask": "0x18" + }, + { + "EventName": "ic_tag_hit_miss.instruction_cache_hit", + "EventCode": "0x18e", + "BriefDescription": "Instruction Cache Hit. Counts various IC tag related hit and miss events.", + "UMask": "0x07" + }, + { + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC Mode Switch. OC to IC mode switch.", + "UMask": "0x02" + }, + { + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC Mode Switch. IC to OC mode switch.", + "UMask": "0x01" + }, + { + "EventName": "op_cache_hit_miss.all_op_cache_accesses", + "EventCode": "0x28f", + "BriefDescription": "All Op Cache accesses. Counts Op Cache micro-tag hit/miss events", + "UMask": "0x07" + }, + { + "EventName": "op_cache_hit_miss.op_cache_miss", + "EventCode": "0x28f", + "BriefDescription": "Op Cache Miss. Counts Op Cache micro-tag hit/miss events", + "UMask": "0x04" + }, + { + "EventName": "op_cache_hit_miss.op_cache_hit", + "EventCode": "0x28f", + "BriefDescription": "Op Cache Hit. Counts Op Cache micro-tag hit/miss events", + "UMask": "0x03" + }, + { + "EventName": "l3_request_g1.caching_l3_cache_accesses", + "EventCode": "0x01", + "BriefDescription": "Caching: L3 cache accesses", + "UMask": "0x80", + "Unit": "L3PMC" + }, + { + "EventName": "l3_lookup_state.all_l3_req_typs", + "EventCode": "0x04", + "BriefDescription": "All L3 Request Types. All L3 cache Requests", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", + "EventCode": "0x06", + "BriefDescription": "Other L3 Miss Request Types", + "UMask": "0xfe", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.request_miss", + "EventCode": "0x06", + "BriefDescription": "L3 cache misses", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "EventName": "xi_sys_fill_latency", + "EventCode": "0x90", + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", + "Unit": "L3PMC" + }, + { + "EventName": "xi_ccx_sdp_req1", + "EventCode": "0x9a", + "BriefDescription": "L3 Misses by Request Type. Ignores SliceID, EnAllSlices, CoreID, EnAllCores and ThreadMask. Requires unit mask 0xFF to engage event for counting.", + "UMask": "0xff", + "Unit": "L3PMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/core.json b/tools/perf/pmu-events/arch/x86/amdzen3/core.json new file mode 100644 index 000000000000..4e27a2be359e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/core.json @@ -0,0 +1,137 @@ +[ + { + "EventName": "ex_ret_instr", + "EventCode": "0xc0", + "BriefDescription": "Retired Instructions." + }, + { + "EventName": "ex_ret_ops", + "EventCode": "0xc1", + "BriefDescription": "Retired Ops. Use macro_ops_retired instead.", + "PublicDescription": "The number of macro-ops retired." + }, + { + "EventName": "ex_ret_brn", + "EventCode": "0xc2", + "BriefDescription": "Retired Branch Instructions.", + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_misp", + "EventCode": "0xc3", + "BriefDescription": "Retired Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired branch instructions, that were mispredicted." + }, + { + "EventName": "ex_ret_brn_tkn", + "EventCode": "0xc4", + "BriefDescription": "Retired Taken Branch Instructions.", + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_tkn_misp", + "EventCode": "0xc5", + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." + }, + { + "EventName": "ex_ret_brn_far", + "EventCode": "0xc6", + "BriefDescription": "Retired Far Control Transfers.", + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." + }, + { + "EventName": "ex_ret_brn_resync", + "EventCode": "0xc7", + "BriefDescription": "Retired Branch Resyncs.", + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." + }, + { + "EventName": "ex_ret_near_ret", + "EventCode": "0xc8", + "BriefDescription": "Retired Near Returns.", + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." + }, + { + "EventName": "ex_ret_near_ret_mispred", + "EventCode": "0xc9", + "BriefDescription": "Retired Near Returns Mispredicted.", + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." + }, + { + "EventName": "ex_ret_brn_ind_misp", + "EventCode": "0xca", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", + "PublicDescription": "The number of indirect branches retired that were not correctly predicted. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction. Note that only EX mispredicts are counted." + }, + { + "EventName": "ex_ret_mmx_fp_instr.sse_instr", + "EventCode": "0xcb", + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS.", + "UMask": "0x04" + }, + { + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", + "EventCode": "0xcb", + "BriefDescription": "MMX instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", + "UMask": "0x02" + }, + { + "EventName": "ex_ret_mmx_fp_instr.x87_instr", + "EventCode": "0xcb", + "BriefDescription": "x87 instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", + "UMask": "0x01" + }, + { + "EventName": "ex_ret_ind_brch_instr", + "EventCode": "0xcc", + "BriefDescription": "Retired Indirect Branch Instructions. The number of indirect branches retired." + }, + { + "EventName": "ex_ret_cond", + "EventCode": "0xd1", + "BriefDescription": "Retired Conditional Branch Instructions." + }, + { + "EventName": "ex_div_busy", + "EventCode": "0xd3", + "BriefDescription": "Div Cycles Busy count." + }, + { + "EventName": "ex_div_count", + "EventCode": "0xd4", + "BriefDescription": "Div Op Count." + }, + { + "EventName": "ex_ret_msprd_brnch_instr_dir_msmtch", + "EventCode": "0x1c7", + "BriefDescription": "Retired Mispredicted Branch Instructions due to Direction Mismatch", + "PublicDescription": "The number of retired conditional branch instructions that were not correctly predicted because of a branch direction mismatch." + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "UMask": "0x04" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "UMask": "0x02" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "UMask": "0x01" + }, + { + "EventName": "ex_ret_fused_instr", + "EventCode": "0x1d0", + "BriefDescription": "Counts retired Fused Instructions." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen3/data-fabric.json new file mode 100644 index 000000000000..40271df40015 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/data-fabric.json @@ -0,0 +1,98 @@ +[ + { + "EventName": "remote_outbound_data_controller_0", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0", + "EventCode": "0x7c7", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_1", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1", + "EventCode": "0x807", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_2", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2", + "EventCode": "0x847", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_3", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3", + "EventCode": "0x887", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_0", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x07", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_1", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x47", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_2", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x87", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_3", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0xc7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_4", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x107", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_5", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x147", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_6", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x187", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_7", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x1c7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen3/floating-point.json new file mode 100644 index 000000000000..98cfcb9c78ec --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/floating-point.json @@ -0,0 +1,139 @@ +[ + { + "EventName": "fpu_pipe_assignment.total", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps.", + "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.", + "UMask": "0x0f" + }, + { + "EventName": "fpu_pipe_assignment.total3", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 3.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", + "UMask": "0x08" + }, + { + "EventName": "fpu_pipe_assignment.total2", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 2.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", + "UMask": "0x04" + }, + { + "EventName": "fpu_pipe_assignment.total1", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 1.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", + "UMask": "0x02" + }, + { + "EventName": "fpu_pipe_assignment.total0", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 0.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", + "UMask": "0x01" + }, + { + "EventName": "fp_ret_sse_avx_ops.all", + "EventCode": "0x03", + "BriefDescription": "All FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0xff" + }, + { + "EventName": "fp_ret_sse_avx_ops.mac_flops", + "EventCode": "0x03", + "BriefDescription": "Multiply-Accumulate FLOPs. Each MAC operation is counted as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", + "UMask": "0x08" + }, + { + "EventName": "fp_ret_sse_avx_ops.div_flops", + "EventCode": "0x03", + "BriefDescription": "Divide/square root FLOPs. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", + "UMask": "0x04" + }, + { + "EventName": "fp_ret_sse_avx_ops.mult_flops", + "EventCode": "0x03", + "BriefDescription": "Multiply FLOPs. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", + "UMask": "0x02" + }, + { + "EventName": "fp_ret_sse_avx_ops.add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Add/subtract FLOPs. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", + "UMask": "0x01" + }, + { + "EventName": "fp_num_mov_elim_scal_op.optimized", + "EventCode": "0x04", + "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x08" + }, + { + "EventName": "fp_num_mov_elim_scal_op.opt_potential", + "EventCode": "0x04", + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x04" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x02" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x01" + }, + { + "EventName": "fp_retired_ser_ops.sse_bot_ret", + "EventCode": "0x05", + "BriefDescription": "SSE/AVX bottom-executing ops retired. The number of serializing Ops retired.", + "UMask": "0x08" + }, + { + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "SSE/AVX control word mispredict traps. The number of serializing Ops retired.", + "UMask": "0x04" + }, + { + "EventName": "fp_retired_ser_ops.x87_bot_ret", + "EventCode": "0x05", + "BriefDescription": "x87 bottom-executing ops retired. The number of serializing Ops retired.", + "UMask": "0x02" + }, + { + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.", + "UMask": "0x01" + }, + { + "EventName": "fp_disp_faults.ymm_spill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.", + "UMask": "0x08" + }, + { + "EventName": "fp_disp_faults.ymm_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.", + "UMask": "0x04" + }, + { + "EventName": "fp_disp_faults.xmm_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.", + "UMask": "0x02" + }, + { + "EventName": "fp_disp_faults.x87_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.", + "UMask": "0x01" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/memory.json b/tools/perf/pmu-events/arch/x86/amdzen3/memory.json new file mode 100644 index 000000000000..a2833955dcd2 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/memory.json @@ -0,0 +1,428 @@ +[ + { + "EventName": "ls_bad_status2.stli_other", + "EventCode": "0x24", + "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.", + "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.", + "UMask": "0x02" + }, + { + "EventName": "ls_locks.spec_lock_hi_spec", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.", + "UMask": "0x08" + }, + { + "EventName": "ls_locks.spec_lock_lo_spec", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.", + "UMask": "0x04" + }, + { + "EventName": "ls_locks.non_spec_lock", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.", + "UMask": "0x02" + }, + { + "EventName": "ls_locks.bus_lock", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Comparable to legacy bus lock.", + "UMask": "0x01" + }, + { + "EventName": "ls_ret_cl_flush", + "EventCode": "0x26", + "BriefDescription": "The number of retired CLFLUSH instructions. This is a non-speculative event." + }, + { + "EventName": "ls_ret_cpuid", + "EventCode": "0x27", + "BriefDescription": "The number of CPUID instructions retired." + }, + { + "EventName": "ls_dispatch.ld_st_dispatch", + "EventCode": "0x29", + "BriefDescription": "Load-op-Store Dispatch. Dispatch of a single op that performs a load from and store to the same memory address. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x04" + }, + { + "EventName": "ls_dispatch.store_dispatch", + "EventCode": "0x29", + "BriefDescription": "Dispatch of a single op that performs a memory store. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x02" + }, + { + "EventName": "ls_dispatch.ld_dispatch", + "EventCode": "0x29", + "BriefDescription": "Dispatch of a single op that performs a memory load. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x01" + }, + { + "EventName": "ls_smi_rx", + "EventCode": "0x2b", + "BriefDescription": "Counts the number of SMIs received." + }, + { + "EventName": "ls_int_taken", + "EventCode": "0x2c", + "BriefDescription": "Counts the number of interrupts taken." + }, + { + "EventName": "ls_rdtsc", + "EventCode": "0x2d", + "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative." + }, + { + "EventName": "ls_stlf", + "EventCode": "0x35", + "BriefDescription": "Number of STLF hits." + }, + { + "EventName": "ls_st_commit_cancel2.st_commit_cancel_wcb_full", + "EventCode": "0x37", + "BriefDescription": "A non-cacheable store and the non-cacheable commit buffer is full.", + "UMask": "0x01" + }, + { + "EventName": "ls_dc_accesses", + "EventCode": "0x40", + "BriefDescription": "Number of accesses to the dcache for load/store references.", + "PublicDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." + }, + { + "EventName": "ls_mab_alloc.all_allocations", + "EventCode": "0x41", + "BriefDescription": "All Allocations. Counts when a LS pipe allocates a MAB entry.", + "UMask": "0x7f" + }, + { + "EventName": "ls_mab_alloc.hardware_prefetcher_allocations", + "EventCode": "0x41", + "BriefDescription": "Hardware Prefetcher Allocations. Counts when a LS pipe allocates a MAB entry.", + "UMask": "0x40" + }, + { + "EventName": "ls_mab_alloc.load_store_allocations", + "EventCode": "0x41", + "BriefDescription": "Load Store Allocations. Counts when a LS pipe allocates a MAB entry.", + "UMask": "0x3f" + }, + { + "EventName": "ls_mab_alloc.dc_prefetcher", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.", + "UMask": "0x08" + }, + { + "EventName": "ls_mab_alloc.stores", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. Stores.", + "UMask": "0x02" + }, + { + "EventName": "ls_mab_alloc.loads", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. Loads.", + "UMask": "0x01" + }, + { + "EventName": "ls_dmnd_fills_from_sys.mem_io_remote", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", + "UMask": "0x40" + }, + { + "EventName": "ls_dmnd_fills_from_sys.ext_cache_remote", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From CCX Cache in different Node.", + "UMask": "0x10" + }, + { + "EventName": "ls_dmnd_fills_from_sys.mem_io_local", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From DRAM or IO connected in same node.", + "UMask": "0x08" + }, + { + "EventName": "ls_dmnd_fills_from_sys.ext_cache_local", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From cache of different CCX in same node.", + "UMask": "0x04" + }, + { + "EventName": "ls_dmnd_fills_from_sys.int_cache", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", + "UMask": "0x02" + }, + { + "EventName": "ls_dmnd_fills_from_sys.lcl_l2", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From Local L2 to the core.", + "UMask": "0x01" + }, + { + "EventName": "ls_any_fills_from_sys.mem_io_remote", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", + "UMask": "0x40" + }, + { + "EventName": "ls_any_fills_from_sys.ext_cache_remote", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From CCX Cache in different Node.", + "UMask": "0x10" + }, + { + "EventName": "ls_any_fills_from_sys.mem_io_local", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From DRAM or IO connected in same node.", + "UMask": "0x08" + }, + { + "EventName": "ls_any_fills_from_sys.ext_cache_local", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From cache of different CCX in same node.", + "UMask": "0x04" + }, + { + "EventName": "ls_any_fills_from_sys.int_cache", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", + "UMask": "0x02" + }, + { + "EventName": "ls_any_fills_from_sys.lcl_l2", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From Local L2 to the core.", + "UMask": "0x01" + }, + { + "EventName": "ls_l1_d_tlb_miss.all", + "EventCode": "0x45", + "BriefDescription": "All L1 DTLB Misses or Reloads. Use l1_dtlb_misses instead.", + "UMask": "0xff" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that also missed in the L2 TLB.", + "UMask": "0x80" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that also missed in the L2 TLB.", + "UMask": "0x40" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload coalesced page that also missed in the L2 TLB.", + "UMask": "0x20" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that missed the L2 TLB.", + "UMask": "0x10" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.", + "UMask": "0x08" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.", + "UMask": "0x04" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a coalesced page that hit in the L2 TLB.", + "UMask": "0x02" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.", + "UMask": "0x01" + }, + { + "EventName": "ls_tablewalker.iside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on I-side.", + "UMask": "0x0c" + }, + { + "EventName": "ls_tablewalker.ic_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 1.", + "UMask": "0x08" + }, + { + "EventName": "ls_tablewalker.ic_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 0.", + "UMask": "0x04" + }, + { + "EventName": "ls_tablewalker.dside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on D-side.", + "UMask": "0x03" + }, + { + "EventName": "ls_tablewalker.dc_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 1.", + "UMask": "0x02" + }, + { + "EventName": "ls_tablewalker.dc_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 0.", + "UMask": "0x01" + }, + { + "EventName": "ls_misal_loads.ma4k", + "EventCode": "0x47", + "BriefDescription": "The number of 4KB misaligned (i.e., page crossing) loads.", + "UMask": "0x02" + }, + { + "EventName": "ls_misal_loads.ma64", + "EventCode": "0x47", + "BriefDescription": "The number of 64B misaligned (i.e., cacheline crossing) loads.", + "UMask": "0x01" + }, + { + "EventName": "ls_pref_instr_disp", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative).", + "UMask": "0xff" + }, + { + "EventName": "ls_pref_instr_disp.prefetch_nta", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.", + "UMask": "0x04" + }, + { + "EventName": "ls_pref_instr_disp.prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchW instruction. See docAPM3 PREFETCHW.", + "UMask": "0x02" + }, + { + "EventName": "ls_pref_instr_disp.prefetch", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.", + "UMask": "0x01" + }, + { + "EventName": "ls_inef_sw_pref.mab_mch_cnt", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", + "UMask": "0x02" + }, + { + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", + "UMask": "0x01" + }, + { + "EventName": "ls_sw_pf_dc_fills.mem_io_remote", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", + "UMask": "0x40" + }, + { + "EventName": "ls_sw_pf_dc_fills.ext_cache_remote", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From CCX Cache in different Node.", + "UMask": "0x10" + }, + { + "EventName": "ls_sw_pf_dc_fills.mem_io_local", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in same node.", + "UMask": "0x08" + }, + { + "EventName": "ls_sw_pf_dc_fills.ext_cache_local", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From cache of different CCX in same node.", + "UMask": "0x04" + }, + { + "EventName": "ls_sw_pf_dc_fills.int_cache", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", + "UMask": "0x02" + }, + { + "EventName": "ls_sw_pf_dc_fills.lcl_l2", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From Local L2 to the core.", + "UMask": "0x01" + }, + { + "EventName": "ls_hw_pf_dc_fills.mem_io_remote", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", + "UMask": "0x40" + }, + { + "EventName": "ls_hw_pf_dc_fills.ext_cache_remote", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From CCX Cache in different Node.", + "UMask": "0x10" + }, + { + "EventName": "ls_hw_pf_dc_fills.mem_io_local", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in same node.", + "UMask": "0x08" + }, + { + "EventName": "ls_hw_pf_dc_fills.ext_cache_local", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From cache of different CCX in same node.", + "UMask": "0x04" + }, + { + "EventName": "ls_hw_pf_dc_fills.int_cache", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", + "UMask": "0x02" + }, + { + "EventName": "ls_hw_pf_dc_fills.lcl_l2", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From Local L2 to the core.", + "UMask": "0x01" + }, + { + "EventName": "ls_alloc_mab_count", + "EventCode": "0x5f", + "BriefDescription": "Count of Allocated Mabs", + "PublicDescription": "This event counts the in-flight L1 data cache misses (allocated Miss Address Buffers) divided by 4 and rounded down each cycle unless used with the MergeEvent functionality. If the MergeEvent is used, it counts the exact number of outstanding L1 data cache misses. See 2.1.17.3 [Large Increment per Cycle Events]." + }, + { + "EventName": "ls_not_halted_cyc", + "EventCode": "0x76", + "BriefDescription": "Cycles not in Halt." + }, + { + "EventName": "ls_tlb_flush.all_tlb_flushes", + "EventCode": "0x78", + "BriefDescription": "All TLB Flushes. Requires unit mask 0xFF to engage event for counting. Use all_tlbs_flushed instead", + "UMask": "0xff" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/other.json b/tools/perf/pmu-events/arch/x86/amdzen3/other.json new file mode 100644 index 000000000000..7da5d0791ea3 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/other.json @@ -0,0 +1,103 @@ +[ + { + "EventName": "de_dis_uop_queue_empty_di0", + "EventCode": "0xa9", + "BriefDescription": "Cycles where the Micro-Op Queue is empty." + }, + { + "EventName": "de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch", + "EventCode": "0xab", + "BriefDescription": "Any Integer dispatch. Types of Oops Dispatched from Decoder.", + "UMask": "0x08" + }, + { + "EventName": "de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch", + "EventCode": "0xab", + "BriefDescription": "Any FP dispatch. Types of Oops Dispatched from Decoder.", + "UMask": "0x04" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_flush_recovery_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. FP Flush recovery stall.", + "UMask": "0x80" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. FP scheduler resource stall. Applies to ops that use the FP scheduler.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Floating point register file resource stall. Applies to all FP ops that have a destination register.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.taken_brnch_buffer_rsrc", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Taken branch buffer resource stall.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.", + "UMask": "0x08" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.store_queue_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Store Queue resource stall. Applies to all ops with store semantics.", + "UMask": "0x04" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.load_queue_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Load Queue resource stall. Applies to all ops with load semantics.", + "UMask": "0x02" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Integer Physical Register File resource stall. Integer Physical Register File, applies to all ops that have an integer destination register.", + "UMask": "0x01" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.retire_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Insufficient Retire Queue tokens available.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.agsq_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.int_sch3_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 3 available.", + "UMask": "0x08" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.int_sch2_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 2 available.", + "UMask": "0x04" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.int_sch1_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 1 available.", + "UMask": "0x02" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.int_sch0_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 0 available.", + "UMask": "0x01" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json new file mode 100644 index 000000000000..988cf68ae825 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json @@ -0,0 +1,214 @@ +[ + { + "MetricName": "branch_misprediction_ratio", + "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)", + "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)", + "MetricGroup": "branch_prediction", + "ScaleUnit": "100%" + }, + { + "EventName": "all_data_cache_accesses", + "EventCode": "0x29", + "BriefDescription": "All L1 Data Cache Accesses", + "UMask": "0x07" + }, + { + "MetricName": "all_l2_cache_accesses", + "BriefDescription": "All L2 Cache Accesses", + "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_accesses_from_ic_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_accesses_from_dc_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)", + "UMask": "0xe8" + }, + { + "MetricName": "l2_cache_accesses_from_l2_hwpf", + "BriefDescription": "L2 Cache Accesses from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_misses", + "BriefDescription": "All L2 Cache Misses", + "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_misses_from_ic_miss", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_misses_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses", + "UMask": "0x08" + }, + { + "MetricName": "l2_cache_misses_from_l2_hwpf", + "BriefDescription": "L2 Cache Misses from L2 Cache HWPF", + "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_hits", + "BriefDescription": "All L2 Cache Hits", + "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_hits_from_ic_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses", + "UMask": "0x06" + }, + { + "EventName": "l2_cache_hits_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses", + "UMask": "0xf0" + }, + { + "EventName": "l2_cache_hits_from_l2_hwpf", + "EventCode": "0x70", + "BriefDescription": "L2 Cache Hits from L2 Cache HWPF", + "UMask": "0xff" + }, + { + "EventName": "l3_cache_accesses", + "EventCode": "0x04", + "BriefDescription": "L3 Cache Accesses", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_misses", + "EventCode": "0x04", + "BriefDescription": "L3 Misses (includes cacheline state change requests)", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "MetricName": "l3_read_miss_latency", + "BriefDescription": "Average L3 Read Miss Latency (in core clocks)", + "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1", + "MetricGroup": "l3_cache", + "ScaleUnit": "1core clocks" + }, + { + "MetricName": "op_cache_fetch_miss_ratio", + "BriefDescription": "Op Cache (64B) Fetch Miss Ratio", + "MetricExpr": "d_ratio(op_cache_hit_miss.op_cache_miss, op_cache_hit_miss.all_op_cache_accesses)", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "ic_fetch_miss_ratio", + "BriefDescription": "Instruction Cache (32B) Fetch Miss Ratio", + "MetricExpr": "d_ratio(ic_tag_hit_miss.instruction_cache_miss, ic_tag_hit_miss.all_instruction_cache_accesses)", + "MetricGroup": "l2_cache", + "ScaleUnit": "100%" + }, + { + "EventName": "l1_data_cache_fills_from_memory", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: From Memory", + "UMask": "0x48" + }, + { + "EventName": "l1_data_cache_fills_from_remote_node", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: From Remote Node", + "UMask": "0x50" + }, + { + "EventName": "l1_data_cache_fills_from_within_same_ccx", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: From within same CCX", + "UMask": "0x03" + }, + { + "EventName": "l1_data_cache_fills_from_external_ccx_cache", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: From External CCX Cache", + "UMask": "0x14" + }, + { + "EventName": "l1_data_cache_fills_all", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: All", + "UMask": "0xff" + }, + { + "MetricName": "l1_itlb_misses", + "BriefDescription": "L1 ITLB Misses", + "MetricExpr": "bp_l1_tlb_miss_l2_tlb_hit + bp_l1_tlb_miss_l2_tlb_miss", + "MetricGroup": "tlb" + }, + { + "EventName": "l2_itlb_misses", + "EventCode": "0x85", + "BriefDescription": "L2 ITLB Misses & Instruction page walks", + "UMask": "0x07" + }, + { + "EventName": "l1_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Misses", + "UMask": "0xff" + }, + { + "EventName": "l2_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L2 DTLB Misses & Data page walks", + "UMask": "0xf0" + }, + { + "EventName": "all_tlbs_flushed", + "EventCode": "0x78", + "BriefDescription": "All TLBs Flushed", + "UMask": "0xff" + }, + { + "MetricName": "macro_ops_dispatched", + "BriefDescription": "Macro-ops Dispatched", + "MetricExpr": "de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch + de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch", + "MetricGroup": "decoder" + }, + { + "EventName": "sse_avx_stalls", + "EventCode": "0x0e", + "BriefDescription": "Mixed SSE/AVX Stalls", + "UMask": "0x0e" + }, + { + "EventName": "macro_ops_retired", + "EventCode": "0xc1", + "BriefDescription": "Macro-ops Retired" + }, + { + "MetricName": "all_remote_links_outbound", + "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)", + "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3e-5MiB" + }, + { + "MetricName": "nps1_die_to_dram", + "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)", + "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.1e-5MiB" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 2f2a209e87e1..9f97b32533a0 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -38,4 +38,4 @@ GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core -AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen2,core +AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen3,core diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index e1f3f5c8c550..33aa3c885eaf 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1149,7 +1149,7 @@ static int process_one_file(const char *fpath, const struct stat *sb, * and directory tree could result in build failure due to table * names not being found. * - * Atleast for now, be strict with processing JSON file names. + * At least for now, be strict with processing JSON file names. * i.e. if JSON file name cannot be mapped to C-style table name, * fail. */ diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py index ea0c8b90a783..a0cfc7fe5908 100644 --- a/tools/perf/scripts/python/netdev-times.py +++ b/tools/perf/scripts/python/netdev-times.py @@ -356,7 +356,7 @@ def handle_irq_softirq_exit(event_info): return rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time, 'irq_list':irq_list, 'event_list':event_list} - # merge information realted to a NET_RX softirq + # merge information related to a NET_RX softirq receive_hunk_list.append(rec_data) def handle_napi_poll(event_info): diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index cc9fbcedb364..ef37353636d8 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -225,11 +225,11 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused * * The test case check following error conditions: * - we get stuck in signal handler because of debug - * exception being triggered receursively due to + * exception being triggered recursively due to * the wrong RF EFLAG management * * - we never trigger the sig_handler breakpoint due - * to the rong RF EFLAG management + * to the wrong RF EFLAG management * */ @@ -242,7 +242,7 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0); /* - * Kick off the test by trigering 'fd1' + * Kick off the test by triggering 'fd1' * breakpoint. */ test_function(); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 2fdc7b2f996e..9866cddebf23 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -658,7 +658,7 @@ static int do_test_code_reading(bool try_kcore) /* * Both cpus and threads are now owned by evlist * and will be freed by following perf_evlist__set_maps - * call. Getting refference to keep them alive. + * call. Getting reference to keep them alive. */ perf_cpu_map__get(cpus); perf_thread_map__get(threads); diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c index a273ed5163d7..0043be812355 100644 --- a/tools/perf/tests/demangle-ocaml-test.c +++ b/tools/perf/tests/demangle-ocaml-test.c @@ -19,14 +19,14 @@ int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_u { "main", NULL }, { "camlStdlib__array__map_154", - "Stdlib.array.map" }, + "Stdlib.array.map_154" }, { "camlStdlib__anon_fn$5bstdlib$2eml$3a334$2c0$2d$2d54$5d_1453", - "Stdlib.anon_fn[stdlib.ml:334,0--54]" }, + "Stdlib.anon_fn[stdlib.ml:334,0--54]_1453" }, { "camlStdlib__bytes__$2b$2b_2205", - "Stdlib.bytes.++" }, + "Stdlib.bytes.++_2205" }, }; - for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { buf = ocaml_demangle_sym(test_cases[i].mangled); if ((buf == NULL && test_cases[i].demangled != NULL) || (buf != NULL && test_cases[i].demangled == NULL) diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 3f2e1a581247..890cb1f5bf53 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -47,7 +47,7 @@ static struct sample fake_samples[] = { }; /* - * Will be casted to struct ip_callchain which has all 64 bit entries + * Will be cast to struct ip_callchain which has all 64 bit entries * of nr and ips[]. */ static u64 fake_callchains[][10] = { @@ -297,7 +297,7 @@ out: return err; } -/* callcain + NO children */ +/* callchain + NO children */ static int test2(struct evsel *evsel, struct machine *machine) { int err; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index a7f6661e6112..026c54743311 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -20,7 +20,7 @@ #if defined(__s390x__) /* Return true if kvm module is available and loaded. Test this - * and retun success when trace point kvm_s390_create_vm + * and return success when trace point kvm_s390_create_vm * exists. Otherwise this test always fails. */ static bool kvm_s390_create_vm_valid(void) diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 6dc1db1626ad..4968c4106254 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -98,7 +98,7 @@ static u64 find_value(const char *name, struct value *values) if (!strcmp(name, v->event)) return v->val; v++; - }; + } return 0; } @@ -186,7 +186,7 @@ static int __compute_metric(const char *name, struct value *vals, *ratio2 = compute_single(&metric_events, evlist, &st, name2); out: - /* ... clenup. */ + /* ... cleanup. */ metricgroup__rblist_exit(&metric_events); runtime_stat__exit(&st); evlist__free_stats(evlist); diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 0ca6a5a53523..b8aff8fb50d8 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -12,6 +12,7 @@ #include "util/evlist.h" #include "util/expr.h" #include "util/parse-events.h" +#include "metricgroup.h" struct perf_pmu_test_event { /* used for matching against events from generated pmu-events.c */ @@ -471,9 +472,74 @@ static void expr_failure(const char *msg, pr_debug("On expression %s\n", pe->metric_expr); } +struct metric { + struct list_head list; + struct metric_ref metric_ref; +}; + +static int resolve_metric_simple(struct expr_parse_ctx *pctx, + struct list_head *compound_list, + struct pmu_events_map *map, + const char *metric_name) +{ + struct hashmap_entry *cur, *cur_tmp; + struct metric *metric, *tmp; + size_t bkt; + bool all; + int rc; + + do { + all = true; + hashmap__for_each_entry_safe((&pctx->ids), cur, cur_tmp, bkt) { + struct metric_ref *ref; + struct pmu_event *pe; + + pe = metricgroup__find_metric(cur->key, map); + if (!pe) + continue; + + if (!strcmp(metric_name, (char *)cur->key)) { + pr_warning("Recursion detected for metric %s\n", metric_name); + rc = -1; + goto out_err; + } + + all = false; + + /* The metric key itself needs to go out.. */ + expr__del_id(pctx, cur->key); + + metric = malloc(sizeof(*metric)); + if (!metric) { + rc = -ENOMEM; + goto out_err; + } + + ref = &metric->metric_ref; + ref->metric_name = pe->metric_name; + ref->metric_expr = pe->metric_expr; + list_add_tail(&metric->list, compound_list); + + rc = expr__find_other(pe->metric_expr, NULL, pctx, 0); + if (rc) + goto out_err; + break; /* The hashmap has been modified, so restart */ + } + } while (!all); + + return 0; + +out_err: + list_for_each_entry_safe(metric, tmp, compound_list, list) + free(metric); + + return rc; + +} + static int test_parsing(void) { - struct pmu_events_map *cpus_map = perf_pmu__find_map(NULL); + struct pmu_events_map *cpus_map = pmu_events_map__find(); struct pmu_events_map *map; struct pmu_event *pe; int i, j, k; @@ -488,7 +554,9 @@ static int test_parsing(void) break; j = 0; for (;;) { + struct metric *metric, *tmp; struct hashmap_entry *cur; + LIST_HEAD(compound_list); size_t bkt; pe = &map->table[j++]; @@ -504,6 +572,13 @@ static int test_parsing(void) continue; } + if (resolve_metric_simple(&ctx, &compound_list, map, + pe->metric_name)) { + expr_failure("Could not resolve metrics", map, pe); + ret++; + goto exit; /* Don't tolerate errors due to severity */ + } + /* * Add all ids with a made up value. The value may * trigger divide by zero when subtracted and so try to @@ -519,6 +594,11 @@ static int test_parsing(void) ret++; } + list_for_each_entry_safe(metric, tmp, &compound_list, list) { + expr__add_ref(&ctx, &metric->metric_ref); + free(metric); + } + if (expr__parse(&result, &ctx, pe->metric_expr, 0)) { expr_failure("Parse failed", map, pe); ret++; @@ -527,6 +607,7 @@ static int test_parsing(void) } } /* TODO: fail when not ok */ +exit: return ret == 0 ? TEST_OK : TEST_SKIP; } diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh index 416af614bbe0..f05670d1e39e 100755 --- a/tools/perf/tests/shell/buildid.sh +++ b/tools/perf/tests/shell/buildid.sh @@ -14,18 +14,56 @@ if ! [ -x "$(command -v cc)" ]; then exit 2 fi +# check what we need to test windows binaries +add_pe=1 +run_pe=1 +if ! perf version --build-options | grep -q 'libbfd: .* on '; then + echo "WARNING: perf not built with libbfd. PE binaries will not be tested." + add_pe=0 + run_pe=0 +fi +if ! which wine > /dev/null; then + echo "WARNING: wine not found. PE binaries will not be run." + run_pe=0 +fi + +# set up wine +if [ ${run_pe} -eq 1 ]; then + wineprefix=$(mktemp -d /tmp/perf.wineprefix.XXX) + export WINEPREFIX=${wineprefix} + # clear display variables to prevent wine from popping up dialogs + unset DISPLAY + unset WAYLAND_DISPLAY +fi + ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX) ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX) +ex_pe=$(dirname $0)/../pe-file.exe echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c - echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c - -echo "test binaries: ${ex_sha1} ${ex_md5}" +echo "test binaries: ${ex_sha1} ${ex_md5} ${ex_pe}" check() { - id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` - + case $1 in + *.exe) + # We don't have a tool that can pull a nicely formatted build-id out of + # a PE file, but we can extract the whole section with objcopy and + # format it ourselves. The .buildid section is a Debug Directory + # containing a CodeView entry: + # https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#debug-directory-image-only + # https://github.com/dotnet/runtime/blob/da94c022576a5c3bbc0e896f006565905eb137f9/docs/design/specs/PE-COFF.md + # The build-id starts at byte 33 and must be rearranged into a GUID. + id=`objcopy -O binary --only-section=.buildid $1 /dev/stdout | \ + cut -c 33-48 | hexdump -ve '/1 "%02x"' | \ + sed 's@^\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(.*\)0a$@\4\3\2\1\6\5\8\7\9@'` + ;; + *) + id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` + ;; + esac echo "build id: ${id}" link=${build_id_dir}/.build-id/${id:0:2}/${id:2} @@ -50,7 +88,7 @@ check() exit 1 fi - ${perf} buildid-cache -l | grep $id + ${perf} buildid-cache -l | grep ${id} if [ $? -ne 0 ]; then echo "failed: ${id} is not reported by \"perf buildid-cache -l\"" exit 1 @@ -79,16 +117,20 @@ test_record() { data=$(mktemp /tmp/perf.data.XXX) build_id_dir=$(mktemp -d /tmp/perf.debug.XXX) + log=$(mktemp /tmp/perf.log.XXX) perf="perf --buildid-dir ${build_id_dir}" - ${perf} record --buildid-all -o ${data} ${1} + echo "running: perf record $@" + ${perf} record --buildid-all -o ${data} $@ &> ${log} if [ $? -ne 0 ]; then - echo "failed: record ${1}" + echo "failed: record $@" + echo "see log: ${log}" exit 1 fi - check ${1} + check ${@: -1} + rm -f ${log} rm -rf ${build_id_dir} rm -rf ${data} } @@ -96,12 +138,21 @@ test_record() # add binaries manual via perf buildid-cache -a test_add ${ex_sha1} test_add ${ex_md5} +if [ ${add_pe} -eq 1 ]; then + test_add ${ex_pe} +fi # add binaries via perf record post processing test_record ${ex_sha1} test_record ${ex_md5} +if [ ${run_pe} -eq 1 ]; then + test_record wine ${ex_pe} +fi # cleanup rm ${ex_sha1} ${ex_md5} +if [ ${run_pe} -eq 1 ]; then + rm -r ${wineprefix} +fi exit ${err} diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index 58984380b211..45fc24af5b07 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -98,6 +98,23 @@ check_line_other() fi } +daemon_exit() +{ + local config=$1 + + local line=`perf daemon --config ${config} -x: | head -1` + local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` + + # Reset trap handler. + trap - SIGINT SIGTERM + + # stop daemon + perf daemon stop --config ${config} + + # ... and wait for the pid to go away + tail --pid=${pid} -f /dev/null +} + daemon_start() { local config=$1 @@ -105,29 +122,24 @@ daemon_start() perf daemon start --config ${config} + # Clean up daemon if interrupted. + trap "echo 'FAILED: Signal caught'; daemon_exit ${config}; exit 1" SIGINT SIGTERM + # wait for the session to ping local state="FAIL" + local retries=0 while [ "${state}" != "OK" ]; do state=`perf daemon ping --config ${config} --session ${session} | awk '{ print $1 }'` sleep 0.05 + retries=$((${retries} +1)) + if [ ${retries} -ge 600 ]; then + echo "FAILED: Timeout waiting for daemon to ping" + daemon_exit ${config} + exit 1 + fi done } -daemon_exit() -{ - local base=$1 - local config=$2 - - local line=`perf daemon --config ${config} -x: | head -1` - local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` - - # stop daemon - perf daemon stop --config ${config} - - # ... and wait for the pid to go away - tail --pid=${pid} -f /dev/null -} - test_list() { echo "test daemon list" @@ -171,7 +183,7 @@ EOF ${base}/session-time/ack "0" # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} @@ -288,7 +300,7 @@ EOF done # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} @@ -333,7 +345,7 @@ EOF fi # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} # check that sessions are gone if [ -d "/proc/${pid_size}" ]; then @@ -374,7 +386,7 @@ EOF perf daemon signal --config ${config} # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} # count is 2 perf.data for signals and 1 for perf record finished count=`ls ${base}/session-test/ | grep perf.data | wc -l` @@ -420,7 +432,7 @@ EOF fi # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} @@ -457,7 +469,7 @@ EOF fi # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} diff --git a/tools/perf/tests/shell/stat+csv_summary.sh b/tools/perf/tests/shell/stat+csv_summary.sh new file mode 100755 index 000000000000..5571ff75eb42 --- /dev/null +++ b/tools/perf/tests/shell/stat+csv_summary.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# perf stat csv summary test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# +# 1.001364330 9224197 cycles 8012885033 100.00 +# summary 9224197 cycles 8012885033 100.00 +# +perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary 2>&1 | \ +grep -e summary | \ +while read summary num event run pct +do + if [ $summary != "summary" ]; then + exit 1 + fi +done + +# +# 1.001360298 9148534 cycles 8012853854 100.00 +#9148534 cycles 8012853854 100.00 +# +perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary --no-csv-summary 2>&1 | \ +grep -e summary | \ +while read num event run pct +do + exit 1 +done + +exit 0 diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh new file mode 100755 index 000000000000..22eb31e48ca7 --- /dev/null +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# perf stat --bpf-counters test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# check whether $2 is within +/- 10% of $1 +compare_number() +{ + first_num=$1 + second_num=$2 + + # upper bound is first_num * 110% + upper=$(( $first_num + $first_num / 10 )) + # lower bound is first_num * 90% + lower=$(( $first_num - $first_num / 10 )) + + if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then + echo "The difference between $first_num and $second_num are greater than 10%." + exit 1 + fi +} + +# skip if --bpf-counters is not supported +perf stat --bpf-counters true > /dev/null 2>&1 || exit 2 + +base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') +bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') + +compare_number $base_cycles $bpf_cycles +exit 0 diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 74748ed75b2c..050489807a47 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -80,7 +80,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) * CPU 1 is on core_id 1 and physical_package_id 3 * * Core_id and physical_package_id are platform and architecture - * dependend and might have higher numbers than the CPU id. + * dependent and might have higher numbers than the CPU id. * This actually depends on the configuration. * * In this case process_cpu_topology() prints error message: diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 385894b4a8bb..b8fc5c53ba6f 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -85,7 +85,7 @@ struct mmsghdr { /* * POSIX 1003.1g - ancillary data object information - * Ancillary data consits of a sequence of pairs of + * Ancillary data consists of a sequence of pairs of * (cmsghdr, cmsg_data[]) */ diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 35b82caf8090..ad0a70f0edaf 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -381,6 +381,25 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) return true; } +#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) + +static void annotate_browser__show_full_location(struct ui_browser *browser) +{ + struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); + struct disasm_line *cursor = disasm_line(ab->selection); + struct annotation_line *al = &cursor->al; + + if (al->offset != -1) + ui_helpline__puts("Only available for source code lines."); + else if (al->fileloc == NULL) + ui_helpline__puts("No source file location."); + else { + char help_line[SYM_TITLE_MAX_SIZE]; + sprintf (help_line, "Source file location: %s", al->fileloc); + ui_helpline__puts(help_line); + } +} + static void ui_browser__init_asm_mode(struct ui_browser *browser) { struct annotation *notes = browser__annotation(browser); @@ -388,8 +407,6 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser) browser->nr_entries = notes->nr_asm_entries; } -#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) - static int sym_title(struct symbol *sym, struct map *map, char *title, size_t sz, int percent_type) { @@ -398,7 +415,7 @@ static int sym_title(struct symbol *sym, struct map *map, char *title, } /* - * This can be called from external jumps, i.e. jumps from one functon + * This can be called from external jumps, i.e. jumps from one function * to another, like from the kernel's entry_SYSCALL_64 function to the * swapgs_restore_regs_and_return_to_usermode() function. * @@ -747,6 +764,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "c Show min/max cycle\n" "/ Search string\n" "k Toggle line numbers\n" + "l Show full source file location\n" "P Print to [symbol_name].annotation file.\n" "r Run available scripts\n" "p Toggle percent type [local/global]\n" @@ -760,6 +778,9 @@ static int annotate_browser__run(struct annotate_browser *browser, case 'k': notes->options->show_linenr = !notes->options->show_linenr; continue; + case 'l': + annotate_browser__show_full_location (&browser->b); + continue; case 'H': nd = browser->curr_hot; break; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 3b9818ee9546..bcfd0a45953b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -117,7 +117,7 @@ static void hist_browser__update_rows(struct hist_browser *hb) browser->rows -= browser->extra_title_lines; /* * Verify if we were at the last line and that line isn't - * visibe because we now show the header line(s). + * visible because we now show the header line(s). */ index_row = browser->index - browser->top_idx; if (index_row >= browser->rows) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e60841b86d27..18eee25b4976 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1161,6 +1161,7 @@ struct annotate_args { s64 offset; char *line; int line_nr; + char *fileloc; }; static void annotation_line__init(struct annotation_line *al, @@ -1170,6 +1171,7 @@ static void annotation_line__init(struct annotation_line *al, al->offset = args->offset; al->line = strdup(args->line); al->line_nr = args->line_nr; + al->fileloc = args->fileloc; al->data_nr = nr; } @@ -1482,7 +1484,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start */ static int symbol__parse_objdump_line(struct symbol *sym, struct annotate_args *args, - char *parsed_line, int *line_nr) + char *parsed_line, int *line_nr, char **fileloc) { struct map *map = args->ms.map; struct annotation *notes = symbol__annotation(sym); @@ -1494,6 +1496,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { *line_nr = atoi(parsed_line + match[1].rm_so); + *fileloc = strdup(parsed_line); return 0; } @@ -1513,6 +1516,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, args->offset = offset; args->line = parsed_line; args->line_nr = *line_nr; + args->fileloc = *fileloc; args->ms.sym = sym; dl = disasm_line__new(args); @@ -1807,6 +1811,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, args->offset = -1; args->line = strdup(srcline); args->line_nr = 0; + args->fileloc = NULL; args->ms.sym = sym; dl = disasm_line__new(args); if (dl) { @@ -1818,6 +1823,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, args->offset = pc; args->line = buf + prev_buf_size; args->line_nr = 0; + args->fileloc = NULL; args->ms.sym = sym; dl = disasm_line__new(args); if (dl) @@ -1852,6 +1858,7 @@ symbol__disassemble_bpf_image(struct symbol *sym, args->offset = -1; args->line = strdup("to be implemented"); args->line_nr = 0; + args->fileloc = NULL; dl = disasm_line__new(args); if (dl) annotation_line__add(&dl->al, ¬es->src->source); @@ -1933,6 +1940,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) bool delete_extract = false; bool decomp = false; int lineno = 0; + char *fileloc = NULL; int nline; char *line; size_t line_len; @@ -2060,7 +2068,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) * See disasm_line__new() and struct disasm_line::line_nr. */ if (symbol__parse_objdump_line(sym, args, expanded_line, - &lineno) < 0) + &lineno, &fileloc) < 0) break; nline++; } @@ -3144,6 +3152,10 @@ static int annotation__config(const char *var, const char *value, void *data) opt->use_offset = perf_config_bool("use_offset", value); } else if (!strcmp(var, "annotate.disassembler_style")) { opt->disassembler_style = value; + } else if (!strcmp(var, "annotate.demangle")) { + symbol_conf.demangle = perf_config_bool("demangle", value); + } else if (!strcmp(var, "annotate.demangle_kernel")) { + symbol_conf.demangle_kernel = perf_config_bool("demangle_kernel", value); } else { pr_debug("%s variable unknown, ignoring...", var); } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 096cdaf21b01..3757416bcf46 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -84,6 +84,7 @@ struct annotation_options { print_lines, full_path, show_linenr, + show_fileloc, show_nr_jumps, show_minmax_cycle, show_asm_raw, @@ -136,6 +137,7 @@ struct annotation_line { s64 offset; char *line; int line_nr; + char *fileloc; int jump_sources; float ipc; u64 cycles; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 9087f1bffd3d..fbb3c4057c30 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -671,7 +671,7 @@ int bpf__probe(struct bpf_object *obj) * After probing, let's consider prologue, which * adds program fetcher to BPF programs. * - * hook_load_preprocessorr() hooks pre-processor + * hook_load_preprocessor() hooks pre-processor * to bpf_program, let it generate prologue * dynamically during loading. */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 04f89120b323..81d1df3c4ec0 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -5,6 +5,7 @@ #include <assert.h> #include <limits.h> #include <unistd.h> +#include <sys/file.h> #include <sys/time.h> #include <sys/resource.h> #include <linux/err.h> @@ -12,14 +13,45 @@ #include <bpf/bpf.h> #include <bpf/btf.h> #include <bpf/libbpf.h> +#include <api/fs/fs.h> #include "bpf_counter.h" #include "counts.h" #include "debug.h" #include "evsel.h" +#include "evlist.h" #include "target.h" +#include "cpumap.h" +#include "thread_map.h" #include "bpf_skel/bpf_prog_profiler.skel.h" +#include "bpf_skel/bperf_u.h" +#include "bpf_skel/bperf_leader.skel.h" +#include "bpf_skel/bperf_follower.skel.h" + +/* + * bperf uses a hashmap, the attr_map, to track all the leader programs. + * The hashmap is pinned in bpffs. flock() on this file is used to ensure + * no concurrent access to the attr_map. The key of attr_map is struct + * perf_event_attr, and the value is struct perf_event_attr_map_entry. + * + * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the + * leader prog, and the diff_map. Each perf-stat session holds a reference + * to the bpf_link to make sure the leader prog is attached to sched_switch + * tracepoint. + * + * Since the hashmap only contains IDs of the bpf_link and diff_map, it + * does not hold any references to the leader program. Once all perf-stat + * sessions of these events exit, the leader prog, its maps, and the + * perf_events will be freed. + */ +struct perf_event_attr_map_entry { + __u32 link_id; + __u32 diff_map_id; +}; + +#define DEFAULT_ATTR_MAP_PATH "fs/bpf/perf_attr_map" +#define ATTR_MAP_SIZE 16 static inline void *u64_to_ptr(__u64 ptr) { @@ -274,17 +306,494 @@ struct bpf_counter_ops bpf_program_profiler_ops = { .install_pe = bpf_program_profiler__install_pe, }; +static __u32 bpf_link_get_id(int fd) +{ + struct bpf_link_info link_info = {0}; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.id; +} + +static __u32 bpf_link_get_prog_id(int fd) +{ + struct bpf_link_info link_info = {0}; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.prog_id; +} + +static __u32 bpf_map_get_id(int fd) +{ + struct bpf_map_info map_info = {0}; + __u32 map_info_len = sizeof(map_info); + + bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len); + return map_info.id; +} + +static int bperf_lock_attr_map(struct target *target) +{ + char path[PATH_MAX]; + int map_fd, err; + + if (target->attr_map) { + scnprintf(path, PATH_MAX, "%s", target->attr_map); + } else { + scnprintf(path, PATH_MAX, "%s/%s", sysfs__mountpoint(), + DEFAULT_ATTR_MAP_PATH); + } + + if (access(path, F_OK)) { + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(struct perf_event_attr), + sizeof(struct perf_event_attr_map_entry), + ATTR_MAP_SIZE, 0); + if (map_fd < 0) + return -1; + + err = bpf_obj_pin(map_fd, path); + if (err) { + /* someone pinned the map in parallel? */ + close(map_fd); + map_fd = bpf_obj_get(path); + if (map_fd < 0) + return -1; + } + } else { + map_fd = bpf_obj_get(path); + if (map_fd < 0) + return -1; + } + + err = flock(map_fd, LOCK_EX); + if (err) { + close(map_fd); + return -1; + } + return map_fd; +} + +/* trigger the leader program on a cpu */ +static int bperf_trigger_reading(int prog_fd, int cpu) +{ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = NULL, + .ctx_size_in = 0, + .flags = BPF_F_TEST_RUN_ON_CPU, + .cpu = cpu, + .retval = 0, + ); + + return bpf_prog_test_run_opts(prog_fd, &opts); +} + +static int bperf_check_target(struct evsel *evsel, + struct target *target, + enum bperf_filter_type *filter_type, + __u32 *filter_entry_cnt) +{ + if (evsel->leader->core.nr_members > 1) { + pr_err("bpf managed perf events do not yet support groups.\n"); + return -1; + } + + /* determine filter type based on target */ + if (target->system_wide) { + *filter_type = BPERF_FILTER_GLOBAL; + *filter_entry_cnt = 1; + } else if (target->cpu_list) { + *filter_type = BPERF_FILTER_CPU; + *filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel)); + } else if (target->tid) { + *filter_type = BPERF_FILTER_PID; + *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads); + } else if (target->pid || evsel->evlist->workload.pid != -1) { + *filter_type = BPERF_FILTER_TGID; + *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads); + } else { + pr_err("bpf managed perf events do not yet support these targets.\n"); + return -1; + } + + return 0; +} + +static struct perf_cpu_map *all_cpu_map; + +static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, + struct perf_event_attr_map_entry *entry) +{ + struct bperf_leader_bpf *skel = bperf_leader_bpf__open(); + int link_fd, diff_map_fd, err; + struct bpf_link *link = NULL; + + if (!skel) { + pr_err("Failed to open leader skeleton\n"); + return -1; + } + + bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus()); + err = bperf_leader_bpf__load(skel); + if (err) { + pr_err("Failed to load leader skeleton\n"); + goto out; + } + + err = -1; + link = bpf_program__attach(skel->progs.on_switch); + if (!link) { + pr_err("Failed to attach leader program\n"); + goto out; + } + + link_fd = bpf_link__fd(link); + diff_map_fd = bpf_map__fd(skel->maps.diff_readings); + entry->link_id = bpf_link_get_id(link_fd); + entry->diff_map_id = bpf_map_get_id(diff_map_fd); + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY); + assert(err == 0); + + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id); + assert(evsel->bperf_leader_link_fd >= 0); + + /* + * save leader_skel for install_pe, which is called within + * following evsel__open_per_cpu call + */ + evsel->leader_skel = skel; + evsel__open_per_cpu(evsel, all_cpu_map, -1); + +out: + bperf_leader_bpf__destroy(skel); + bpf_link__destroy(link); + return err; +} + +static int bperf__load(struct evsel *evsel, struct target *target) +{ + struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; + int attr_map_fd, diff_map_fd = -1, err; + enum bperf_filter_type filter_type; + __u32 filter_entry_cnt, i; + + if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) + return -1; + + if (!all_cpu_map) { + all_cpu_map = perf_cpu_map__new(NULL); + if (!all_cpu_map) + return -1; + } + + evsel->bperf_leader_prog_fd = -1; + evsel->bperf_leader_link_fd = -1; + + /* + * Step 1: hold a fd on the leader program and the bpf_link, if + * the program is not already gone, reload the program. + * Use flock() to ensure exclusive access to the perf_event_attr + * map. + */ + attr_map_fd = bperf_lock_attr_map(target); + if (attr_map_fd < 0) { + pr_err("Failed to lock perf_event_attr map\n"); + return -1; + } + + err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry); + if (err) { + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, &entry, BPF_ANY); + if (err) + goto out; + } + + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id); + if (evsel->bperf_leader_link_fd < 0 && + bperf_reload_leader_program(evsel, attr_map_fd, &entry)) + goto out; + + /* + * The bpf_link holds reference to the leader program, and the + * leader program holds reference to the maps. Therefore, if + * link_id is valid, diff_map_id should also be valid. + */ + evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id( + bpf_link_get_prog_id(evsel->bperf_leader_link_fd)); + assert(evsel->bperf_leader_prog_fd >= 0); + + diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id); + assert(diff_map_fd >= 0); + + /* + * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check + * whether the kernel support it + */ + err = bperf_trigger_reading(evsel->bperf_leader_prog_fd, 0); + if (err) { + pr_err("The kernel does not support test_run for raw_tp BPF programs.\n" + "Therefore, --use-bpf might show inaccurate readings\n"); + goto out; + } + + /* Step 2: load the follower skeleton */ + evsel->follower_skel = bperf_follower_bpf__open(); + if (!evsel->follower_skel) { + pr_err("Failed to open follower skeleton\n"); + goto out; + } + + /* attach fexit program to the leader program */ + bpf_program__set_attach_target(evsel->follower_skel->progs.fexit_XXX, + evsel->bperf_leader_prog_fd, "on_switch"); + + /* connect to leader diff_reading map */ + bpf_map__reuse_fd(evsel->follower_skel->maps.diff_readings, diff_map_fd); + + /* set up reading map */ + bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, + filter_entry_cnt); + /* set up follower filter based on target */ + bpf_map__set_max_entries(evsel->follower_skel->maps.filter, + filter_entry_cnt); + err = bperf_follower_bpf__load(evsel->follower_skel); + if (err) { + pr_err("Failed to load follower skeleton\n"); + bperf_follower_bpf__destroy(evsel->follower_skel); + evsel->follower_skel = NULL; + goto out; + } + + for (i = 0; i < filter_entry_cnt; i++) { + int filter_map_fd; + __u32 key; + + if (filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) + key = evsel->core.threads->map[i].pid; + else if (filter_type == BPERF_FILTER_CPU) + key = evsel->core.cpus->map[i]; + else + break; + + filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); + bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + } + + evsel->follower_skel->bss->type = filter_type; + + err = bperf_follower_bpf__attach(evsel->follower_skel); + +out: + if (err && evsel->bperf_leader_link_fd >= 0) + close(evsel->bperf_leader_link_fd); + if (err && evsel->bperf_leader_prog_fd >= 0) + close(evsel->bperf_leader_prog_fd); + if (diff_map_fd >= 0) + close(diff_map_fd); + + flock(attr_map_fd, LOCK_UN); + close(attr_map_fd); + + return err; +} + +static int bperf__install_pe(struct evsel *evsel, int cpu, int fd) +{ + struct bperf_leader_bpf *skel = evsel->leader_skel; + + return bpf_map_update_elem(bpf_map__fd(skel->maps.events), + &cpu, &fd, BPF_ANY); +} + +/* + * trigger the leader prog on each cpu, so the accum_reading map could get + * the latest readings. + */ +static int bperf_sync_counters(struct evsel *evsel) +{ + int num_cpu, i, cpu; + + num_cpu = all_cpu_map->nr; + for (i = 0; i < num_cpu; i++) { + cpu = all_cpu_map->map[i]; + bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); + } + return 0; +} + +static int bperf__enable(struct evsel *evsel) +{ + evsel->follower_skel->bss->enabled = 1; + return 0; +} + +static int bperf__read(struct evsel *evsel) +{ + struct bperf_follower_bpf *skel = evsel->follower_skel; + __u32 num_cpu_bpf = cpu__max_cpu(); + struct bpf_perf_event_value values[num_cpu_bpf]; + int reading_map_fd, err = 0; + __u32 i, j, num_cpu; + + bperf_sync_counters(evsel); + reading_map_fd = bpf_map__fd(skel->maps.accum_readings); + + for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + __u32 cpu; + + err = bpf_map_lookup_elem(reading_map_fd, &i, values); + if (err) + goto out; + switch (evsel->follower_skel->bss->type) { + case BPERF_FILTER_GLOBAL: + assert(i == 0); + + num_cpu = all_cpu_map->nr; + for (j = 0; j < num_cpu; j++) { + cpu = all_cpu_map->map[j]; + perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; + perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; + perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; + } + break; + case BPERF_FILTER_CPU: + cpu = evsel->core.cpus->map[i]; + perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; + perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; + perf_counts(evsel->counts, i, 0)->run = values[cpu].running; + break; + case BPERF_FILTER_PID: + case BPERF_FILTER_TGID: + perf_counts(evsel->counts, 0, i)->val = 0; + perf_counts(evsel->counts, 0, i)->ena = 0; + perf_counts(evsel->counts, 0, i)->run = 0; + + for (cpu = 0; cpu < num_cpu_bpf; cpu++) { + perf_counts(evsel->counts, 0, i)->val += values[cpu].counter; + perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled; + perf_counts(evsel->counts, 0, i)->run += values[cpu].running; + } + break; + default: + break; + } + } +out: + return err; +} + +static int bperf__destroy(struct evsel *evsel) +{ + bperf_follower_bpf__destroy(evsel->follower_skel); + close(evsel->bperf_leader_prog_fd); + close(evsel->bperf_leader_link_fd); + return 0; +} + +/* + * bperf: share hardware PMCs with BPF + * + * perf uses performance monitoring counters (PMC) to monitor system + * performance. The PMCs are limited hardware resources. For example, + * Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu. + * + * Modern data center systems use these PMCs in many different ways: + * system level monitoring, (maybe nested) container level monitoring, per + * process monitoring, profiling (in sample mode), etc. In some cases, + * there are more active perf_events than available hardware PMCs. To allow + * all perf_events to have a chance to run, it is necessary to do expensive + * time multiplexing of events. + * + * On the other hand, many monitoring tools count the common metrics + * (cycles, instructions). It is a waste to have multiple tools create + * multiple perf_events of "cycles" and occupy multiple PMCs. + * + * bperf tries to reduce such wastes by allowing multiple perf_events of + * "cycles" or "instructions" (at different scopes) to share PMUs. Instead + * of having each perf-stat session to read its own perf_events, bperf uses + * BPF programs to read the perf_events and aggregate readings to BPF maps. + * Then, the perf-stat session(s) reads the values from these BPF maps. + * + * || + * shared progs and maps <- || -> per session progs and maps + * || + * --------------- || + * | perf_events | || + * --------------- fexit || ----------------- + * | --------||----> | follower prog | + * --------------- / || --- ----------------- + * cs -> | leader prog |/ ||/ | | + * --> --------------- /|| -------------- ------------------ + * / | | / || | filter map | | accum_readings | + * / ------------ ------------ || -------------- ------------------ + * | | prev map | | diff map | || | + * | ------------ ------------ || | + * \ || | + * = \ ==================================================== | ============ + * \ / user space + * \ / + * \ / + * BPF_PROG_TEST_RUN BPF_MAP_LOOKUP_ELEM + * \ / + * \ / + * \------ perf-stat ----------------------/ + * + * The figure above shows the architecture of bperf. Note that the figure + * is divided into 3 regions: shared progs and maps (top left), per session + * progs and maps (top right), and user space (bottom). + * + * The leader prog is triggered on each context switch (cs). The leader + * prog reads perf_events and stores the difference (current_reading - + * previous_reading) to the diff map. For the same metric, e.g. "cycles", + * multiple perf-stat sessions share the same leader prog. + * + * Each perf-stat session creates a follower prog as fexit program to the + * leader prog. It is possible to attach up to BPF_MAX_TRAMP_PROGS (38) + * follower progs to the same leader prog. The follower prog checks current + * task and processor ID to decide whether to add the value from the diff + * map to its accumulated reading map (accum_readings). + * + * Finally, perf-stat user space reads the value from accum_reading map. + * + * Besides context switch, it is also necessary to trigger the leader prog + * before perf-stat reads the value. Otherwise, the accum_reading map may + * not have the latest reading from the perf_events. This is achieved by + * triggering the event via sys_bpf(BPF_PROG_TEST_RUN) to each CPU. + * + * Comment before the definition of struct perf_event_attr_map_entry + * describes how different sessions of perf-stat share information about + * the leader prog. + */ + +struct bpf_counter_ops bperf_ops = { + .load = bperf__load, + .enable = bperf__enable, + .read = bperf__read, + .install_pe = bperf__install_pe, + .destroy = bperf__destroy, +}; + +static inline bool bpf_counter_skip(struct evsel *evsel) +{ + return list_empty(&evsel->bpf_counter_list) && + evsel->follower_skel == NULL; +} + int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return 0; return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); } int bpf_counter__load(struct evsel *evsel, struct target *target) { - if (target__has_bpf(target)) + if (target->bpf_str) evsel->bpf_counter_ops = &bpf_program_profiler_ops; + else if (target->use_bpf) + evsel->bpf_counter_ops = &bperf_ops; if (evsel->bpf_counter_ops) return evsel->bpf_counter_ops->load(evsel, target); @@ -293,21 +802,21 @@ int bpf_counter__load(struct evsel *evsel, struct target *target) int bpf_counter__enable(struct evsel *evsel) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return 0; return evsel->bpf_counter_ops->enable(evsel); } int bpf_counter__read(struct evsel *evsel) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return -EAGAIN; return evsel->bpf_counter_ops->read(evsel); } void bpf_counter__destroy(struct evsel *evsel) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return; evsel->bpf_counter_ops->destroy(evsel); evsel->bpf_counter_ops = NULL; diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 2eca210e5dc1..cb9c532e0a07 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -38,7 +38,7 @@ int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); #else /* HAVE_BPF_SKEL */ -#include<linux/err.h> +#include <linux/err.h> static inline int bpf_counter__load(struct evsel *evsel __maybe_unused, struct target *target __maybe_unused) diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h new file mode 100644 index 000000000000..186a5551ddb9 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook + +#ifndef __BPERF_STAT_H +#define __BPERF_STAT_H + +typedef struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} reading_map; + +#endif /* __BPERF_STAT_H */ diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c new file mode 100644 index 000000000000..b8fa3cb2da23 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bperf.h" +#include "bperf_u.h" + +reading_map diff_readings SEC(".maps"); +reading_map accum_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} filter SEC(".maps"); + +enum bperf_filter_type type = 0; +int enabled = 0; + +SEC("fexit/XXX") +int BPF_PROG(fexit_XXX) +{ + struct bpf_perf_event_value *diff_val, *accum_val; + __u32 filter_key, zero = 0; + __u32 *accum_key; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_GLOBAL: + accum_key = &zero; + goto do_add; + case BPERF_FILTER_CPU: + filter_key = bpf_get_smp_processor_id(); + break; + case BPERF_FILTER_PID: + filter_key = bpf_get_current_pid_tgid() & 0xffffffff; + break; + case BPERF_FILTER_TGID: + filter_key = bpf_get_current_pid_tgid() >> 32; + break; + default: + return 0; + } + + accum_key = bpf_map_lookup_elem(&filter, &filter_key); + if (!accum_key) + return 0; + +do_add: + diff_val = bpf_map_lookup_elem(&diff_readings, &zero); + if (!diff_val) + return 0; + + accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + if (!accum_val) + return 0; + + accum_val->counter += diff_val->counter; + accum_val->enabled += diff_val->enabled; + accum_val->running += diff_val->running; + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c new file mode 100644 index 000000000000..4f70d1459e86 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bperf.h" + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(int)); + __uint(map_flags, BPF_F_PRESERVE_ELEMS); +} events SEC(".maps"); + +reading_map prev_readings SEC(".maps"); +reading_map diff_readings SEC(".maps"); + +SEC("raw_tp/sched_switch") +int BPF_PROG(on_switch) +{ + struct bpf_perf_event_value val, *prev_val, *diff_val; + __u32 key = bpf_get_smp_processor_id(); + __u32 zero = 0; + long err; + + prev_val = bpf_map_lookup_elem(&prev_readings, &zero); + if (!prev_val) + return 0; + + diff_val = bpf_map_lookup_elem(&diff_readings, &zero); + if (!diff_val) + return 0; + + err = bpf_perf_event_read_value(&events, key, &val, sizeof(val)); + if (err) + return 0; + + diff_val->counter = val.counter - prev_val->counter; + diff_val->enabled = val.enabled - prev_val->enabled; + diff_val->running = val.running - prev_val->running; + *prev_val = val; + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h new file mode 100644 index 000000000000..1ce0c2c905c1 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook + +#ifndef __BPERF_STAT_U_H +#define __BPERF_STAT_U_H + +enum bperf_filter_type { + BPERF_FILTER_GLOBAL = 1, + BPERF_FILTER_CPU, + BPERF_FILTER_PID, + BPERF_FILTER_TGID, +}; + +#endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c index c7cec92d0236..ab12b4c4ece2 100644 --- a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -52,7 +52,7 @@ int BPF_PROG(fentry_XXX) static inline void fexit_update_maps(struct bpf_perf_event_value *after) { - struct bpf_perf_event_value *before, diff, *accum; + struct bpf_perf_event_value *before, diff; __u32 zero = 0; before = bpf_map_lookup_elem(&fentry_readings, &zero); @@ -78,7 +78,6 @@ int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value reading; __u32 cpu = bpf_get_smp_processor_id(); - __u32 one = 1, zero = 0; int err; /* read all events before updating the maps, to reduce error */ diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h index 6b3229106f16..5875cfc8106e 100644 --- a/tools/perf/util/call-path.h +++ b/tools/perf/util/call-path.h @@ -23,7 +23,7 @@ * @children: tree of call paths of functions called * * In combination with the call_return structure, the call_path structure - * defines a context-sensitve call-graph. + * defines a context-sensitive call-graph. */ struct call_path { struct call_path *parent; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1b60985690bb..8e2777133bd9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -877,7 +877,7 @@ append_chain_children(struct callchain_node *root, if (!node) return -1; - /* lookup in childrens */ + /* lookup in children */ while (*p) { enum match_result ret; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 6984c77068a3..6bcb5ef221f8 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -457,6 +457,9 @@ static int perf_stat_config(const char *var, const char *value) if (!strcmp(var, "stat.big-num")) perf_stat__set_big_num(perf_config_bool(var, value)); + if (!strcmp(var, "stat.no-csv-summary")) + perf_stat__set_no_csv_summary(perf_config_bool(var, value)); + /* Add other config variables here. */ return 0; } @@ -699,7 +702,7 @@ static int collect_config(const char *var, const char *value, /* perf_config_set can contain both user and system config items. * So we should know where each value is from. * The classification would be needed when a particular config file - * is overwrited by setting feature i.e. set_config(). + * is overwritten by setting feature i.e. set_config(). */ if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) { section->from_system_config = true; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3f4bc4050477..059bcec3f651 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -6,6 +6,7 @@ * Author: Mathieu Poirier <mathieu.poirier@linaro.org> */ +#include <linux/coresight-pmu.h> #include <linux/err.h> #include <linux/list.h> #include <linux/zalloc.h> @@ -316,7 +317,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * This is the first timestamp we've seen since the beginning of traces * or a discontinuity. Since timestamps packets are generated *after* * range packets have been generated, we need to estimate the time at - * which instructions started by substracting the number of instructions + * which instructions started by subtracting the number of instructions * executed to the timestamp. */ packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; @@ -491,13 +492,42 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { - pid_t tid; + pid_t tid = -1; + static u64 pid_fmt; + int ret; - /* Ignore PE_CONTEXT packets that don't have a valid contextID */ - if (!elem->context.ctxt_id_valid) + /* + * As all the ETMs run at the same exception level, the system should + * have the same PID format crossing CPUs. So cache the PID format + * and reuse it for sequential decoding. + */ + if (!pid_fmt) { + ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt); + if (ret) + return OCSD_RESP_FATAL_SYS_ERR; + } + + /* + * Process the PE_CONTEXT packets if we have a valid contextID or VMID. + * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 + * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. + */ + switch (pid_fmt) { + case BIT(ETM_OPT_CTXTID): + if (elem->context.ctxt_id_valid) + tid = elem->context.context_id; + break; + case BIT(ETM_OPT_CTXTID2): + if (elem->context.vmid_valid) + tid = elem->context.vmid; + break; + default: + break; + } + + if (tid == -1) return OCSD_RESP_CONT; - tid = elem->context.context_id; if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) return OCSD_RESP_FATAL_SYS_ERR; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a2a369e2fbb6..7e63e7dedc33 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -7,6 +7,7 @@ */ #include <linux/bitops.h> +#include <linux/coresight-pmu.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/log2.h> @@ -156,11 +157,52 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) return 0; } +/* + * The returned PID format is presented by two bits: + * + * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced; + * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced. + * + * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 + * are enabled at the same time when the session runs on an EL2 kernel. + * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be + * recorded in the trace data, the tool will selectively use + * CONTEXTIDR_EL2 as PID. + */ +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) +{ + struct int_node *inode; + u64 *metadata, val; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + + if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { + val = metadata[CS_ETM_ETMCR]; + /* CONTEXTIDR is traced */ + if (val & BIT(ETM_OPT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } else { + val = metadata[CS_ETMV4_TRCCONFIGR]; + /* CONTEXTIDR_EL2 is traced */ + if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) + *pid_fmt = BIT(ETM_OPT_CTXTID2); + /* CONTEXTIDR_EL1 is traced */ + else if (val & BIT(ETM4_CFG_BIT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } + + return 0; +} + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { /* - * Wnen a timestamp packet is encountered the backend code + * When a timestamp packet is encountered the backend code * is stopped so that the front end has time to process packets * that were accumulated in the traceID queue. Since there can * be more than one channel per cs_etm_queue, we need to specify @@ -1655,7 +1697,7 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * | 1 1 0 1 1 1 1 1 | imm8 | * +-----------------+--------+ * - * According to the specifiction, it only defines SVC for T32 + * According to the specification, it only defines SVC for T32 * with 16 bits instruction and has no definition for 32bits; * so below only read 2 bytes as instruction size for T32. */ @@ -1887,7 +1929,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, /* * If the previous packet is an exception return packet - * and the return address just follows SVC instuction, + * and the return address just follows SVC instruction, * it needs to calibrate the previous packet sample flags * as PERF_IP_FLAG_SYSCALLRET. */ @@ -1961,7 +2003,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, * contain exception type related info so we cannot decide * the exception type purely based on exception return packet. * If we record the exception number from exception packet and - * reuse it for excpetion return packet, this is not reliable + * reuse it for exception return packet, this is not reliable * due the trace can be discontinuity or the interrupt can * be nested, thus the recorded exception number cannot be * used for exception return packet for these two cases. @@ -2435,7 +2477,7 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) } static const char * const cs_etm_global_header_fmts[] = { - [CS_HEADER_VERSION_0] = " Header version %llx\n", + [CS_HEADER_VERSION] = " Header version %llx\n", [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", }; @@ -2443,6 +2485,7 @@ static const char * const cs_etm_global_header_fmts[] = { static const char * const cs_etm_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETM_ETMCR] = " ETMCR %llx\n", [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", [CS_ETM_ETMCCER] = " ETMCCER %llx\n", @@ -2452,6 +2495,7 @@ static const char * const cs_etm_priv_fmts[] = { static const char * const cs_etmv4_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", @@ -2461,26 +2505,167 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", }; -static void cs_etm__print_auxtrace_info(__u64 *val, int num) +static const char * const param_unk_fmt = + " Unknown parameter [%d] %llx\n"; +static const char * const magic_unk_fmt = + " Magic number Unknown %llx\n"; + +static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset) { - int i, j, cpu = 0; + int i = *offset, j, nr_params = 0, fmt_offset; + __u64 magic; - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) - fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + /* check magic value */ + magic = val[i + CS_ETM_MAGIC]; + if ((magic != __perf_cs_etmv3_magic) && + (magic != __perf_cs_etmv4_magic)) { + /* failure - note bad magic value */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + + /* print common header block */ + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]); + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]); + + if (magic == __perf_cs_etmv3_magic) { + nr_params = CS_ETM_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETM_ETMCR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + } else if (magic == __perf_cs_etmv4_magic) { + nr_params = CS_ETMV4_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETMV4_TRCCONFIGR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + } + *offset = i; + return 0; +} + +static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) +{ + int i = *offset, j, total_params = 0; + __u64 magic; + + magic = val[i + CS_ETM_MAGIC]; + /* total params to print is NR_PARAMS + common block size for v1 */ + total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1; - for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { - if (val[i] == __perf_cs_etmv3_magic) - for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) + if (magic == __perf_cs_etmv3_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETM_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); - else if (val[i] == __perf_cs_etmv4_magic) - for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) + } + } else if (magic == __perf_cs_etmv4_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETMV4_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); - else - /* failure.. return */ + } + } else { + /* failure - note bad magic value and error out */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + *offset = i; + return 0; +} + +static void cs_etm__print_auxtrace_info(__u64 *val, int num) +{ + int i, cpu = 0, version, err; + + /* bail out early on bad header version */ + version = val[0]; + if (version > CS_HEADER_CURRENT_VERSION) { + /* failure.. return */ + fprintf(stdout, " Unknown Header Version = %x, ", version); + fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION); + return; + } + + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { + if (version == 0) + err = cs_etm__print_cpu_metadata_v0(val, &i); + else if (version == 1) + err = cs_etm__print_cpu_metadata_v1(val, &i); + if (err) return; } } +/* + * Read a single cpu parameter block from the auxtrace_info priv block. + * + * For version 1 there is a per cpu nr_params entry. If we are handling + * version 1 file, then there may be less, the same, or more params + * indicated by this value than the compile time number we understand. + * + * For a version 0 info block, there are a fixed number, and we need to + * fill out the nr_param value in the metadata we create. + */ +static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, + int out_blk_size, int nr_params_v0) +{ + u64 *metadata = NULL; + int hdr_version; + int nr_in_params, nr_out_params, nr_cmn_params; + int i, k; + + metadata = zalloc(sizeof(*metadata) * out_blk_size); + if (!metadata) + return NULL; + + /* read block current index & version */ + i = *buff_in_offset; + hdr_version = buff_in[CS_HEADER_VERSION]; + + if (!hdr_version) { + /* read version 0 info block into a version 1 metadata block */ + nr_in_params = nr_params_v0; + metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; + metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; + metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; + /* remaining block params at offset +1 from source */ + for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) + metadata[k + 1] = buff_in[i + k]; + /* version 0 has 2 common params */ + nr_cmn_params = 2; + } else { + /* read version 1 info block - input and output nr_params may differ */ + /* version 1 has 3 common params */ + nr_cmn_params = 3; + nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; + + /* if input has more params than output - skip excess */ + nr_out_params = nr_in_params + nr_cmn_params; + if (nr_out_params > out_blk_size) + nr_out_params = out_blk_size; + + for (k = CS_ETM_MAGIC; k < nr_out_params; k++) + metadata[k] = buff_in[i + k]; + + /* record the actual nr params we copied */ + metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; + } + + /* adjust in offset by number of in params used */ + i += nr_in_params + nr_cmn_params; + *buff_in_offset = i; + return metadata; +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -2492,11 +2677,12 @@ int cs_etm__process_auxtrace_info(union perf_event *event, int info_header_size; int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu; - int err = 0, idx = -1; - int i, j, k; + int num_cpu, trcidr_idx; + int err = 0; + int i, j; u64 *ptr, *hdr = NULL; u64 **metadata = NULL; + u64 hdr_version; /* * sizeof(auxtrace_info_event::type) + @@ -2512,16 +2698,21 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* First the global part */ ptr = (u64 *) auxtrace_info->priv; - /* Look for version '0' of the header */ - if (ptr[0] != 0) + /* Look for version of the header */ + hdr_version = ptr[0]; + if (hdr_version > CS_HEADER_CURRENT_VERSION) { + /* print routine will print an error on bad version */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, 0); return -EINVAL; + } - hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX); if (!hdr) return -ENOMEM; /* Extract header information - see cs-etm.h for format */ - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) hdr[i] = ptr[i]; num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & @@ -2552,35 +2743,31 @@ int cs_etm__process_auxtrace_info(union perf_event *event, */ for (j = 0; j < num_cpu; j++) { if (ptr[i] == __perf_cs_etmv3_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETM_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETM_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETM_PRIV_MAX, + CS_ETM_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETM_ETMTRACEIDR]; - i += CS_ETM_PRIV_MAX; + trcidr_idx = CS_ETM_ETMTRACEIDR; + } else if (ptr[i] == __perf_cs_etmv4_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETMV4_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETMV4_PRIV_MAX, + CS_ETMV4_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; - i += CS_ETMV4_PRIV_MAX; + trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } + + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; } /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, idx); + inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]); /* Something went wrong, no need to continue */ if (!inode) { @@ -2601,7 +2788,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, } /* - * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and + * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and * CS_ETMV4_PRIV_MAX mark how many double words are in the * global metadata, and each cpu's metadata respectively. * The following tests if the correct number of double words was @@ -2703,6 +2890,12 @@ err_free_traceid_list: intlist__delete(traceid_list); err_free_hdr: zfree(&hdr); - + /* + * At this point, as a minimum we have valid header. Dump the rest of + * the info section - the print routines will error out on structural + * issues. + */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 4ad925d6d799..36428918411e 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -12,28 +12,43 @@ struct perf_session; -/* Versionning header in case things need tro change in the future. That way +/* + * Versioning header in case things need to change in the future. That way * decoding of old snapshot is still possible. */ enum { /* Starting with 0x0 */ - CS_HEADER_VERSION_0, + CS_HEADER_VERSION, /* PMU->type (32 bit), total # of CPUs (32 bit) */ CS_PMU_TYPE_CPUS, CS_ETM_SNAPSHOT, - CS_HEADER_VERSION_0_MAX, + CS_HEADER_VERSION_MAX, }; +/* + * Update the version for new format. + * + * New version 1 format adds a param count to the per cpu metadata. + * This allows easy adding of new metadata parameters. + * Requires that new params always added after current ones. + * Also allows client reader to handle file versions that are different by + * checking the number of params in the file vs the number expected. + */ +#define CS_HEADER_CURRENT_VERSION 1 + /* Beginning of header common to both ETMv3 and V4 */ enum { CS_ETM_MAGIC, CS_ETM_CPU, + /* Number of trace config params in following ETM specific block */ + CS_ETM_NR_TRC_PARAMS, + CS_ETM_COMMON_BLK_MAX_V1, }; /* ETMv3/PTM metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETM_ETMCR = CS_ETM_CPU + 1, + CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETM_ETMTRACEIDR, /* RO, taken from sysFS */ CS_ETM_ETMCCER, @@ -41,10 +56,13 @@ enum { CS_ETM_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1) + /* ETMv4 metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1, + CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETMV4_TRCTRACEIDR, /* RO, taken from sysFS */ CS_ETMV4_TRCIDR0, @@ -55,9 +73,12 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1) + /* * ETMv3 exception encoding number: - * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) + * See Embedded Trace Macrocell specification (ARM IHI 0014Q) * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors. */ enum { @@ -162,7 +183,7 @@ struct cs_etm_packet_queue { #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) -#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) +#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64)) #define __perf_cs_etmv3_magic 0x3030303030303030ULL #define __perf_cs_etmv4_magic 0x4040404040404040ULL @@ -173,6 +194,7 @@ struct cs_etm_packet_queue { int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt); int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, pid_t tid, u8 trace_chan_id); bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 8b67bd97d122..a9c375e63e73 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -949,7 +949,7 @@ static char *change_name(char *name, char *orig_name, int dup) /* * Add '_' prefix to potential keywork. According to * Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com), - * futher CTF spec updating may require us to use '$'. + * further CTF spec updating may require us to use '$'. */ if (dup < 0) len = strlen(name) + sizeof("_"); diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index 39c05200ed65..ddf33d58bcd3 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -147,7 +147,7 @@ error: * Demangle Java function signature (openJDK, not GCJ) * input: * str: string to parse. String is not modified - * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling + * flags: combination of JAVA_DEMANGLE_* flags to modify demangling * return: * if input can be demangled, then a newly allocated string is returned. * if input cannot be demangled, then NULL is returned @@ -164,7 +164,7 @@ java_demangle_sym(const char *str, int flags) if (!str) return NULL; - /* find start of retunr type */ + /* find start of return type */ p = strrchr(str, ')'); if (!p) return NULL; diff --git a/tools/perf/util/demangle-ocaml.c b/tools/perf/util/demangle-ocaml.c index 3df14e67c622..9d707bb60b4b 100644 --- a/tools/perf/util/demangle-ocaml.c +++ b/tools/perf/util/demangle-ocaml.c @@ -64,17 +64,5 @@ ocaml_demangle_sym(const char *sym) } result[j] = '\0'; - /* scan backwards to remove an "_" followed by decimal digits */ - if (j != 0 && isdigit(result[j - 1])) { - while (--j) { - if (!isdigit(result[j])) { - break; - } - } - if (result[j] == '_') { - result[j] = '\0'; - } - } - return result; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index cd2fe64a3c5d..52e7101c5609 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -216,7 +216,7 @@ struct dso { /* dso__for_each_symbol - iterate over the symbols of given type * - * @dso: the 'struct dso *' in which symbols itereated + * @dso: the 'struct dso *' in which symbols are iterated * @pos: the 'struct symbol *' to use as a loop cursor * @n: the 'struct rb_node *' to use as a temporary storage */ diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 7b2d471a6419..b2f4920e19a6 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -91,7 +91,7 @@ static Dwarf_Line *cu_getsrc_die(Dwarf_Die *cu_die, Dwarf_Addr addr) return NULL; } while (laddr == addr); l++; - /* Going foward to find the statement line */ + /* Going forward to find the statement line */ do { line = dwarf_onesrcline(lines, l++); if (!line || dwarf_lineaddr(line, &laddr) != 0 || @@ -177,7 +177,7 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, * die_get_linkage_name - Get the linkage name of the object * @dw_die: A DIE of the object * - * Get the linkage name attiribute of given @dw_die. + * Get the linkage name attribute of given @dw_die. * For C++ binary, the linkage name will be the mangled symbol. */ const char *die_get_linkage_name(Dwarf_Die *dw_die) @@ -739,7 +739,7 @@ static int __die_walk_instances_cb(Dwarf_Die *inst, void *data) * @data: user data * * Walk on the instances of give @in_die. @in_die must be an inlined function - * declartion. This returns the return value of @callback if it returns + * declaration. This returns the return value of @callback if it returns * non-zero value, or -ENOENT if there is no instance. */ int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *), diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 506006e0cf66..cb99646843a9 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -22,7 +22,7 @@ const char *cu_get_comp_dir(Dwarf_Die *cu_die); int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, const char **fname, int *lineno); -/* Walk on funcitons at given address */ +/* Walk on functions at given address */ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, int (*callback)(Dwarf_Die *, void *), void *data); diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 1b49ecee5aff..3fa4486742cd 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -24,6 +24,7 @@ #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" +#include "../arch/mips/include/dwarf-regs-table.h" #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) @@ -53,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(sparc_regstr_tbl, n); case EM_XTENSA: return __get_dwarf_regstr(xtensa_regstr_tbl, n); + case EM_MIPS: + return __get_dwarf_regstr(mips_regstr_tbl, n); default: pr_err("ELF MACHINE %x is not supported.\n", machine); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index f603edbbbc6f..8a62fb39e365 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -147,6 +147,7 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; + u16 p_stage_cyc; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; @@ -427,5 +428,7 @@ char *get_page_size_name(u64 size, char *str); void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); +const char *arch_perf_header_entry(const char *se_header); +int arch_support_sort_key(const char *sort_key); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index 859cb34fcff2..631a4af2ed86 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -21,7 +21,7 @@ * all struct perf_record_lost_samples.lost fields reported. * * The total_period is needed because by default auto-freq is used, so - * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get + * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get * the total number of low level events, it is necessary to to sum all struct * perf_record_sample.period and stash the result in total_period. */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 882cd1f721d9..f1c79ecf8107 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -36,6 +36,7 @@ #include <fcntl.h> #include <sys/ioctl.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <linux/bitops.h> #include <linux/hash.h> @@ -1209,7 +1210,7 @@ bool evlist__valid_read_format(struct evlist *evlist) } } - /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ + /* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */ if ((sample_type & PERF_SAMPLE_READ) && !(read_format & PERF_FORMAT_ID)) { return false; @@ -1406,6 +1407,13 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); /* + * Change the name of this process not to confuse --exclude-perf users + * that sees 'perf' in the window up to the execvp() and thinks that + * perf samples are not being excluded. + */ + prctl(PR_SET_NAME, "perf-exec"); + + /* * Tell the parent we're ready to go */ close(child_ready_pipe[1]); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7ecbc8e2fbfa..2d2614eeaa20 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -621,7 +621,7 @@ const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_AL #define COP(x) (1 << x) /* - * cache operartion stat + * cache operation stat * L1I : Read and prefetch only * ITLB and BPU : Read-only */ @@ -2275,7 +2275,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, /* * Undo swap of u64, then swap on individual u32s, * get the size of the raw area and undo all of the - * swap. The pevent interface handles endianity by + * swap. The pevent interface handles endianness by * itself. */ if (swapped) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6026487353dd..eccc4fd5b3eb 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -20,6 +20,8 @@ union perf_event; struct bpf_counter_ops; struct target; struct hashmap; +struct bperf_leader_bpf; +struct bperf_follower_bpf; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -130,8 +132,24 @@ struct evsel { * See also evsel__has_callchain(). */ __u64 synth_sample_type; - struct list_head bpf_counter_list; + + /* + * bpf_counter_ops serves two use cases: + * 1. perf-stat -b counting events used byBPF programs + * 2. perf-stat --use-bpf use BPF programs to aggregate counts + */ struct bpf_counter_ops *bpf_counter_ops; + + /* for perf-stat -b */ + struct list_head bpf_counter_list; + + /* for perf-stat --use-bpf */ + int bperf_leader_prog_fd; + int bperf_leader_link_fd; + union { + struct bperf_leader_bpf *leader_skel; + struct bperf_follower_bpf *follower_skel; + }; }; struct perf_missing_features { @@ -157,7 +175,6 @@ struct perf_missing_features { extern struct perf_missing_features perf_missing_features; struct perf_cpu_map; -struct target; struct thread_map; struct record_opts; diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index dcf8d19b83c8..85df3e4771e4 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -3,7 +3,7 @@ #define PARSE_CTX_H 1 // There are fixes that need to land upstream before we can use libbpf's headers, -// for now use our copy uncoditionally, since the data structures at this point +// for now use our copy unconditionally, since the data structures at this point // are exactly the same, no problem. //#ifdef HAVE_LIBBPF_SUPPORT //#include <bpf/hashmap.h> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 20effdff76ce..aa1e42518d37 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -127,7 +127,7 @@ static int __do_write_buf(struct feat_fd *ff, const void *buf, size_t size) return 0; } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ int do_write(struct feat_fd *ff, const void *buf, size_t size) { if (!ff->buf) @@ -135,7 +135,7 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size) return __do_write_buf(ff, buf, size); } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size) { u64 *p = (u64 *) set; @@ -154,7 +154,7 @@ static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size) return 0; } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ int write_padded(struct feat_fd *ff, const void *bf, size_t count, size_t count_aligned) { @@ -170,7 +170,7 @@ int write_padded(struct feat_fd *ff, const void *bf, #define string_size(str) \ (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32)) -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ static int do_write_string(struct feat_fd *ff, const char *str) { u32 len, olen; @@ -266,7 +266,7 @@ static char *do_read_string(struct feat_fd *ff) return NULL; } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) { unsigned long *set; @@ -2874,7 +2874,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) int err = -1; if (ff->ph->needs_swap) { - pr_warning("interpreting bpf_prog_info from systems with endianity is not yet supported\n"); + pr_warning("interpreting bpf_prog_info from systems with endianness is not yet supported\n"); return 0; } @@ -2942,7 +2942,7 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) int err = -1; if (ff->ph->needs_swap) { - pr_warning("interpreting btf from systems with endianity is not yet supported\n"); + pr_warning("interpreting btf from systems with endianness is not yet supported\n"); return 0; } @@ -3481,11 +3481,11 @@ static const size_t attr_pipe_abi_sizes[] = { }; /* - * In the legacy pipe format, there is an implicit assumption that endiannesss + * In the legacy pipe format, there is an implicit assumption that endianness * between host recording the samples, and host parsing the samples is the * same. This is not always the case given that the pipe output may always be * redirected into a file and analyzed on a different machine with possibly a - * different endianness and perf_event ABI revsions in the perf tool itself. + * different endianness and perf_event ABI revisions in the perf tool itself. */ static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) { diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index c82f5fc26af8..9299ee535518 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); + hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else @@ -289,13 +290,14 @@ static long hist_time(unsigned long htime) } static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight, u64 ins_lat) + u64 weight, u64 ins_lat, u64 p_stage_cyc) { he_stat->period += period; he_stat->weight += weight; he_stat->nr_events += 1; he_stat->ins_lat += ins_lat; + he_stat->p_stage_cyc += p_stage_cyc; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -308,6 +310,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->nr_events += src->nr_events; dest->weight += src->weight; dest->ins_lat += src->ins_lat; + dest->p_stage_cyc += src->p_stage_cyc; } static void he_stat__decay(struct he_stat *he_stat) @@ -597,6 +600,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, u64 period = entry->stat.period; u64 weight = entry->stat.weight; u64 ins_lat = entry->stat.ins_lat; + u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -615,11 +619,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight, ins_lat); + he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight, ins_lat); + he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc); /* * This mem info was allocated from sample__resolve_mem @@ -731,6 +735,7 @@ __hists__add_entry(struct hists *hists, .period = sample->period, .weight = sample->weight, .ins_lat = sample->ins_lat, + .p_stage_cyc = sample->p_stage_cyc, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3c537232294b..e2faa745c8d6 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,6 +75,7 @@ enum hist_column { HISTC_MEM_BLOCKED, HISTC_LOCAL_INS_LAT, HISTC_GLOBAL_INS_LAT, + HISTC_P_STAGE_CYC, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index f6e28ac231b7..8658d42ce57a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3569,7 +3569,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, /* * Since this thread will not be kept in any rbtree not in a * list, initialize its list node so that at thread__put() the - * current thread lifetime assuption is kept and we don't segfault + * current thread lifetime assumption is kept and we don't segfault * at list_del_init(). */ INIT_LIST_HEAD(&pt->unknown_thread->node); diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c index a217ecf0359d..6a6712635aa4 100644 --- a/tools/perf/util/levenshtein.c +++ b/tools/perf/util/levenshtein.c @@ -30,7 +30,7 @@ * * It does so by calculating the costs of the path ending in characters * i (in string1) and j (in string2), respectively, given that the last - * operation is a substition, a swap, a deletion, or an insertion. + * operation is a substitution, a swap, a deletion, or an insertion. * * This implementation allows the costs to be weighted: * diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 6b4e5a0892f8..c397be0c2e32 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -4,7 +4,7 @@ * generic one. * * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch - * name and the defination of this function is included directly from + * name and the definition of this function is included directly from * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function * is defined no matter what arch the host is. * diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c index 21c216c40a3b..b2b92d030aef 100644 --- a/tools/perf/util/libunwind/x86_32.c +++ b/tools/perf/util/libunwind/x86_32.c @@ -4,7 +4,7 @@ * generic one. * * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch - * name and the defination of this function is included directly from + * name and the definition of this function is included directly from * 'arch/x86/util/unwind-libunwind.c', to make sure that this function * is defined no matter what arch the host is. * diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index dbdffb6673fe..3ceaf7ef3301 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -471,7 +471,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, /* * This is an optional work. Even it fail we can continue our - * work. Needn't to check error return. + * work. Needn't check error return. */ llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b5c2d8be4144..3ff4936a15a4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -905,7 +905,7 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start maps__insert(&machine->kmaps, map); - /* Put the map here because maps__insert alread got it */ + /* Put the map here because maps__insert already got it */ map__put(map); out: /* put the dso here, corresponding to machine__findnew_module_dso */ @@ -1952,7 +1952,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event * maps because that is what the kernel just did. * * But when synthesizing, this should not be done. If we do, we end up - * with overlapping maps as we process the sythesized MMAP2 events that + * with overlapping maps as we process the synthesized MMAP2 events that * get delivered shortly thereafter. * * Use the FORK event misc flags in an internal way to signal this @@ -2038,8 +2038,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, static bool symbol__match_regex(struct symbol *sym, regex_t *regex) { if (!regexec(regex, sym->name, 0, NULL, 0)) - return 1; - return 0; + return true; + return false; } static void ip__resolve_ams(struct thread *thread, @@ -2518,7 +2518,7 @@ static bool has_stitched_lbr(struct thread *thread, /* * Check if there are identical LBRs between two samples. - * Identicall LBRs must have same from, to and flags values. Also, + * Identical LBRs must have same from, to and flags values. Also, * they have to be saved in the same LBR registers (same physical * index). * @@ -2588,7 +2588,7 @@ err: } /* - * Recolve LBR callstack chain sample + * Resolve LBR callstack chain sample * Return: * 1 on success get LBR callchain information * 0 no available LBR callchain information, should try fp diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 9f32825c98d8..d32f5b28c1fb 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -75,7 +75,7 @@ struct thread; /* map__for_each_symbol - iterate over the symbols in the given map * - * @map: the 'struct map *' in which symbols itereated + * @map: the 'struct map *' in which symbols are iterated * @pos: the 'struct symbol *' to use as a loop cursor * @n: the 'struct rb_node *' to use as a temporary storage * Note: caller must ensure map->dso is not NULL (map is loaded). @@ -86,7 +86,7 @@ struct thread; /* map__for_each_symbol_with_name - iterate over the symbols in the given map * that have the given name * - * @map: the 'struct map *' in which symbols itereated + * @map: the 'struct map *' in which symbols are iterated * @sym_name: the symbol name * @pos: the 'struct symbol *' to use as a loop cursor */ diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 755cef7e0625..cacdebd65b8a 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -44,7 +44,6 @@ bool is_mem_loads_aux_event(struct evsel *leader); void perf_mem_events__list(void); -struct mem_info; int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); @@ -81,7 +80,7 @@ struct c2c_stats { u32 rmt_dram; /* count of loads miss to remote DRAM */ u32 blk_data; /* count of loads blocked by data */ u32 blk_addr; /* count of loads blocked by address conflict */ - u32 nomap; /* count of load/stores with no phys adrs */ + u32 nomap; /* count of load/stores with no phys addrs */ u32 noparse; /* count of unparsable data sources */ }; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 26c990e32378..8336dd8e8098 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -181,7 +181,7 @@ static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2) * @pctx: the parse context for the metric expression. * @metric_no_merge: don't attempt to share events for the metric with other * metrics. - * @has_constraint: is there a contraint on the group of events? In which case + * @has_constraint: is there a constraint on the group of events? In which case * the events won't be grouped. * @metric_events: out argument, null terminated array of evsel's associated * with the metric. @@ -618,7 +618,7 @@ static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw, bool details) { - struct pmu_events_map *map = perf_pmu__find_map(NULL); + struct pmu_events_map *map = pmu_events_map__find(); struct pmu_event *pe; int i; struct rblist groups; @@ -900,7 +900,8 @@ static int __add_metric(struct list_head *metric_list, (match_metric(__pe->metric_group, __metric) || \ match_metric(__pe->metric_name, __metric))) -static struct pmu_event *find_metric(const char *metric, struct pmu_events_map *map) +struct pmu_event *metricgroup__find_metric(const char *metric, + struct pmu_events_map *map) { struct pmu_event *pe; int i; @@ -985,7 +986,7 @@ static int __resolve_metric(struct metric *m, struct expr_id *parent; struct pmu_event *pe; - pe = find_metric(cur->key, map); + pe = metricgroup__find_metric(cur->key, map); if (!pe) continue; @@ -1253,8 +1254,7 @@ int metricgroup__parse_groups(const struct option *opt, struct rblist *metric_events) { struct evlist *perf_evlist = *(struct evlist **)opt->value; - struct pmu_events_map *map = perf_pmu__find_map(NULL); - + struct pmu_events_map *map = pmu_events_map__find(); return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, NULL, metric_events, map); @@ -1273,7 +1273,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metricgroup__has_metric(const char *metric) { - struct pmu_events_map *map = perf_pmu__find_map(NULL); + struct pmu_events_map *map = pmu_events_map__find(); struct pmu_event *pe; int i; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index ed1b9392e624..cc4a92492a61 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -9,7 +9,6 @@ struct evlist; struct evsel; -struct evlist; struct option; struct rblist; struct pmu_events_map; @@ -44,7 +43,8 @@ int metricgroup__parse_groups(const struct option *opt, bool metric_no_group, bool metric_no_merge, struct rblist *metric_events); - +struct pmu_event *metricgroup__find_metric(const char *metric, + struct pmu_events_map *map); int metricgroup__parse_groups_test(struct evlist *evlist, struct pmu_events_map *map, const char *str, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c0c0fab22cb8..8123d218ad17 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -846,9 +846,9 @@ split_bpf_config_terms(struct list_head *evt_head_config, struct parse_events_term *term, *temp; /* - * Currectly, all possible user config term + * Currently, all possible user config term * belong to bpf object. parse_events__is_hardcoded_term() - * happends to be a good flag. + * happens to be a good flag. * * See parse_events_config_bpf() and * config_term_tracepoint(). @@ -898,7 +898,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, /* * Caller doesn't know anything about obj_head_config, - * so combine them together again before returnning. + * so combine them together again before returning. */ if (head_config) list_splice_tail(&obj_head_config, head_config); @@ -1185,10 +1185,10 @@ do { \ } /* - * Check term availbility after basic checking so + * Check term availability after basic checking so * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered. * - * If check availbility at the entry of this function, + * If check availability at the entry of this function, * user will see "'<sysfs term>' is not usable in 'perf stat'" * if an invalid config term is provided for legacy events * (for example, instructions/badterm/...), which is confusing. diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 46fd0f998484..286d5e415bdc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -717,6 +717,11 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } +struct pmu_events_map *__weak pmu_events_map__find(void) +{ + return perf_pmu__find_map(NULL); +} + bool pmu_uncore_alias_match(const char *pmu_name, const char *name) { char *tmp = NULL, *tok, *str; @@ -1069,7 +1074,7 @@ int perf_pmu__format_type(struct list_head *formats, const char *name) /* * Sets value based on the format definition (format parameter) - * and unformated value (value parameter). + * and unformatted value (value parameter). */ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, bool zero) @@ -1408,7 +1413,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, } /* - * if no unit or scale foundin aliases, then + * if no unit or scale found in aliases, then * set defaults as for evsel * unit cannot left to NULL */ diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 160b0f561771..1f1749ba830f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -114,6 +114,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, struct pmu_events_map *map); struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); +struct pmu_events_map *pmu_events_map__find(void); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a9cff3a50ddf..a78c8d59a555 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -3228,7 +3228,7 @@ errout: return err; } -/* Concatinate two arrays */ +/* Concatenate two arrays */ static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b) { void *ret; @@ -3258,7 +3258,7 @@ concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs, if (*ntevs + ntevs2 > probe_conf.max_probes) ret = -E2BIG; else { - /* Concatinate the array of probe_trace_event */ + /* Concatenate the array of probe_trace_event */ new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs), *tevs2, ntevs2 * sizeof(**tevs2)); if (!new_tevs) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1b118c9c86a6..866f2d514d72 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -164,7 +164,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) /* * Convert a location into trace_arg. * If tvar == NULL, this just checks variable can be converted. - * If fentry == true and vr_die is a parameter, do huristic search + * If fentry == true and vr_die is a parameter, do heuristic search * for the location fuzzed by function entry mcount. */ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, @@ -498,7 +498,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, " nor array.\n", varname); return -EINVAL; } - /* While prcessing unnamed field, we don't care about this */ + /* While processing unnamed field, we don't care about this */ if (field->ref && dwarf_diename(vr_die)) { pr_err("Semantic error: %s must be referred by '.'\n", field->name); @@ -1832,7 +1832,7 @@ static int line_range_walk_cb(const char *fname, int lineno, (lf->lno_s > lineno || lf->lno_e < lineno)) return 0; - /* Make sure this line can be reversable */ + /* Make sure this line can be reversible */ if (cu_find_lineinfo(&lf->cu_die, addr, &__fname, &__lineno) > 0 && (lineno != __lineno || strcmp(fname, __fname))) return 0; diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 078a71773565..8130b56aa04b 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -45,7 +45,7 @@ * the data portion is mmap()'ed. * * To sort the queues in chronological order, all queue access is controlled - * by the auxtrace_heap. This is basicly a stack, each stack element has two + * by the auxtrace_heap. This is basically a stack, each stack element has two * entries, the queue number and a time stamp. However the stack is sorted by * the time stamps. The highest time stamp is at the bottom the lowest * (nearest) time stamp is at the top. That sort order is maintained at all @@ -65,11 +65,11 @@ * stamp of the last processed entry of the auxtrace_buffer replaces the * current auxtrace_heap top. * - * 3. Auxtrace_queues might run of out data and are feeded by the + * 3. Auxtrace_queues might run of out data and are fed by the * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). * * Event Generation - * Each sampling-data entry in the auxilary trace data generates a perf sample. + * Each sampling-data entry in the auxiliary trace data generates a perf sample. * This sample is filled * with data from the auxtrace such as PID/TID, instruction address, CPU state, * etc. This sample is processed with perf_session__deliver_synth_event() to @@ -575,7 +575,7 @@ static unsigned long long get_trailer_time(const unsigned char *buf) * pointer to the queue, the second parameter is the time stamp. This * is the time stamp: * - of the event that triggered this processing. - * - or the time stamp when the last proccesing of this queue stopped. + * - or the time stamp when the last processing of this queue stopped. * In this case it stopped at a 4KB page boundary and record the * position on where to continue processing on the next invocation * (see buffer->use_data and buffer->use_size). @@ -640,7 +640,7 @@ static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) goto out; } - pos += dsdes; /* Skip diagnositic entry */ + pos += dsdes; /* Skip diagnostic entry */ /* Check for trailer entry */ if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index cfcf8d534d76..08ec3c3ae0ee 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -160,11 +160,9 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) const char *color = PERF_COLOR_BLUE; struct cf_ctrset_entry *cep, ce; struct pmu_events_map *map; - struct perf_pmu pmu; u64 *p; - memset(&pmu, 0, sizeof(pmu)); - map = perf_pmu__find_map(&pmu); + map = pmu_events_map__find(); while (offset < len) { cep = (struct cf_ctrset_entry *)(buf + offset); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c83c2c6564e0..4e4aa4c97ac5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1531,7 +1531,7 @@ static void set_table_handlers(struct tables *tables) * Attempt to use the call path root from the call return * processor, if the call return processor is in use. Otherwise, * we allocate a new call path root. This prevents exporting - * duplicate call path ids when both are in use simultaniously. + * duplicate call path ids when both are in use simultaneously. */ if (tables->dbe.crp) tables->dbe.cpr = tables->dbe.crp->cpr; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 859832a82496..eba3769be3f1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1069,7 +1069,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) * in "to" register. * For example, there is a call stack * "A"->"B"->"C"->"D". - * The LBR registers will recorde like + * The LBR registers will be recorded like * "C"->"D", "B"->"C", "A"->"B". * So only the first "to" register and all "from" * registers are needed to construct the whole stack. @@ -1302,8 +1302,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { printf("... weight: %" PRIu64 "", sample->weight); - if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { printf(",0x%"PRIx16"", sample->ins_lat); + printf(",0x%"PRIx16"", sample->p_stage_cyc); + } printf("\n"); } @@ -1584,7 +1586,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->event_update(tool, event, &session->evlist); case PERF_RECORD_HEADER_EVENT_TYPE: /* - * Depreceated, but we need to handle it for sake + * Deprecated, but we need to handle it for sake * of old data files create in pipe mode. */ return 0; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 552b590485bf..88ce47f2547e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -25,6 +25,7 @@ #include <traceevent/event-parse.h> #include "mem-events.h" #include "annotate.h" +#include "event.h" #include "time-utils.h" #include "cgroup.h" #include "machine.h" @@ -36,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char *default_sort_order = "comm,dso,symbol"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; -const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat"; +const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; const char default_tracepoint_sort_order[] = "trace"; @@ -45,6 +46,8 @@ const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; +const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; +const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; /* * Replaces all occurrences of a char used with the: @@ -1408,6 +1411,25 @@ struct sort_entry sort_global_ins_lat = { .se_width_idx = HISTC_GLOBAL_INS_LAT, }; +static int64_t +sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->stat.p_stage_cyc - right->stat.p_stage_cyc; +} + +static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc); +} + +struct sort_entry sort_p_stage_cyc = { + .se_header = "Pipeline Stage Cycle", + .se_cmp = sort__global_p_stage_cyc_cmp, + .se_snprintf = hist_entry__p_stage_cyc_snprintf, + .se_width_idx = HISTC_P_STAGE_CYC, +}; + struct sort_entry sort_mem_daddr_sym = { .se_header = "Data Symbol", .se_cmp = sort__daddr_cmp, @@ -1816,6 +1838,21 @@ struct sort_dimension { int taken; }; +int __weak arch_support_sort_key(const char *sort_key __maybe_unused) +{ + return 0; +} + +const char * __weak arch_perf_header_entry(const char *se_header) +{ + return se_header; +} + +static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) +{ + sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header); +} + #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } static struct sort_dimension common_sort_dimensions[] = { @@ -1841,6 +1878,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), + DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc), }; #undef DIM @@ -2739,7 +2777,20 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, struct evlist *evlist, int level) { - unsigned int i; + unsigned int i, j; + + /* + * Check to see if there are any arch specific + * sort dimensions not applicable for the current + * architecture. If so, Skip that sort key since + * we don't want to display it in the output fields. + */ + for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) { + if (!strcmp(arch_specific_sort_keys[j], tok) && + !arch_support_sort_key(tok)) { + return 0; + } + } for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; @@ -2747,6 +2798,11 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (strncasecmp(tok, sd->name, strlen(tok))) continue; + for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { + if (!strcmp(dynamic_headers[j], sd->name)) + sort_dimension_add_dynamic_header(sd); + } + if (sd->entry == &sort_parent) { int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 63f67a3f3630..87a092645aa7 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -51,6 +51,7 @@ struct he_stat { u64 period_guest_us; u64 weight; u64 ins_lat; + u64 p_stage_cyc; u32 nr_events; }; @@ -234,6 +235,7 @@ enum sort_type { SORT_CODE_PAGE_SIZE, SORT_LOCAL_INS_LAT, SORT_GLOBAL_INS_LAT, + SORT_PIPELINE_STAGE_CYC, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7f09cdaf5b60..d3137bc17065 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -439,6 +439,12 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int if (counter->cgrp) os.nfields++; } + + if (!config->no_csv_summary && config->csv_output && + config->summary && !config->interval) { + fprintf(config->output, "%16s%s", "summary", config->csv_sep); + } + if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { pm(config, &os, NULL, "", "", 0); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 6ccf21a72f06..3f800e71126f 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -9,6 +9,7 @@ #include "expr.h" #include "metricgroup.h" #include "cgroup.h" +#include "units.h" #include <linux/zalloc.h> /* @@ -1270,18 +1271,15 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { - char unit = 'M'; - char unit_buf[10]; + char unit = ' '; + char unit_buf[10] = "/sec"; total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); - if (total) - ratio = 1000.0 * avg / total; - if (ratio < 0.001) { - ratio *= 1000; - unit = 'K'; - } - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); + ratio = convert_unit_double(1000000000.0 * avg / total, &unit); + + if (unit != ' ') + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { print_smi_cost(config, cpu, out, st, &rsd); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c400f8dde017..2db46b9bebd0 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -76,8 +76,7 @@ double rel_stddev_stats(double stddev, double avg) return pct; } -bool __perf_evsel_stat__is(struct evsel *evsel, - enum perf_stat_evsel_id id) +bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id) { struct perf_stat_evsel *ps = evsel->stats; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index d85c292148bb..48e6a06233fa 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -128,6 +128,7 @@ struct perf_stat_config { bool all_user; bool percore_show_thread; bool summary; + bool no_csv_summary; bool metric_no_group; bool metric_no_merge; bool stop_read_counter; @@ -160,6 +161,7 @@ struct perf_stat_config { }; void perf_stat__set_big_num(int set); +void perf_stat__set_no_csv_summary(int set); void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); @@ -187,11 +189,10 @@ struct perf_aggr_thread_value { u64 ena; }; -bool __perf_evsel_stat__is(struct evsel *evsel, - enum perf_stat_evsel_id id); +bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); #define perf_stat_evsel__is(evsel, id) \ - __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) + __perf_stat_evsel__is(evsel, PERF_STAT_EVSEL_ID__ ## id) extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index ea94d8628980..be94d7046fa0 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -12,7 +12,7 @@ * build complex strings/buffers whose final size isn't easily known. * * It is NOT legal to copy the ->buf pointer away. - * `strbuf_detach' is the operation that detachs a buffer from its shell + * `strbuf_detach' is the operation that detaches a buffer from its shell * while keeping the shell valid wrt its invariants. * * 2. the ->buf member is a byte array that has at least ->len + 1 bytes diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h index e0c25a40f796..c05aca9ca582 100644 --- a/tools/perf/util/strfilter.h +++ b/tools/perf/util/strfilter.h @@ -8,8 +8,8 @@ /* A node of string filter */ struct strfilter_node { - struct strfilter_node *l; /* Tree left branche (for &,|) */ - struct strfilter_node *r; /* Tree right branche (for !,&,|) */ + struct strfilter_node *l; /* Tree left branch (for &,|) */ + struct strfilter_node *r; /* Tree right branch (for !,&,|) */ const char *p; /* Operator or rule */ }; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6dff843fd883..4c56aa837434 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1058,7 +1058,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, curr_dso->symtab_type = dso->symtab_type; maps__insert(kmaps, curr_map); /* - * Add it before we drop the referece to curr_map, i.e. while + * Add it before we drop the reference to curr_map, i.e. while * we still are sure to have a reference to this DSO via * *curr_map->dso. */ diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index 35c936ce33ef..2664fb65e47a 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -68,7 +68,7 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); - fprintf(fp, "%s\n", pos->sym.name); + ret += fprintf(fp, "%s\n", pos->sym.name); } return ret; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index dff178103ce5..35aa0c0f7cd9 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1211,7 +1211,7 @@ static size_t mask_size(struct perf_cpu_map *map, int *max) *max = 0; for (i = 0; i < map->nr; i++) { - /* bit possition of the cpu is + 1 */ + /* bit position of the cpu is + 1 */ int bit = map->map[i] + 1; if (bit > *max) @@ -1237,7 +1237,7 @@ void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int * mask = size of 'struct perf_record_record_cpu_map' + * maximum cpu bit converted to size of longs * - * and finaly + the size of 'struct perf_record_cpu_map_data'. + * and finally + the size of 'struct perf_record_cpu_map_data'. */ size_cpus = cpus_size(map); size_mask = mask_size(map, max); diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 03bd99d3be16..a2e906858891 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -34,6 +34,10 @@ static const char **syscalltbl_native = syscalltbl_powerpc_32; #include <asm/syscalls.c> const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; static const char **syscalltbl_native = syscalltbl_arm64; +#elif defined(__mips__) +#include <asm/syscalls_n64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_mips_n64; #endif struct syscall { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index f132c6c2eef8..1bce3eb28ef2 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -16,6 +16,8 @@ struct target { bool uses_mmap; bool default_per_cpu; bool per_thread; + bool use_bpf; + const char *attr_map; }; enum target_errno { @@ -66,7 +68,7 @@ static inline bool target__has_cpu(struct target *target) static inline bool target__has_bpf(struct target *target) { - return target->bpf_str; + return target->bpf_str || target->use_bpf; } static inline bool target__none(struct target *target) diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 3bc47a42af8e..b3cd09beb62f 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -16,7 +16,6 @@ struct comm; struct ip_callchain; struct symbol; struct dso; -struct comm; struct perf_sample; struct addr_location; struct call_path; diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c index a46762aec4c9..32c39cfe209b 100644 --- a/tools/perf/util/units.c +++ b/tools/perf/util/units.c @@ -33,28 +33,35 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } -unsigned long convert_unit(unsigned long value, char *unit) +double convert_unit_double(double value, char *unit) { *unit = ' '; - if (value > 1000) { - value /= 1000; + if (value > 1000.0) { + value /= 1000.0; *unit = 'K'; } - if (value > 1000) { - value /= 1000; + if (value > 1000.0) { + value /= 1000.0; *unit = 'M'; } - if (value > 1000) { - value /= 1000; + if (value > 1000.0) { + value /= 1000.0; *unit = 'G'; } return value; } +unsigned long convert_unit(unsigned long value, char *unit) +{ + double v = convert_unit_double((double)value, unit); + + return (unsigned long)v; +} + int unit_number__scnprintf(char *buf, size_t size, u64 n) { char unit[4] = "BKMG"; diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h index 99263b6a23f7..ea43e74e3240 100644 --- a/tools/perf/util/units.h +++ b/tools/perf/util/units.h @@ -12,6 +12,7 @@ struct parse_tag { unsigned long parse_tag_value(const char *str, struct parse_tag *tags); +double convert_unit_double(double value, char *unit); unsigned long convert_unit(unsigned long value, char *unit); int unit_number__scnprintf(char *buf, size_t size, u64 n); diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 9aededc0bc06..71a353349181 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -82,7 +82,7 @@ UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, #define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ #define DW_EH_PE_aligned 0x50 /* aligned pointer */ -/* Flags intentionaly not handled, since they're not needed: +/* Flags intentionally not handled, since they're not needed: * #define DW_EH_PE_indirect 0x80 * #define DW_EH_PE_uleb128 0x01 * #define DW_EH_PE_udata2 0x02 |