diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-01-07 01:30:14 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-01-07 01:30:14 +0100 |
commit | ac5eed2b41776b05cf03aac761d3bb5e64eea24c (patch) | |
tree | c9bf703ffaf0265fa1135f0dd6f65485184a3570 /tools/perf/util | |
parent | Change mincore() to count "mapped" pages rather than "cached" pages (diff) | |
parent | Merge tag 'perf-core-for-mingo-4.21-20190103' of git://git.kernel.org/pub/scm... (diff) | |
download | linux-ac5eed2b41776b05cf03aac761d3bb5e64eea24c.tar.xz linux-ac5eed2b41776b05cf03aac761d3bb5e64eea24c.zip |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf tooling updates form Ingo Molnar:
"A final batch of perf tooling changes: mostly fixes and small
improvements"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (29 commits)
perf session: Add comment for perf_session__register_idle_thread()
perf thread-stack: Fix thread stack processing for the idle task
perf thread-stack: Allocate an array of thread stacks
perf thread-stack: Factor out thread_stack__init()
perf thread-stack: Allow for a thread stack array
perf thread-stack: Avoid direct reference to the thread's stack
perf thread-stack: Tidy thread_stack__bottom() usage
perf thread-stack: Simplify some code in thread_stack__process()
tools gpio: Allow overriding CFLAGS
tools power turbostat: Override CFLAGS assignments and add LDFLAGS to build command
tools thermal tmon: Allow overriding CFLAGS assignments
tools power x86_energy_perf_policy: Override CFLAGS assignments and add LDFLAGS to build command
perf c2c: Increase the HITM ratio limit for displayed cachelines
perf c2c: Change the default coalesce setup
perf trace beauty ioctl: Beautify USBDEVFS_ commands
perf trace beauty: Export function to get the files for a thread
perf trace: Wire up ioctl's USBDEBFS_ cmd table generator
perf beauty ioctl: Add generator for USBDEVFS_ ioctl commands
tools headers uapi: Grab a copy of usbdevice_fs.h
perf trace: Store the major number for a file when storing its pathname
...
Diffstat (limited to 'tools/perf/util')
-rw-r--r-- | tools/perf/util/dump-insn.c | 8 | ||||
-rw-r--r-- | tools/perf/util/dump-insn.h | 2 | ||||
-rw-r--r-- | tools/perf/util/intel-bts.c | 4 | ||||
-rw-r--r-- | tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 8 | ||||
-rw-r--r-- | tools/perf/util/intel-pt.c | 6 | ||||
-rw-r--r-- | tools/perf/util/python.c | 3 | ||||
-rw-r--r-- | tools/perf/util/session.c | 7 | ||||
-rw-r--r-- | tools/perf/util/thread-stack.c | 227 | ||||
-rw-r--r-- | tools/perf/util/thread-stack.h | 8 |
9 files changed, 200 insertions, 73 deletions
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c index 10988d3de7ce..2bd8585db93c 100644 --- a/tools/perf/util/dump-insn.c +++ b/tools/perf/util/dump-insn.c @@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused, *lenp = 0; return "?"; } + +__weak +int arch_is_branch(const unsigned char *buf __maybe_unused, + size_t len __maybe_unused, + int x86_64 __maybe_unused) +{ + return 0; +} diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h index 0e06280a8860..650125061530 100644 --- a/tools/perf/util/dump-insn.h +++ b/tools/perf/util/dump-insn.h @@ -20,4 +20,6 @@ struct perf_insn { const char *dump_insn(struct perf_insn *x, u64 ip, u8 *inbuf, int inlen, int *lenp); +int arch_is_branch(const unsigned char *buf, size_t len, int x86_64); + #endif diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 7b27d77306c2..ee6ca65f81f4 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -451,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq, continue; intel_bts_get_branch_type(btsq, branch); if (btsq->bts->synth_opts.thread_stack) - thread_stack__event(thread, btsq->sample_flags, + thread_stack__event(thread, btsq->cpu, btsq->sample_flags, le64_to_cpu(branch->from), le64_to_cpu(branch->to), btsq->intel_pt_insn.length, @@ -523,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) !btsq->bts->synth_opts.thread_stack && thread && (!old_buffer || btsq->bts->sampling_mode || (btsq->bts->snapshot_mode && !buffer->consecutive))) - thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); + thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1); err = intel_bts_process_buffer(btsq, buffer, thread); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 54818828023b..1c0e289f01e6 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, return 0; } +int arch_is_branch(const unsigned char *buf, size_t len, int x86_64) +{ + struct intel_pt_insn in; + if (intel_pt_get_insn(buf, len, x86_64, &in) < 0) + return -1; + return in.branch != INTEL_PT_BR_NO_BRANCH; +} + const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused, u8 *inbuf, int inlen, int *lenp) { diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 149ff361ca78..2e72373ec6df 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1174,7 +1174,7 @@ static void intel_pt_prep_sample(struct intel_pt *pt, intel_pt_prep_b_sample(pt, ptq, event, sample); if (pt->synth_opts.callchain) { - thread_stack__sample(ptq->thread, ptq->chain, + thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, pt->synth_opts.callchain_sz + 1, sample->ip, pt->kernel_start); sample->callchain = ptq->chain; @@ -1526,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return 0; if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) - thread_stack__event(ptq->thread, ptq->flags, state->from_ip, + thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip, state->to_ip, ptq->insn_len, state->trace_nr); else - thread_stack__set_trace_nr(ptq->thread, state->trace_nr); + thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); if (pt->sample_branches) { err = intel_pt_synth_branch_sample(ptq); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 47628e85c5eb..dda0ac978b1e 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -939,7 +939,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, file = PyFile_FromFile(fp, "perf", "r", NULL); #else - file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1); + file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, + NULL, NULL, NULL, 0); #endif if (file == NULL) goto free_list; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 78a067777144..5456c84c7dd1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1527,6 +1527,13 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) return machine__findnew_thread(&session->machines.host, -1, pid); } +/* + * Threads are identified by pid and tid, and the idle task has pid == tid == 0. + * So here a single thread is created for that, but actually there is a separate + * idle task per cpu, so there should be one 'struct thread' per cpu, but there + * is only 1. That causes problems for some tools, requiring workarounds. For + * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu(). + */ int perf_session__register_idle_thread(struct perf_session *session) { struct thread *thread; diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 61a4286a74dc..d52f27f373ce 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -15,6 +15,7 @@ #include <linux/rbtree.h> #include <linux/list.h> +#include <linux/log2.h> #include <errno.h> #include "thread.h" #include "event.h" @@ -60,6 +61,7 @@ struct thread_stack_entry { * @last_time: last timestamp * @crp: call/return processor * @comm: current comm + * @arr_sz: size of array if this is the first element of an array */ struct thread_stack { struct thread_stack_entry *stack; @@ -71,8 +73,19 @@ struct thread_stack { u64 last_time; struct call_return_processor *crp; struct comm *comm; + unsigned int arr_sz; }; +/* + * Assume pid == tid == 0 identifies the idle task as defined by + * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, + * and therefore requires a stack for each cpu. + */ +static inline bool thread_stack__per_cpu(struct thread *thread) +{ + return !(thread->tid || thread->pid_); +} + static int thread_stack__grow(struct thread_stack *ts) { struct thread_stack_entry *new_stack; @@ -91,19 +104,14 @@ static int thread_stack__grow(struct thread_stack *ts) return 0; } -static struct thread_stack *thread_stack__new(struct thread *thread, - struct call_return_processor *crp) +static int thread_stack__init(struct thread_stack *ts, struct thread *thread, + struct call_return_processor *crp) { - struct thread_stack *ts; - - ts = zalloc(sizeof(struct thread_stack)); - if (!ts) - return NULL; + int err; - if (thread_stack__grow(ts)) { - free(ts); - return NULL; - } + err = thread_stack__grow(ts); + if (err) + return err; if (thread->mg && thread->mg->machine) ts->kernel_start = machine__kernel_start(thread->mg->machine); @@ -111,9 +119,72 @@ static struct thread_stack *thread_stack__new(struct thread *thread, ts->kernel_start = 1ULL << 63; ts->crp = crp; + return 0; +} + +static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, + struct call_return_processor *crp) +{ + struct thread_stack *ts = thread->ts, *new_ts; + unsigned int old_sz = ts ? ts->arr_sz : 0; + unsigned int new_sz = 1; + + if (thread_stack__per_cpu(thread) && cpu > 0) + new_sz = roundup_pow_of_two(cpu + 1); + + if (!ts || new_sz > old_sz) { + new_ts = calloc(new_sz, sizeof(*ts)); + if (!new_ts) + return NULL; + if (ts) + memcpy(new_ts, ts, old_sz * sizeof(*ts)); + new_ts->arr_sz = new_sz; + zfree(&thread->ts); + thread->ts = new_ts; + ts = new_ts; + } + + if (thread_stack__per_cpu(thread) && cpu > 0 && + (unsigned int)cpu < ts->arr_sz) + ts += cpu; + + if (!ts->stack && + thread_stack__init(ts, thread, crp)) + return NULL; + return ts; } +static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) +{ + struct thread_stack *ts = thread->ts; + + if (cpu < 0) + cpu = 0; + + if (!ts || (unsigned int)cpu >= ts->arr_sz) + return NULL; + + ts += cpu; + + if (!ts->stack) + return NULL; + + return ts; +} + +static inline struct thread_stack *thread__stack(struct thread *thread, + int cpu) +{ + if (!thread) + return NULL; + + if (thread_stack__per_cpu(thread)) + return thread__cpu_stack(thread, cpu); + + return thread->ts; +} + static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, bool trace_end) { @@ -226,25 +297,37 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) int thread_stack__flush(struct thread *thread) { - if (thread->ts) - return __thread_stack__flush(thread, thread->ts); + struct thread_stack *ts = thread->ts; + unsigned int pos; + int err = 0; - return 0; + if (ts) { + for (pos = 0; pos < ts->arr_sz; pos++) { + int ret = __thread_stack__flush(thread, ts + pos); + + if (ret) + err = ret; + } + } + + return err; } -int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, +int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr) { + struct thread_stack *ts = thread__stack(thread, cpu); + if (!thread) return -EINVAL; - if (!thread->ts) { - thread->ts = thread_stack__new(thread, NULL); - if (!thread->ts) { + if (!ts) { + ts = thread_stack__new(thread, cpu, NULL); + if (!ts) { pr_warning("Out of memory: no thread stack\n"); return -ENOMEM; } - thread->ts->trace_nr = trace_nr; + ts->trace_nr = trace_nr; } /* @@ -252,14 +335,14 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, * the stack might be completely invalid. Better to report nothing than * to report something misleading, so flush the stack. */ - if (trace_nr != thread->ts->trace_nr) { - if (thread->ts->trace_nr) - __thread_stack__flush(thread, thread->ts); - thread->ts->trace_nr = trace_nr; + if (trace_nr != ts->trace_nr) { + if (ts->trace_nr) + __thread_stack__flush(thread, ts); + ts->trace_nr = trace_nr; } /* Stop here if thread_stack__process() is in use */ - if (thread->ts->crp) + if (ts->crp) return 0; if (flags & PERF_IP_FLAG_CALL) { @@ -270,7 +353,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, ret_addr = from_ip + insn_len; if (ret_addr == to_ip) return 0; /* Zero-length calls are excluded */ - return thread_stack__push(thread->ts, ret_addr, + return thread_stack__push(ts, ret_addr, flags & PERF_IP_FLAG_TRACE_END); } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { /* @@ -280,32 +363,52 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, * address, so try to pop that. Also, do not expect a call made * when the trace ended, to return, so pop that. */ - thread_stack__pop(thread->ts, to_ip); - thread_stack__pop_trace_end(thread->ts); + thread_stack__pop(ts, to_ip); + thread_stack__pop_trace_end(ts); } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { - thread_stack__pop(thread->ts, to_ip); + thread_stack__pop(ts, to_ip); } return 0; } -void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) +void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) { - if (!thread || !thread->ts) + struct thread_stack *ts = thread__stack(thread, cpu); + + if (!ts) return; - if (trace_nr != thread->ts->trace_nr) { - if (thread->ts->trace_nr) - __thread_stack__flush(thread, thread->ts); - thread->ts->trace_nr = trace_nr; + if (trace_nr != ts->trace_nr) { + if (ts->trace_nr) + __thread_stack__flush(thread, ts); + ts->trace_nr = trace_nr; } } +static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) +{ + __thread_stack__flush(thread, ts); + zfree(&ts->stack); +} + +static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) +{ + unsigned int arr_sz = ts->arr_sz; + + __thread_stack__free(thread, ts); + memset(ts, 0, sizeof(*ts)); + ts->arr_sz = arr_sz; +} + void thread_stack__free(struct thread *thread) { - if (thread->ts) { - __thread_stack__flush(thread, thread->ts); - zfree(&thread->ts->stack); + struct thread_stack *ts = thread->ts; + unsigned int pos; + + if (ts) { + for (pos = 0; pos < ts->arr_sz; pos++) + __thread_stack__free(thread, ts + pos); zfree(&thread->ts); } } @@ -315,9 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start) return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; } -void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, +void thread_stack__sample(struct thread *thread, int cpu, + struct ip_callchain *chain, size_t sz, u64 ip, u64 kernel_start) { + struct thread_stack *ts = thread__stack(thread, cpu); u64 context = callchain_context(ip, kernel_start); u64 last_context; size_t i, j; @@ -330,15 +435,15 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, chain->ips[0] = context; chain->ips[1] = ip; - if (!thread || !thread->ts) { + if (!ts) { chain->nr = 2; return; } last_context = context; - for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) { - ip = thread->ts->stack[thread->ts->cnt - j].ret_addr; + for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { + ip = ts->stack[ts->cnt - j].ret_addr; context = callchain_context(ip, kernel_start); if (context != last_context) { if (i >= sz - 1) @@ -449,7 +554,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, return 1; } -static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, +static int thread_stack__bottom(struct thread_stack *ts, struct perf_sample *sample, struct addr_location *from_al, struct addr_location *to_al, u64 ref) @@ -474,7 +579,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, if (!cp) return -ENOMEM; - return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp, + return thread_stack__push_cp(ts, ip, sample->time, ref, cp, true, false); } @@ -590,24 +695,19 @@ int thread_stack__process(struct thread *thread, struct comm *comm, struct addr_location *to_al, u64 ref, struct call_return_processor *crp) { - struct thread_stack *ts = thread->ts; + struct thread_stack *ts = thread__stack(thread, sample->cpu); int err = 0; - if (ts) { - if (!ts->crp) { - /* Supersede thread_stack__event() */ - thread_stack__free(thread); - thread->ts = thread_stack__new(thread, crp); - if (!thread->ts) - return -ENOMEM; - ts = thread->ts; - ts->comm = comm; - } - } else { - thread->ts = thread_stack__new(thread, crp); - if (!thread->ts) + if (ts && !ts->crp) { + /* Supersede thread_stack__event() */ + thread_stack__reset(thread, ts); + ts = NULL; + } + + if (!ts) { + ts = thread_stack__new(thread, sample->cpu, crp); + if (!ts) return -ENOMEM; - ts = thread->ts; ts->comm = comm; } @@ -621,8 +721,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, /* If the stack is empty, put the current symbol on the stack */ if (!ts->cnt) { - err = thread_stack__bottom(thread, ts, sample, from_al, to_al, - ref); + err = thread_stack__bottom(ts, sample, from_al, to_al, ref); if (err) return err; } @@ -671,9 +770,11 @@ int thread_stack__process(struct thread *thread, struct comm *comm, return err; } -size_t thread_stack__depth(struct thread *thread) +size_t thread_stack__depth(struct thread *thread, int cpu) { - if (!thread->ts) + struct thread_stack *ts = thread__stack(thread, cpu); + + if (!ts) return 0; - return thread->ts->cnt; + return ts->cnt; } diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index f97c00a8c251..1f626f4a1c40 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -80,14 +80,14 @@ struct call_return_processor { void *data; }; -int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, +int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr); -void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); -void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, +void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr); +void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain, size_t sz, u64 ip, u64 kernel_start); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); -size_t thread_stack__depth(struct thread *thread); +size_t thread_stack__depth(struct thread *thread, int cpu); struct call_return_processor * call_return_processor__new(int (*process)(struct call_return *cr, void *data), |