summaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/bpf_trace.c11
-rw-r--r--kernel/trace/fgraph.c10
-rw-r--r--kernel/trace/ftrace.c8
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace.c267
-rw-r--r--kernel/trace/trace.h6
-rw-r--r--kernel/trace/trace_eprobe.c5
-rw-r--r--kernel/trace/trace_events.c237
-rw-r--r--kernel/trace/trace_functions.c3
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--kernel/trace/trace_preemptirq.c43
-rw-r--r--kernel/trace/trace_uprobe.c6
-rw-r--r--kernel/trace/tracing_map.c6
14 files changed, 338 insertions, 278 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 949a3870946c..1b8db5aee9d3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2250,6 +2250,9 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
goto unlock;
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
+ if (!old_array)
+ goto put;
+
ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
if (ret < 0) {
bpf_prog_array_delete_safe(old_array, event->prog);
@@ -2258,6 +2261,14 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
bpf_prog_array_free_sleepable(old_array);
}
+put:
+ /*
+ * It could be that the bpf_prog is not sleepable (and will be freed
+ * via normal RCU), but is called from a point that supports sleepable
+ * programs and uses tasks-trace-RCU.
+ */
+ synchronize_rcu_tasks_trace();
+
bpf_prog_put(event->prog);
event->prog = NULL;
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 0bf78517b5d4..30e3ddc8a8a8 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -833,7 +833,7 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
#endif
{
for_each_set_bit(i, &bitmap, sizeof(bitmap) * BITS_PER_BYTE) {
- struct fgraph_ops *gops = fgraph_array[i];
+ struct fgraph_ops *gops = READ_ONCE(fgraph_array[i]);
if (gops == &fgraph_stub)
continue;
@@ -1215,7 +1215,7 @@ void fgraph_update_pid_func(void)
static int start_graph_tracing(void)
{
unsigned long **ret_stack_list;
- int ret;
+ int ret, cpu;
ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE,
sizeof(*ret_stack_list), GFP_KERNEL);
@@ -1223,6 +1223,12 @@ static int start_graph_tracing(void)
if (!ret_stack_list)
return -ENOMEM;
+ /* The cpu_boot init_task->ret_stack will never be freed */
+ for_each_online_cpu(cpu) {
+ if (!idle_task(cpu)->ret_stack)
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
+ }
+
do {
ret = alloc_retstack_tasklist(ret_stack_list);
} while (ret == -EAGAIN);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9b17efb1a87d..2e113f8b13a2 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -902,16 +902,13 @@ static void profile_graph_return(struct ftrace_graph_ret *trace,
}
static struct fgraph_ops fprofiler_ops = {
- .ops = {
- .flags = FTRACE_OPS_FL_INITIALIZED,
- INIT_OPS_HASH(fprofiler_ops.ops)
- },
.entryfunc = &profile_graph_entry,
.retfunc = &profile_graph_return,
};
static int register_ftrace_profiler(void)
{
+ ftrace_ops_set_global_filter(&fprofiler_ops.ops);
return register_ftrace_graph(&fprofiler_ops);
}
@@ -922,12 +919,11 @@ static void unregister_ftrace_profiler(void)
#else
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
.func = function_profile_call,
- .flags = FTRACE_OPS_FL_INITIALIZED,
- INIT_OPS_HASH(ftrace_profile_ops)
};
static int register_ftrace_profiler(void)
{
+ ftrace_ops_set_global_filter(&ftrace_profile_ops);
return register_ftrace_function(&ftrace_profile_ops);
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7e257e855dd1..60210fb5b211 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -7019,7 +7019,11 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
lockdep_assert_held(&cpu_buffer->mapping_lock);
nr_subbufs = cpu_buffer->nr_pages + 1; /* + reader-subbuf */
- nr_pages = ((nr_subbufs + 1) << subbuf_order) - pgoff; /* + meta-page */
+ nr_pages = ((nr_subbufs + 1) << subbuf_order); /* + meta-page */
+ if (nr_pages <= pgoff)
+ return -EINVAL;
+
+ nr_pages -= pgoff;
nr_vma_pages = vma_pages(vma);
if (!nr_vma_pages || nr_vma_pages > nr_pages)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index be62f0ea1814..f8aebcb01e62 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3611,17 +3611,12 @@ char *trace_iter_expand_format(struct trace_iterator *iter)
}
/* Returns true if the string is safe to dereference from an event */
-static bool trace_safe_str(struct trace_iterator *iter, const char *str,
- bool star, int len)
+static bool trace_safe_str(struct trace_iterator *iter, const char *str)
{
unsigned long addr = (unsigned long)str;
struct trace_event *trace_event;
struct trace_event_call *event;
- /* Ignore strings with no length */
- if (star && !len)
- return true;
-
/* OK if part of the event data */
if ((addr >= (unsigned long)iter->ent) &&
(addr < (unsigned long)iter->ent + iter->ent_size))
@@ -3661,181 +3656,69 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str,
return false;
}
-static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
-
-static int test_can_verify_check(const char *fmt, ...)
-{
- char buf[16];
- va_list ap;
- int ret;
-
- /*
- * The verifier is dependent on vsnprintf() modifies the va_list
- * passed to it, where it is sent as a reference. Some architectures
- * (like x86_32) passes it by value, which means that vsnprintf()
- * does not modify the va_list passed to it, and the verifier
- * would then need to be able to understand all the values that
- * vsnprintf can use. If it is passed by value, then the verifier
- * is disabled.
- */
- va_start(ap, fmt);
- vsnprintf(buf, 16, "%d", ap);
- ret = va_arg(ap, int);
- va_end(ap);
-
- return ret;
-}
-
-static void test_can_verify(void)
-{
- if (!test_can_verify_check("%d %d", 0, 1)) {
- pr_info("trace event string verifier disabled\n");
- static_branch_inc(&trace_no_verify);
- }
-}
-
/**
- * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
+ * ignore_event - Check dereferenced fields while writing to the seq buffer
* @iter: The iterator that holds the seq buffer and the event being printed
- * @fmt: The format used to print the event
- * @ap: The va_list holding the data to print from @fmt.
*
- * This writes the data into the @iter->seq buffer using the data from
- * @fmt and @ap. If the format has a %s, then the source of the string
- * is examined to make sure it is safe to print, otherwise it will
- * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
- * pointer.
+ * At boot up, test_event_printk() will flag any event that dereferences
+ * a string with "%s" that does exist in the ring buffer. It may still
+ * be valid, as the string may point to a static string in the kernel
+ * rodata that never gets freed. But if the string pointer is pointing
+ * to something that was allocated, there's a chance that it can be freed
+ * by the time the user reads the trace. This would cause a bad memory
+ * access by the kernel and possibly crash the system.
+ *
+ * This function will check if the event has any fields flagged as needing
+ * to be checked at runtime and perform those checks.
+ *
+ * If it is found that a field is unsafe, it will write into the @iter->seq
+ * a message stating what was found to be unsafe.
+ *
+ * @return: true if the event is unsafe and should be ignored,
+ * false otherwise.
*/
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap)
+bool ignore_event(struct trace_iterator *iter)
{
- long text_delta = 0;
- long data_delta = 0;
- const char *p = fmt;
- const char *str;
- bool good;
- int i, j;
+ struct ftrace_event_field *field;
+ struct trace_event *trace_event;
+ struct trace_event_call *event;
+ struct list_head *head;
+ struct trace_seq *seq;
+ const void *ptr;
- if (WARN_ON_ONCE(!fmt))
- return;
+ trace_event = ftrace_find_event(iter->ent->type);
- if (static_branch_unlikely(&trace_no_verify))
- goto print;
+ seq = &iter->seq;
- /*
- * When the kernel is booted with the tp_printk command line
- * parameter, trace events go directly through to printk().
- * It also is checked by this function, but it does not
- * have an associated trace_array (tr) for it.
- */
- if (iter->tr) {
- text_delta = iter->tr->text_delta;
- data_delta = iter->tr->data_delta;
+ if (!trace_event) {
+ trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
+ return true;
}
- /* Don't bother checking when doing a ftrace_dump() */
- if (iter->fmt == static_fmt_buf)
- goto print;
-
- while (*p) {
- bool star = false;
- int len = 0;
-
- j = 0;
-
- /*
- * We only care about %s and variants
- * as well as %p[sS] if delta is non-zero
- */
- for (i = 0; p[i]; i++) {
- if (i + 1 >= iter->fmt_size) {
- /*
- * If we can't expand the copy buffer,
- * just print it.
- */
- if (!trace_iter_expand_format(iter))
- goto print;
- }
-
- if (p[i] == '\\' && p[i+1]) {
- i++;
- continue;
- }
- if (p[i] == '%') {
- /* Need to test cases like %08.*s */
- for (j = 1; p[i+j]; j++) {
- if (isdigit(p[i+j]) ||
- p[i+j] == '.')
- continue;
- if (p[i+j] == '*') {
- star = true;
- continue;
- }
- break;
- }
- if (p[i+j] == 's')
- break;
-
- if (text_delta && p[i+1] == 'p' &&
- ((p[i+2] == 's' || p[i+2] == 'S')))
- break;
-
- star = false;
- }
- j = 0;
- }
- /* If no %s found then just print normally */
- if (!p[i])
- break;
-
- /* Copy up to the %s, and print that */
- strncpy(iter->fmt, p, i);
- iter->fmt[i] = '\0';
- trace_seq_vprintf(&iter->seq, iter->fmt, ap);
+ event = container_of(trace_event, struct trace_event_call, event);
+ if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
+ return false;
- /* Add delta to %pS pointers */
- if (p[i+1] == 'p') {
- unsigned long addr;
- char fmt[4];
+ head = trace_get_fields(event);
+ if (!head) {
+ trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
+ trace_event_name(event));
+ return true;
+ }
- fmt[0] = '%';
- fmt[1] = 'p';
- fmt[2] = p[i+2]; /* Either %ps or %pS */
- fmt[3] = '\0';
+ /* Offsets are from the iter->ent that points to the raw event */
+ ptr = iter->ent;
- addr = va_arg(ap, unsigned long);
- addr += text_delta;
- trace_seq_printf(&iter->seq, fmt, (void *)addr);
+ list_for_each_entry(field, head, link) {
+ const char *str;
+ bool good;
- p += i + 3;
+ if (!field->needs_test)
continue;
- }
-
- /*
- * If iter->seq is full, the above call no longer guarantees
- * that ap is in sync with fmt processing, and further calls
- * to va_arg() can return wrong positional arguments.
- *
- * Ensure that ap is no longer used in this case.
- */
- if (iter->seq.full) {
- p = "";
- break;
- }
- if (star)
- len = va_arg(ap, int);
+ str = *(const char **)(ptr + field->offset);
- /* The ap now points to the string data of the %s */
- str = va_arg(ap, const char *);
-
- good = trace_safe_str(iter, str, star, len);
-
- /* Could be from the last boot */
- if (data_delta && !good) {
- str += data_delta;
- good = trace_safe_str(iter, str, star, len);
- }
+ good = trace_safe_str(iter, str);
/*
* If you hit this warning, it is likely that the
@@ -3846,44 +3729,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
* instead. See samples/trace_events/trace-events-sample.h
* for reference.
*/
- if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
- fmt, seq_buf_str(&iter->seq.seq))) {
- int ret;
-
- /* Try to safely read the string */
- if (star) {
- if (len + 1 > iter->fmt_size)
- len = iter->fmt_size - 1;
- if (len < 0)
- len = 0;
- ret = copy_from_kernel_nofault(iter->fmt, str, len);
- iter->fmt[len] = 0;
- star = false;
- } else {
- ret = strncpy_from_kernel_nofault(iter->fmt, str,
- iter->fmt_size);
- }
- if (ret < 0)
- trace_seq_printf(&iter->seq, "(0x%px)", str);
- else
- trace_seq_printf(&iter->seq, "(0x%px:%s)",
- str, iter->fmt);
- str = "[UNSAFE-MEMORY]";
- strcpy(iter->fmt, "%s");
- } else {
- strncpy(iter->fmt, p + i, j + 1);
- iter->fmt[j+1] = '\0';
+ if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
+ trace_event_name(event), field->name)) {
+ trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
+ trace_event_name(event), field->name);
+ return true;
}
- if (star)
- trace_seq_printf(&iter->seq, iter->fmt, len, str);
- else
- trace_seq_printf(&iter->seq, iter->fmt, str);
-
- p += i + j + 1;
}
- print:
- if (*p)
- trace_seq_vprintf(&iter->seq, p, ap);
+ return false;
}
const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
@@ -4353,6 +4206,15 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
if (event) {
if (tr->trace_flags & TRACE_ITER_FIELDS)
return print_event_fields(iter, event);
+ /*
+ * For TRACE_EVENT() events, the print_fmt is not
+ * safe to use if the array has delta offsets
+ * Force printing via the fields.
+ */
+ if ((tr->text_delta || tr->data_delta) &&
+ event->type > __TRACE_LAST_TYPE)
+ return print_event_fields(iter, event);
+
return event->funcs->trace(iter, sym_flags, event);
}
@@ -5225,6 +5087,9 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
cpumask_var_t tracing_cpumask_new;
int err;
+ if (count == 0 || count > KMALLOC_MAX_SIZE)
+ return -EINVAL;
+
if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
return -ENOMEM;
@@ -10777,8 +10642,6 @@ __init static int tracer_alloc_buffers(void)
register_snapshot_cmd();
- test_can_verify();
-
return 0;
out_free_pipe_cpumask:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 266740b4e121..9691b47b5f3d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -667,9 +667,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
bool trace_is_tracepoint_string(const char *str);
const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap) __printf(2, 0);
char *trace_iter_expand_format(struct trace_iterator *iter);
+bool ignore_event(struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
@@ -1413,7 +1412,8 @@ struct ftrace_event_field {
int filter_type;
int offset;
int size;
- int is_signed;
+ unsigned int is_signed:1;
+ unsigned int needs_test:1;
int len;
};
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index ebda68ee9abf..be8be0c1aaf0 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -963,6 +963,11 @@ static int __trace_eprobe_create(int argc, const char *argv[])
goto error;
}
ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
+ if (ret < 0) {
+ trace_probe_unregister_event_call(&ep->tp);
+ mutex_unlock(&event_mutex);
+ goto error;
+ }
mutex_unlock(&event_mutex);
return ret;
parse_error:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 77e68efbd43e..770e7ed91716 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -82,7 +82,7 @@ static int system_refcount_dec(struct event_subsystem *system)
}
static struct ftrace_event_field *
-__find_event_field(struct list_head *head, char *name)
+__find_event_field(struct list_head *head, const char *name)
{
struct ftrace_event_field *field;
@@ -114,7 +114,8 @@ trace_find_event_field(struct trace_event_call *call, char *name)
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
- int is_signed, int filter_type, int len)
+ int is_signed, int filter_type, int len,
+ int need_test)
{
struct ftrace_event_field *field;
@@ -133,6 +134,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field->offset = offset;
field->size = size;
field->is_signed = is_signed;
+ field->needs_test = need_test;
field->len = len;
list_add(&field->link, head);
@@ -151,13 +153,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, 0);
+ is_signed, filter_type, 0, 0);
}
EXPORT_SYMBOL_GPL(trace_define_field);
static int trace_define_field_ext(struct trace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
- int filter_type, int len)
+ int filter_type, int len, int need_test)
{
struct list_head *head;
@@ -166,13 +168,13 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, len);
+ is_signed, filter_type, len, need_test);
}
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
- filter_type, 0); \
+ filter_type, 0, 0); \
if (ret) \
return ret;
@@ -181,7 +183,8 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
"common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER, 0); \
+ is_signed_type(type), FILTER_OTHER, \
+ 0, 0); \
if (ret) \
return ret;
@@ -244,19 +247,16 @@ int trace_event_get_offsets(struct trace_event_call *call)
return tail->offset + tail->size;
}
-/*
- * Check if the referenced field is an array and return true,
- * as arrays are OK to dereference.
- */
-static bool test_field(const char *fmt, struct trace_event_call *call)
+
+static struct trace_event_fields *find_event_field(const char *fmt,
+ struct trace_event_call *call)
{
struct trace_event_fields *field = call->class->fields_array;
- const char *array_descriptor;
const char *p = fmt;
int len;
if (!(len = str_has_prefix(fmt, "REC->")))
- return false;
+ return NULL;
fmt += len;
for (p = fmt; *p; p++) {
if (!isalnum(*p) && *p != '_')
@@ -265,16 +265,141 @@ static bool test_field(const char *fmt, struct trace_event_call *call)
len = p - fmt;
for (; field->type; field++) {
- if (strncmp(field->name, fmt, len) ||
- field->name[len])
+ if (strncmp(field->name, fmt, len) || field->name[len])
continue;
- array_descriptor = strchr(field->type, '[');
- /* This is an array and is OK to dereference. */
- return array_descriptor != NULL;
+
+ return field;
+ }
+ return NULL;
+}
+
+/*
+ * Check if the referenced field is an array and return true,
+ * as arrays are OK to dereference.
+ */
+static bool test_field(const char *fmt, struct trace_event_call *call)
+{
+ struct trace_event_fields *field;
+
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* This is an array and is OK to dereference. */
+ return strchr(field->type, '[') != NULL;
+}
+
+/* Look for a string within an argument */
+static bool find_print_string(const char *arg, const char *str, const char *end)
+{
+ const char *r;
+
+ r = strstr(arg, str);
+ return r && r < end;
+}
+
+/* Return true if the argument pointer is safe */
+static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
+{
+ const char *r, *e, *a;
+
+ e = fmt + len;
+
+ /* Find the REC-> in the argument */
+ r = strstr(fmt, "REC->");
+ if (r && r < e) {
+ /*
+ * Addresses of events on the buffer, or an array on the buffer is
+ * OK to dereference. There's ways to fool this, but
+ * this is to catch common mistakes, not malicious code.
+ */
+ a = strchr(fmt, '&');
+ if ((a && (a < r)) || test_field(r, call))
+ return true;
+ } else if (find_print_string(fmt, "__get_dynamic_array(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_sockaddr(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) {
+ return true;
}
return false;
}
+/* Return true if the string is safe */
+static bool process_string(const char *fmt, int len, struct trace_event_call *call)
+{
+ struct trace_event_fields *field;
+ const char *r, *e, *s;
+
+ e = fmt + len;
+
+ /*
+ * There are several helper functions that return strings.
+ * If the argument contains a function, then assume its field is valid.
+ * It is considered that the argument has a function if it has:
+ * alphanumeric or '_' before a parenthesis.
+ */
+ s = fmt;
+ do {
+ r = strstr(s, "(");
+ if (!r || r >= e)
+ break;
+ for (int i = 1; r - i >= s; i++) {
+ char ch = *(r - i);
+ if (isspace(ch))
+ continue;
+ if (isalnum(ch) || ch == '_')
+ return true;
+ /* Anything else, this isn't a function */
+ break;
+ }
+ /* A function could be wrapped in parethesis, try the next one */
+ s = r + 1;
+ } while (s < e);
+
+ /*
+ * Check for arrays. If the argument has: foo[REC->val]
+ * then it is very likely that foo is an array of strings
+ * that are safe to use.
+ */
+ r = strstr(s, "[");
+ if (r && r < e) {
+ r = strstr(r, "REC->");
+ if (r && r < e)
+ return true;
+ }
+
+ /*
+ * If there's any strings in the argument consider this arg OK as it
+ * could be: REC->field ? "foo" : "bar" and we don't want to get into
+ * verifying that logic here.
+ */
+ if (find_print_string(fmt, "\"", e))
+ return true;
+
+ /* Dereferenced strings are also valid like any other pointer */
+ if (process_pointer(fmt, len, call))
+ return true;
+
+ /* Make sure the field is found */
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* Test this field's string before printing the event */
+ call->flags |= TRACE_EVENT_FL_TEST_STR;
+ field->needs_test = 1;
+
+ return true;
+}
+
/*
* Examine the print fmt of the event looking for unsafe dereference
* pointers using %p* that could be recorded in the trace event and
@@ -284,13 +409,14 @@ static bool test_field(const char *fmt, struct trace_event_call *call)
static void test_event_printk(struct trace_event_call *call)
{
u64 dereference_flags = 0;
+ u64 string_flags = 0;
bool first = true;
- const char *fmt, *c, *r, *a;
+ const char *fmt;
int parens = 0;
char in_quote = 0;
int start_arg = 0;
int arg = 0;
- int i;
+ int i, e;
fmt = call->print_fmt;
@@ -374,8 +500,16 @@ static void test_event_printk(struct trace_event_call *call)
star = true;
continue;
}
- if ((fmt[i + j] == 's') && star)
- arg++;
+ if ((fmt[i + j] == 's')) {
+ if (star)
+ arg++;
+ if (WARN_ONCE(arg == 63,
+ "Too many args for event: %s",
+ trace_event_name(call)))
+ return;
+ dereference_flags |= 1ULL << arg;
+ string_flags |= 1ULL << arg;
+ }
break;
}
break;
@@ -403,42 +537,47 @@ static void test_event_printk(struct trace_event_call *call)
case ',':
if (in_quote || parens)
continue;
+ e = i;
i++;
while (isspace(fmt[i]))
i++;
- start_arg = i;
- if (!(dereference_flags & (1ULL << arg)))
- goto next_arg;
- /* Find the REC-> in the argument */
- c = strchr(fmt + i, ',');
- r = strstr(fmt + i, "REC->");
- if (r && (!c || r < c)) {
- /*
- * Addresses of events on the buffer,
- * or an array on the buffer is
- * OK to dereference.
- * There's ways to fool this, but
- * this is to catch common mistakes,
- * not malicious code.
- */
- a = strchr(fmt + i, '&');
- if ((a && (a < r)) || test_field(r, call))
+ /*
+ * If start_arg is zero, then this is the start of the
+ * first argument. The processing of the argument happens
+ * when the end of the argument is found, as it needs to
+ * handle paranthesis and such.
+ */
+ if (!start_arg) {
+ start_arg = i;
+ /* Balance out the i++ in the for loop */
+ i--;
+ continue;
+ }
+
+ if (dereference_flags & (1ULL << arg)) {
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(fmt + start_arg, e - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(fmt + start_arg, e - start_arg, call))
dereference_flags &= ~(1ULL << arg);
- } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
- (!c || r < c)) {
- dereference_flags &= ~(1ULL << arg);
- } else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
- (!c || r < c)) {
- dereference_flags &= ~(1ULL << arg);
}
- next_arg:
- i--;
+ start_arg = i;
arg++;
+ /* Balance out the i++ in the for loop */
+ i--;
}
}
+ if (dereference_flags & (1ULL << arg)) {
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(fmt + start_arg, i - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(fmt + start_arg, i - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ }
+
/*
* If you triggered the below warning, the trace event reported
* uses an unsafe dereference pointer %p*. As the data stored
@@ -2471,7 +2610,7 @@ event_define_fields(struct trace_event_call *call)
ret = trace_define_field_ext(call, field->type, field->name,
offset, field->size,
field->is_signed, field->filter_type,
- field->len);
+ field->len, field->needs_test);
if (WARN_ON_ONCE(ret)) {
pr_err("error code is %d\n", ret);
break;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 74c353164ca1..d358c9935164 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -176,7 +176,8 @@ static void function_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(&tr->array_buffer);
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* fregs are guaranteed not to be NULL if HAVE_DYNAMIC_FTRACE_WITH_ARGS is set */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)
static __always_inline unsigned long
function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
{
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 263fac44d3ca..935a886af40c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -725,7 +725,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
static struct notifier_block trace_kprobe_module_nb = {
.notifier_call = trace_kprobe_module_callback,
- .priority = 1 /* Invoked after kprobe module callback */
+ .priority = 2 /* Invoked after kprobe and jump_label module callback */
};
static int trace_kprobe_register_module_notifier(void)
{
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index da748b7cbc4d..03d56f711ad1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -317,10 +317,14 @@ EXPORT_SYMBOL(trace_raw_output_prep);
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)
{
+ struct trace_seq *s = &iter->seq;
va_list ap;
+ if (ignore_event(iter))
+ return;
+
va_start(ap, fmt);
- trace_check_vprintf(iter, trace_event_format(iter, fmt), ap);
+ trace_seq_vprintf(s, trace_event_format(iter, fmt), ap);
va_end(ap);
}
EXPORT_SYMBOL(trace_event_printf);
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index 5c03633316a6..0c42b15c3800 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -10,11 +10,42 @@
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
+#include <linux/hardirq.h>
#include "trace.h"
#define CREATE_TRACE_POINTS
#include <trace/events/preemptirq.h>
+/*
+ * Use regular trace points on architectures that implement noinstr
+ * tooling: these calls will only happen with RCU enabled, which can
+ * use a regular tracepoint.
+ *
+ * On older architectures, RCU may not be watching in idle. In that
+ * case, wake up RCU to watch while calling the tracepoint. These
+ * aren't NMI-safe - so exclude NMI contexts:
+ */
+#ifdef CONFIG_ARCH_WANTS_NO_INSTR
+#define trace(point, args) trace_##point(args)
+#else
+#define trace(point, args) \
+ do { \
+ if (trace_##point##_enabled()) { \
+ bool exit_rcu = false; \
+ if (in_nmi()) \
+ break; \
+ if (!IS_ENABLED(CONFIG_TINY_RCU) && \
+ is_idle_task(current)) { \
+ ct_irq_enter(); \
+ exit_rcu = true; \
+ } \
+ trace_##point(args); \
+ if (exit_rcu) \
+ ct_irq_exit(); \
+ } \
+ } while (0)
+#endif
+
#ifdef CONFIG_TRACE_IRQFLAGS
/* Per-cpu variable to prevent redundant calls when IRQs already off */
static DEFINE_PER_CPU(int, tracing_irq_cpu);
@@ -28,7 +59,7 @@ static DEFINE_PER_CPU(int, tracing_irq_cpu);
void trace_hardirqs_on_prepare(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
- trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
+ trace(irq_enable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
this_cpu_write(tracing_irq_cpu, 0);
}
@@ -39,7 +70,7 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_prepare);
void trace_hardirqs_on(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
- trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
+ trace(irq_enable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
this_cpu_write(tracing_irq_cpu, 0);
}
@@ -61,7 +92,7 @@ void trace_hardirqs_off_finish(void)
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
- trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
+ trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
}
}
@@ -75,7 +106,7 @@ void trace_hardirqs_off(void)
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
- trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
+ trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
}
}
EXPORT_SYMBOL(trace_hardirqs_off);
@@ -86,13 +117,13 @@ NOKPROBE_SYMBOL(trace_hardirqs_off);
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
- trace_preempt_enable(a0, a1);
+ trace(preempt_enable, TP_ARGS(a0, a1));
tracer_preempt_on(a0, a1);
}
void trace_preempt_off(unsigned long a0, unsigned long a1)
{
- trace_preempt_disable(a0, a1);
+ trace(preempt_disable, TP_ARGS(a0, a1));
tracer_preempt_off(a0, a1);
}
#endif
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index fed382b7881b..4875e7f5de3d 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1402,9 +1402,13 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
#ifdef CONFIG_BPF_EVENTS
if (bpf_prog_array_valid(call)) {
+ const struct bpf_prog_array *array;
u32 ret;
- ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run);
+ rcu_read_lock_trace();
+ array = rcu_dereference_check(call->prog_array, rcu_read_lock_trace_held());
+ ret = bpf_prog_run_array_uprobe(array, regs, bpf_prog_run);
+ rcu_read_unlock_trace();
if (!ret)
return;
}
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 3a56e7c8aa4f..1921ade45be3 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -845,15 +845,11 @@ int tracing_map_init(struct tracing_map *map)
static int cmp_entries_dup(const void *A, const void *B)
{
const struct tracing_map_sort_entry *a, *b;
- int ret = 0;
a = *(const struct tracing_map_sort_entry **)A;
b = *(const struct tracing_map_sort_entry **)B;
- if (memcmp(a->key, b->key, a->elt->map->key_size))
- ret = 1;
-
- return ret;
+ return memcmp(a->key, b->key, a->elt->map->key_size);
}
static int cmp_entries_sum(const void *A, const void *B)