diff options
author | Steven Rostedt (Google) <rostedt@goodmis.org> | 2024-06-13 01:19:37 +0200 |
---|---|---|
committer | Steven Rostedt (Google) <rostedt@goodmis.org> | 2024-06-14 18:28:20 +0200 |
commit | b14d032973d4e6c36e025e96ea3437de53d072d9 (patch) | |
tree | 911e11878d09bb742ce84df6bcadee2c6ffa7901 /kernel/trace | |
parent | ring-buffer: Add ring_buffer_alloc_range() (diff) | |
download | linux-b14d032973d4e6c36e025e96ea3437de53d072d9.tar.xz linux-b14d032973d4e6c36e025e96ea3437de53d072d9.zip |
ring-buffer: Add ring_buffer_meta data
Populate the ring_buffer_meta array. It holds the pointer to the
head_buffer (next to read), the commit_buffer (next to write) the size of
the sub-buffers, number of sub-buffers and an array that keeps track of
the order of the sub-buffers.
This information will be stored in the persistent memory to help on reboot
to reconstruct the ring buffer.
Link: https://lkml.kernel.org/r/20240612232025.530733577@goodmis.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineeth Pillai <vineeth@bitbyteword.org>
Cc: Youssef Esmat <youssefesmat@google.com>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/ring_buffer.c | 209 |
1 files changed, 184 insertions, 25 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6edd70e45807..0e3ed7f1cc5d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -43,6 +43,11 @@ static void update_pages_handler(struct work_struct *work); struct ring_buffer_meta { + unsigned long head_buffer; + unsigned long commit_buffer; + __u32 subbuf_size; + __u32 nr_subbufs; + int buffers[]; }; /* @@ -501,6 +506,7 @@ struct ring_buffer_per_cpu { struct mutex mapping_lock; unsigned long *subbuf_ids; /* ID to subbuf VA */ struct trace_buffer_meta *meta_page; + struct ring_buffer_meta *ring_meta; /* ring buffer pages to update, > 0 to add, < 0 to remove */ long nr_pages_to_update; @@ -1261,6 +1267,11 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) * Set the previous list pointer to have the HEAD flag. */ rb_set_list_to_head(head->list.prev); + + if (cpu_buffer->ring_meta) { + struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + meta->head_buffer = (unsigned long)head->page; + } } static void rb_list_head_clear(struct list_head *list) @@ -1515,51 +1526,127 @@ rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs) } /* - * Return a specific sub-buffer for a given @cpu defined by @idx. + * Return the ring_buffer_meta for a given @cpu. */ -static void *rb_range_buffer(struct trace_buffer *buffer, int cpu, int nr_pages, int idx) +static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu) { - unsigned long ptr; int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE; + unsigned long ptr = buffer->range_addr_start; + struct ring_buffer_meta *meta; int nr_subbufs; - /* Include the reader page */ - nr_subbufs = nr_pages + 1; + if (!ptr) + return NULL; + + /* When nr_pages passed in is zero, the first meta has already been initialized */ + if (!nr_pages) { + meta = (struct ring_buffer_meta *)ptr; + nr_subbufs = meta->nr_subbufs; + } else { + meta = NULL; + /* Include the reader page */ + nr_subbufs = nr_pages + 1; + } /* * The first chunk may not be subbuffer aligned, where as * the rest of the chunks are. */ - ptr = buffer->range_addr_start; - ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs); if (cpu) { - unsigned long p; - - ptr += subbuf_size * nr_subbufs; - - /* Save the beginning of this CPU chunk */ - p = ptr; - ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs); + ptr += subbuf_size * nr_subbufs; /* We can use multiplication to find chunks greater than 1 */ if (cpu > 1) { unsigned long size; + unsigned long p; + /* Save the beginning of this CPU chunk */ + p = ptr; + ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs); ptr += subbuf_size * nr_subbufs; /* Now all chunks after this are the same size */ size = ptr - p; ptr += size * (cpu - 2); - - ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs); } } - if (ptr + subbuf_size * nr_subbufs > buffer->range_addr_end) + return (void *)ptr; +} + +/* Return the start of subbufs given the meta pointer */ +static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta) +{ + int subbuf_size = meta->subbuf_size; + unsigned long ptr; + + ptr = (unsigned long)meta; + ptr = rb_range_align_subbuf(ptr, subbuf_size, meta->nr_subbufs); + + return (void *)ptr; +} + +/* + * Return a specific sub-buffer for a given @cpu defined by @idx. + */ +static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx) +{ + struct ring_buffer_meta *meta; + unsigned long ptr; + int subbuf_size; + + meta = rb_range_meta(cpu_buffer->buffer, 0, cpu_buffer->cpu); + if (!meta) + return NULL; + + if (WARN_ON_ONCE(idx >= meta->nr_subbufs)) return NULL; + + subbuf_size = meta->subbuf_size; + + /* Map this buffer to the order that's in meta->buffers[] */ + idx = meta->buffers[idx]; + + ptr = (unsigned long)rb_subbufs_from_meta(meta); + + ptr += subbuf_size * idx; + if (ptr + subbuf_size > cpu_buffer->buffer->range_addr_end) + return NULL; + return (void *)ptr; } +static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) +{ + struct ring_buffer_meta *meta; + void *subbuf; + int cpu; + int i; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + meta = rb_range_meta(buffer, nr_pages, cpu); + + meta->nr_subbufs = nr_pages + 1; + meta->subbuf_size = PAGE_SIZE; + + subbuf = rb_subbufs_from_meta(meta); + + /* + * The buffers[] array holds the order of the sub-buffers + * that are after the meta data. The sub-buffers may + * be swapped out when read and inserted into a different + * location of the ring buffer. Although their addresses + * remain the same, the buffers[] array contains the + * index into the sub-buffers holding their actual order. + */ + for (i = 0; i < meta->nr_subbufs; i++) { + meta->buffers[i] = i; + rb_init_page(subbuf); + subbuf += meta->subbuf_size; + } + } +} + static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, long nr_pages, struct list_head *pages) { @@ -1600,7 +1687,6 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, set_current_oom_origin(); for (i = 0; i < nr_pages; i++) { struct page *page; - int cpu = cpu_buffer->cpu; bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), mflags, cpu_to_node(cpu_buffer->cpu)); @@ -1617,10 +1703,11 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, if (buffer->range_addr_start) { /* A range was given. Use that for the buffer page */ - bpage->page = rb_range_buffer(buffer, cpu, nr_pages, i + 1); + bpage->page = rb_range_buffer(cpu_buffer, i + 1); if (!bpage->page) goto free_pages; bpage->range = 1; + bpage->id = i + 1; } else { page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags | __GFP_COMP | __GFP_ZERO, @@ -1628,9 +1715,9 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, if (!page) goto free_pages; bpage->page = page_address(page); + rb_init_page(bpage->page); } bpage->order = cpu_buffer->buffer->subbuf_order; - rb_init_page(bpage->page); if (user_thread && fatal_signal_pending(current)) goto free_pages; @@ -1711,7 +1798,13 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) cpu_buffer->reader_page = bpage; if (buffer->range_addr_start) { - bpage->page = rb_range_buffer(buffer, cpu, nr_pages, 0); + /* + * Range mapped buffers have the same restrictions as memory + * mapped ones do. + */ + cpu_buffer->mapped = 1; + cpu_buffer->ring_meta = rb_range_meta(buffer, nr_pages, cpu); + bpage->page = rb_range_buffer(cpu_buffer, 0); if (!bpage->page) goto fail_free_reader; bpage->range = 1; @@ -1722,8 +1815,8 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) if (!page) goto fail_free_reader; bpage->page = page_address(page); + rb_init_page(bpage->page); } - rb_init_page(bpage->page); INIT_LIST_HEAD(&cpu_buffer->reader_page->list); INIT_LIST_HEAD(&cpu_buffer->new_pages); @@ -1737,6 +1830,10 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; rb_head_page_activate(cpu_buffer); + if (cpu_buffer->ring_meta) { + struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + meta->commit_buffer = meta->head_buffer; + } return cpu_buffer; @@ -1856,6 +1953,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, nr_pages--; buffer->range_addr_start = start; buffer->range_addr_end = end; + + rb_range_meta_init(buffer, nr_pages); } else { /* need at least two pages */ @@ -2544,6 +2643,52 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) iter->next_event = 0; } +/* Return the index into the sub-buffers for a given sub-buffer */ +static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf) +{ + void *subbuf_array; + + subbuf_array = (void *)meta + sizeof(int) * meta->nr_subbufs; + subbuf_array = (void *)ALIGN((unsigned long)subbuf_array, meta->subbuf_size); + return (subbuf - subbuf_array) / meta->subbuf_size; +} + +static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer, + struct buffer_page *next_page) +{ + struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + unsigned long old_head = (unsigned long)next_page->page; + unsigned long new_head; + + rb_inc_page(&next_page); + new_head = (unsigned long)next_page->page; + + /* + * Only move it forward once, if something else came in and + * moved it forward, then we don't want to touch it. + */ + (void)cmpxchg(&meta->head_buffer, old_head, new_head); +} + +static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer, + struct buffer_page *reader) +{ + struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + void *old_reader = cpu_buffer->reader_page->page; + void *new_reader = reader->page; + int id; + + id = reader->id; + cpu_buffer->reader_page->id = id; + reader->id = 0; + + meta->buffers[0] = rb_meta_subbuf_idx(meta, new_reader); + meta->buffers[id] = rb_meta_subbuf_idx(meta, old_reader); + + /* The head pointer is the one after the reader */ + rb_update_meta_head(cpu_buffer, reader); +} + /* * rb_handle_head_page - writer hit the head page * @@ -2593,6 +2738,8 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); + if (cpu_buffer->ring_meta) + rb_update_meta_head(cpu_buffer, next_page); /* * The entries will be zeroed out when we move the * tail page. @@ -3154,6 +3301,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); rb_inc_page(&cpu_buffer->commit_page); + if (cpu_buffer->ring_meta) { + struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + meta->commit_buffer = (unsigned long)cpu_buffer->commit_page->page; + } /* add barrier to keep gcc from optimizing too much */ barrier(); } @@ -4771,6 +4922,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) if (!ret) goto spin; + if (cpu_buffer->ring_meta) + rb_update_meta_reader(cpu_buffer, reader); + /* * Yay! We succeeded in replacing the page. * @@ -5451,11 +5605,16 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->lost_events = 0; cpu_buffer->last_overrun = 0; - if (cpu_buffer->user_mapped) - rb_update_meta_page(cpu_buffer); - rb_head_page_activate(cpu_buffer); cpu_buffer->pages_removed = 0; + + if (cpu_buffer->mapped) { + rb_update_meta_page(cpu_buffer); + if (cpu_buffer->ring_meta) { + struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + meta->commit_buffer = meta->head_buffer; + } + } } /* Must have disabled the cpu buffer then done a synchronize_rcu */ |