diff options
Diffstat (limited to 'lib')
52 files changed, 3280 insertions, 770 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index b38849af6f13..5a318f753b2f 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -40,6 +40,18 @@ config PACKING When in doubt, say N. +config PACKING_KUNIT_TEST + tristate "KUnit tests for packing library" if !KUNIT_ALL_TESTS + depends on PACKING && KUNIT + default KUNIT_ALL_TESTS + help + This builds KUnit tests for the packing library. + + For more information on KUnit and unit tests in general, + please refer to the KUnit documentation in Documentation/dev-tools/kunit/. + + When in doubt, say N. + config BITREVERSE tristate @@ -777,3 +789,9 @@ config POLYNOMIAL config FIRMWARE_TABLE bool + +config UNION_FIND + bool + +config MIN_HEAP + bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7312ae7c3cc5..f3d723705879 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -993,6 +993,7 @@ config CODE_TAGGING config MEM_ALLOC_PROFILING bool "Enable memory allocation profiling" default n + depends on MMU depends on PROC_FS depends on !DEBUG_FORCE_WEAK_PER_CPU select CODE_TAGGING @@ -1328,19 +1329,6 @@ config SCHEDSTATS endmenu -config DEBUG_TIMEKEEPING - bool "Enable extra timekeeping sanity checking" - help - This option will enable additional timekeeping sanity checks - which may be helpful when diagnosing issues where timekeeping - problems are suspected. - - This may include checks in the timekeeping hotpaths, so this - option may have a (very small) performance impact to some - workloads. - - If unsure, say N. - config DEBUG_PREEMPT bool "Debug preemptible kernel" depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT @@ -1409,22 +1397,14 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.rst. config PROVE_RAW_LOCK_NESTING - bool "Enable raw_spinlock - spinlock nesting checks" + bool depends on PROVE_LOCKING - default n + default y help Enable the raw_spinlock vs. spinlock nesting checks which ensure that the lock nesting rules for PREEMPT_RT enabled kernels are not violated. - NOTE: There are known nesting problems. So if you enable this - option expect lockdep splats until these problems have been fully - addressed which is work in progress. This config switch allows to - identify and analyze these problems. It will be removed and the - check permanently enabled once the main issues have been fixed. - - If unsure, select N. - config LOCK_STAT bool "Lock usage statistics" depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT @@ -1905,7 +1885,7 @@ config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU && DEVMEM depends on ARCH_HAS_DEVMEM_IS_ALLOWED || GENERIC_LIB_DEVMEM_IS_ALLOWED - default y if PPC || X86 || ARM64 + default y if PPC || X86 || ARM64 || S390 help If this option is disabled, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental @@ -2115,6 +2095,16 @@ config FAIL_SUNRPC Provide fault-injection capability for SunRPC and its consumers. +config FAIL_SKB_REALLOC + bool "Fault-injection capability forcing skb to reallocate" + depends on FAULT_INJECTION_DEBUG_FS + help + Provide fault-injection capability that forces the skb to be + reallocated, catching possible invalid pointers to the skb. + + For more information, check + Documentation/dev-tools/fault-injection/fault-injection.rst + config FAULT_INJECTION_CONFIGFS bool "Configfs interface for fault-injection capabilities" depends on FAULT_INJECTION @@ -2279,6 +2269,7 @@ config TEST_LIST_SORT config TEST_MIN_HEAP tristate "Min heap test" depends on DEBUG_KERNEL || m + select MIN_HEAP help Enable this to turn on min heap function tests. This test is executed only once during system boot (so affects only boot time), @@ -2629,6 +2620,23 @@ config CHECKSUM_KUNIT If unsure, say N. +config UTIL_MACROS_KUNIT + tristate "KUnit test util_macros.h functions at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to test the util_macros.h function at boot. + + KUnit tests run during boot and output the results to the debug log + in TAP format (http://testanything.org/). Only useful for kernel devs + running the KUnit test harness, and not intended for inclusion into a + production build. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config HASH_KUNIT_TEST tristate "KUnit Test for integer hash functions" if !KUNIT_ALL_TESTS depends on KUNIT @@ -2850,6 +2858,15 @@ config USERCOPY_KUNIT_TEST on the copy_to/from_user infrastructure, making sure basic user/kernel boundary testing is working. +config CRC16_KUNIT_TEST + tristate "KUnit tests for CRC16" + depends on KUNIT + default KUNIT_ALL_TESTS + select CRC16 + help + Enable this option to run unit tests for the kernel's CRC16 + implementation (<linux/crc16.h>). + config TEST_UDELAY tristate "udelay test driver" help @@ -2903,6 +2920,141 @@ config TEST_KMOD If unsure, say N. +config TEST_RUNTIME + bool + +config TEST_RUNTIME_MODULE + bool + +config TEST_KALLSYMS + tristate "module kallsyms find_symbol() test" + depends on m + select TEST_RUNTIME + select TEST_RUNTIME_MODULE + select TEST_KALLSYMS_A + select TEST_KALLSYMS_B + select TEST_KALLSYMS_C + select TEST_KALLSYMS_D + help + This allows us to stress test find_symbol() through the kallsyms + used to place symbols on the kernel ELF kallsyms and modules kallsyms + where we place kernel symbols such as exported symbols. + + We have four test modules: + + A: has KALLSYSMS_NUMSYMS exported symbols + B: uses one of A's symbols + C: adds KALLSYMS_SCALE_FACTOR * KALLSYSMS_NUMSYMS exported + D: adds 2 * the symbols than C + + We stress test find_symbol() through two means: + + 1) Upon load of B it will trigger simplify_symbols() to look for the + one symbol it uses from the module A with tons of symbols. This is an + indirect way for us to have B call resolve_symbol_wait() upon module + load. This will eventually call find_symbol() which will eventually + try to find the symbols used with find_exported_symbol_in_section(). + find_exported_symbol_in_section() uses bsearch() so a binary search + for each symbol. Binary search will at worst be O(log(n)) so the + larger TEST_MODULE_KALLSYSMS the worse the search. + + 2) The selftests should load C first, before B. Upon B's load towards + the end right before we call module B's init routine we get + complete_formation() called on the module. That will first check + for duplicate symbols with the call to verify_exported_symbols(). + That is when we'll force iteration on module C's insane symbol list. + Since it has 10 * KALLSYMS_NUMSYMS it means we can first test + just loading B without C. The amount of time it takes to load C Vs + B can give us an idea of the impact growth of the symbol space and + give us projection. Module A only uses one symbol from B so to allow + this scaling in module C to be proportional, if it used more symbols + then the first test would be doing more and increasing just the + search space would be slightly different. The last module, module D + will just increase the search space by twice the number of symbols in + C so to allow for full projects. + + tools/testing/selftests/module/find_symbol.sh + + The current defaults will incur a build delay of about 7 minutes + on an x86_64 with only 8 cores. Enable this only if you want to + stress test find_symbol() with thousands of symbols. At the same + time this is also useful to test building modules with thousands of + symbols, and if BTF is enabled this also stress tests adding BTF + information for each module. Currently enabling many more symbols + will segfault the build system. + + If unsure, say N. + +if TEST_KALLSYMS + +config TEST_KALLSYMS_A + tristate + depends on m + +config TEST_KALLSYMS_B + tristate + depends on m + +config TEST_KALLSYMS_C + tristate + depends on m + +config TEST_KALLSYMS_D + tristate + depends on m + +choice + prompt "Kallsym test range" + default TEST_KALLSYMS_LARGE + help + Selecting something other than "Fast" will enable tests which slow + down the build and may crash your build. + +config TEST_KALLSYMS_FAST + bool "Fast builds" + help + You won't really be testing kallsysms, so this just helps fast builds + when allmodconfig is used.. + +config TEST_KALLSYMS_LARGE + bool "Enable testing kallsyms with large exports" + help + This will enable larger number of symbols. This will slow down + your build considerably. + +config TEST_KALLSYMS_MAX + bool "Known kallsysms limits" + help + This will enable exports to the point we know we'll start crashing + builds. + +endchoice + +config TEST_KALLSYMS_NUMSYMS + int "test kallsyms number of symbols" + range 2 10000 + default 2 if TEST_KALLSYMS_FAST + default 100 if TEST_KALLSYMS_LARGE + default 10000 if TEST_KALLSYMS_MAX + help + The number of symbols to create on TEST_KALLSYMS_A, only one of which + module TEST_KALLSYMS_B will use. This also will be used + for how many symbols TEST_KALLSYMS_C will have, scaled up by + TEST_KALLSYMS_SCALE_FACTOR. Note that setting this to 10,000 will + trigger a segfault today, don't use anything close to it unless + you are aware that this should not be used for automated build tests. + +config TEST_KALLSYMS_SCALE_FACTOR + int "test kallsyms scale factor" + default 8 + help + How many more unusued symbols will TEST_KALLSYSMS_C have than + TEST_KALLSYMS_A. If 8, then module C will have 8 * syms + than module A. Then TEST_KALLSYMS_D will have double the amount + of symbols than C so to allow projections. + +endif # TEST_KALLSYMS + config TEST_DEBUG_VIRTUAL tristate "Test CONFIG_DEBUG_VIRTUAL feature" depends on DEBUG_VIRTUAL @@ -2993,6 +3145,22 @@ config TEST_OBJPOOL If unsure, say N. +config INT_POW_TEST + tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This option enables the KUnit test suite for the int_pow function, + which performs integer exponentiation. The test suite is designed to + verify that the implementation of int_pow correctly computes the power + of a given base raised to a given exponent. + + Enabling this option will include tests that check various scenarios + and edge cases to ensure the accuracy and reliability of the exponentiation + function. + + If unsure, say N + endif # RUNTIME_TESTING_MENU config ARCH_USE_MEMTEST @@ -3088,19 +3256,3 @@ config RUST_KERNEL_DOCTESTS endmenu # "Rust" endmenu # Kernel hacking - -config INT_POW_TEST - tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - help - This option enables the KUnit test suite for the int_pow function, - which performs integer exponentiation. The test suite is designed to - verify that the implementation of int_pow correctly computes the power - of a given base raised to a given exponent. - - Enabling this option will include tests that check various scenarios - and edge cases to ensure the accuracy and reliability of the exponentiation - function. - - If unsure, say N diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 98016e137b7f..f82889a830fa 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -195,13 +195,6 @@ config KASAN_KUNIT_TEST For more information on KUnit and unit tests in general, please refer to the KUnit documentation in Documentation/dev-tools/kunit/. -config KASAN_MODULE_TEST - tristate "KUnit-incompatible tests of KASAN bug detection capabilities" - depends on m && KASAN && !KASAN_HW_TAGS - help - A part of the KASAN test suite that is not integrated with KUnit. - Incompatible with Hardware Tag-Based KASAN. - config KASAN_EXTRA_INFO bool "Record and report more information" depends on KASAN diff --git a/lib/Makefile b/lib/Makefile index 773adf88af41..a8155c972f02 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -35,10 +35,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o objpool.o union_find.o + buildid.o objpool.o iomem_copy.o +lib-$(CONFIG_UNION_FIND) += union_find.o lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_SMP) += cpumask.o +lib-$(CONFIG_MIN_HEAP) += min_heap.o lib-y += kobject.o klist.o obj-y += lockref.o @@ -96,6 +98,7 @@ obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o +obj-$(CONFIG_TEST_RUNTIME) += tests/ obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o @@ -154,6 +157,7 @@ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o obj-$(CONFIG_BITREVERSE) += bitrev.o obj-$(CONFIG_LINEAR_RANGES) += linear_ranges.o obj-$(CONFIG_PACKING) += packing.o +obj-$(CONFIG_PACKING_KUNIT_TEST) += packing_test.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o @@ -370,6 +374,7 @@ obj-$(CONFIG_PLDMFW) += pldmfw/ CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o +obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o @@ -389,6 +394,7 @@ CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o +obj-$(CONFIG_CRC16_KUNIT_TEST) += crc16_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 81e5f9a70f22..2414a7ee7ec7 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -1,12 +1,26 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/alloc_tag.h> +#include <linux/execmem.h> #include <linux/fs.h> #include <linux/gfp.h> +#include <linux/kallsyms.h> #include <linux/module.h> #include <linux/page_ext.h> #include <linux/proc_fs.h> #include <linux/seq_buf.h> #include <linux/seq_file.h> +#include <linux/vmalloc.h> + +#define ALLOCINFO_FILE_NAME "allocinfo" +#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) +#define SECTION_START(NAME) (CODETAG_SECTION_START_PREFIX NAME) +#define SECTION_STOP(NAME) (CODETAG_SECTION_STOP_PREFIX NAME) + +#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT +static bool mem_profiling_support = true; +#else +static bool mem_profiling_support; +#endif static struct codetag_type *alloc_tag_cttype; @@ -15,6 +29,11 @@ EXPORT_SYMBOL(_shared_alloc_tag); DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, mem_alloc_profiling_key); +DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed); + +struct alloc_tag_kernel_section kernel_tags = { NULL, 0 }; +unsigned long alloc_tag_ref_mask; +int alloc_tag_ref_offs; struct allocinfo_private { struct codetag_iterator iter; @@ -144,44 +163,450 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl return nr; } +void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) +{ + int i; + struct alloc_tag *tag; + unsigned int nr_pages = 1 << new_order; + + if (!mem_alloc_profiling_enabled()) + return; + + tag = pgalloc_tag_get(&folio->page); + if (!tag) + return; + + for (i = nr_pages; i < (1 << old_order); i += nr_pages) { + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) { + /* Set new reference to point to the original tag */ + alloc_tag_ref_set(&ref, tag); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); + } + } +} + +void pgalloc_tag_copy(struct folio *new, struct folio *old) +{ + union pgtag_ref_handle handle; + union codetag_ref ref; + struct alloc_tag *tag; + + tag = pgalloc_tag_get(&old->page); + if (!tag) + return; + + if (!get_page_tag_ref(&new->page, &ref, &handle)) + return; + + /* Clear the old ref to the original allocation tag. */ + clear_page_tag_ref(&old->page); + /* Decrement the counters of the tag on get_new_folio. */ + alloc_tag_sub(&ref, folio_size(new)); + __alloc_tag_ref_set(&ref, tag); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); +} + +static void shutdown_mem_profiling(bool remove_file) +{ + if (mem_alloc_profiling_enabled()) + static_branch_disable(&mem_alloc_profiling_key); + + if (!mem_profiling_support) + return; + + if (remove_file) + remove_proc_entry(ALLOCINFO_FILE_NAME, NULL); + mem_profiling_support = false; +} + static void __init procfs_init(void) { - proc_create_seq("allocinfo", 0400, NULL, &allocinfo_seq_op); + if (!mem_profiling_support) + return; + + if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) { + pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME); + shutdown_mem_profiling(false); + } +} + +void __init alloc_tag_sec_init(void) +{ + struct alloc_tag *last_codetag; + + if (!mem_profiling_support) + return; + + if (!static_key_enabled(&mem_profiling_compressed)) + return; + + kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name( + SECTION_START(ALLOC_TAG_SECTION_NAME)); + last_codetag = (struct alloc_tag *)kallsyms_lookup_name( + SECTION_STOP(ALLOC_TAG_SECTION_NAME)); + kernel_tags.count = last_codetag - kernel_tags.first_tag; + + /* Check if kernel tags fit into page flags */ + if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) { + shutdown_mem_profiling(false); /* allocinfo file does not exist yet */ + pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n", + kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS); + return; + } + + alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS); + alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1); + pr_debug("Memory allocation profiling compression is using %d page flag bits!\n", + NR_UNUSED_PAGEFLAG_BITS); +} + +#ifdef CONFIG_MODULES + +static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE); +static struct vm_struct *vm_module_tags; +/* A dummy object used to indicate an unloaded module */ +static struct module unloaded_mod; +/* A dummy object used to indicate a module prepended area */ +static struct module prepend_mod; + +struct alloc_tag_module_section module_tags; + +static inline unsigned long alloc_tag_align(unsigned long val) +{ + if (!static_key_enabled(&mem_profiling_compressed)) { + /* No alignment requirements when we are not indexing the tags */ + return val; + } + + if (val % sizeof(struct alloc_tag) == 0) + return val; + return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag); } -static bool alloc_tag_module_unload(struct codetag_type *cttype, - struct codetag_module *cmod) +static bool ensure_alignment(unsigned long align, unsigned int *prepend) { - struct codetag_iterator iter = codetag_get_ct_iter(cttype); - struct alloc_tag_counters counter; - bool module_unused = true; + if (!static_key_enabled(&mem_profiling_compressed)) { + /* No alignment requirements when we are not indexing the tags */ + return true; + } + + /* + * If alloc_tag size is not a multiple of required alignment, tag + * indexing does not work. + */ + if (!IS_ALIGNED(sizeof(struct alloc_tag), align)) + return false; + + /* Ensure prepend consumes multiple of alloc_tag-sized blocks */ + if (*prepend) + *prepend = alloc_tag_align(*prepend); + + return true; +} + +static inline bool tags_addressable(void) +{ + unsigned long tag_idx_count; + + if (!static_key_enabled(&mem_profiling_compressed)) + return true; /* with page_ext tags are always addressable */ + + tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count + + module_tags.size / sizeof(struct alloc_tag); + + return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS); +} + +static bool needs_section_mem(struct module *mod, unsigned long size) +{ + if (!mem_profiling_support) + return false; + + return size >= sizeof(struct alloc_tag); +} + +static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to) +{ + while (from <= to) { + struct alloc_tag_counters counter; + + counter = alloc_tag_read(from); + if (counter.bytes) + return from; + from++; + } + + return NULL; +} + +/* Called with mod_area_mt locked */ +static void clean_unused_module_areas_locked(void) +{ + MA_STATE(mas, &mod_area_mt, 0, module_tags.size); + struct module *val; + + mas_for_each(&mas, val, module_tags.size) { + if (val != &unloaded_mod) + continue; + + /* Release area if all tags are unused */ + if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), + (struct alloc_tag *)(module_tags.start_addr + mas.last))) + mas_erase(&mas); + } +} + +/* Called with mod_area_mt locked */ +static bool find_aligned_area(struct ma_state *mas, unsigned long section_size, + unsigned long size, unsigned int prepend, unsigned long align) +{ + bool cleanup_done = false; + +repeat: + /* Try finding exact size and hope the start is aligned */ + if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) { + if (IS_ALIGNED(mas->index + prepend, align)) + return true; + + /* Try finding larger area to align later */ + mas_reset(mas); + if (!mas_empty_area(mas, 0, section_size - 1, + size + prepend + align - 1)) + return true; + } + + /* No free area, try cleanup stale data and repeat the search once */ + if (!cleanup_done) { + clean_unused_module_areas_locked(); + cleanup_done = true; + mas_reset(mas); + goto repeat; + } + + return false; +} + +static int vm_module_tags_populate(void) +{ + unsigned long phys_size = vm_module_tags->nr_pages << PAGE_SHIFT; + + if (phys_size < module_tags.size) { + struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; + unsigned long addr = module_tags.start_addr + phys_size; + unsigned long more_pages; + unsigned long nr; + + more_pages = ALIGN(module_tags.size - phys_size, PAGE_SIZE) >> PAGE_SHIFT; + nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN, + NUMA_NO_NODE, more_pages, next_page); + if (nr < more_pages || + vmap_pages_range(addr, addr + (nr << PAGE_SHIFT), PAGE_KERNEL, + next_page, PAGE_SHIFT) < 0) { + /* Clean up and error out */ + for (int i = 0; i < nr; i++) + __free_page(next_page[i]); + return -ENOMEM; + } + vm_module_tags->nr_pages += nr; + } + + return 0; +} + +static void *reserve_module_tags(struct module *mod, unsigned long size, + unsigned int prepend, unsigned long align) +{ + unsigned long section_size = module_tags.end_addr - module_tags.start_addr; + MA_STATE(mas, &mod_area_mt, 0, section_size - 1); + unsigned long offset; + void *ret = NULL; + + /* If no tags return error */ + if (size < sizeof(struct alloc_tag)) + return ERR_PTR(-EINVAL); + + /* + * align is always power of 2, so we can use IS_ALIGNED and ALIGN. + * align 0 or 1 means no alignment, to simplify set to 1. + */ + if (!align) + align = 1; + + if (!ensure_alignment(align, &prepend)) { + shutdown_mem_profiling(true); + pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n", + mod->name, align); + return ERR_PTR(-EINVAL); + } + + mas_lock(&mas); + if (!find_aligned_area(&mas, section_size, size, prepend, align)) { + ret = ERR_PTR(-ENOMEM); + goto unlock; + } + + /* Mark found area as reserved */ + offset = mas.index; + offset += prepend; + offset = ALIGN(offset, align); + if (offset != mas.index) { + unsigned long pad_start = mas.index; + + mas.last = offset - 1; + mas_store(&mas, &prepend_mod); + if (mas_is_err(&mas)) { + ret = ERR_PTR(xa_err(mas.node)); + goto unlock; + } + mas.index = offset; + mas.last = offset + size - 1; + mas_store(&mas, mod); + if (mas_is_err(&mas)) { + mas.index = pad_start; + mas_erase(&mas); + ret = ERR_PTR(xa_err(mas.node)); + } + } else { + mas.last = offset + size - 1; + mas_store(&mas, mod); + if (mas_is_err(&mas)) + ret = ERR_PTR(xa_err(mas.node)); + } +unlock: + mas_unlock(&mas); + + if (IS_ERR(ret)) + return ret; + + if (module_tags.size < offset + size) { + int grow_res; + + module_tags.size = offset + size; + if (mem_alloc_profiling_enabled() && !tags_addressable()) { + shutdown_mem_profiling(true); + pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n", + mod->name, NR_UNUSED_PAGEFLAG_BITS); + } + + grow_res = vm_module_tags_populate(); + if (grow_res) { + shutdown_mem_profiling(true); + pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n", + mod->name); + return ERR_PTR(grow_res); + } + } + + return (struct alloc_tag *)(module_tags.start_addr + offset); +} + +static void release_module_tags(struct module *mod, bool used) +{ + MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size); struct alloc_tag *tag; - struct codetag *ct; + struct module *val; + + mas_lock(&mas); + mas_for_each_rev(&mas, val, 0) + if (val == mod) + break; + + if (!val) /* module not found */ + goto out; + + if (!used) + goto release_area; + + /* Find out if the area is used */ + tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), + (struct alloc_tag *)(module_tags.start_addr + mas.last)); + if (tag) { + struct alloc_tag_counters counter = alloc_tag_read(tag); + + pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", + tag->ct.filename, tag->ct.lineno, tag->ct.modname, + tag->ct.function, counter.bytes); + } else { + used = false; + } +release_area: + mas_store(&mas, used ? &unloaded_mod : NULL); + val = mas_prev_range(&mas, 0); + if (val == &prepend_mod) + mas_store(&mas, NULL); +out: + mas_unlock(&mas); +} - for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) { - if (iter.cmod != cmod) +static void replace_module(struct module *mod, struct module *new_mod) +{ + MA_STATE(mas, &mod_area_mt, 0, module_tags.size); + struct module *val; + + mas_lock(&mas); + mas_for_each(&mas, val, module_tags.size) { + if (val != mod) continue; - tag = ct_to_alloc_tag(ct); - counter = alloc_tag_read(tag); + mas_store_gfp(&mas, new_mod, GFP_KERNEL); + break; + } + mas_unlock(&mas); +} - if (WARN(counter.bytes, - "%s:%u module %s func:%s has %llu allocated at module unload", - ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes)) - module_unused = false; +static int __init alloc_mod_tags_mem(void) +{ + /* Map space to copy allocation tags */ + vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE); + if (!vm_module_tags) { + pr_err("Failed to map %lu bytes for module allocation tags\n", + MODULE_ALLOC_TAG_VMAP_SIZE); + module_tags.start_addr = 0; + return -ENOMEM; } - return module_unused; + vm_module_tags->pages = kmalloc_array(get_vm_area_size(vm_module_tags) >> PAGE_SHIFT, + sizeof(struct page *), GFP_KERNEL | __GFP_ZERO); + if (!vm_module_tags->pages) { + free_vm_area(vm_module_tags); + return -ENOMEM; + } + + module_tags.start_addr = (unsigned long)vm_module_tags->addr; + module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE; + /* Ensure the base is alloc_tag aligned when required for indexing */ + module_tags.start_addr = alloc_tag_align(module_tags.start_addr); + + return 0; +} + +static void __init free_mod_tags_mem(void) +{ + int i; + + module_tags.start_addr = 0; + for (i = 0; i < vm_module_tags->nr_pages; i++) + __free_page(vm_module_tags->pages[i]); + kfree(vm_module_tags->pages); + free_vm_area(vm_module_tags); } -#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT -static bool mem_profiling_support __meminitdata = true; -#else -static bool mem_profiling_support __meminitdata; -#endif +#else /* CONFIG_MODULES */ + +static inline int alloc_mod_tags_mem(void) { return 0; } +static inline void free_mod_tags_mem(void) {} + +#endif /* CONFIG_MODULES */ +/* See: Documentation/mm/allocation-profiling.rst */ static int __init setup_early_mem_profiling(char *str) { + bool compressed = false; bool enable; if (!str || !str[0]) @@ -190,22 +615,37 @@ static int __init setup_early_mem_profiling(char *str) if (!strncmp(str, "never", 5)) { enable = false; mem_profiling_support = false; + pr_info("Memory allocation profiling is disabled!\n"); } else { - int res; + char *token = strsep(&str, ","); - res = kstrtobool(str, &enable); - if (res) - return res; + if (kstrtobool(token, &enable)) + return -EINVAL; + if (str) { + + if (strcmp(str, "compressed")) + return -EINVAL; + + compressed = true; + } mem_profiling_support = true; + pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n", + compressed ? "with" : "without", enable ? "on" : "off"); } - if (enable != static_key_enabled(&mem_alloc_profiling_key)) { + if (enable != mem_alloc_profiling_enabled()) { if (enable) static_branch_enable(&mem_alloc_profiling_key); else static_branch_disable(&mem_alloc_profiling_key); } + if (compressed != static_key_enabled(&mem_profiling_compressed)) { + if (compressed) + static_branch_enable(&mem_profiling_compressed); + else + static_branch_disable(&mem_profiling_compressed); + } return 0; } @@ -213,6 +653,9 @@ early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling); static __init bool need_page_alloc_tagging(void) { + if (static_key_enabled(&mem_profiling_compressed)) + return false; + return mem_profiling_support; } @@ -255,14 +698,26 @@ static inline void sysctl_init(void) {} static int __init alloc_tag_init(void) { const struct codetag_type_desc desc = { - .section = "alloc_tags", - .tag_size = sizeof(struct alloc_tag), - .module_unload = alloc_tag_module_unload, + .section = ALLOC_TAG_SECTION_NAME, + .tag_size = sizeof(struct alloc_tag), +#ifdef CONFIG_MODULES + .needs_section_mem = needs_section_mem, + .alloc_section_mem = reserve_module_tags, + .free_section_mem = release_module_tags, + .module_replaced = replace_module, +#endif }; + int res; + + res = alloc_mod_tags_mem(); + if (res) + return res; alloc_tag_cttype = codetag_register_type(&desc); - if (IS_ERR(alloc_tag_cttype)) + if (IS_ERR(alloc_tag_cttype)) { + free_mod_tags_mem(); return PTR_ERR(alloc_tag_cttype); + } sysctl_init(); procfs_init(); diff --git a/lib/checksum.c b/lib/checksum.c index 6860d6b05a17..025ba546e1ec 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -34,15 +34,6 @@ #include <asm/byteorder.h> #ifndef do_csum -static inline unsigned short from32to16(unsigned int x) -{ - /* add up 16-bit and 16-bit for 16+c bit */ - x = (x & 0xffff) + (x >> 16); - /* add up carry.. */ - x = (x & 0xffff) + (x >> 16); - return x; -} - static unsigned int do_csum(const unsigned char *buff, int len) { int odd; @@ -90,7 +81,7 @@ static unsigned int do_csum(const unsigned char *buff, int len) #else result += (*buff << 8); #endif - result = from32to16(result); + result = csum_from32to16(result); if (odd) result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); out: diff --git a/lib/codetag.c b/lib/codetag.c index d1fbbb7c2ec3..42aadd6c1454 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -149,8 +149,8 @@ static struct codetag_range get_section_range(struct module *mod, const char *section) { return (struct codetag_range) { - get_symbol(mod, "__start_", section), - get_symbol(mod, "__stop_", section), + get_symbol(mod, CODETAG_SECTION_START_PREFIX, section), + get_symbol(mod, CODETAG_SECTION_STOP_PREFIX, section), }; } @@ -207,6 +207,94 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) } #ifdef CONFIG_MODULES +#define CODETAG_SECTION_PREFIX ".codetag." + +/* Some codetag types need a separate module section */ +bool codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size) +{ + const char *type_name; + struct codetag_type *cttype; + bool ret = false; + + if (strncmp(name, CODETAG_SECTION_PREFIX, strlen(CODETAG_SECTION_PREFIX))) + return false; + + type_name = name + strlen(CODETAG_SECTION_PREFIX); + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (strcmp(type_name, cttype->desc.section) == 0) { + if (!cttype->desc.needs_section_mem) + break; + + down_write(&cttype->mod_lock); + ret = cttype->desc.needs_section_mem(mod, size); + up_write(&cttype->mod_lock); + break; + } + } + mutex_unlock(&codetag_lock); + + return ret; +} + +void *codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align) +{ + const char *type_name = name + strlen(CODETAG_SECTION_PREFIX); + struct codetag_type *cttype; + void *ret = ERR_PTR(-EINVAL); + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (strcmp(type_name, cttype->desc.section) == 0) { + if (WARN_ON(!cttype->desc.alloc_section_mem)) + break; + + down_write(&cttype->mod_lock); + ret = cttype->desc.alloc_section_mem(mod, size, prepend, align); + up_write(&cttype->mod_lock); + break; + } + } + mutex_unlock(&codetag_lock); + + return ret; +} + +void codetag_free_module_sections(struct module *mod) +{ + struct codetag_type *cttype; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (!cttype->desc.free_section_mem) + continue; + + down_write(&cttype->mod_lock); + cttype->desc.free_section_mem(mod, false); + up_write(&cttype->mod_lock); + } + mutex_unlock(&codetag_lock); +} + +void codetag_module_replaced(struct module *mod, struct module *new_mod) +{ + struct codetag_type *cttype; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (!cttype->desc.module_replaced) + continue; + + down_write(&cttype->mod_lock); + cttype->desc.module_replaced(mod, new_mod); + up_write(&cttype->mod_lock); + } + mutex_unlock(&codetag_lock); +} + void codetag_load_module(struct module *mod) { struct codetag_type *cttype; @@ -220,13 +308,12 @@ void codetag_load_module(struct module *mod) mutex_unlock(&codetag_lock); } -bool codetag_unload_module(struct module *mod) +void codetag_unload_module(struct module *mod) { struct codetag_type *cttype; - bool unload_ok = true; if (!mod) - return true; + return; /* await any module's kfree_rcu() operations to complete */ kvfree_rcu_barrier(); @@ -246,18 +333,17 @@ bool codetag_unload_module(struct module *mod) } if (found) { if (cttype->desc.module_unload) - if (!cttype->desc.module_unload(cttype, cmod)) - unload_ok = false; + cttype->desc.module_unload(cttype, cmod); cttype->count -= range_size(cttype, &cmod->range); idr_remove(&cttype->mod_idr, mod_id); kfree(cmod); } up_write(&cttype->mod_lock); + if (found && cttype->desc.free_section_mem) + cttype->desc.free_section_mem(mod, true); } mutex_unlock(&codetag_lock); - - return unload_ok; } #endif /* CONFIG_MODULES */ diff --git a/lib/crc16_kunit.c b/lib/crc16_kunit.c new file mode 100644 index 000000000000..0918c98a96d2 --- /dev/null +++ b/lib/crc16_kunit.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnits tests for CRC16. + * + * Copyright (C) 2024, LKCAMP + * Author: Vinicius Peixoto <vpeixoto@lkcamp.dev> + * Author: Fabricio Gasperin <fgasperin@lkcamp.dev> + * Author: Enzo Bertoloti <ebertoloti@lkcamp.dev> + */ +#include <kunit/test.h> +#include <linux/crc16.h> +#include <linux/prandom.h> + +#define CRC16_KUNIT_DATA_SIZE 4096 +#define CRC16_KUNIT_TEST_SIZE 100 +#define CRC16_KUNIT_SEED 0x12345678 + +/** + * struct crc16_test - CRC16 test data + * @crc: initial input value to CRC16 + * @start: Start index within the data buffer + * @length: Length of the data + */ +static struct crc16_test { + u16 crc; + u16 start; + u16 length; +} tests[CRC16_KUNIT_TEST_SIZE]; + +u8 data[CRC16_KUNIT_DATA_SIZE]; + + +/* Naive implementation of CRC16 for validation purposes */ +static inline u16 _crc16_naive_byte(u16 crc, u8 data) +{ + u8 i = 0; + + crc ^= (u16) data; + for (i = 0; i < 8; i++) { + if (crc & 0x01) + crc = (crc >> 1) ^ 0xa001; + else + crc = crc >> 1; + } + + return crc; +} + + +static inline u16 _crc16_naive(u16 crc, u8 *buffer, size_t len) +{ + while (len--) + crc = _crc16_naive_byte(crc, *buffer++); + return crc; +} + + +/* Small helper for generating pseudorandom 16-bit data */ +static inline u16 _rand16(void) +{ + static u32 rand = CRC16_KUNIT_SEED; + + rand = next_pseudo_random32(rand); + return rand & 0xFFFF; +} + + +static int crc16_init_test_data(struct kunit_suite *suite) +{ + size_t i; + + /* Fill the data buffer with random bytes */ + for (i = 0; i < CRC16_KUNIT_DATA_SIZE; i++) + data[i] = _rand16() & 0xFF; + + /* Generate random test data while ensuring the random + * start + length values won't overflow the 4096-byte + * buffer (0x7FF * 2 = 0xFFE < 0x1000) + */ + for (size_t i = 0; i < CRC16_KUNIT_TEST_SIZE; i++) { + tests[i].crc = _rand16(); + tests[i].start = _rand16() & 0x7FF; + tests[i].length = _rand16() & 0x7FF; + } + + return 0; +} + +static void crc16_test_empty(struct kunit *test) +{ + u16 crc; + + /* The result for empty data should be the same as the + * initial crc + */ + crc = crc16(0x00, data, 0); + KUNIT_EXPECT_EQ(test, crc, 0); + crc = crc16(0xFF, data, 0); + KUNIT_EXPECT_EQ(test, crc, 0xFF); +} + +static void crc16_test_correctness(struct kunit *test) +{ + size_t i; + u16 crc, crc_naive; + + for (i = 0; i < CRC16_KUNIT_TEST_SIZE; i++) { + /* Compare results with the naive crc16 implementation */ + crc = crc16(tests[i].crc, data + tests[i].start, + tests[i].length); + crc_naive = _crc16_naive(tests[i].crc, data + tests[i].start, + tests[i].length); + KUNIT_EXPECT_EQ(test, crc, crc_naive); + } +} + + +static void crc16_test_combine(struct kunit *test) +{ + size_t i, j; + u16 crc, crc_naive; + + /* Make sure that combining two consecutive crc16 calculations + * yields the same result as calculating the crc16 for the whole thing + */ + for (i = 0; i < CRC16_KUNIT_TEST_SIZE; i++) { + crc_naive = crc16(tests[i].crc, data + tests[i].start, tests[i].length); + for (j = 0; j < tests[i].length; j++) { + crc = crc16(tests[i].crc, data + tests[i].start, j); + crc = crc16(crc, data + tests[i].start + j, tests[i].length - j); + KUNIT_EXPECT_EQ(test, crc, crc_naive); + } + } +} + + +static struct kunit_case crc16_test_cases[] = { + KUNIT_CASE(crc16_test_empty), + KUNIT_CASE(crc16_test_combine), + KUNIT_CASE(crc16_test_correctness), + {}, +}; + +static struct kunit_suite crc16_test_suite = { + .name = "crc16", + .test_cases = crc16_test_cases, + .suite_init = crc16_init_test_data, +}; +kunit_test_suite(crc16_test_suite); + +MODULE_AUTHOR("Fabricio Gasperin <fgasperin@lkcamp.dev>"); +MODULE_AUTHOR("Vinicius Peixoto <vpeixoto@lkcamp.dev>"); +MODULE_AUTHOR("Enzo Bertoloti <ebertoloti@lkcamp.dev>"); +MODULE_DESCRIPTION("Unit tests for crc16"); +MODULE_LICENSE("GPL"); diff --git a/lib/crc32.c b/lib/crc32.c index 5649847d0a8d..ff587fee3893 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -205,7 +205,11 @@ EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); +EXPORT_SYMBOL(crc32_le_base); + u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); +EXPORT_SYMBOL(__crc32c_le_base); + u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be); /* diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 969baab8c805..01fac1cd05a1 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -58,3 +58,5 @@ libcurve25519-y += curve25519-selftest.o endif obj-$(CONFIG_MPILIB) += mpi/ + +obj-$(CONFIG_CRYPTO_MANAGER_EXTRA_TESTS) += simd.o diff --git a/lib/crypto/mpi/mpi-bit.c b/lib/crypto/mpi/mpi-bit.c index 835a2f0622a0..934d81311360 100644 --- a/lib/crypto/mpi/mpi-bit.c +++ b/lib/crypto/mpi/mpi-bit.c @@ -95,6 +95,7 @@ int mpi_set_bit(MPI a, unsigned int n) a->d[limbno] |= (A_LIMB_1<<bitno); return 0; } +EXPORT_SYMBOL_GPL(mpi_set_bit); /* * Shift A by N bits to the right. diff --git a/lib/crypto/simd.c b/lib/crypto/simd.c new file mode 100644 index 000000000000..9c36cb3bb49c --- /dev/null +++ b/lib/crypto/simd.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SIMD testing utility functions + * + * Copyright 2024 Google LLC + */ + +#include <crypto/internal/simd.h> + +DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test); +EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 5ce473ad499b..7f50c4480a4e 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -7,25 +7,30 @@ #define pr_fmt(fmt) "ODEBUG: " fmt +#include <linux/cpu.h> #include <linux/debugobjects.h> -#include <linux/interrupt.h> +#include <linux/debugfs.h> +#include <linux/hash.h> +#include <linux/kmemleak.h> #include <linux/sched.h> +#include <linux/sched/loadavg.h> #include <linux/sched/task_stack.h> #include <linux/seq_file.h> -#include <linux/debugfs.h> #include <linux/slab.h> -#include <linux/hash.h> -#include <linux/kmemleak.h> -#include <linux/cpu.h> +#include <linux/static_key.h> #define ODEBUG_HASH_BITS 14 #define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS) -#define ODEBUG_POOL_SIZE 1024 -#define ODEBUG_POOL_MIN_LEVEL 256 -#define ODEBUG_POOL_PERCPU_SIZE 64 +/* Must be power of two */ #define ODEBUG_BATCH_SIZE 16 +/* Initial values. Must all be a multiple of batch size */ +#define ODEBUG_POOL_SIZE (64 * ODEBUG_BATCH_SIZE) +#define ODEBUG_POOL_MIN_LEVEL (ODEBUG_POOL_SIZE / 4) + +#define ODEBUG_POOL_PERCPU_SIZE (8 * ODEBUG_BATCH_SIZE) + #define ODEBUG_CHUNK_SHIFT PAGE_SHIFT #define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT) #define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1)) @@ -35,7 +40,7 @@ * frequency of 10Hz and about 1024 objects for each freeing operation. * So it is freeing at most 10k debug objects per second. */ -#define ODEBUG_FREE_WORK_MAX 1024 +#define ODEBUG_FREE_WORK_MAX (1024 / ODEBUG_BATCH_SIZE) #define ODEBUG_FREE_WORK_DELAY DIV_ROUND_UP(HZ, 10) struct debug_bucket { @@ -43,16 +48,24 @@ struct debug_bucket { raw_spinlock_t lock; }; -/* - * Debug object percpu free list - * Access is protected by disabling irq - */ -struct debug_percpu_free { - struct hlist_head free_objs; - int obj_free; +struct pool_stats { + unsigned int cur_used; + unsigned int max_used; + unsigned int min_fill; }; -static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool); +struct obj_pool { + struct hlist_head objects; + unsigned int cnt; + unsigned int min_cnt; + unsigned int max_cnt; + struct pool_stats stats; +} ____cacheline_aligned; + + +static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu) = { + .max_cnt = ODEBUG_POOL_PERCPU_SIZE, +}; static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; @@ -60,37 +73,32 @@ static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; static DEFINE_RAW_SPINLOCK(pool_lock); -static HLIST_HEAD(obj_pool); -static HLIST_HEAD(obj_to_free); +static struct obj_pool pool_global = { + .min_cnt = ODEBUG_POOL_MIN_LEVEL, + .max_cnt = ODEBUG_POOL_SIZE, + .stats = { + .min_fill = ODEBUG_POOL_SIZE, + }, +}; -/* - * Because of the presence of percpu free pools, obj_pool_free will - * under-count those in the percpu free pools. Similarly, obj_pool_used - * will over-count those in the percpu free pools. Adjustments will be - * made at debug_stats_show(). Both obj_pool_min_free and obj_pool_max_used - * can be off. - */ -static int __data_racy obj_pool_min_free = ODEBUG_POOL_SIZE; -static int __data_racy obj_pool_free = ODEBUG_POOL_SIZE; -static int obj_pool_used; -static int __data_racy obj_pool_max_used; +static struct obj_pool pool_to_free = { + .max_cnt = UINT_MAX, +}; + +static HLIST_HEAD(pool_boot); + +static unsigned long avg_usage; static bool obj_freeing; -/* The number of objs on the global free list */ -static int obj_nr_tofree; static int __data_racy debug_objects_maxchain __read_mostly; static int __data_racy __maybe_unused debug_objects_maxchecked __read_mostly; static int __data_racy debug_objects_fixups __read_mostly; static int __data_racy debug_objects_warnings __read_mostly; -static int __data_racy debug_objects_enabled __read_mostly +static bool __data_racy debug_objects_enabled __read_mostly = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT; -static int debug_objects_pool_size __ro_after_init - = ODEBUG_POOL_SIZE; -static int debug_objects_pool_min_level __ro_after_init - = ODEBUG_POOL_MIN_LEVEL; -static const struct debug_obj_descr *descr_test __read_mostly; -static struct kmem_cache *obj_cache __ro_after_init; +static const struct debug_obj_descr *descr_test __read_mostly; +static struct kmem_cache *obj_cache __ro_after_init; /* * Track numbers of kmem_cache_alloc()/free() calls done. @@ -101,19 +109,20 @@ static int __data_racy debug_objects_freed; static void free_obj_work(struct work_struct *work); static DECLARE_DELAYED_WORK(debug_obj_work, free_obj_work); +static DEFINE_STATIC_KEY_FALSE(obj_cache_enabled); + static int __init enable_object_debug(char *str) { - debug_objects_enabled = 1; + debug_objects_enabled = true; return 0; } +early_param("debug_objects", enable_object_debug); static int __init disable_object_debug(char *str) { - debug_objects_enabled = 0; + debug_objects_enabled = false; return 0; } - -early_param("debug_objects", enable_object_debug); early_param("no_debug_objects", disable_object_debug); static const char *obj_states[ODEBUG_STATE_MAX] = { @@ -125,61 +134,280 @@ static const char *obj_states[ODEBUG_STATE_MAX] = { [ODEBUG_STATE_NOTAVAILABLE] = "not available", }; -static void fill_pool(void) +static __always_inline unsigned int pool_count(struct obj_pool *pool) +{ + return READ_ONCE(pool->cnt); +} + +static __always_inline bool pool_should_refill(struct obj_pool *pool) +{ + return pool_count(pool) < pool->min_cnt; +} + +static __always_inline bool pool_must_refill(struct obj_pool *pool) +{ + return pool_count(pool) < pool->min_cnt / 2; +} + +static bool pool_move_batch(struct obj_pool *dst, struct obj_pool *src) { - gfp_t gfp = __GFP_HIGH | __GFP_NOWARN; + struct hlist_node *last, *next_batch, *first_batch; struct debug_obj *obj; - unsigned long flags; - if (likely(READ_ONCE(obj_pool_free) >= debug_objects_pool_min_level)) + if (dst->cnt >= dst->max_cnt || !src->cnt) + return false; + + first_batch = src->objects.first; + obj = hlist_entry(first_batch, typeof(*obj), node); + last = obj->batch_last; + next_batch = last->next; + + /* Move the next batch to the front of the source pool */ + src->objects.first = next_batch; + if (next_batch) + next_batch->pprev = &src->objects.first; + + /* Add the extracted batch to the destination pool */ + last->next = dst->objects.first; + if (last->next) + last->next->pprev = &last->next; + first_batch->pprev = &dst->objects.first; + dst->objects.first = first_batch; + + WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE); + WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE); + return true; +} + +static bool pool_push_batch(struct obj_pool *dst, struct hlist_head *head) +{ + struct hlist_node *last; + struct debug_obj *obj; + + if (dst->cnt >= dst->max_cnt) + return false; + + obj = hlist_entry(head->first, typeof(*obj), node); + last = obj->batch_last; + + hlist_splice_init(head, last, &dst->objects); + WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE); + return true; +} + +static bool pool_pop_batch(struct hlist_head *head, struct obj_pool *src) +{ + struct hlist_node *last, *next; + struct debug_obj *obj; + + if (!src->cnt) + return false; + + /* Move the complete list to the head */ + hlist_move_list(&src->objects, head); + + obj = hlist_entry(head->first, typeof(*obj), node); + last = obj->batch_last; + next = last->next; + /* Disconnect the batch from the list */ + last->next = NULL; + + /* Move the node after last back to the source pool. */ + src->objects.first = next; + if (next) + next->pprev = &src->objects.first; + + WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE); + return true; +} + +static struct debug_obj *__alloc_object(struct hlist_head *list) +{ + struct debug_obj *obj; + + if (unlikely(!list->first)) + return NULL; + + obj = hlist_entry(list->first, typeof(*obj), node); + hlist_del(&obj->node); + return obj; +} + +static void pcpu_refill_stats(void) +{ + struct pool_stats *stats = &pool_global.stats; + + WRITE_ONCE(stats->cur_used, stats->cur_used + ODEBUG_BATCH_SIZE); + + if (stats->cur_used > stats->max_used) + stats->max_used = stats->cur_used; + + if (pool_global.cnt < stats->min_fill) + stats->min_fill = pool_global.cnt; +} + +static struct debug_obj *pcpu_alloc(void) +{ + struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu); + + lockdep_assert_irqs_disabled(); + + for (;;) { + struct debug_obj *obj = __alloc_object(&pcp->objects); + + if (likely(obj)) { + pcp->cnt--; + /* + * If this emptied a batch try to refill from the + * free pool. Don't do that if this was the top-most + * batch as pcpu_free() expects the per CPU pool + * to be less than ODEBUG_POOL_PERCPU_SIZE. + */ + if (unlikely(pcp->cnt < (ODEBUG_POOL_PERCPU_SIZE - ODEBUG_BATCH_SIZE) && + !(pcp->cnt % ODEBUG_BATCH_SIZE))) { + /* + * Don't try to allocate from the regular pool here + * to not exhaust it prematurely. + */ + if (pool_count(&pool_to_free)) { + guard(raw_spinlock)(&pool_lock); + pool_move_batch(pcp, &pool_to_free); + pcpu_refill_stats(); + } + } + return obj; + } + + guard(raw_spinlock)(&pool_lock); + if (!pool_move_batch(pcp, &pool_to_free)) { + if (!pool_move_batch(pcp, &pool_global)) + return NULL; + } + pcpu_refill_stats(); + } +} + +static void pcpu_free(struct debug_obj *obj) +{ + struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu); + struct debug_obj *first; + + lockdep_assert_irqs_disabled(); + + if (!(pcp->cnt % ODEBUG_BATCH_SIZE)) { + obj->batch_last = &obj->node; + } else { + first = hlist_entry(pcp->objects.first, typeof(*first), node); + obj->batch_last = first->batch_last; + } + hlist_add_head(&obj->node, &pcp->objects); + pcp->cnt++; + + /* Pool full ? */ + if (pcp->cnt < ODEBUG_POOL_PERCPU_SIZE) return; + /* Remove a batch from the per CPU pool */ + guard(raw_spinlock)(&pool_lock); + /* Try to fit the batch into the pool_global first */ + if (!pool_move_batch(&pool_global, pcp)) + pool_move_batch(&pool_to_free, pcp); + WRITE_ONCE(pool_global.stats.cur_used, pool_global.stats.cur_used - ODEBUG_BATCH_SIZE); +} + +static void free_object_list(struct hlist_head *head) +{ + struct hlist_node *tmp; + struct debug_obj *obj; + int cnt = 0; + + hlist_for_each_entry_safe(obj, tmp, head, node) { + hlist_del(&obj->node); + kmem_cache_free(obj_cache, obj); + cnt++; + } + debug_objects_freed += cnt; +} + +static void fill_pool_from_freelist(void) +{ + static unsigned long state; + /* * Reuse objs from the global obj_to_free list; they will be * reinitialized when allocating. - * - * obj_nr_tofree is checked locklessly; the READ_ONCE() pairs with - * the WRITE_ONCE() in pool_lock critical sections. */ - if (READ_ONCE(obj_nr_tofree)) { - raw_spin_lock_irqsave(&pool_lock, flags); - /* - * Recheck with the lock held as the worker thread might have - * won the race and freed the global free list already. - */ - while (obj_nr_tofree && (obj_pool_free < debug_objects_pool_min_level)) { - obj = hlist_entry(obj_to_free.first, typeof(*obj), node); - hlist_del(&obj->node); - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1); - hlist_add_head(&obj->node, &obj_pool); - WRITE_ONCE(obj_pool_free, obj_pool_free + 1); + if (!pool_count(&pool_to_free)) + return; + + /* + * Prevent the context from being scheduled or interrupted after + * setting the state flag; + */ + guard(irqsave)(); + + /* + * Avoid lock contention on &pool_lock and avoid making the cache + * line exclusive by testing the bit before attempting to set it. + */ + if (test_bit(0, &state) || test_and_set_bit(0, &state)) + return; + + /* Avoid taking the lock when there is no work to do */ + while (pool_should_refill(&pool_global) && pool_count(&pool_to_free)) { + guard(raw_spinlock)(&pool_lock); + /* Move a batch if possible */ + pool_move_batch(&pool_global, &pool_to_free); + } + clear_bit(0, &state); +} + +static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp) +{ + struct hlist_node *last = NULL; + struct debug_obj *obj; + + for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) { + obj = kmem_cache_zalloc(cache, gfp); + if (!obj) { + free_object_list(head); + return false; } - raw_spin_unlock_irqrestore(&pool_lock, flags); + debug_objects_allocated++; + + if (!last) + last = &obj->node; + obj->batch_last = last; + + hlist_add_head(&obj->node, head); } + return true; +} + +static void fill_pool(void) +{ + static atomic_t cpus_allocating; - if (unlikely(!obj_cache)) + /* + * Avoid allocation and lock contention when: + * - One other CPU is already allocating + * - the global pool has not reached the critical level yet + */ + if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating)) return; - while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) { - struct debug_obj *new[ODEBUG_BATCH_SIZE]; - int cnt; + atomic_inc(&cpus_allocating); + while (pool_should_refill(&pool_global)) { + HLIST_HEAD(head); - for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) { - new[cnt] = kmem_cache_zalloc(obj_cache, gfp); - if (!new[cnt]) - break; - } - if (!cnt) - return; + if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN)) + break; - raw_spin_lock_irqsave(&pool_lock, flags); - while (cnt) { - hlist_add_head(&new[--cnt]->node, &obj_pool); - debug_objects_allocated++; - WRITE_ONCE(obj_pool_free, obj_pool_free + 1); - } - raw_spin_unlock_irqrestore(&pool_lock, flags); + guard(raw_spinlock_irqsave)(&pool_lock); + if (!pool_push_batch(&pool_global, &head)) + pool_push_batch(&pool_to_free, &head); } + atomic_dec(&cpus_allocating); } /* @@ -201,72 +429,37 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b) return NULL; } -/* - * Allocate a new object from the hlist - */ -static struct debug_obj *__alloc_object(struct hlist_head *list) +static void calc_usage(void) { - struct debug_obj *obj = NULL; + static DEFINE_RAW_SPINLOCK(avg_lock); + static unsigned long avg_period; + unsigned long cur, now = jiffies; - if (list->first) { - obj = hlist_entry(list->first, typeof(*obj), node); - hlist_del(&obj->node); - } + if (!time_after_eq(now, READ_ONCE(avg_period))) + return; - return obj; + if (!raw_spin_trylock(&avg_lock)) + return; + + WRITE_ONCE(avg_period, now + msecs_to_jiffies(10)); + cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX; + WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur)); + raw_spin_unlock(&avg_lock); } -static struct debug_obj * -alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr) +static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b, + const struct debug_obj_descr *descr) { - struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool); struct debug_obj *obj; - if (likely(obj_cache)) { - obj = __alloc_object(&percpu_pool->free_objs); - if (obj) { - percpu_pool->obj_free--; - goto init_obj; - } - } - - raw_spin_lock(&pool_lock); - obj = __alloc_object(&obj_pool); - if (obj) { - obj_pool_used++; - WRITE_ONCE(obj_pool_free, obj_pool_free - 1); - - /* - * Looking ahead, allocate one batch of debug objects and - * put them into the percpu free pool. - */ - if (likely(obj_cache)) { - int i; - - for (i = 0; i < ODEBUG_BATCH_SIZE; i++) { - struct debug_obj *obj2; - - obj2 = __alloc_object(&obj_pool); - if (!obj2) - break; - hlist_add_head(&obj2->node, - &percpu_pool->free_objs); - percpu_pool->obj_free++; - obj_pool_used++; - WRITE_ONCE(obj_pool_free, obj_pool_free - 1); - } - } - - if (obj_pool_used > obj_pool_max_used) - obj_pool_max_used = obj_pool_used; + calc_usage(); - if (obj_pool_free < obj_pool_min_free) - obj_pool_min_free = obj_pool_free; - } - raw_spin_unlock(&pool_lock); + if (static_branch_likely(&obj_cache_enabled)) + obj = pcpu_alloc(); + else + obj = __alloc_object(&pool_boot); -init_obj: - if (obj) { + if (likely(obj)) { obj->object = addr; obj->descr = descr; obj->state = ODEBUG_STATE_NONE; @@ -276,142 +469,58 @@ init_obj: return obj; } -/* - * workqueue function to free objects. - * - * To reduce contention on the global pool_lock, the actual freeing of - * debug objects will be delayed if the pool_lock is busy. - */ +/* workqueue function to free objects. */ static void free_obj_work(struct work_struct *work) { - struct hlist_node *tmp; - struct debug_obj *obj; - unsigned long flags; - HLIST_HEAD(tofree); + static unsigned long last_use_avg; + unsigned long cur_used, last_used, delta; + unsigned int max_free = 0; WRITE_ONCE(obj_freeing, false); - if (!raw_spin_trylock_irqsave(&pool_lock, flags)) - return; - if (obj_pool_free >= debug_objects_pool_size) - goto free_objs; + /* Rate limit freeing based on current use average */ + cur_used = READ_ONCE(avg_usage); + last_used = last_use_avg; + last_use_avg = cur_used; - /* - * The objs on the pool list might be allocated before the work is - * run, so recheck if pool list it full or not, if not fill pool - * list from the global free list. As it is likely that a workload - * may be gearing up to use more and more objects, don't free any - * of them until the next round. - */ - while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) { - obj = hlist_entry(obj_to_free.first, typeof(*obj), node); - hlist_del(&obj->node); - hlist_add_head(&obj->node, &obj_pool); - WRITE_ONCE(obj_pool_free, obj_pool_free + 1); - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1); - } - raw_spin_unlock_irqrestore(&pool_lock, flags); - return; + if (!pool_count(&pool_to_free)) + return; -free_objs: - /* - * Pool list is already full and there are still objs on the free - * list. Move remaining free objs to a temporary list to free the - * memory outside the pool_lock held region. - */ - if (obj_nr_tofree) { - hlist_move_list(&obj_to_free, &tofree); - debug_objects_freed += obj_nr_tofree; - WRITE_ONCE(obj_nr_tofree, 0); + if (cur_used <= last_used) { + delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX; + max_free = min(delta, ODEBUG_FREE_WORK_MAX); } - raw_spin_unlock_irqrestore(&pool_lock, flags); - hlist_for_each_entry_safe(obj, tmp, &tofree, node) { - hlist_del(&obj->node); - kmem_cache_free(obj_cache, obj); + for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) { + HLIST_HEAD(tofree); + + /* Acquire and drop the lock for each batch */ + scoped_guard(raw_spinlock_irqsave, &pool_lock) { + if (!pool_to_free.cnt) + return; + + /* Refill the global pool if possible */ + if (pool_move_batch(&pool_global, &pool_to_free)) { + /* Don't free as there seems to be demand */ + max_free = 0; + } else if (max_free) { + pool_pop_batch(&tofree, &pool_to_free); + max_free--; + } else { + return; + } + } + free_object_list(&tofree); } } static void __free_object(struct debug_obj *obj) { - struct debug_obj *objs[ODEBUG_BATCH_SIZE]; - struct debug_percpu_free *percpu_pool; - int lookahead_count = 0; - unsigned long flags; - bool work; - - local_irq_save(flags); - if (!obj_cache) - goto free_to_obj_pool; - - /* - * Try to free it into the percpu pool first. - */ - percpu_pool = this_cpu_ptr(&percpu_obj_pool); - if (percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) { - hlist_add_head(&obj->node, &percpu_pool->free_objs); - percpu_pool->obj_free++; - local_irq_restore(flags); - return; - } - - /* - * As the percpu pool is full, look ahead and pull out a batch - * of objects from the percpu pool and free them as well. - */ - for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) { - objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs); - if (!objs[lookahead_count]) - break; - percpu_pool->obj_free--; - } - -free_to_obj_pool: - raw_spin_lock(&pool_lock); - work = (obj_pool_free > debug_objects_pool_size) && obj_cache && - (obj_nr_tofree < ODEBUG_FREE_WORK_MAX); - obj_pool_used--; - - if (work) { - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1); - hlist_add_head(&obj->node, &obj_to_free); - if (lookahead_count) { - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count); - obj_pool_used -= lookahead_count; - while (lookahead_count) { - hlist_add_head(&objs[--lookahead_count]->node, - &obj_to_free); - } - } - - if ((obj_pool_free > debug_objects_pool_size) && - (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) { - int i; - - /* - * Free one more batch of objects from obj_pool. - */ - for (i = 0; i < ODEBUG_BATCH_SIZE; i++) { - obj = __alloc_object(&obj_pool); - hlist_add_head(&obj->node, &obj_to_free); - WRITE_ONCE(obj_pool_free, obj_pool_free - 1); - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1); - } - } - } else { - WRITE_ONCE(obj_pool_free, obj_pool_free + 1); - hlist_add_head(&obj->node, &obj_pool); - if (lookahead_count) { - WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count); - obj_pool_used -= lookahead_count; - while (lookahead_count) { - hlist_add_head(&objs[--lookahead_count]->node, - &obj_pool); - } - } - } - raw_spin_unlock(&pool_lock); - local_irq_restore(flags); + guard(irqsave)(); + if (static_branch_likely(&obj_cache_enabled)) + pcpu_free(obj); + else + hlist_add_head(&obj->node, &pool_boot); } /* @@ -421,63 +530,52 @@ free_to_obj_pool: static void free_object(struct debug_obj *obj) { __free_object(obj); - if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) { + if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) { WRITE_ONCE(obj_freeing, true); schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY); } } -#ifdef CONFIG_HOTPLUG_CPU -static int object_cpu_offline(unsigned int cpu) +static void put_objects(struct hlist_head *list) { - struct debug_percpu_free *percpu_pool; struct hlist_node *tmp; struct debug_obj *obj; - unsigned long flags; - /* Remote access is safe as the CPU is dead already */ - percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu); - hlist_for_each_entry_safe(obj, tmp, &percpu_pool->free_objs, node) { + /* + * Using free_object() puts the objects into reuse or schedules + * them for freeing and it get's all the accounting correct. + */ + hlist_for_each_entry_safe(obj, tmp, list, node) { hlist_del(&obj->node); - kmem_cache_free(obj_cache, obj); + free_object(obj); } +} - raw_spin_lock_irqsave(&pool_lock, flags); - obj_pool_used -= percpu_pool->obj_free; - debug_objects_freed += percpu_pool->obj_free; - raw_spin_unlock_irqrestore(&pool_lock, flags); - - percpu_pool->obj_free = 0; +#ifdef CONFIG_HOTPLUG_CPU +static int object_cpu_offline(unsigned int cpu) +{ + /* Remote access is safe as the CPU is dead already */ + struct obj_pool *pcp = per_cpu_ptr(&pool_pcpu, cpu); + put_objects(&pcp->objects); + pcp->cnt = 0; return 0; } #endif -/* - * We run out of memory. That means we probably have tons of objects - * allocated. - */ +/* Out of memory. Free all objects from hash */ static void debug_objects_oom(void) { struct debug_bucket *db = obj_hash; - struct hlist_node *tmp; HLIST_HEAD(freelist); - struct debug_obj *obj; - unsigned long flags; - int i; pr_warn("Out of memory. ODEBUG disabled\n"); - for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { - raw_spin_lock_irqsave(&db->lock, flags); - hlist_move_list(&db->list, &freelist); - raw_spin_unlock_irqrestore(&db->lock, flags); + for (int i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { + scoped_guard(raw_spinlock_irqsave, &db->lock) + hlist_move_list(&db->list, &freelist); - /* Now free them */ - hlist_for_each_entry_safe(obj, tmp, &freelist, node) { - hlist_del(&obj->node); - free_object(obj); - } + put_objects(&freelist); } } @@ -592,12 +690,24 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket } /* Out of memory. Do the cleanup outside of the locked region */ - debug_objects_enabled = 0; + debug_objects_enabled = false; return NULL; } static void debug_objects_fill_pool(void) { + if (!static_branch_likely(&obj_cache_enabled)) + return; + + if (likely(!pool_should_refill(&pool_global))) + return; + + /* Try reusing objects from obj_to_free_list */ + fill_pool_from_freelist(); + + if (likely(!pool_should_refill(&pool_global))) + return; + /* * On RT enabled kernels the pool refill must happen in preemptible * context -- for !RT kernels we rely on the fact that spinlock_t and @@ -1007,7 +1117,7 @@ repeat: debug_objects_maxchecked = objs_checked; /* Schedule work to actually kmem_cache_free() objects */ - if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) { + if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) { WRITE_ONCE(obj_freeing, true); schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY); } @@ -1024,23 +1134,33 @@ void debug_check_no_obj_freed(const void *address, unsigned long size) static int debug_stats_show(struct seq_file *m, void *v) { - int cpu, obj_percpu_free = 0; + unsigned int cpu, pool_used, pcp_free = 0; + /* + * pool_global.stats.cur_used is the number of batches currently + * handed out to per CPU pools. Convert it to number of objects + * and subtract the number of free objects in the per CPU pools. + * As this is lockless the number is an estimate. + */ for_each_possible_cpu(cpu) - obj_percpu_free += per_cpu(percpu_obj_pool.obj_free, cpu); - - seq_printf(m, "max_chain :%d\n", debug_objects_maxchain); - seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked); - seq_printf(m, "warnings :%d\n", debug_objects_warnings); - seq_printf(m, "fixups :%d\n", debug_objects_fixups); - seq_printf(m, "pool_free :%d\n", READ_ONCE(obj_pool_free) + obj_percpu_free); - seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free); - seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); - seq_printf(m, "pool_used :%d\n", obj_pool_used - obj_percpu_free); - seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); - seq_printf(m, "on_free_list :%d\n", READ_ONCE(obj_nr_tofree)); - seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated); - seq_printf(m, "objs_freed :%d\n", debug_objects_freed); + pcp_free += per_cpu(pool_pcpu.cnt, cpu); + + pool_used = READ_ONCE(pool_global.stats.cur_used); + pcp_free = min(pool_used, pcp_free); + pool_used -= pcp_free; + + seq_printf(m, "max_chain : %d\n", debug_objects_maxchain); + seq_printf(m, "max_checked : %d\n", debug_objects_maxchecked); + seq_printf(m, "warnings : %d\n", debug_objects_warnings); + seq_printf(m, "fixups : %d\n", debug_objects_fixups); + seq_printf(m, "pool_free : %u\n", pool_count(&pool_global) + pcp_free); + seq_printf(m, "pool_pcp_free : %u\n", pcp_free); + seq_printf(m, "pool_min_free : %u\n", data_race(pool_global.stats.min_fill)); + seq_printf(m, "pool_used : %u\n", pool_used); + seq_printf(m, "pool_max_used : %u\n", data_race(pool_global.stats.max_used)); + seq_printf(m, "on_free_list : %u\n", pool_count(&pool_to_free)); + seq_printf(m, "objs_allocated: %d\n", debug_objects_allocated); + seq_printf(m, "objs_freed : %d\n", debug_objects_freed); return 0; } DEFINE_SHOW_ATTRIBUTE(debug_stats); @@ -1194,7 +1314,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) out: raw_spin_unlock_irqrestore(&db->lock, flags); if (res) - debug_objects_enabled = 0; + debug_objects_enabled = false; return res; } @@ -1209,7 +1329,7 @@ static __initconst const struct debug_obj_descr descr_type_test = { static __initdata struct self_test obj = { .static_init = 0 }; -static void __init debug_objects_selftest(void) +static bool __init debug_objects_selftest(void) { int fixups, oldfixups, warnings, oldwarnings; unsigned long flags; @@ -1278,9 +1398,10 @@ out: descr_test = NULL; local_irq_restore(flags); + return debug_objects_enabled; } #else -static inline void debug_objects_selftest(void) { } +static inline bool debug_objects_selftest(void) { return true; } #endif /* @@ -1295,65 +1416,54 @@ void __init debug_objects_early_init(void) for (i = 0; i < ODEBUG_HASH_SIZE; i++) raw_spin_lock_init(&obj_hash[i].lock); + /* Keep early boot simple and add everything to the boot list */ for (i = 0; i < ODEBUG_POOL_SIZE; i++) - hlist_add_head(&obj_static_pool[i].node, &obj_pool); + hlist_add_head(&obj_static_pool[i].node, &pool_boot); } /* - * Convert the statically allocated objects to dynamic ones: + * Convert the statically allocated objects to dynamic ones. + * debug_objects_mem_init() is called early so only one CPU is up and + * interrupts are disabled, which means it is safe to replace the active + * object references. */ -static int __init debug_objects_replace_static_objects(void) +static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache) { struct debug_bucket *db = obj_hash; struct hlist_node *tmp; - struct debug_obj *obj, *new; + struct debug_obj *obj; HLIST_HEAD(objects); - int i, cnt = 0; + int i; - for (i = 0; i < ODEBUG_POOL_SIZE; i++) { - obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL); - if (!obj) + for (i = 0; i < ODEBUG_POOL_SIZE; i += ODEBUG_BATCH_SIZE) { + if (!kmem_alloc_batch(&objects, cache, GFP_KERNEL)) goto free; - hlist_add_head(&obj->node, &objects); + pool_push_batch(&pool_global, &objects); } - debug_objects_allocated += i; - - /* - * debug_objects_mem_init() is now called early that only one CPU is up - * and interrupts have been disabled, so it is safe to replace the - * active object references. - */ - - /* Remove the statically allocated objects from the pool */ - hlist_for_each_entry_safe(obj, tmp, &obj_pool, node) - hlist_del(&obj->node); - /* Move the allocated objects to the pool */ - hlist_move_list(&objects, &obj_pool); + /* Disconnect the boot pool. */ + pool_boot.first = NULL; /* Replace the active object references */ for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { hlist_move_list(&db->list, &objects); hlist_for_each_entry(obj, &objects, node) { - new = hlist_entry(obj_pool.first, typeof(*obj), node); - hlist_del(&new->node); + struct debug_obj *new = pcpu_alloc(); + /* copy object data */ *new = *obj; hlist_add_head(&new->node, &db->list); - cnt++; } } - - pr_debug("%d of %d active objects replaced\n", - cnt, obj_pool_used); - return 0; + return true; free: - hlist_for_each_entry_safe(obj, tmp, &objects, node) { + /* Can't use free_object_list() as the cache is not populated yet */ + hlist_for_each_entry_safe(obj, tmp, &pool_global.objects, node) { hlist_del(&obj->node); - kmem_cache_free(obj_cache, obj); + kmem_cache_free(cache, obj); } - return -ENOMEM; + return false; } /* @@ -1364,43 +1474,40 @@ free: */ void __init debug_objects_mem_init(void) { - int cpu, extras; + struct kmem_cache *cache; + int extras; if (!debug_objects_enabled) return; - /* - * Initialize the percpu object pools - * - * Initialization is not strictly necessary, but was done for - * completeness. - */ - for_each_possible_cpu(cpu) - INIT_HLIST_HEAD(&per_cpu(percpu_obj_pool.free_objs, cpu)); + if (!debug_objects_selftest()) + return; - obj_cache = kmem_cache_create("debug_objects_cache", - sizeof (struct debug_obj), 0, - SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, - NULL); + cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0, + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, NULL); - if (!obj_cache || debug_objects_replace_static_objects()) { - debug_objects_enabled = 0; - kmem_cache_destroy(obj_cache); - pr_warn("out of memory.\n"); + if (!cache || !debug_objects_replace_static_objects(cache)) { + debug_objects_enabled = false; + pr_warn("Out of memory.\n"); return; - } else - debug_objects_selftest(); - -#ifdef CONFIG_HOTPLUG_CPU - cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL, - object_cpu_offline); -#endif + } /* - * Increase the thresholds for allocating and freeing objects - * according to the number of possible CPUs available in the system. + * Adjust the thresholds for allocating and freeing objects + * according to the number of possible CPUs available in the + * system. */ extras = num_possible_cpus() * ODEBUG_BATCH_SIZE; - debug_objects_pool_size += extras; - debug_objects_pool_min_level += extras; + pool_global.max_cnt += extras; + pool_global.min_cnt += extras; + + /* Everything worked. Expose the cache */ + obj_cache = cache; + static_branch_enable(&obj_cache_enabled); + +#ifdef CONFIG_HOTPLUG_CPU + cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL, + object_cpu_offline); +#endif + return; } diff --git a/lib/dim/dim.c b/lib/dim/dim.c index 83b65ac74d73..97c3d084ebf0 100644 --- a/lib/dim/dim.c +++ b/lib/dim/dim.c @@ -54,7 +54,8 @@ void dim_park_tired(struct dim *dim) } EXPORT_SYMBOL(dim_park_tired); -bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, +bool dim_calc_stats(const struct dim_sample *start, + const struct dim_sample *end, struct dim_stats *curr_stats) { /* u32 holds up to 71 minutes, should be enough */ diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index d7e7028e9b19..d6aa09a979b3 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -347,7 +347,7 @@ static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim) return dim->profile_ix != prev_ix; } -void net_dim(struct dim *dim, struct dim_sample end_sample) +void net_dim(struct dim *dim, const struct dim_sample *end_sample) { struct dim_stats curr_stats; u16 nevents; @@ -355,11 +355,11 @@ void net_dim(struct dim *dim, struct dim_sample end_sample) switch (dim->state) { case DIM_MEASURE_IN_PROGRESS: nevents = BIT_GAP(BITS_PER_TYPE(u16), - end_sample.event_ctr, + end_sample->event_ctr, dim->start_sample.event_ctr); if (nevents < DIM_NEVENTS) break; - if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats)) + if (!dim_calc_stats(&dim->start_sample, end_sample, &curr_stats)) break; if (net_dim_decision(&curr_stats, dim)) { dim->state = DIM_APPLY_NEW_PROFILE; @@ -368,8 +368,8 @@ void net_dim(struct dim *dim, struct dim_sample end_sample) } fallthrough; case DIM_START_MEASURE: - dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, - end_sample.byte_ctr, &dim->start_sample); + dim_update_sample(end_sample->event_ctr, end_sample->pkt_ctr, + end_sample->byte_ctr, &dim->start_sample); dim->state = DIM_MEASURE_IN_PROGRESS; break; case DIM_APPLY_NEW_PROFILE: diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index e49deddd3de9..c1b7638a594a 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -179,7 +179,7 @@ void dql_completed(struct dql *dql, unsigned int count) dql->adj_limit = limit + completed; dql->prev_ovlimit = ovlimit; - dql->prev_last_obj_cnt = dql->last_obj_cnt; + dql->prev_last_obj_cnt = READ_ONCE(dql->last_obj_cnt); dql->num_completed = completed; dql->prev_num_queued = num_queued; diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index f37f4d44faa9..837064b83a6c 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -2,7 +2,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/interval_tree.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/slab.h> #include <asm/timex.h> diff --git a/lib/iomem_copy.c b/lib/iomem_copy.c new file mode 100644 index 000000000000..dec7eaea60e0 --- /dev/null +++ b/lib/iomem_copy.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Kalray, Inc. All Rights Reserved. + */ + +#include <linux/align.h> +#include <linux/export.h> +#include <linux/io.h> +#include <linux/types.h> +#include <linux/unaligned.h> + +#ifndef memset_io +/** + * memset_io() - Set a range of I/O memory to a constant value + * @addr: The beginning of the I/O-memory range to set + * @val: The value to set the memory to + * @count: The number of bytes to set + * + * Set a range of I/O memory to a given value. + */ +void memset_io(volatile void __iomem *addr, int val, size_t count) +{ + long qc = (u8)val; + + qc *= ~0UL / 0xff; + + while (count && !IS_ALIGNED((long)addr, sizeof(long))) { + __raw_writeb(val, addr); + addr++; + count--; + } + + while (count >= sizeof(long)) { +#ifdef CONFIG_64BIT + __raw_writeq(qc, addr); +#else + __raw_writel(qc, addr); +#endif + + addr += sizeof(long); + count -= sizeof(long); + } + + while (count) { + __raw_writeb(val, addr); + addr++; + count--; + } +} +EXPORT_SYMBOL(memset_io); +#endif + +#ifndef memcpy_fromio +/** + * memcpy_fromio() - Copy a block of data from I/O memory + * @dst: The (RAM) destination for the copy + * @src: The (I/O memory) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data from I/O memory. + */ +void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) +{ + while (count && !IS_ALIGNED((long)src, sizeof(long))) { + *(u8 *)dst = __raw_readb(src); + src++; + dst++; + count--; + } + + while (count >= sizeof(long)) { +#ifdef CONFIG_64BIT + long val = __raw_readq(src); +#else + long val = __raw_readl(src); +#endif + put_unaligned(val, (long *)dst); + + + src += sizeof(long); + dst += sizeof(long); + count -= sizeof(long); + } + + while (count) { + *(u8 *)dst = __raw_readb(src); + src++; + dst++; + count--; + } +} +EXPORT_SYMBOL(memcpy_fromio); +#endif + +#ifndef memcpy_toio +/** + * memcpy_toio() -Copy a block of data into I/O memory + * @dst: The (I/O memory) destination for the copy + * @src: The (RAM) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data to I/O memory. + */ +void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) +{ + while (count && !IS_ALIGNED((long)dst, sizeof(long))) { + __raw_writeb(*(u8 *)src, dst); + src++; + dst++; + count--; + } + + while (count >= sizeof(long)) { + long val = get_unaligned((long *)src); +#ifdef CONFIG_64BIT + __raw_writeq(val, dst); +#else + __raw_writel(val, dst); +#endif + + src += sizeof(long); + dst += sizeof(long); + count -= sizeof(long); + } + + while (count) { + __raw_writeb(*(u8 *)src, dst); + src++; + dst++; + count--; + } +} +EXPORT_SYMBOL(memcpy_toio); +#endif + + diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 908e75a28d90..9ec806f989f2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1682,8 +1682,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i, } /* - * Extract a list of contiguous pages from an ITER_BVEC iterator. This does - * not get references on the pages, nor does it get a pin on them. + * Extract a list of virtually contiguous pages from an ITER_BVEC iterator. + * This does not get references on the pages, nor does it get a pin on them. */ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, @@ -1691,35 +1691,59 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i, iov_iter_extraction_t extraction_flags, size_t *offset0) { - struct page **p, *page; - size_t skip = i->iov_offset, offset, size; - int k; + size_t skip = i->iov_offset, size = 0; + struct bvec_iter bi; + int k = 0; - for (;;) { - if (i->nr_segs == 0) - return 0; - size = min(maxsize, i->bvec->bv_len - skip); - if (size) - break; + if (i->nr_segs == 0) + return 0; + + if (i->iov_offset == i->bvec->bv_len) { i->iov_offset = 0; i->nr_segs--; i->bvec++; skip = 0; } + bi.bi_idx = 0; + bi.bi_size = maxsize; + bi.bi_bvec_done = skip; + + maxpages = want_pages_array(pages, maxsize, skip, maxpages); + + while (bi.bi_size && bi.bi_idx < i->nr_segs) { + struct bio_vec bv = bvec_iter_bvec(i->bvec, bi); + + /* + * The iov_iter_extract_pages interface only allows an offset + * into the first page. Break out of the loop if we see an + * offset into subsequent pages, the caller will have to call + * iov_iter_extract_pages again for the reminder. + */ + if (k) { + if (bv.bv_offset) + break; + } else { + *offset0 = bv.bv_offset; + } - skip += i->bvec->bv_offset; - page = i->bvec->bv_page + skip / PAGE_SIZE; - offset = skip % PAGE_SIZE; - *offset0 = offset; + (*pages)[k++] = bv.bv_page; + size += bv.bv_len; - maxpages = want_pages_array(pages, size, offset, maxpages); - if (!maxpages) - return -ENOMEM; - p = *pages; - for (k = 0; k < maxpages; k++) - p[k] = page + k; + if (k >= maxpages) + break; + + /* + * We are done when the end of the bvec doesn't align to a page + * boundary as that would create a hole in the returned space. + * The caller will handle this with another call to + * iov_iter_extract_pages. + */ + if (bv.bv_offset + bv.bv_len != PAGE_SIZE) + break; + + bvec_iter_advance_single(i->bvec, &bi, bv.bv_len); + } - size = min_t(size_t, size, maxpages * PAGE_SIZE - offset); iov_iter_advance(i, size); return size; } diff --git a/lib/kunit/debugfs.c b/lib/kunit/debugfs.c index d548750a325a..af71911f4a07 100644 --- a/lib/kunit/debugfs.c +++ b/lib/kunit/debugfs.c @@ -181,7 +181,7 @@ void kunit_debugfs_create_suite(struct kunit_suite *suite) * successfully. */ stream = alloc_string_stream(GFP_KERNEL); - if (IS_ERR_OR_NULL(stream)) + if (IS_ERR(stream)) return; string_stream_set_append_newlines(stream, true); @@ -189,7 +189,7 @@ void kunit_debugfs_create_suite(struct kunit_suite *suite) kunit_suite_for_each_test_case(suite, test_case) { stream = alloc_string_stream(GFP_KERNEL); - if (IS_ERR_OR_NULL(stream)) + if (IS_ERR(stream)) goto err; string_stream_set_append_newlines(stream, true); @@ -212,8 +212,11 @@ void kunit_debugfs_create_suite(struct kunit_suite *suite) err: string_stream_destroy(suite->log); - kunit_suite_for_each_test_case(suite, test_case) + suite->log = NULL; + kunit_suite_for_each_test_case(suite, test_case) { string_stream_destroy(test_case->log); + test_case->log = NULL; + } } void kunit_debugfs_destroy_suite(struct kunit_suite *suite) diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index 37e02be1e710..d9c781c859fd 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -805,6 +805,8 @@ static void kunit_device_driver_test(struct kunit *test) struct device *test_device; struct driver_test_state *test_state = kunit_kzalloc(test, sizeof(*test_state), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_state); + test->priv = test_state; test_driver = kunit_driver_create(test, "my_driver"); diff --git a/lib/kunit/string-stream-test.c b/lib/kunit/string-stream-test.c index 7511442ea98f..7734e33156f9 100644 --- a/lib/kunit/string-stream-test.c +++ b/lib/kunit/string-stream-test.c @@ -9,6 +9,7 @@ #include <kunit/static_stub.h> #include <kunit/test.h> #include <linux/ktime.h> +#include <linux/prandom.h> #include <linux/slab.h> #include <linux/timekeeping.h> diff --git a/lib/list-test.c b/lib/list-test.c index e207c4c98d70..9135cdc1bb39 100644 --- a/lib/list-test.c +++ b/lib/list-test.c @@ -412,6 +412,8 @@ static void list_test_list_cut_position(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } + + KUNIT_EXPECT_EQ(test, i, 3); } static void list_test_list_cut_before(struct kunit *test) @@ -440,6 +442,8 @@ static void list_test_list_cut_before(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } + + KUNIT_EXPECT_EQ(test, i, 3); } static void list_test_list_splice(struct kunit *test) diff --git a/lib/list_sort.c b/lib/list_sort.c index 0fb59e92ca2d..8d3f623536fe 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -1,9 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/bug.h> #include <linux/compiler.h> #include <linux/export.h> -#include <linux/string.h> #include <linux/list_sort.h> #include <linux/list.h> diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 6f6a5fc85b42..6e0c019f71b6 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -2710,6 +2710,43 @@ static void local_lock_3B(void) } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem) +{ + return rwsem->dep_map.name; +} +#else +static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem) +{ + return NULL; +} +#endif + +static void test_lockdep_set_subclass_name(void) +{ + const char *name_before = rw_semaphore_lockdep_name(&rwsem_X1); + const char *name_after; + + lockdep_set_subclass(&rwsem_X1, 1); + name_after = rw_semaphore_lockdep_name(&rwsem_X1); + DEBUG_LOCKS_WARN_ON(name_before != name_after); +} + +/* + * lockdep_set_subclass() should reuse the existing lock class name instead + * of creating a new one. + */ +static void lockdep_set_subclass_name_test(void) +{ + printk(" --------------------------------------------------------------------------\n"); + printk(" | lockdep_set_subclass() name test|\n"); + printk(" -----------------------------------\n"); + + print_testname("compare name before and after"); + dotest(test_lockdep_set_subclass_name, SUCCESS, LOCKTYPE_RWSEM); + pr_cont("\n"); +} + static void local_lock_tests(void) { printk(" --------------------------------------------------------------------------\n"); @@ -2920,6 +2957,8 @@ void locking_selftest(void) dotest(hardirq_deadlock_softirq_not_deadlock, FAILURE, LOCKTYPE_SPECIAL); pr_cont("\n"); + lockdep_set_subclass_name_test(); + if (unexpected_testcase_failures) { printk("-----------------------------------------------------------------\n"); debug_locks = 0; diff --git a/lib/logic_pio.c b/lib/logic_pio.c index 2ea564a40064..e29496a38d06 100644 --- a/lib/logic_pio.c +++ b/lib/logic_pio.c @@ -122,7 +122,7 @@ void logic_pio_unregister_range(struct logic_pio_hwaddr *range) * * Traverse the io_range_list to find the registered node for @fwnode. */ -struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode) +struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode) { struct logic_pio_hwaddr *range, *found_range = NULL; @@ -186,7 +186,7 @@ resource_size_t logic_pio_to_hwaddr(unsigned long pio) * * Returns Logical PIO value if successful, ~0UL otherwise */ -unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, +unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode, resource_size_t addr, resource_size_t size) { struct logic_pio_hwaddr *range; diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 3619301dda2e..d0ae808f3a14 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -64,6 +64,21 @@ #define CREATE_TRACE_POINTS #include <trace/events/maple_tree.h> +/* + * Kernel pointer hashing renders much of the maple tree dump useless as tagged + * pointers get hashed to arbitrary values. + * + * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is + * permissible to bypass this. Otherwise remain cautious and retain the hashing. + * + * Userland doesn't know about %px so also use %p there. + */ +#if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) +#define PTR_FMT "%px" +#else +#define PTR_FMT "%p" +#endif + #define MA_ROOT_PARENT 1 /* @@ -120,7 +135,6 @@ static const unsigned char mt_min_slots[] = { #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) struct maple_big_node { - struct maple_pnode *parent; unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; union { struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; @@ -1193,19 +1207,17 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) reuse->request_count = 0; reuse->node_count = 0; - if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) { - head->slot[head->node_count++] = reuse; - head->total++; - goto done; - } - - reuse->total = 1; - if ((head) && !((unsigned long)head & 0x1)) { + if (count) { + if (head->node_count < MAPLE_ALLOC_SLOTS) { + head->slot[head->node_count++] = reuse; + head->total++; + goto done; + } reuse->slot[0] = head; reuse->node_count = 1; - reuse->total += head->total; } + reuse->total = count + 1; mas->alloc = reuse; done: if (requested > 1) @@ -1251,11 +1263,11 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) mas->alloc = node; node->total = ++allocated; + node->request_count = 0; requested--; } node = mas->alloc; - node->request_count = 0; while (requested) { max_req = MAPLE_ALLOC_SLOTS - node->node_count; slots = (void **)&node->slot[node->node_count]; @@ -1271,7 +1283,10 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) node->node_count += count; allocated += count; - node = node->slot[0]; + /* find a non-full node*/ + do { + node = node->slot[0]; + } while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS)); requested -= count; } mas->alloc->total = allocated; @@ -1280,10 +1295,9 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) nomem_bulk: /* Clean up potential freed allocations on bulk failure */ memset(slots, 0, max_req * sizeof(unsigned long)); + mas->alloc->total = allocated; nomem_one: mas_set_alloc_req(mas, requested); - if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) - mas->alloc->total = allocated; mas_set_err(mas, -ENOMEM); } @@ -1943,14 +1957,13 @@ static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, for (; i < piv_end; i++, j++) { b_node->pivot[j] = pivots[i]; if (unlikely(!b_node->pivot[j])) - break; + goto complete; if (unlikely(mas->max == b_node->pivot[j])) goto complete; } - if (likely(i <= mas_end)) - b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); + b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); complete: b_node->b_end = ++j; @@ -2139,9 +2152,7 @@ static inline bool mas_prev_sibling(struct ma_state *mas) { unsigned int p_slot = mte_parent_slot(mas->node); - if (mte_is_root(mas->node)) - return false; - + /* For root node, p_slot is set to 0 by mte_parent_slot(). */ if (!p_slot) return false; @@ -3159,10 +3170,7 @@ static inline void mast_fill_bnode(struct maple_subtree_state *mast, bool cp = true; unsigned char split; - memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap)); - memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot)); - memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot)); - mast->bn->b_end = 0; + memset(mast->bn, 0, sizeof(struct maple_big_node)); if (mte_is_root(mas->node)) { cp = false; @@ -3400,7 +3408,7 @@ static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas, * @mas: The maple state * @entry: The entry to store into the tree */ -static inline int mas_root_expand(struct ma_state *mas, void *entry) +static inline void mas_root_expand(struct ma_state *mas, void *entry) { void *contents = mas_root_locked(mas); enum maple_type type = maple_leaf_64; @@ -3436,12 +3444,23 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry) ma_set_meta(node, maple_leaf_64, 0, slot); /* swap the new root into the tree */ rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); - return slot; + return; } +/* + * mas_store_root() - Storing value into root. + * @mas: The maple state + * @entry: The entry to store. + * + * There is no root node now and we are storing a value into the root - this + * function either assigns the pointer or expands into a node. + */ static inline void mas_store_root(struct ma_state *mas, void *entry) { - if (likely((mas->last != 0) || (mas->index != 0))) + if (!entry) { + if (!mas->index) + rcu_assign_pointer(mas->tree->ma_root, NULL); + } else if (likely((mas->last != 0) || (mas->index != 0))) mas_root_expand(mas, entry); else if (((unsigned long) (entry) & 3) == 2) mas_root_expand(mas, entry); @@ -3662,7 +3681,9 @@ static inline void mas_new_root(struct ma_state *mas, void *entry) void __rcu **slots; unsigned long *pivots; - if (!entry && !mas->index && mas->last == ULONG_MAX) { + WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX); + + if (!entry) { mas->depth = 0; mas_set_height(mas); rcu_assign_pointer(mas->tree->ma_root, entry); @@ -3889,7 +3910,8 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) wr_mas->pivots[offset] = mas->index - 1; mas->offset++; /* Keep mas accurate. */ } - } else if (!mt_in_rcu(mas->tree)) { + } else { + WARN_ON_ONCE(mt_in_rcu(mas->tree)); /* * Expand the range, only partially overwriting the previous and * next ranges @@ -3899,8 +3921,6 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) wr_mas->pivots[offset] = mas->index - 1; wr_mas->pivots[offset + 1] = mas->last; mas->offset++; /* Keep mas accurate. */ - } else { - return; } trace_ma_write(__func__, mas, 0, wr_mas->entry); @@ -4181,75 +4201,53 @@ static inline int mas_prealloc_calc(struct ma_state *mas, void *entry) } /* - * mas_wr_store_type() - Set the store type for a given + * mas_wr_store_type() - Determine the store type for a given * store operation. * @wr_mas: The maple write state + * + * Return: the type of store needed for the operation */ -static inline void mas_wr_store_type(struct ma_wr_state *wr_mas) +static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; unsigned char new_end; - if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) { - mas->store_type = wr_store_root; - return; - } + if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) + return wr_store_root; - if (unlikely(!mas_wr_walk(wr_mas))) { - mas->store_type = wr_spanning_store; - return; - } + if (unlikely(!mas_wr_walk(wr_mas))) + return wr_spanning_store; /* At this point, we are at the leaf node that needs to be altered. */ mas_wr_end_piv(wr_mas); if (!wr_mas->entry) mas_wr_extend_null(wr_mas); - new_end = mas_wr_new_end(wr_mas); - if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) { - mas->store_type = wr_exact_fit; - return; - } + if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) + return wr_exact_fit; - if (unlikely(!mas->index && mas->last == ULONG_MAX)) { - mas->store_type = wr_new_root; - return; - } + if (unlikely(!mas->index && mas->last == ULONG_MAX)) + return wr_new_root; + new_end = mas_wr_new_end(wr_mas); /* Potential spanning rebalance collapsing a node */ if (new_end < mt_min_slots[wr_mas->type]) { - if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) { - mas->store_type = wr_rebalance; - return; - } - mas->store_type = wr_node_store; - return; + if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) + return wr_rebalance; + return wr_node_store; } - if (new_end >= mt_slots[wr_mas->type]) { - mas->store_type = wr_split_store; - return; - } + if (new_end >= mt_slots[wr_mas->type]) + return wr_split_store; - if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) { - mas->store_type = wr_append; - return; - } + if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) + return wr_append; if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) || - (wr_mas->offset_end - mas->offset == 1))) { - mas->store_type = wr_slot_store; - return; - } - - if (mte_is_root(mas->node) || (new_end >= mt_min_slots[wr_mas->type]) || - (mas->mas_flags & MA_STATE_BULK)) { - mas->store_type = wr_node_store; - return; - } + (wr_mas->offset_end - mas->offset == 1))) + return wr_slot_store; - mas->store_type = wr_invalid; - MAS_WARN_ON(mas, 1); + return wr_node_store; } /** @@ -4264,7 +4262,7 @@ static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) int request; mas_wr_prealloc_setup(wr_mas); - mas_wr_store_type(wr_mas); + mas->store_type = mas_wr_store_type(wr_mas); request = mas_prealloc_calc(mas, entry); if (!request) return; @@ -5419,7 +5417,8 @@ void *mas_store(struct ma_state *mas, void *entry) trace_ma_write(__func__, mas, 0, entry); #ifdef CONFIG_DEBUG_MAPLE_TREE if (MAS_WARN_ON(mas, mas->index > mas->last)) - pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry); + pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, + entry); if (mas->index > mas->last) { mas_set_err(mas, -EINVAL); @@ -5435,7 +5434,7 @@ void *mas_store(struct ma_state *mas, void *entry) * overwrite multiple entries within a self-balancing B-Tree. */ mas_wr_prealloc_setup(&wr_mas); - mas_wr_store_type(&wr_mas); + mas->store_type = mas_wr_store_type(&wr_mas); if (mas->mas_flags & MA_STATE_PREALLOC) { mas_wr_store_entry(&wr_mas); MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); @@ -5538,7 +5537,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) int request; mas_wr_prealloc_setup(&wr_mas); - mas_wr_store_type(&wr_mas); + mas->store_type = mas_wr_store_type(&wr_mas); request = mas_prealloc_calc(mas, entry); if (!request) return ret; @@ -7124,14 +7123,14 @@ static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, mt_dump_range(min, max, depth, format); if (xa_is_value(entry)) - pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry), - xa_to_value(entry), entry); + pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry), + xa_to_value(entry), entry); else if (xa_is_zero(entry)) pr_cont("zero (%ld)\n", xa_to_internal(entry)); else if (mt_is_reserved(entry)) - pr_cont("UNKNOWN ENTRY (%p)\n", entry); + pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry); else - pr_cont("%p\n", entry); + pr_cont(PTR_FMT "\n", entry); } static void mt_dump_range64(const struct maple_tree *mt, void *entry, @@ -7147,13 +7146,13 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { switch(format) { case mt_dump_hex: - pr_cont("%p %lX ", node->slot[i], node->pivot[i]); + pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); break; case mt_dump_dec: - pr_cont("%p %lu ", node->slot[i], node->pivot[i]); + pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); } } - pr_cont("%p\n", node->slot[i]); + pr_cont(PTR_FMT "\n", node->slot[i]); for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { unsigned long last = max; @@ -7175,11 +7174,11 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, if (last > max) { switch(format) { case mt_dump_hex: - pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n", + pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", node, last, max, i); break; case mt_dump_dec: - pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", + pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", node, last, max, i); } } @@ -7209,13 +7208,13 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry, for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { switch (format) { case mt_dump_hex: - pr_cont("%p %lX ", node->slot[i], node->pivot[i]); + pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); break; case mt_dump_dec: - pr_cont("%p %lu ", node->slot[i], node->pivot[i]); + pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); } } - pr_cont("%p\n", node->slot[i]); + pr_cont(PTR_FMT "\n", node->slot[i]); for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { unsigned long last = max; @@ -7234,11 +7233,11 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry, if (last > max) { switch(format) { case mt_dump_hex: - pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n", + pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", node, last, max, i); break; case mt_dump_dec: - pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", + pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", node, last, max, i); } } @@ -7256,8 +7255,8 @@ static void mt_dump_node(const struct maple_tree *mt, void *entry, mt_dump_range(min, max, depth, format); - pr_cont("node %p depth %d type %d parent %p", node, depth, type, - node ? node->parent : NULL); + pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node, + depth, type, node ? node->parent : NULL); switch (type) { case maple_dense: pr_cont("\n"); @@ -7285,12 +7284,14 @@ void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) { void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); - pr_info("maple_tree(%p) flags %X, height %u root %p\n", + pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n", mt, mt->ma_flags, mt_height(mt), entry); - if (!xa_is_node(entry)) - mt_dump_entry(entry, 0, 0, 0, format); - else if (entry) + if (xa_is_node(entry)) mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); + else if (entry) + mt_dump_entry(entry, 0, 0, 0, format); + else + pr_info("(empty)\n"); } EXPORT_SYMBOL_GPL(mt_dump); @@ -7337,7 +7338,7 @@ static void mas_validate_gaps(struct ma_state *mas) MT_BUG_ON(mas->tree, !entry); if (gap > p_end - p_start + 1) { - pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n", + pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n", mas_mn(mas), i, gap, p_end, p_start, p_end - p_start + 1); MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); @@ -7357,19 +7358,19 @@ counted: MT_BUG_ON(mas->tree, !gaps); offset = ma_meta_gap(node); if (offset > i) { - pr_err("gap offset %p[%u] is invalid\n", node, offset); + pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset); MT_BUG_ON(mas->tree, 1); } if (gaps[offset] != max_gap) { - pr_err("gap %p[%u] is not the largest gap %lu\n", + pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n", node, offset, max_gap); MT_BUG_ON(mas->tree, 1); } for (i++ ; i < mt_slot_count(mte); i++) { if (gaps[i] != 0) { - pr_err("gap %p[%u] beyond node limit != 0\n", + pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n", node, i); MT_BUG_ON(mas->tree, 1); } @@ -7383,7 +7384,7 @@ counted: p_mn = mte_parent(mte); MT_BUG_ON(mas->tree, max_gap > mas->max); if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { - pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap); + pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap); mt_dump(mas->tree, mt_dump_hex); MT_BUG_ON(mas->tree, 1); } @@ -7413,11 +7414,11 @@ static void mas_validate_parent_slot(struct ma_state *mas) node = mas_slot(mas, slots, i); if (i == p_slot) { if (node != mas->node) - pr_err("parent %p[%u] does not have %p\n", + pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n", parent, i, mas_mn(mas)); MT_BUG_ON(mas->tree, node != mas->node); } else if (node == mas->node) { - pr_err("Invalid child %p at parent %p[%u] p_slot %u\n", + pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n", mas_mn(mas), parent, i, p_slot); MT_BUG_ON(mas->tree, node == mas->node); } @@ -7439,20 +7440,20 @@ static void mas_validate_child_slot(struct ma_state *mas) child = mas_slot(mas, slots, i); if (!child) { - pr_err("Non-leaf node lacks child at %p[%u]\n", + pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n", mas_mn(mas), i); MT_BUG_ON(mas->tree, 1); } if (mte_parent_slot(child) != i) { - pr_err("Slot error at %p[%u]: child %p has pslot %u\n", + pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n", mas_mn(mas), i, mte_to_node(child), mte_parent_slot(child)); MT_BUG_ON(mas->tree, 1); } if (mte_parent(child) != mte_to_node(mas->node)) { - pr_err("child %p has parent %p not %p\n", + pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n", mte_to_node(child), mte_parent(child), mte_to_node(mas->node)); MT_BUG_ON(mas->tree, 1); @@ -7482,24 +7483,24 @@ static void mas_validate_limits(struct ma_state *mas) piv = mas_safe_pivot(mas, pivots, i, type); if (!piv && (i != 0)) { - pr_err("Missing node limit pivot at %p[%u]", + pr_err("Missing node limit pivot at " PTR_FMT "[%u]", mas_mn(mas), i); MAS_WARN_ON(mas, 1); } if (prev_piv > piv) { - pr_err("%p[%u] piv %lu < prev_piv %lu\n", + pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n", mas_mn(mas), i, piv, prev_piv); MAS_WARN_ON(mas, piv < prev_piv); } if (piv < mas->min) { - pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i, + pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i, piv, mas->min); MAS_WARN_ON(mas, piv < mas->min); } if (piv > mas->max) { - pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i, + pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i, piv, mas->max); MAS_WARN_ON(mas, piv > mas->max); } @@ -7509,7 +7510,7 @@ static void mas_validate_limits(struct ma_state *mas) } if (mas_data_end(mas) != i) { - pr_err("node%p: data_end %u != the last slot offset %u\n", + pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n", mas_mn(mas), mas_data_end(mas), i); MT_BUG_ON(mas->tree, 1); } @@ -7518,8 +7519,8 @@ static void mas_validate_limits(struct ma_state *mas) void *entry = mas_slot(mas, slots, i); if (entry && (i != mt_slots[type] - 1)) { - pr_err("%p[%u] should not have entry %p\n", mas_mn(mas), - i, entry); + pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n", + mas_mn(mas), i, entry); MT_BUG_ON(mas->tree, entry != NULL); } @@ -7529,7 +7530,7 @@ static void mas_validate_limits(struct ma_state *mas) if (!piv) continue; - pr_err("%p[%u] should not have piv %lu\n", + pr_err(PTR_FMT "[%u] should not have piv %lu\n", mas_mn(mas), i, piv); MAS_WARN_ON(mas, i < mt_pivots[type] - 1); } @@ -7554,7 +7555,7 @@ static void mt_validate_nulls(struct maple_tree *mt) do { entry = mas_slot(&mas, slots, offset); if (!last && !entry) { - pr_err("Sequential nulls end at %p[%u]\n", + pr_err("Sequential nulls end at " PTR_FMT "[%u]\n", mas_mn(&mas), offset); } MT_BUG_ON(mt, !last && !entry); @@ -7596,7 +7597,8 @@ void mt_validate(struct maple_tree *mt) end = mas_data_end(&mas); if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && (mas.max != ULONG_MAX))) { - pr_err("Invalid size %u of %p\n", end, mas_mn(&mas)); + pr_err("Invalid size %u of " PTR_FMT "\n", + end, mas_mn(&mas)); } mas_validate_parent_slot(&mas); @@ -7612,7 +7614,8 @@ EXPORT_SYMBOL_GPL(mt_validate); void mas_dump(const struct ma_state *mas) { - pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node); + pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ", + mas->tree, mas->node); switch (mas->status) { case ma_active: pr_err("(ma_active)"); @@ -7676,7 +7679,7 @@ void mas_dump(const struct ma_state *mas) pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, mas->index, mas->last); - pr_err(" min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n", + pr_err(" min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n", mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); if (mas->index > mas->last) pr_err("Check index & last\n"); @@ -7685,7 +7688,7 @@ EXPORT_SYMBOL_GPL(mas_dump); void mas_wr_dump(const struct ma_wr_state *wr_mas) { - pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n", + pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n", wr_mas->node, wr_mas->r_min, wr_mas->r_max); pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, diff --git a/lib/math/test_div64.c b/lib/math/test_div64.c index c15edd688dd2..3cd699b654d9 100644 --- a/lib/math/test_div64.c +++ b/lib/math/test_div64.c @@ -26,6 +26,9 @@ static const u64 test_div64_dividends[] = { 0x0072db27380dd689, 0x0842f488162e2284, 0xf66745411d8ab063, + 0xfffffffffffffffb, + 0xfffffffffffffffc, + 0xffffffffffffffff, }; #define SIZE_DIV64_DIVIDENDS ARRAY_SIZE(test_div64_dividends) @@ -37,7 +40,10 @@ static const u64 test_div64_dividends[] = { #define TEST_DIV64_DIVISOR_5 0x0008a880 #define TEST_DIV64_DIVISOR_6 0x003fd3ae #define TEST_DIV64_DIVISOR_7 0x0b658fac -#define TEST_DIV64_DIVISOR_8 0xdc08b349 +#define TEST_DIV64_DIVISOR_8 0x80000001 +#define TEST_DIV64_DIVISOR_9 0xdc08b349 +#define TEST_DIV64_DIVISOR_A 0xfffffffe +#define TEST_DIV64_DIVISOR_B 0xffffffff static const u32 test_div64_divisors[] = { TEST_DIV64_DIVISOR_0, @@ -49,13 +55,16 @@ static const u32 test_div64_divisors[] = { TEST_DIV64_DIVISOR_6, TEST_DIV64_DIVISOR_7, TEST_DIV64_DIVISOR_8, + TEST_DIV64_DIVISOR_9, + TEST_DIV64_DIVISOR_A, + TEST_DIV64_DIVISOR_B, }; #define SIZE_DIV64_DIVISORS ARRAY_SIZE(test_div64_divisors) static const struct { u64 quotient; u32 remainder; -} test_div64_results[SIZE_DIV64_DIVISORS][SIZE_DIV64_DIVIDENDS] = { +} test_div64_results[SIZE_DIV64_DIVIDENDS][SIZE_DIV64_DIVISORS] = { { { 0x0000000013045e47, 0x00000001 }, { 0x000000000161596c, 0x00000030 }, @@ -65,6 +74,9 @@ static const struct { { 0x00000000000013c4, 0x0004ce80 }, { 0x00000000000002ae, 0x001e143c }, { 0x000000000000000f, 0x0033e56c }, + { 0x0000000000000001, 0x2b27507f }, + { 0x0000000000000000, 0xab275080 }, + { 0x0000000000000000, 0xab275080 }, { 0x0000000000000000, 0xab275080 }, }, { { 0x00000001c45c02d1, 0x00000000 }, @@ -75,7 +87,10 @@ static const struct { { 0x000000000001d637, 0x0004e5d9 }, { 0x0000000000003fc9, 0x000713bb }, { 0x0000000000000165, 0x029abe7d }, + { 0x000000000000001f, 0x673c193a }, { 0x0000000000000012, 0x6e9f7e37 }, + { 0x000000000000000f, 0xe73c1977 }, + { 0x000000000000000f, 0xe73c1968 }, }, { { 0x000000197a3a0cf7, 0x00000002 }, { 0x00000001d9632e5c, 0x00000021 }, @@ -85,7 +100,10 @@ static const struct { { 0x00000000001a7bb3, 0x00072331 }, { 0x00000000000397ad, 0x0002c61b }, { 0x000000000000141e, 0x06ea2e89 }, + { 0x00000000000001ca, 0x4c0a72e7 }, { 0x000000000000010a, 0xab002ad7 }, + { 0x00000000000000e5, 0x4c0a767b }, + { 0x00000000000000e5, 0x4c0a7596 }, }, { { 0x0000017949e37538, 0x00000001 }, { 0x0000001b62441f37, 0x00000055 }, @@ -95,7 +113,10 @@ static const struct { { 0x0000000001882ec6, 0x0005cbf9 }, { 0x000000000035333b, 0x0017abdf }, { 0x00000000000129f1, 0x0ab4520d }, + { 0x0000000000001a87, 0x18ff0472 }, { 0x0000000000000f6e, 0x8ac0ce9b }, + { 0x0000000000000d43, 0x98ff397f }, + { 0x0000000000000d43, 0x98ff2c3c }, }, { { 0x000011f321a74e49, 0x00000006 }, { 0x0000014d8481d211, 0x0000005b }, @@ -105,7 +126,10 @@ static const struct { { 0x0000000012a88828, 0x00036c97 }, { 0x000000000287f16f, 0x002c2a25 }, { 0x00000000000e2cc7, 0x02d581e3 }, + { 0x0000000000014318, 0x2ee07d7f }, { 0x000000000000bbf4, 0x1ba08c03 }, + { 0x000000000000a18c, 0x2ee303af }, + { 0x000000000000a18c, 0x2ee26223 }, }, { { 0x0000d8db8f72935d, 0x00000005 }, { 0x00000fbd5aed7a2e, 0x00000002 }, @@ -115,7 +139,10 @@ static const struct { { 0x00000000e16b20fa, 0x0002a14a }, { 0x000000001e940d22, 0x00353b2e }, { 0x0000000000ab40ac, 0x06fba6ba }, + { 0x00000000000f3f70, 0x0af7eeda }, { 0x000000000008debd, 0x72d98365 }, + { 0x0000000000079fb8, 0x0b166dba }, + { 0x0000000000079fb8, 0x0b0ece02 }, }, { { 0x000cc3045b8fc281, 0x00000000 }, { 0x0000ed1f48b5c9fc, 0x00000079 }, @@ -125,7 +152,10 @@ static const struct { { 0x0000000d43fce827, 0x00082b09 }, { 0x00000001ccaba11a, 0x0037e8dd }, { 0x000000000a13f729, 0x0566dffd }, + { 0x0000000000e5b64e, 0x3728203b }, { 0x000000000085a14b, 0x23d36726 }, + { 0x000000000072db27, 0x38f38cd7 }, + { 0x000000000072db27, 0x3880b1b0 }, }, { { 0x00eafeb9c993592b, 0x00000001 }, { 0x00110e5befa9a991, 0x00000048 }, @@ -135,7 +165,10 @@ static const struct { { 0x000000f4459740fc, 0x00084484 }, { 0x0000002122c47bf9, 0x002ca446 }, { 0x00000000b9936290, 0x004979c4 }, + { 0x000000001085e910, 0x05a83974 }, { 0x00000000099ca89d, 0x9db446bf }, + { 0x000000000842f488, 0x26b40b94 }, + { 0x000000000842f488, 0x1e71170c }, }, { { 0x1b60cece589da1d2, 0x00000001 }, { 0x01fcb42be1453f5b, 0x0000004f }, @@ -145,7 +178,49 @@ static const struct { { 0x00001c757dfab350, 0x00048863 }, { 0x000003dc4979c652, 0x00224ea7 }, { 0x000000159edc3144, 0x06409ab3 }, + { 0x00000001ecce8a7e, 0x30bc25e5 }, { 0x000000011eadfee3, 0xa99c48a8 }, + { 0x00000000f6674543, 0x0a593ae9 }, + { 0x00000000f6674542, 0x13f1f5a5 }, + }, { + { 0x1c71c71c71c71c71, 0x00000002 }, + { 0x0210842108421084, 0x0000000b }, + { 0x007f01fc07f01fc0, 0x000000fb }, + { 0x00014245eabf1f9a, 0x0000a63d }, + { 0x0000ffffffffffff, 0x0000fffb }, + { 0x00001d913cecc509, 0x0007937b }, + { 0x00000402c70c678f, 0x0005bfc9 }, + { 0x00000016766cb70b, 0x045edf97 }, + { 0x00000001fffffffb, 0x80000000 }, + { 0x0000000129d84b3a, 0xa2e8fe71 }, + { 0x0000000100000001, 0xfffffffd }, + { 0x0000000100000000, 0xfffffffb }, + }, { + { 0x1c71c71c71c71c71, 0x00000003 }, + { 0x0210842108421084, 0x0000000c }, + { 0x007f01fc07f01fc0, 0x000000fc }, + { 0x00014245eabf1f9a, 0x0000a63e }, + { 0x0000ffffffffffff, 0x0000fffc }, + { 0x00001d913cecc509, 0x0007937c }, + { 0x00000402c70c678f, 0x0005bfca }, + { 0x00000016766cb70b, 0x045edf98 }, + { 0x00000001fffffffc, 0x00000000 }, + { 0x0000000129d84b3a, 0xa2e8fe72 }, + { 0x0000000100000002, 0x00000000 }, + { 0x0000000100000000, 0xfffffffc }, + }, { + { 0x1c71c71c71c71c71, 0x00000006 }, + { 0x0210842108421084, 0x0000000f }, + { 0x007f01fc07f01fc0, 0x000000ff }, + { 0x00014245eabf1f9a, 0x0000a641 }, + { 0x0000ffffffffffff, 0x0000ffff }, + { 0x00001d913cecc509, 0x0007937f }, + { 0x00000402c70c678f, 0x0005bfcd }, + { 0x00000016766cb70b, 0x045edf9b }, + { 0x00000001fffffffc, 0x00000003 }, + { 0x0000000129d84b3a, 0xa2e8fe75 }, + { 0x0000000100000002, 0x00000003 }, + { 0x0000000100000001, 0x00000000 }, }, }; @@ -208,6 +283,12 @@ static bool __init test_div64(void) return false; if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_8, i, 8)) return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_9, i, 9)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_A, i, 10)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_B, i, 11)) + return false; for (j = 0; j < SIZE_DIV64_DIVISORS; j++) { if (!test_div64_one(dividend, test_div64_divisors[j], i, j)) diff --git a/lib/min_heap.c b/lib/min_heap.c new file mode 100644 index 000000000000..4485372ff3b1 --- /dev/null +++ b/lib/min_heap.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> +#include <linux/min_heap.h> + +void __min_heap_init(min_heap_char *heap, void *data, int size) +{ + __min_heap_init_inline(heap, data, size); +} +EXPORT_SYMBOL(__min_heap_init); + +void *__min_heap_peek(struct min_heap_char *heap) +{ + return __min_heap_peek_inline(heap); +} +EXPORT_SYMBOL(__min_heap_peek); + +bool __min_heap_full(min_heap_char *heap) +{ + return __min_heap_full_inline(heap); +} +EXPORT_SYMBOL(__min_heap_full); + +void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_sift_down_inline(heap, pos, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_sift_down); + +void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_sift_up_inline(heap, elem_size, idx, func, args); +} +EXPORT_SYMBOL(__min_heap_sift_up); + +void __min_heapify_all(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heapify_all_inline(heap, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heapify_all); + +bool __min_heap_pop(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + return __min_heap_pop_inline(heap, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_pop); + +void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_pop_push_inline(heap, element, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_pop_push); + +bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + return __min_heap_push_inline(heap, element, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_push); + +bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + return __min_heap_del_inline(heap, elem_size, idx, func, args); +} +EXPORT_SYMBOL(__min_heap_del); diff --git a/lib/objpool.c b/lib/objpool.c index fd108fe0d095..b998b720c732 100644 --- a/lib/objpool.c +++ b/lib/objpool.c @@ -74,15 +74,21 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs, * warm caches and TLB hits. in default vmalloc is used to * reduce the pressure of kernel slab system. as we know, * mimimal size of vmalloc is one page since vmalloc would - * always align the requested size to page size + * always align the requested size to page size. + * but if vmalloc fails or it is not available (e.g. GFP_ATOMIC) + * allocate percpu slot with kmalloc. */ - if ((pool->gfp & GFP_ATOMIC) == GFP_ATOMIC) - slot = kmalloc_node(size, pool->gfp, cpu_to_node(i)); - else + slot = NULL; + + if ((pool->gfp & (GFP_ATOMIC | GFP_KERNEL)) != GFP_ATOMIC) slot = __vmalloc_node(size, sizeof(void *), pool->gfp, cpu_to_node(i), __builtin_return_address(0)); - if (!slot) - return -ENOMEM; + + if (!slot) { + slot = kmalloc_node(size, pool->gfp, cpu_to_node(i)); + if (!slot) + return -ENOMEM; + } memset(slot, 0, size); pool->cpu_slots[i] = slot; diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index 2abc78367dd1..5222c6393f11 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -1187,7 +1187,7 @@ static void DEFINE_FLEX_test(struct kunit *test) { /* Using _RAW_ on a __counted_by struct will initialize "counter" to zero */ DEFINE_RAW_FLEX(struct foo, two_but_zero, array, 2); -#if __has_attribute(__counted_by__) +#ifdef CONFIG_CC_HAS_COUNTED_BY int expected_raw_size = sizeof(struct foo); #else int expected_raw_size = sizeof(struct foo) + 2 * sizeof(s16); diff --git a/lib/packing.c b/lib/packing.c index 3f656167c17e..793942745e34 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -9,52 +9,53 @@ #include <linux/types.h> #include <linux/bitrev.h> -static int get_le_offset(int offset) +/** + * calculate_box_addr - Determine physical location of byte in buffer + * @box: Index of byte within buffer seen as a logical big-endian big number + * @len: Size of buffer in bytes + * @quirks: mask of QUIRK_LSW32_IS_FIRST and QUIRK_LITTLE_ENDIAN + * + * Function interprets the buffer as a @len byte sized big number, and returns + * the physical offset of the @box logical octet within it. Internally, it + * treats the big number as groups of 4 bytes. If @len is not a multiple of 4, + * the last group may be shorter. + * + * @QUIRK_LSW32_IS_FIRST gives the ordering of groups of 4 octets relative to + * each other. If set, the most significant group of 4 octets is last in the + * buffer (and may be truncated if @len is not a multiple of 4). + * + * @QUIRK_LITTLE_ENDIAN gives the ordering of bytes within each group of 4. + * If set, the most significant byte is last in the group. If @len takes the + * form of 4k+3, the last group will only be able to represent 24 bits, and its + * most significant octet is byte 2. + * + * Return: the physical offset into the buffer corresponding to the logical box. + */ +static size_t calculate_box_addr(size_t box, size_t len, u8 quirks) { - int closest_multiple_of_4; + size_t offset_of_group, offset_in_group, this_group = box / 4; + size_t group_size; - closest_multiple_of_4 = (offset / 4) * 4; - offset -= closest_multiple_of_4; - return closest_multiple_of_4 + (3 - offset); -} + if (quirks & QUIRK_LSW32_IS_FIRST) + offset_of_group = this_group * 4; + else + offset_of_group = len - ((this_group + 1) * 4); -static int get_reverse_lsw32_offset(int offset, size_t len) -{ - int closest_multiple_of_4; - int word_index; - - word_index = offset / 4; - closest_multiple_of_4 = word_index * 4; - offset -= closest_multiple_of_4; - word_index = (len / 4) - word_index - 1; - return word_index * 4 + offset; -} + group_size = min(4, len - offset_of_group); -static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, - int *box_end_bit, u8 *box_mask) -{ - int box_bit_width = *box_start_bit - *box_end_bit + 1; - int new_box_start_bit, new_box_end_bit; - - *to_write >>= *box_end_bit; - *to_write = bitrev8(*to_write) >> (8 - box_bit_width); - *to_write <<= *box_end_bit; - - new_box_end_bit = box_bit_width - *box_start_bit - 1; - new_box_start_bit = box_bit_width - *box_end_bit - 1; - *box_mask = GENMASK_ULL(new_box_start_bit, new_box_end_bit); - *box_start_bit = new_box_start_bit; - *box_end_bit = new_box_end_bit; + if (quirks & QUIRK_LITTLE_ENDIAN) + offset_in_group = box - this_group * 4; + else + offset_in_group = group_size - (box - this_group * 4) - 1; + + return offset_of_group + offset_in_group; } /** - * packing - Convert numbers (currently u64) between a packed and an unpacked - * format. Unpacked means laid out in memory in the CPU's native - * understanding of integers, while packed means anything else that - * requires translation. + * pack - Pack u64 number into bitfield of buffer. * * @pbuf: Pointer to a buffer holding the packed value. - * @uval: Pointer to an u64 holding the unpacked value. + * @uval: CPU-readable unpacked value to pack. * @startbit: The index (in logical notation, compensated for quirks) where * the packed value starts within pbuf. Must be larger than, or * equal to, endbit. @@ -62,79 +63,179 @@ static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, * the packed value ends within pbuf. Must be smaller than, or equal * to, startbit. * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. - * @op: If PACK, then uval will be treated as const pointer and copied (packed) - * into pbuf, between startbit and endbit. - * If UNPACK, then pbuf will be treated as const pointer and the logical - * value between startbit and endbit will be copied (unpacked) to uval. * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and * QUIRK_MSB_ON_THE_RIGHT. * * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. - * If op is PACK, pbuf is modified. - * If op is UNPACK, uval is modified. + * correct usage, return code may be discarded. The @pbuf memory will + * be modified on success. */ -int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, - enum packing_op op, u8 quirks) +int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, + u8 quirks) { - /* Number of bits for storing "uval" - * also width of the field to access in the pbuf - */ - u64 value_width; /* Logical byte indices corresponding to the * start and end of the field. */ int plogical_first_u8, plogical_last_u8, box; + /* width of the field to access in the pbuf */ + u64 value_width; - /* startbit is expected to be larger than endbit */ - if (startbit < endbit) + /* startbit is expected to be larger than endbit, and both are + * expected to be within the logically addressable range of the buffer. + */ + if (unlikely(startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen)) /* Invalid function call */ return -EINVAL; value_width = startbit - endbit + 1; - if (value_width > 64) + if (unlikely(value_width > 64)) return -ERANGE; /* Check if "uval" fits in "value_width" bits. * If value_width is 64, the check will fail, but any * 64-bit uval will surely fit. */ - if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width))) + if (unlikely(value_width < 64 && uval >= (1ull << value_width))) /* Cannot store "uval" inside "value_width" bits. * Truncating "uval" is most certainly not desirable, * so simply erroring out is appropriate. */ return -ERANGE; + /* Iterate through an idealistic view of the pbuf as an u64 with + * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low + * logical bit significance. "box" denotes the current logical u8. + */ + plogical_first_u8 = startbit / BITS_PER_BYTE; + plogical_last_u8 = endbit / BITS_PER_BYTE; + + for (box = plogical_first_u8; box >= plogical_last_u8; box--) { + /* Bit indices into the currently accessed 8-bit box */ + size_t box_start_bit, box_end_bit, box_addr; + u8 box_mask; + /* Corresponding bits from the unpacked u64 parameter */ + size_t proj_start_bit, proj_end_bit; + u64 proj_mask; + u64 pval; + + /* This u8 may need to be accessed in its entirety + * (from bit 7 to bit 0), or not, depending on the + * input arguments startbit and endbit. + */ + if (box == plogical_first_u8) + box_start_bit = startbit % BITS_PER_BYTE; + else + box_start_bit = 7; + if (box == plogical_last_u8) + box_end_bit = endbit % BITS_PER_BYTE; + else + box_end_bit = 0; + + /* We have determined the box bit start and end. + * Now we calculate where this (masked) u8 box would fit + * in the unpacked (CPU-readable) u64 - the u8 box's + * projection onto the unpacked u64. Though the + * box is u8, the projection is u64 because it may fall + * anywhere within the unpacked u64. + */ + proj_start_bit = ((box * BITS_PER_BYTE) + box_start_bit) - endbit; + proj_end_bit = ((box * BITS_PER_BYTE) + box_end_bit) - endbit; + proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit); + box_mask = GENMASK(box_start_bit, box_end_bit); + + /* Determine the offset of the u8 box inside the pbuf, + * adjusted for quirks. The adjusted box_addr will be used for + * effective addressing inside the pbuf (so it's not + * logical any longer). + */ + box_addr = calculate_box_addr(box, pbuflen, quirks); + + /* Write to pbuf, read from uval */ + pval = uval & proj_mask; + pval >>= proj_end_bit; + pval <<= box_end_bit; + + if (quirks & QUIRK_MSB_ON_THE_RIGHT) { + pval = bitrev8(pval); + box_mask = bitrev8(box_mask); + } + + ((u8 *)pbuf)[box_addr] &= ~box_mask; + ((u8 *)pbuf)[box_addr] |= pval; + } + return 0; +} +EXPORT_SYMBOL(pack); + +/** + * unpack - Unpack u64 number from packed buffer. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. The @uval will be + * modified on success. + */ +int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) +{ + /* Logical byte indices corresponding to the + * start and end of the field. + */ + int plogical_first_u8, plogical_last_u8, box; + /* width of the field to access in the pbuf */ + u64 value_width; + + /* startbit is expected to be larger than endbit, and both are + * expected to be within the logically addressable range of the buffer. + */ + if (unlikely(startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen)) + /* Invalid function call */ + return -EINVAL; + + value_width = startbit - endbit + 1; + if (unlikely(value_width > 64)) + return -ERANGE; + /* Initialize parameter */ - if (op == UNPACK) - *uval = 0; + *uval = 0; /* Iterate through an idealistic view of the pbuf as an u64 with * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low * logical bit significance. "box" denotes the current logical u8. */ - plogical_first_u8 = startbit / 8; - plogical_last_u8 = endbit / 8; + plogical_first_u8 = startbit / BITS_PER_BYTE; + plogical_last_u8 = endbit / BITS_PER_BYTE; for (box = plogical_first_u8; box >= plogical_last_u8; box--) { /* Bit indices into the currently accessed 8-bit box */ - int box_start_bit, box_end_bit, box_addr; + size_t box_start_bit, box_end_bit, box_addr; u8 box_mask; /* Corresponding bits from the unpacked u64 parameter */ - int proj_start_bit, proj_end_bit; + size_t proj_start_bit, proj_end_bit; u64 proj_mask; + u64 pval; /* This u8 may need to be accessed in its entirety * (from bit 7 to bit 0), or not, depending on the * input arguments startbit and endbit. */ if (box == plogical_first_u8) - box_start_bit = startbit % 8; + box_start_bit = startbit % BITS_PER_BYTE; else box_start_bit = 7; if (box == plogical_last_u8) - box_end_bit = endbit % 8; + box_end_bit = endbit % BITS_PER_BYTE; else box_end_bit = 0; @@ -145,57 +246,72 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, * box is u8, the projection is u64 because it may fall * anywhere within the unpacked u64. */ - proj_start_bit = ((box * 8) + box_start_bit) - endbit; - proj_end_bit = ((box * 8) + box_end_bit) - endbit; + proj_start_bit = ((box * BITS_PER_BYTE) + box_start_bit) - endbit; + proj_end_bit = ((box * BITS_PER_BYTE) + box_end_bit) - endbit; proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit); - box_mask = GENMASK_ULL(box_start_bit, box_end_bit); + box_mask = GENMASK(box_start_bit, box_end_bit); /* Determine the offset of the u8 box inside the pbuf, * adjusted for quirks. The adjusted box_addr will be used for * effective addressing inside the pbuf (so it's not * logical any longer). */ - box_addr = pbuflen - box - 1; - if (quirks & QUIRK_LITTLE_ENDIAN) - box_addr = get_le_offset(box_addr); - if (quirks & QUIRK_LSW32_IS_FIRST) - box_addr = get_reverse_lsw32_offset(box_addr, - pbuflen); - - if (op == UNPACK) { - u64 pval; - - /* Read from pbuf, write to uval */ - pval = ((u8 *)pbuf)[box_addr] & box_mask; - if (quirks & QUIRK_MSB_ON_THE_RIGHT) - adjust_for_msb_right_quirk(&pval, - &box_start_bit, - &box_end_bit, - &box_mask); - - pval >>= box_end_bit; - pval <<= proj_end_bit; - *uval &= ~proj_mask; - *uval |= pval; - } else { - u64 pval; - - /* Write to pbuf, read from uval */ - pval = (*uval) & proj_mask; - pval >>= proj_end_bit; - if (quirks & QUIRK_MSB_ON_THE_RIGHT) - adjust_for_msb_right_quirk(&pval, - &box_start_bit, - &box_end_bit, - &box_mask); - - pval <<= box_end_bit; - ((u8 *)pbuf)[box_addr] &= ~box_mask; - ((u8 *)pbuf)[box_addr] |= pval; - } + box_addr = calculate_box_addr(box, pbuflen, quirks); + + /* Read from pbuf, write to uval */ + pval = ((u8 *)pbuf)[box_addr]; + + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + pval = bitrev8(pval); + + pval &= box_mask; + + pval >>= box_end_bit; + pval <<= proj_end_bit; + *uval &= ~proj_mask; + *uval |= pval; } return 0; } +EXPORT_SYMBOL(unpack); + +/** + * packing - Convert numbers (currently u64) between a packed and an unpacked + * format. Unpacked means laid out in memory in the CPU's native + * understanding of integers, while packed means anything else that + * requires translation. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @op: If PACK, then uval will be treated as const pointer and copied (packed) + * into pbuf, between startbit and endbit. + * If UNPACK, then pbuf will be treated as const pointer and the logical + * value between startbit and endbit will be copied (unpacked) to uval. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Note: this is deprecated, prefer to use pack() or unpack() in new code. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. + * If op is PACK, pbuf is modified. + * If op is UNPACK, uval is modified. + */ +int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, + enum packing_op op, u8 quirks) +{ + if (op == PACK) + return pack(pbuf, *uval, startbit, endbit, pbuflen, quirks); + + return unpack(pbuf, uval, startbit, endbit, pbuflen, quirks); +} EXPORT_SYMBOL(packing); MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); diff --git a/lib/packing_test.c b/lib/packing_test.c new file mode 100644 index 000000000000..b38ea43c03fd --- /dev/null +++ b/lib/packing_test.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Vladimir Oltean <olteanv@gmail.com> + * Copyright (c) 2024, Intel Corporation. + */ +#include <kunit/test.h> +#include <linux/packing.h> + +struct packing_test_case { + const char *desc; + const u8 *pbuf; + size_t pbuf_size; + u64 uval; + size_t start_bit; + size_t end_bit; + u8 quirks; +}; + +#define NO_QUIRKS 0 + +/** + * PBUF - Initialize .pbuf and .pbuf_size + * @array: elements of constant physical buffer + * + * Initializes the .pbuf and .pbuf_size fields of a struct packing_test_case + * with a constant array of the specified elements. + */ +#define PBUF(array...) \ + .pbuf = (const u8[]){ array }, \ + .pbuf_size = sizeof((const u8 []){ array }) + +static const struct packing_test_case cases[] = { + /* These tests pack and unpack a magic 64-bit value + * (0xcafedeadbeefcafe) at a fixed logical offset (32) within an + * otherwise zero array of 128 bits (16 bytes). They test all possible + * bit layouts of the 128 bit buffer. + */ + { + .desc = "no quirks, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad, + 0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "lsw32 first, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xbe, 0xef, 0xca, 0xfe, + 0xca, 0xfe, 0xde, 0xad, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "little endian words, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xad, 0xde, 0xfe, 0xca, + 0xfe, 0xca, 0xef, 0xbe, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x53, 0x7f, 0x7b, 0xb5, + 0x7d, 0xf7, 0x53, 0x7f, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_MSB_ON_THE_RIGHT, + }, + { + .desc = "msb right + lsw32 first, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x7d, 0xf7, 0x53, 0x7f, + 0x53, 0x7f, 0x7b, 0xb5, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "msb right + little endian words, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xb5, 0x7b, 0x7f, 0x53, + 0x7f, 0x53, 0xf7, 0x7d, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right + lsw32 first + little endian words, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x7f, 0x53, 0xf7, 0x7d, + 0xb5, 0x7b, 0x7f, 0x53, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + /* These tests pack and unpack a magic 64-bit value + * (0xcafedeadbeefcafe) at a fixed logical offset (32) within an + * otherwise zero array of varying size from 18 bytes to 24 bytes. + */ + { + .desc = "no quirks, 18 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, + 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, + 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "no quirks, 19 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca, + 0xfe, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0x00, + 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "no quirks, 20 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xca, 0xfe, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, + 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "no quirks, 22 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad, 0xbe, 0xef, + 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "no quirks, 24 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad, + 0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "lsw32 first + little endian words, 18 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 19 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 20 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 22 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 24 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + /* These tests pack and unpack a magic 64-bit value + * (0x1122334455667788) at an odd starting bit (43) within an + * otherwise zero array of 128 bits (16 bytes). They test all possible + * bit layouts of the 128 bit buffer. + */ + { + .desc = "no quirks, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x89, 0x11, 0x9a, 0x22, 0xab, + 0x33, 0xbc, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = NO_QUIRKS, + }, + { + .desc = "lsw32 first, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x00, 0x33, 0xbc, 0x40, 0x00, + 0x11, 0x9a, 0x22, 0xab, 0x00, 0x00, 0x00, 0x89), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "little endian words, 16 bytes, non-aligned", + PBUF(0x89, 0x00, 0x00, 0x00, 0xab, 0x22, 0x9a, 0x11, + 0x00, 0x40, 0xbc, 0x33, 0x00, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0xbc, 0x33, + 0xab, 0x22, 0x9a, 0x11, 0x89, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x91, 0x88, 0x59, 0x44, 0xd5, + 0xcc, 0x3d, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT, + }, + { + .desc = "msb right + lsw32 first, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x00, 0xcc, 0x3d, 0x02, 0x00, + 0x88, 0x59, 0x44, 0xd5, 0x00, 0x00, 0x00, 0x91), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "msb right + little endian words, 16 bytes, non-aligned", + PBUF(0x91, 0x00, 0x00, 0x00, 0xd5, 0x44, 0x59, 0x88, + 0x00, 0x02, 0x3d, 0xcc, 0x00, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right + lsw32 first + little endian words, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x3d, 0xcc, + 0xd5, 0x44, 0x59, 0x88, 0x91, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + /* These tests pack and unpack a u64 with all bits set + * (0xffffffffffffffff) at an odd starting bit (43) within an + * otherwise zero array of 128 bits (16 bytes). They test all possible + * bit layouts of the 128 bit buffer. + */ + { + .desc = "no quirks, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = NO_QUIRKS, + }, + { + .desc = "lsw32 first, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xf8, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x07, 0xff), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "little endian words, 16 bytes, non-aligned, 0xff", + PBUF(0xff, 0x07, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0xf8, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT, + }, + { + .desc = "msb right + lsw32 first, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x1f, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xe0, 0xff), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "msb right + little endian words, 16 bytes, non-aligned, 0xff", + PBUF(0xff, 0xe0, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x1f, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right + lsw32 first + little endian words, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(packing, cases, desc); + +static void packing_test_pack(struct kunit *test) +{ + const struct packing_test_case *params = test->param_value; + u8 *pbuf; + int err; + + pbuf = kunit_kzalloc(test, params->pbuf_size, GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, pbuf); + + err = pack(pbuf, params->uval, params->start_bit, params->end_bit, + params->pbuf_size, params->quirks); + + KUNIT_EXPECT_EQ_MSG(test, err, 0, "pack() returned %pe\n", ERR_PTR(err)); + KUNIT_EXPECT_MEMEQ(test, pbuf, params->pbuf, params->pbuf_size); +} + +static void packing_test_unpack(struct kunit *test) +{ + const struct packing_test_case *params = test->param_value; + u64 uval; + int err; + + err = unpack(params->pbuf, &uval, params->start_bit, params->end_bit, + params->pbuf_size, params->quirks); + KUNIT_EXPECT_EQ_MSG(test, err, 0, "unpack() returned %pe\n", ERR_PTR(err)); + KUNIT_EXPECT_EQ(test, uval, params->uval); +} + +static struct kunit_case packing_test_cases[] = { + KUNIT_CASE_PARAM(packing_test_pack, packing_gen_params), + KUNIT_CASE_PARAM(packing_test_unpack, packing_gen_params), + {}, +}; + +static struct kunit_suite packing_test_suite = { + .name = "packing", + .test_cases = packing_test_cases, +}; + +kunit_test_suite(packing_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for packing library"); diff --git a/lib/percpu_test.c b/lib/percpu_test.c index 4a3d70bbc1a0..ce7124b16dab 100644 --- a/lib/percpu_test.c +++ b/lib/percpu_test.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/limits.h> #include <linux/module.h> /* validate @native and @pcp counter values match @expected */ @@ -24,8 +25,9 @@ static int __init percpu_test_init(void) * +ul_one/-ul_one below would replace with inc/dec instructions. */ volatile unsigned int ui_one = 1; - long l = 0; + unsigned long long ull = 0; unsigned long ul = 0; + long l = 0; pr_info("percpu test start\n"); @@ -112,6 +114,13 @@ static int __init percpu_test_init(void) CHECK(ul, ulong_counter, -1); CHECK(ul, ulong_counter, ULONG_MAX); + ul = ull = 0; + __this_cpu_write(ulong_counter, 0); + + ul = ull += UINT_MAX; + __this_cpu_add(ulong_counter, ull); + CHECK(ul, ulong_counter, UINT_MAX); + ul = 3; __this_cpu_write(ulong_counter, 3); diff --git a/lib/random32.c b/lib/random32.c index 0a5a0e3600c8..24e7acd9343f 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -36,7 +36,7 @@ #include <linux/percpu.h> #include <linux/export.h> #include <linux/jiffies.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/sched.h> #include <linux/bitops.h> #include <linux/slab.h> diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 41ae3c7570d3..8655a76d29a1 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -2,7 +2,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/rbtree_augmented.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/slab.h> #include <asm/timex.h> diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 473b2646f71c..5bb6b8aff232 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -474,14 +474,14 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, return -EOPNOTSUPP; if (sgt_append->prv) { - unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) + - sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE; + unsigned long next_pfn; if (WARN_ON(offset)) return -EINVAL; /* Merge contiguous pages into the last SG */ prv_len = sgt_append->prv->length; + next_pfn = (sg_phys(sgt_append->prv) + prv_len) / PAGE_SIZE; if (page_to_pfn(pages[0]) == next_pfn) { last_pg = pfn_to_page(next_pfn - 1); while (n_pages && pages_are_mergeable(pages[0], last_pg)) { diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c index 33564f965958..f11691315c2f 100644 --- a/lib/slub_kunit.c +++ b/lib/slub_kunit.c @@ -192,6 +192,47 @@ static void test_leak_destroy(struct kunit *test) KUNIT_EXPECT_EQ(test, 2, slab_errors); } +static void test_krealloc_redzone_zeroing(struct kunit *test) +{ + u8 *p; + int i; + struct kmem_cache *s = test_kmem_cache_create("TestSlub_krealloc", 64, + SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE); + + p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 48)); + memset(p, 0xff, 48); + + kasan_disable_current(); + OPTIMIZER_HIDE_VAR(p); + + /* Test shrink */ + p = krealloc(p, 40, GFP_KERNEL | __GFP_ZERO); + for (i = 40; i < 64; i++) + KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE); + + /* Test grow within the same 64B kmalloc object */ + p = krealloc(p, 56, GFP_KERNEL | __GFP_ZERO); + for (i = 40; i < 56; i++) + KUNIT_EXPECT_EQ(test, p[i], 0); + for (i = 56; i < 64; i++) + KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE); + + validate_slab_cache(s); + KUNIT_EXPECT_EQ(test, 0, slab_errors); + + memset(p, 0xff, 56); + /* Test grow with allocating a bigger 128B object */ + p = krealloc(p, 112, GFP_KERNEL | __GFP_ZERO); + for (i = 0; i < 56; i++) + KUNIT_EXPECT_EQ(test, p[i], 0xff); + for (i = 56; i < 112; i++) + KUNIT_EXPECT_EQ(test, p[i], 0); + + kfree(p); + kasan_enable_current(); + kmem_cache_destroy(s); +} + static int test_init(struct kunit *test) { slab_errors = 0; @@ -214,6 +255,7 @@ static struct kunit_case test_cases[] = { KUNIT_CASE(test_kmalloc_redzone_access), KUNIT_CASE(test_kfree_rcu), KUNIT_CASE(test_leak_destroy), + KUNIT_CASE(test_krealloc_redzone_zeroing), {} }; diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 4f887aa62fa0..91fa37b5c510 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -57,7 +57,7 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, static const unsigned int rounding[] = { 500, 50, 5 }; int i = 0, j; u32 remainder = 0, sf_cap; - char tmp[8]; + char tmp[12]; const char *unit; tmp[0] = '\0'; diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 989a12a67872..6dc234913dd5 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -120,6 +120,9 @@ long strncpy_from_user(char *dst, const char __user *src, long count) if (unlikely(count <= 0)) return 0; + kasan_check_write(dst, count); + check_object_size(dst, count, false); + if (can_do_masked_user_access()) { long retval; @@ -142,8 +145,6 @@ long strncpy_from_user(char *dst, const char __user *src, long count) if (max > count) max = count; - kasan_check_write(dst, count); - check_object_size(dst, count, false); if (user_read_access_begin(src, max)) { retval = do_strncpy_from_user(dst, src, count, max); user_read_access_end(); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index fa5edd6ef7f7..2eed1ad958e9 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -14,7 +14,7 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/highmem.h> #include <linux/sched.h> diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 31561e0e1a0d..704cb1093ae8 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -1387,6 +1387,92 @@ static noinline void __init check_prev_entry(struct maple_tree *mt) mas_unlock(&mas); } +static noinline void __init check_store_null(struct maple_tree *mt) +{ + MA_STATE(mas, mt, 0, ULONG_MAX); + + /* + * Store NULL at range [0, ULONG_MAX] to an empty tree should result + * in an empty tree + */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mas_lock(&mas); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + MT_BUG_ON(mt, !mtree_empty(mt)); + mas_unlock(&mas); + mtree_destroy(mt); + + /* + * Store NULL at any range to an empty tree should result in an empty + * tree + */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mas_lock(&mas); + mas_set_range(&mas, 3, 10); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + MT_BUG_ON(mt, !mtree_empty(mt)); + mas_unlock(&mas); + mtree_destroy(mt); + + /* + * Store NULL at range [0, ULONG_MAX] to a single entry tree should + * result in an empty tree + */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mas_lock(&mas); + mas_set(&mas, 0); + mas_store_gfp(&mas, &mas, GFP_KERNEL); + mas_set_range(&mas, 0, ULONG_MAX); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + MT_BUG_ON(mt, !mtree_empty(mt)); + mas_unlock(&mas); + mtree_destroy(mt); + + /* + * Store NULL at range [0, n] to a single entry tree should + * result in an empty tree + */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mas_lock(&mas); + mas_set(&mas, 0); + mas_store_gfp(&mas, &mas, GFP_KERNEL); + mas_set_range(&mas, 0, 5); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + MT_BUG_ON(mt, !mtree_empty(mt)); + mas_unlock(&mas); + mtree_destroy(mt); + + /* + * Store NULL at range [m, n] where m > 0 to a single entry tree + * should still be a single entry tree + */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mas_lock(&mas); + mas_set(&mas, 0); + mas_store_gfp(&mas, &mas, GFP_KERNEL); + mas_set_range(&mas, 2, 5); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + MT_BUG_ON(mt, mtree_empty(mt)); +// MT_BUG_ON(mt, xa_is_node(mas_root(&mas))); + mas_unlock(&mas); + mtree_destroy(mt); + + /* + * Store NULL at range [0, ULONG_MAX] to a tree with node should + * result in an empty tree + */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mas_lock(&mas); + mas_set_range(&mas, 1, 3); + mas_store_gfp(&mas, &mas, GFP_KERNEL); +// MT_BUG_ON(mt, !xa_is_node(mas_root(&mas))); + mas_set_range(&mas, 0, ULONG_MAX); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + MT_BUG_ON(mt, !mtree_empty(mt)); + mas_unlock(&mas); + mtree_destroy(mt); +} + static noinline void __init check_root_expand(struct maple_tree *mt) { MA_STATE(mas, mt, 0, 0); @@ -3711,6 +3797,10 @@ static int __init maple_tree_seed(void) #endif mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_store_null(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_root_expand(&tree); mtree_destroy(&tree); diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c index 64c877e73b64..e6fbb798558b 100644 --- a/lib/test_min_heap.c +++ b/lib/test_min_heap.c @@ -23,14 +23,6 @@ static __init bool greater_than(const void *lhs, const void *rhs, void __always_ return *(int *)lhs > *(int *)rhs; } -static __init void swap_ints(void *lhs, void *rhs, void __always_unused *args) -{ - int temp = *(int *)lhs; - - *(int *)lhs = *(int *)rhs; - *(int *)rhs = temp; -} - static __init int pop_verify_heap(bool min_heap, struct min_heap_test *heap, const struct min_heap_callbacks *funcs) @@ -72,7 +64,7 @@ static __init int test_heapify_all(bool min_heap) }; struct min_heap_callbacks funcs = { .less = min_heap ? less_than : greater_than, - .swp = swap_ints, + .swp = NULL, }; int i, err; @@ -104,7 +96,7 @@ static __init int test_heap_push(bool min_heap) }; struct min_heap_callbacks funcs = { .less = min_heap ? less_than : greater_than, - .swp = swap_ints, + .swp = NULL, }; int i, temp, err; @@ -136,7 +128,7 @@ static __init int test_heap_pop_push(bool min_heap) }; struct min_heap_callbacks funcs = { .less = min_heap ? less_than : greater_than, - .swp = swap_ints, + .swp = NULL, }; int i, temp, err; @@ -175,7 +167,7 @@ static __init int test_heap_del(bool min_heap) heap.nr = ARRAY_SIZE(values); struct min_heap_callbacks funcs = { .less = min_heap ? less_than : greater_than, - .swp = swap_ints, + .swp = NULL, }; int i, err; diff --git a/lib/test_parman.c b/lib/test_parman.c index 35e32243693c..f9b97426a337 100644 --- a/lib/test_parman.c +++ b/lib/test_parman.c @@ -39,7 +39,7 @@ #include <linux/slab.h> #include <linux/bitops.h> #include <linux/err.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/parman.h> #define TEST_PARMAN_PRIO_SHIFT 7 /* defines number of prios for testing */ diff --git a/lib/test_printf.c b/lib/test_printf.c index 8448b6d02bd9..59dbe4f9a4cb 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -386,6 +386,66 @@ kernel_ptr(void) static void __init struct_resource(void) { + struct resource test_resource = { + .start = 0xc0ffee00, + .end = 0xc0ffee00, + .flags = IORESOURCE_MEM, + }; + + test("[mem 0xc0ffee00 flags 0x200]", + "%pr", &test_resource); + + test_resource = (struct resource) { + .start = 0xc0ffee, + .end = 0xba5eba11, + .flags = IORESOURCE_MEM, + }; + test("[mem 0x00c0ffee-0xba5eba11 flags 0x200]", + "%pr", &test_resource); + + test_resource = (struct resource) { + .start = 0xba5eba11, + .end = 0xc0ffee, + .flags = IORESOURCE_MEM, + }; + test("[mem 0xba5eba11-0x00c0ffee flags 0x200]", + "%pr", &test_resource); + + test_resource = (struct resource) { + .start = 0xba5eba11, + .end = 0xba5eca11, + .flags = IORESOURCE_MEM, + }; + + test("[mem 0xba5eba11-0xba5eca11 flags 0x200]", + "%pr", &test_resource); + + test_resource = (struct resource) { + .start = 0xba11, + .end = 0xca10, + .flags = IORESOURCE_IO | + IORESOURCE_DISABLED | + IORESOURCE_UNSET, + }; + + test("[io size 0x1000 disabled]", + "%pR", &test_resource); +} + +static void __init +struct_range(void) +{ + struct range test_range = DEFINE_RANGE(0xc0ffee00ba5eba11, + 0xc0ffee00ba5eba11); + test("[range 0xc0ffee00ba5eba11]", "%pra", &test_range); + + test_range = DEFINE_RANGE(0xc0ffee, 0xba5eba11); + test("[range 0x0000000000c0ffee-0x00000000ba5eba11]", + "%pra", &test_range); + + test_range = DEFINE_RANGE(0xba5eba11, 0xc0ffee); + test("[range 0x00000000ba5eba11-0x0000000000c0ffee]", + "%pra", &test_range); } static void __init @@ -763,6 +823,7 @@ test_pointer(void) symbol_ptr(); kernel_ptr(); struct_resource(); + struct_range(); addr(); escaped_str(); hex_string(); diff --git a/lib/test_scanf.c b/lib/test_scanf.c index 7257b1768545..44f8508c9d88 100644 --- a/lib/test_scanf.c +++ b/lib/test_scanf.c @@ -11,7 +11,7 @@ #include <linux/module.h> #include <linux/overflow.h> #include <linux/printk.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/slab.h> #include <linux/string.h> diff --git a/lib/tests/Makefile b/lib/tests/Makefile new file mode 100644 index 000000000000..8e4f42cb9c54 --- /dev/null +++ b/lib/tests/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_TEST_RUNTIME_MODULE) += module/ diff --git a/lib/tests/module/.gitignore b/lib/tests/module/.gitignore new file mode 100644 index 000000000000..8be7891b250f --- /dev/null +++ b/lib/tests/module/.gitignore @@ -0,0 +1,4 @@ +test_kallsyms_a.c +test_kallsyms_b.c +test_kallsyms_c.c +test_kallsyms_d.c diff --git a/lib/tests/module/Makefile b/lib/tests/module/Makefile new file mode 100644 index 000000000000..2f3e1a772c2c --- /dev/null +++ b/lib/tests/module/Makefile @@ -0,0 +1,14 @@ +obj-$(CONFIG_TEST_KALLSYMS_A) += test_kallsyms_a.o +obj-$(CONFIG_TEST_KALLSYMS_B) += test_kallsyms_b.o +obj-$(CONFIG_TEST_KALLSYMS_C) += test_kallsyms_c.o +obj-$(CONFIG_TEST_KALLSYMS_D) += test_kallsyms_d.o + +quiet_cmd_gen_test_kallsyms = GEN $@ + cmd_gen_test_kallsyms = $< $@ \ + $(CONFIG_TEST_KALLSYMS_NUMSYMS) \ + $(CONFIG_TEST_KALLSYMS_SCALE_FACTOR) + +$(obj)/%.c: $(src)/gen_test_kallsyms.sh FORCE + $(call if_changed,gen_test_kallsyms) + +targets += $(foreach x, a b c d, test_kallsyms_$(x).c) diff --git a/lib/tests/module/gen_test_kallsyms.sh b/lib/tests/module/gen_test_kallsyms.sh new file mode 100755 index 000000000000..561dcac0f359 --- /dev/null +++ b/lib/tests/module/gen_test_kallsyms.sh @@ -0,0 +1,134 @@ +#!/bin/bash + +TARGET=$(basename $1) +DIR=lib/tests/module +TARGET="$DIR/$TARGET" +NUM_SYMS=$2 +SCALE_FACTOR=$3 +TEST_TYPE=$(echo $TARGET | sed -e 's|lib/tests/module/test_kallsyms_||g') +TEST_TYPE=$(echo $TEST_TYPE | sed -e 's|.c||g') +FIRST_B_LOOKUP=1 + +if [[ $NUM_SYMS -gt 2 ]]; then + FIRST_B_LOOKUP=$((NUM_SYMS/2)) +fi + +gen_template_module_header() +{ + cat <<____END_MODULE +// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 +/* + * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org> + * + * Automatically generated code for testing, do not edit manually. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/printk.h> + +____END_MODULE +} + +gen_num_syms() +{ + PREFIX=$1 + NUM=$2 + for i in $(seq 1 $NUM); do + printf "int auto_test_%s_%010d = 0;\n" $PREFIX $i + printf "EXPORT_SYMBOL_GPL(auto_test_%s_%010d);\n" $PREFIX $i + done + echo +} + +gen_template_module_data_a() +{ + gen_num_syms a $1 + cat <<____END_MODULE +static int auto_runtime_test(void) +{ + return 0; +} + +____END_MODULE +} + +gen_template_module_data_b() +{ + printf "\nextern int auto_test_a_%010d;\n\n" $FIRST_B_LOOKUP + echo "static int auto_runtime_test(void)" + echo "{" + printf "\nreturn auto_test_a_%010d;\n" $FIRST_B_LOOKUP + echo "}" +} + +gen_template_module_data_c() +{ + gen_num_syms c $1 + cat <<____END_MODULE +static int auto_runtime_test(void) +{ + return 0; +} + +____END_MODULE +} + +gen_template_module_data_d() +{ + gen_num_syms d $1 + cat <<____END_MODULE +static int auto_runtime_test(void) +{ + return 0; +} + +____END_MODULE +} + +gen_template_module_exit() +{ + cat <<____END_MODULE +static int __init auto_test_module_init(void) +{ + return auto_runtime_test(); +} +module_init(auto_test_module_init); + +static void __exit auto_test_module_exit(void) +{ +} +module_exit(auto_test_module_exit); + +MODULE_AUTHOR("Luis Chamberlain <mcgrof@kernel.org>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Test module for kallsyms"); +____END_MODULE +} + +case $TEST_TYPE in + a) + gen_template_module_header > $TARGET + gen_template_module_data_a $NUM_SYMS >> $TARGET + gen_template_module_exit >> $TARGET + ;; + b) + gen_template_module_header > $TARGET + gen_template_module_data_b >> $TARGET + gen_template_module_exit >> $TARGET + ;; + c) + gen_template_module_header > $TARGET + gen_template_module_data_c $((NUM_SYMS * SCALE_FACTOR)) >> $TARGET + gen_template_module_exit >> $TARGET + ;; + d) + gen_template_module_header > $TARGET + gen_template_module_data_d $((NUM_SYMS * SCALE_FACTOR * 2)) >> $TARGET + gen_template_module_exit >> $TARGET + ;; + *) + ;; +esac diff --git a/lib/util_macros_kunit.c b/lib/util_macros_kunit.c new file mode 100644 index 000000000000..94cc9f0de50a --- /dev/null +++ b/lib/util_macros_kunit.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Test cases for bitfield helpers. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <kunit/test.h> +#include <linux/util_macros.h> + +#define FIND_CLOSEST_RANGE_CHECK(from, to, array, exp_idx) \ +{ \ + int i; \ + for (i = from; i <= to; i++) { \ + int found = find_closest(i, array, ARRAY_SIZE(array)); \ + KUNIT_ASSERT_EQ(ctx, exp_idx, found); \ + } \ +} + +static void test_find_closest(struct kunit *ctx) +{ + /* This will test a few arrays that are found in drivers */ + static const int ina226_avg_tab[] = { 1, 4, 16, 64, 128, 256, 512, 1024 }; + static const unsigned int ad7616_oversampling_avail[] = { + 1, 2, 4, 8, 16, 32, 64, 128, + }; + static u32 wd_timeout_table[] = { 2, 4, 6, 8, 16, 32, 48, 64 }; + static int array_prog1a[] = { 1, 2, 3, 4, 5 }; + static u32 array_prog1b[] = { 2, 3, 4, 5, 6 }; + static int array_prog1mix[] = { -2, -1, 0, 1, 2 }; + static int array_prog2a[] = { 1, 3, 5, 7 }; + static u32 array_prog2b[] = { 2, 4, 6, 8 }; + static int array_prog3a[] = { 1, 4, 7, 10 }; + static u32 array_prog3b[] = { 2, 5, 8, 11 }; + static int array_prog4a[] = { 1, 5, 9, 13 }; + static u32 array_prog4b[] = { 2, 6, 10, 14 }; + + FIND_CLOSEST_RANGE_CHECK(-3, 2, ina226_avg_tab, 0); + FIND_CLOSEST_RANGE_CHECK(3, 10, ina226_avg_tab, 1); + FIND_CLOSEST_RANGE_CHECK(11, 40, ina226_avg_tab, 2); + FIND_CLOSEST_RANGE_CHECK(41, 96, ina226_avg_tab, 3); + FIND_CLOSEST_RANGE_CHECK(97, 192, ina226_avg_tab, 4); + FIND_CLOSEST_RANGE_CHECK(193, 384, ina226_avg_tab, 5); + FIND_CLOSEST_RANGE_CHECK(385, 768, ina226_avg_tab, 6); + FIND_CLOSEST_RANGE_CHECK(769, 2048, ina226_avg_tab, 7); + + /* The array that found the bug that caused this kunit to exist */ + FIND_CLOSEST_RANGE_CHECK(-3, 1, ad7616_oversampling_avail, 0); + FIND_CLOSEST_RANGE_CHECK(2, 3, ad7616_oversampling_avail, 1); + FIND_CLOSEST_RANGE_CHECK(4, 6, ad7616_oversampling_avail, 2); + FIND_CLOSEST_RANGE_CHECK(7, 12, ad7616_oversampling_avail, 3); + FIND_CLOSEST_RANGE_CHECK(13, 24, ad7616_oversampling_avail, 4); + FIND_CLOSEST_RANGE_CHECK(25, 48, ad7616_oversampling_avail, 5); + FIND_CLOSEST_RANGE_CHECK(49, 96, ad7616_oversampling_avail, 6); + FIND_CLOSEST_RANGE_CHECK(97, 256, ad7616_oversampling_avail, 7); + + FIND_CLOSEST_RANGE_CHECK(-3, 3, wd_timeout_table, 0); + FIND_CLOSEST_RANGE_CHECK(4, 5, wd_timeout_table, 1); + FIND_CLOSEST_RANGE_CHECK(6, 7, wd_timeout_table, 2); + FIND_CLOSEST_RANGE_CHECK(8, 12, wd_timeout_table, 3); + FIND_CLOSEST_RANGE_CHECK(13, 24, wd_timeout_table, 4); + FIND_CLOSEST_RANGE_CHECK(25, 40, wd_timeout_table, 5); + FIND_CLOSEST_RANGE_CHECK(41, 56, wd_timeout_table, 6); + FIND_CLOSEST_RANGE_CHECK(57, 128, wd_timeout_table, 7); + + /* One could argue that find_closest() should not be used for monotonic + * arrays (like 1,2,3,4,5), but even so, it should work as long as the + * array is sorted ascending. */ + FIND_CLOSEST_RANGE_CHECK(-3, 1, array_prog1a, 0); + FIND_CLOSEST_RANGE_CHECK(2, 2, array_prog1a, 1); + FIND_CLOSEST_RANGE_CHECK(3, 3, array_prog1a, 2); + FIND_CLOSEST_RANGE_CHECK(4, 4, array_prog1a, 3); + FIND_CLOSEST_RANGE_CHECK(5, 8, array_prog1a, 4); + + FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog1b, 0); + FIND_CLOSEST_RANGE_CHECK(3, 3, array_prog1b, 1); + FIND_CLOSEST_RANGE_CHECK(4, 4, array_prog1b, 2); + FIND_CLOSEST_RANGE_CHECK(5, 5, array_prog1b, 3); + FIND_CLOSEST_RANGE_CHECK(6, 8, array_prog1b, 4); + + FIND_CLOSEST_RANGE_CHECK(-4, -2, array_prog1mix, 0); + FIND_CLOSEST_RANGE_CHECK(-1, -1, array_prog1mix, 1); + FIND_CLOSEST_RANGE_CHECK(0, 0, array_prog1mix, 2); + FIND_CLOSEST_RANGE_CHECK(1, 1, array_prog1mix, 3); + FIND_CLOSEST_RANGE_CHECK(2, 5, array_prog1mix, 4); + + FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog2a, 0); + FIND_CLOSEST_RANGE_CHECK(3, 4, array_prog2a, 1); + FIND_CLOSEST_RANGE_CHECK(5, 6, array_prog2a, 2); + FIND_CLOSEST_RANGE_CHECK(7, 10, array_prog2a, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog2b, 0); + FIND_CLOSEST_RANGE_CHECK(4, 5, array_prog2b, 1); + FIND_CLOSEST_RANGE_CHECK(6, 7, array_prog2b, 2); + FIND_CLOSEST_RANGE_CHECK(8, 10, array_prog2b, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog3a, 0); + FIND_CLOSEST_RANGE_CHECK(3, 5, array_prog3a, 1); + FIND_CLOSEST_RANGE_CHECK(6, 8, array_prog3a, 2); + FIND_CLOSEST_RANGE_CHECK(9, 20, array_prog3a, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog3b, 0); + FIND_CLOSEST_RANGE_CHECK(4, 6, array_prog3b, 1); + FIND_CLOSEST_RANGE_CHECK(7, 9, array_prog3b, 2); + FIND_CLOSEST_RANGE_CHECK(10, 20, array_prog3b, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog4a, 0); + FIND_CLOSEST_RANGE_CHECK(4, 7, array_prog4a, 1); + FIND_CLOSEST_RANGE_CHECK(8, 11, array_prog4a, 2); + FIND_CLOSEST_RANGE_CHECK(12, 20, array_prog4a, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 4, array_prog4b, 0); + FIND_CLOSEST_RANGE_CHECK(5, 8, array_prog4b, 1); + FIND_CLOSEST_RANGE_CHECK(9, 12, array_prog4b, 2); + FIND_CLOSEST_RANGE_CHECK(13, 20, array_prog4b, 3); +} + +#define FIND_CLOSEST_DESC_RANGE_CHECK(from, to, array, exp_idx) \ +{ \ + int i; \ + for (i = from; i <= to; i++) { \ + int found = find_closest_descending(i, array, \ + ARRAY_SIZE(array)); \ + KUNIT_ASSERT_EQ(ctx, exp_idx, found); \ + } \ +} + +static void test_find_closest_descending(struct kunit *ctx) +{ + /* Same arrays as 'test_find_closest' but reversed */ + static const int ina226_avg_tab[] = { 1024, 512, 256, 128, 64, 16, 4, 1 }; + static const unsigned int ad7616_oversampling_avail[] = { + 128, 64, 32, 16, 8, 4, 2, 1 + }; + static u32 wd_timeout_table[] = { 64, 48, 32, 16, 8, 6, 4, 2 }; + static int array_prog1a[] = { 5, 4, 3, 2, 1 }; + static u32 array_prog1b[] = { 6, 5, 4, 3, 2 }; + static int array_prog1mix[] = { 2, 1, 0, -1, -2 }; + static int array_prog2a[] = { 7, 5, 3, 1 }; + static u32 array_prog2b[] = { 8, 6, 4, 2 }; + static int array_prog3a[] = { 10, 7, 4, 1 }; + static u32 array_prog3b[] = { 11, 8, 5, 2 }; + static int array_prog4a[] = { 13, 9, 5, 1 }; + static u32 array_prog4b[] = { 14, 10, 6, 2 }; + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, ina226_avg_tab, 7); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 10, ina226_avg_tab, 6); + FIND_CLOSEST_DESC_RANGE_CHECK(11, 40, ina226_avg_tab, 5); + FIND_CLOSEST_DESC_RANGE_CHECK(41, 96, ina226_avg_tab, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(97, 192, ina226_avg_tab, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(193, 384, ina226_avg_tab, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(385, 768, ina226_avg_tab, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(769, 2048, ina226_avg_tab, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 1, ad7616_oversampling_avail, 7); + FIND_CLOSEST_DESC_RANGE_CHECK(2, 3, ad7616_oversampling_avail, 6); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 6, ad7616_oversampling_avail, 5); + FIND_CLOSEST_DESC_RANGE_CHECK(7, 12, ad7616_oversampling_avail, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(13, 24, ad7616_oversampling_avail, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(25, 48, ad7616_oversampling_avail, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(49, 96, ad7616_oversampling_avail, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(97, 256, ad7616_oversampling_avail, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, wd_timeout_table, 7); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 5, wd_timeout_table, 6); + FIND_CLOSEST_DESC_RANGE_CHECK(6, 7, wd_timeout_table, 5); + FIND_CLOSEST_DESC_RANGE_CHECK(8, 12, wd_timeout_table, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(13, 24, wd_timeout_table, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(25, 40, wd_timeout_table, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(41, 56, wd_timeout_table, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(57, 128, wd_timeout_table, 0); + + /* One could argue that find_closest_descending() should not be used + * for monotonic arrays (like 5,4,3,2,1), but even so, it should still + * it should work as long as the array is sorted descending. */ + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 1, array_prog1a, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(2, 2, array_prog1a, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 3, array_prog1a, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 4, array_prog1a, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(5, 8, array_prog1a, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog1b, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 3, array_prog1b, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 4, array_prog1b, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(5, 5, array_prog1b, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(6, 8, array_prog1b, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-4, -2, array_prog1mix, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(-1, -1, array_prog1mix, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(0, 0, array_prog1mix, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(1, 1, array_prog1mix, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(2, 5, array_prog1mix, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog2a, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 4, array_prog2a, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(5, 6, array_prog2a, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(7, 10, array_prog2a, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog2b, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 5, array_prog2b, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(6, 7, array_prog2b, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(8, 10, array_prog2b, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog3a, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 5, array_prog3a, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(6, 8, array_prog3a, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(9, 20, array_prog3a, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog3b, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 6, array_prog3b, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(7, 9, array_prog3b, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(10, 20, array_prog3b, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog4a, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 7, array_prog4a, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(8, 11, array_prog4a, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(12, 20, array_prog4a, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 4, array_prog4b, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(5, 8, array_prog4b, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(9, 12, array_prog4b, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(13, 20, array_prog4b, 0); +} + +static struct kunit_case __refdata util_macros_test_cases[] = { + KUNIT_CASE(test_find_closest), + KUNIT_CASE(test_find_closest_descending), + {} +}; + +static struct kunit_suite util_macros_test_suite = { + .name = "util_macros.h", + .test_cases = util_macros_test_cases, +}; + +kunit_test_suites(&util_macros_test_suite); + +MODULE_AUTHOR("Alexandru Ardelean <aardelean@baylibre.com>"); +MODULE_DESCRIPTION("Test cases for util_macros.h helpers"); +MODULE_LICENSE("GPL"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c5e2ec9303c5..6ac02bbb7df1 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1040,6 +1040,20 @@ static const struct printf_spec default_dec04_spec = { }; static noinline_for_stack +char *hex_range(char *buf, char *end, u64 start_val, u64 end_val, + struct printf_spec spec) +{ + buf = number(buf, end, start_val, spec); + if (start_val == end_val) + return buf; + + if (buf < end) + *buf = '-'; + ++buf; + return number(buf, end, end_val, spec); +} + +static noinline_for_stack char *resource_string(char *buf, char *end, struct resource *res, struct printf_spec spec, const char *fmt) { @@ -1115,11 +1129,7 @@ char *resource_string(char *buf, char *end, struct resource *res, p = string_nocheck(p, pend, "size ", str_spec); p = number(p, pend, resource_size(res), *specp); } else { - p = number(p, pend, res->start, *specp); - if (res->start != res->end) { - *p++ = '-'; - p = number(p, pend, res->end, *specp); - } + p = hex_range(p, pend, res->start, res->end, *specp); } if (decode) { if (res->flags & IORESOURCE_MEM_64) @@ -1141,6 +1151,31 @@ char *resource_string(char *buf, char *end, struct resource *res, } static noinline_for_stack +char *range_string(char *buf, char *end, const struct range *range, + struct printf_spec spec, const char *fmt) +{ + char sym[sizeof("[range 0x0123456789abcdef-0x0123456789abcdef]")]; + char *p = sym, *pend = sym + sizeof(sym); + + struct printf_spec range_spec = { + .field_width = 2 + 2 * sizeof(range->start), /* 0x + 2 * 8 */ + .flags = SPECIAL | SMALL | ZEROPAD, + .base = 16, + .precision = -1, + }; + + if (check_pointer(&buf, end, range, spec)) + return buf; + + p = string_nocheck(p, pend, "[range ", default_str_spec); + p = hex_range(p, pend, range->start, range->end, range_spec); + *p++ = ']'; + *p = '\0'; + + return string_nocheck(buf, end, sym, spec); +} + +static noinline_for_stack char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec, const char *fmt) { @@ -2229,6 +2264,15 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } +static noinline_for_stack +char *resource_or_range(const char *fmt, char *buf, char *end, void *ptr, + struct printf_spec spec) +{ + if (*fmt == 'r' && fmt[1] == 'a') + return range_string(buf, end, ptr, spec, fmt); + return resource_string(buf, end, ptr, spec, fmt); +} + int __init no_hash_pointers_enable(char *str) { if (no_hash_pointers) @@ -2277,6 +2321,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr); * - 'Bb' as above with module build ID (for use in backtraces) * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] + * - 'ra' For struct ranges, e.g., [range 0x0000000000000000 - 0x00000000000000ff] * - 'b[l]' For a bitmap, the number of bits is determined by the field * width which must be explicitly specified either as part of the * format string '%32b[l]' or through '%*b[l]', [l] selects @@ -2401,7 +2446,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return symbol_string(buf, end, ptr, spec, fmt); case 'R': case 'r': - return resource_string(buf, end, ptr, spec, fmt); + return resource_or_range(fmt, buf, end, ptr, spec); case 'h': return hex_string(buf, end, ptr, spec, fmt); case 'b': |