52 files changed, 3280 insertions, 770 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index b38849af6f13..5a318f753b2f 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -40,6 +40,18 @@ config PACKING
 
 	  When in doubt, say N.
 
+config PACKING_KUNIT_TEST
+	tristate "KUnit tests for packing library" if !KUNIT_ALL_TESTS
+	depends on PACKING && KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This builds KUnit tests for the packing library.
+
+	  For more information on KUnit and unit tests in general,
+	  please refer to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  When in doubt, say N.
+
 config BITREVERSE
 	tristate
 
@@ -777,3 +789,9 @@ config POLYNOMIAL
 
 config FIRMWARE_TABLE
 	bool
+
+config UNION_FIND
+	bool
+
+config MIN_HEAP
+	bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7312ae7c3cc5..f3d723705879 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -993,6 +993,7 @@ config CODE_TAGGING
 config MEM_ALLOC_PROFILING
 	bool "Enable memory allocation profiling"
 	default n
+	depends on MMU
 	depends on PROC_FS
 	depends on !DEBUG_FORCE_WEAK_PER_CPU
 	select CODE_TAGGING
@@ -1328,19 +1329,6 @@ config SCHEDSTATS
 
 endmenu
 
-config DEBUG_TIMEKEEPING
-	bool "Enable extra timekeeping sanity checking"
-	help
-	  This option will enable additional timekeeping sanity checks
-	  which may be helpful when diagnosing issues where timekeeping
-	  problems are suspected.
-
-	  This may include checks in the timekeeping hotpaths, so this
-	  option may have a (very small) performance impact to some
-	  workloads.
-
-	  If unsure, say N.
-
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
 	depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
@@ -1409,22 +1397,14 @@ config PROVE_LOCKING
 	 For more details, see Documentation/locking/lockdep-design.rst.
 
 config PROVE_RAW_LOCK_NESTING
-	bool "Enable raw_spinlock - spinlock nesting checks"
+	bool
 	depends on PROVE_LOCKING
-	default n
+	default y
 	help
 	 Enable the raw_spinlock vs. spinlock nesting checks which ensure
 	 that the lock nesting rules for PREEMPT_RT enabled kernels are
 	 not violated.
 
-	 NOTE: There are known nesting problems. So if you enable this
-	 option expect lockdep splats until these problems have been fully
-	 addressed which is work in progress. This config switch allows to
-	 identify and analyze these problems. It will be removed and the
-	 check permanently enabled once the main issues have been fixed.
-
-	 If unsure, select N.
-
 config LOCK_STAT
 	bool "Lock usage statistics"
 	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
@@ -1905,7 +1885,7 @@ config STRICT_DEVMEM
 	bool "Filter access to /dev/mem"
 	depends on MMU && DEVMEM
 	depends on ARCH_HAS_DEVMEM_IS_ALLOWED || GENERIC_LIB_DEVMEM_IS_ALLOWED
-	default y if PPC || X86 || ARM64
+	default y if PPC || X86 || ARM64 || S390
 	help
 	  If this option is disabled, you allow userspace (root) access to all
 	  of memory, including kernel and userspace memory. Accidental
@@ -2115,6 +2095,16 @@ config FAIL_SUNRPC
 	  Provide fault-injection capability for SunRPC and
 	  its consumers.
 
+config FAIL_SKB_REALLOC
+	bool "Fault-injection capability forcing skb to reallocate"
+	depends on FAULT_INJECTION_DEBUG_FS
+	help
+	  Provide fault-injection capability that forces the skb to be
+	  reallocated, catching possible invalid pointers to the skb.
+
+	  For more information, check
+	  Documentation/dev-tools/fault-injection/fault-injection.rst
+
 config FAULT_INJECTION_CONFIGFS
 	bool "Configfs interface for fault-injection capabilities"
 	depends on FAULT_INJECTION
@@ -2279,6 +2269,7 @@ config TEST_LIST_SORT
 config TEST_MIN_HEAP
 	tristate "Min heap test"
 	depends on DEBUG_KERNEL || m
+	select MIN_HEAP
 	help
 	  Enable this to turn on min heap function tests. This test is
 	  executed only once during system boot (so affects only boot time),
@@ -2629,6 +2620,23 @@ config CHECKSUM_KUNIT
 
 	  If unsure, say N.
 
+config UTIL_MACROS_KUNIT
+	tristate "KUnit test util_macros.h functions at runtime" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Enable this option to test the util_macros.h function at boot.
+
+	  KUnit tests run during boot and output the results to the debug log
+	  in TAP format (http://testanything.org/). Only useful for kernel devs
+	  running the KUnit test harness, and not intended for inclusion into a
+	  production build.
+
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
 config HASH_KUNIT_TEST
 	tristate "KUnit Test for integer hash functions" if !KUNIT_ALL_TESTS
 	depends on KUNIT
@@ -2850,6 +2858,15 @@ config USERCOPY_KUNIT_TEST
 	  on the copy_to/from_user infrastructure, making sure basic
 	  user/kernel boundary testing is working.
 
+config CRC16_KUNIT_TEST
+	tristate "KUnit tests for CRC16"
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	select CRC16
+	help
+	  Enable this option to run unit tests for the kernel's CRC16
+	  implementation (<linux/crc16.h>).
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
@@ -2903,6 +2920,141 @@ config TEST_KMOD
 
 	  If unsure, say N.
 
+config TEST_RUNTIME
+	bool
+
+config TEST_RUNTIME_MODULE
+	bool
+
+config TEST_KALLSYMS
+	tristate "module kallsyms find_symbol() test"
+	depends on m
+	select TEST_RUNTIME
+	select TEST_RUNTIME_MODULE
+	select TEST_KALLSYMS_A
+	select TEST_KALLSYMS_B
+	select TEST_KALLSYMS_C
+	select TEST_KALLSYMS_D
+	help
+	  This allows us to stress test find_symbol() through the kallsyms
+	  used to place symbols on the kernel ELF kallsyms and modules kallsyms
+	  where we place kernel symbols such as exported symbols.
+
+	  We have four test modules:
+
+	  A: has KALLSYSMS_NUMSYMS exported symbols
+	  B: uses one of A's symbols
+	  C: adds KALLSYMS_SCALE_FACTOR * KALLSYSMS_NUMSYMS exported
+	  D: adds 2 * the symbols than C
+
+	  We stress test find_symbol() through two means:
+
+	  1) Upon load of B it will trigger simplify_symbols() to look for the
+	  one symbol it uses from the module A with tons of symbols. This is an
+	  indirect way for us to have B call resolve_symbol_wait() upon module
+	  load. This will eventually call find_symbol() which will eventually
+	  try to find the symbols used with find_exported_symbol_in_section().
+	  find_exported_symbol_in_section() uses bsearch() so a binary search
+	  for each symbol. Binary search will at worst be O(log(n)) so the
+	  larger TEST_MODULE_KALLSYSMS the worse the search.
+
+	  2) The selftests should load C first, before B. Upon B's load towards
+	  the end right before we call module B's init routine we get
+	  complete_formation() called on the module. That will first check
+	  for duplicate symbols with the call to verify_exported_symbols().
+	  That is when we'll force iteration on module C's insane symbol list.
+	  Since it has 10 * KALLSYMS_NUMSYMS it means we can first test
+	  just loading B without C. The amount of time it takes to load C Vs
+	  B can give us an idea of the impact growth of the symbol space and
+	  give us projection. Module A only uses one symbol from B so to allow
+	  this scaling in module C to be proportional, if it used more symbols
+	  then the first test would be doing more and increasing just the
+	  search space would be slightly different. The last module, module D
+	  will just increase the search space by twice the number of symbols in
+	  C so to allow for full projects.
+
+	  tools/testing/selftests/module/find_symbol.sh
+
+	  The current defaults will incur a build delay of about 7 minutes
+	  on an x86_64 with only 8 cores. Enable this only if you want to
+	  stress test find_symbol() with thousands of symbols. At the same
+	  time this is also useful to test building modules with thousands of
+	  symbols, and if BTF is enabled this also stress tests adding BTF
+	  information for each module. Currently enabling many more symbols
+	  will segfault the build system.
+
+	  If unsure, say N.
+
+if TEST_KALLSYMS
+
+config TEST_KALLSYMS_A
+	tristate
+	depends on m
+
+config TEST_KALLSYMS_B
+	tristate
+	depends on m
+
+config TEST_KALLSYMS_C
+	tristate
+	depends on m
+
+config TEST_KALLSYMS_D
+	tristate
+	depends on m
+
+choice
+	prompt "Kallsym test range"
+	default TEST_KALLSYMS_LARGE
+	help
+	  Selecting something other than "Fast" will enable tests which slow
+	  down the build and may crash your build.
+
+config TEST_KALLSYMS_FAST
+	bool "Fast builds"
+	help
+	  You won't really be testing kallsysms, so this just helps fast builds
+	  when allmodconfig is used..
+
+config TEST_KALLSYMS_LARGE
+	bool "Enable testing kallsyms with large exports"
+	help
+	  This will enable larger number of symbols. This will slow down
+	  your build considerably.
+
+config TEST_KALLSYMS_MAX
+	bool "Known kallsysms limits"
+	help
+	  This will enable exports to the point we know we'll start crashing
+	  builds.
+
+endchoice
+
+config TEST_KALLSYMS_NUMSYMS
+	int "test kallsyms number of symbols"
+	range 2 10000
+	default 2 if TEST_KALLSYMS_FAST
+	default 100 if TEST_KALLSYMS_LARGE
+	default 10000 if TEST_KALLSYMS_MAX
+	help
+	  The number of symbols to create on TEST_KALLSYMS_A, only one of which
+	  module TEST_KALLSYMS_B will use. This also will be used
+	  for how many symbols TEST_KALLSYMS_C will have, scaled up by
+	  TEST_KALLSYMS_SCALE_FACTOR. Note that setting this to 10,000 will
+	  trigger a segfault today, don't use anything close to it unless
+	  you are aware that this should not be used for automated build tests.
+
+config TEST_KALLSYMS_SCALE_FACTOR
+	int "test kallsyms scale factor"
+	default 8
+	help
+	  How many more unusued symbols will TEST_KALLSYSMS_C have than
+	  TEST_KALLSYMS_A. If 8, then module C will have 8 * syms
+	  than module A. Then TEST_KALLSYMS_D will have double the amount
+	  of symbols than C so to allow projections.
+
+endif # TEST_KALLSYMS
+
 config TEST_DEBUG_VIRTUAL
 	tristate "Test CONFIG_DEBUG_VIRTUAL feature"
 	depends on DEBUG_VIRTUAL
@@ -2993,6 +3145,22 @@ config TEST_OBJPOOL
 
 	  If unsure, say N.
 
+config INT_POW_TEST
+	tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This option enables the KUnit test suite for the int_pow function,
+	  which performs integer exponentiation. The test suite is designed to
+	  verify that the implementation of int_pow correctly computes the power
+	  of a given base raised to a given exponent.
+
+	  Enabling this option will include tests that check various scenarios
+	  and edge cases to ensure the accuracy and reliability of the exponentiation
+	  function.
+
+	  If unsure, say N
+
 endif # RUNTIME_TESTING_MENU
 
 config ARCH_USE_MEMTEST
@@ -3088,19 +3256,3 @@ config RUST_KERNEL_DOCTESTS
 endmenu # "Rust"
 
 endmenu # Kernel hacking
-
-config INT_POW_TEST
-	tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS
-	depends on KUNIT
-	default KUNIT_ALL_TESTS
-	help
-	  This option enables the KUnit test suite for the int_pow function,
-	  which performs integer exponentiation. The test suite is designed to
-	  verify that the implementation of int_pow correctly computes the power
-	  of a given base raised to a given exponent.
-
-	  Enabling this option will include tests that check various scenarios
-	  and edge cases to ensure the accuracy and reliability of the exponentiation
-	  function.
-
-	  If unsure, say N
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 98016e137b7f..f82889a830fa 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -195,13 +195,6 @@ config KASAN_KUNIT_TEST
 	  For more information on KUnit and unit tests in general, please refer
 	  to the KUnit documentation in Documentation/dev-tools/kunit/.
 
-config KASAN_MODULE_TEST
-	tristate "KUnit-incompatible tests of KASAN bug detection capabilities"
-	depends on m && KASAN && !KASAN_HW_TAGS
-	help
-	  A part of the KASAN test suite that is not integrated with KUnit.
-	  Incompatible with Hardware Tag-Based KASAN.
-
 config KASAN_EXTRA_INFO
 	bool "Record and report more information"
 	depends on KASAN
diff --git a/lib/Makefile b/lib/Makefile
index 773adf88af41..a8155c972f02 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -35,10 +35,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
 	 nmi_backtrace.o win_minmax.o memcat_p.o \
-	 buildid.o objpool.o union_find.o
+	 buildid.o objpool.o iomem_copy.o
 
+lib-$(CONFIG_UNION_FIND) += union_find.o
 lib-$(CONFIG_PRINTK) += dump_stack.o
 lib-$(CONFIG_SMP) += cpumask.o
+lib-$(CONFIG_MIN_HEAP) += min_heap.o
 
 lib-y	+= kobject.o klist.o
 obj-y	+= lockref.o
@@ -96,6 +98,7 @@ obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
 obj-$(CONFIG_TEST_KMOD) += test_kmod.o
+obj-$(CONFIG_TEST_RUNTIME) += tests/
 obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
 obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
 obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o
@@ -154,6 +157,7 @@ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 obj-$(CONFIG_BITREVERSE) += bitrev.o
 obj-$(CONFIG_LINEAR_RANGES) += linear_ranges.o
 obj-$(CONFIG_PACKING)	+= packing.o
+obj-$(CONFIG_PACKING_KUNIT_TEST) += packing_test.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
@@ -370,6 +374,7 @@ obj-$(CONFIG_PLDMFW) += pldmfw/
 CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN)
 obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
 obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o
+obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
@@ -389,6 +394,7 @@ CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN)
 obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
 obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
 obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o
+obj-$(CONFIG_CRC16_KUNIT_TEST) += crc16_kunit.o
 
 obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
 
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 81e5f9a70f22..2414a7ee7ec7 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -1,12 +1,26 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/alloc_tag.h>
+#include <linux/execmem.h>
 #include <linux/fs.h>
 #include <linux/gfp.h>
+#include <linux/kallsyms.h>
 #include <linux/module.h>
 #include <linux/page_ext.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_buf.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+
+#define ALLOCINFO_FILE_NAME		"allocinfo"
+#define MODULE_ALLOC_TAG_VMAP_SIZE	(100000UL * sizeof(struct alloc_tag))
+#define SECTION_START(NAME)		(CODETAG_SECTION_START_PREFIX NAME)
+#define SECTION_STOP(NAME)		(CODETAG_SECTION_STOP_PREFIX NAME)
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+static bool mem_profiling_support = true;
+#else
+static bool mem_profiling_support;
+#endif
 
 static struct codetag_type *alloc_tag_cttype;
 
@@ -15,6 +29,11 @@ EXPORT_SYMBOL(_shared_alloc_tag);
 
 DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
 			mem_alloc_profiling_key);
+DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed);
+
+struct alloc_tag_kernel_section kernel_tags = { NULL, 0 };
+unsigned long alloc_tag_ref_mask;
+int alloc_tag_ref_offs;
 
 struct allocinfo_private {
 	struct codetag_iterator iter;
@@ -144,44 +163,450 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl
 	return nr;
 }
 
+void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
+{
+	int i;
+	struct alloc_tag *tag;
+	unsigned int nr_pages = 1 << new_order;
+
+	if (!mem_alloc_profiling_enabled())
+		return;
+
+	tag = pgalloc_tag_get(&folio->page);
+	if (!tag)
+		return;
+
+	for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
+		union pgtag_ref_handle handle;
+		union codetag_ref ref;
+
+		if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) {
+			/* Set new reference to point to the original tag */
+			alloc_tag_ref_set(&ref, tag);
+			update_page_tag_ref(handle, &ref);
+			put_page_tag_ref(handle);
+		}
+	}
+}
+
+void pgalloc_tag_copy(struct folio *new, struct folio *old)
+{
+	union pgtag_ref_handle handle;
+	union codetag_ref ref;
+	struct alloc_tag *tag;
+
+	tag = pgalloc_tag_get(&old->page);
+	if (!tag)
+		return;
+
+	if (!get_page_tag_ref(&new->page, &ref, &handle))
+		return;
+
+	/* Clear the old ref to the original allocation tag. */
+	clear_page_tag_ref(&old->page);
+	/* Decrement the counters of the tag on get_new_folio. */
+	alloc_tag_sub(&ref, folio_size(new));
+	__alloc_tag_ref_set(&ref, tag);
+	update_page_tag_ref(handle, &ref);
+	put_page_tag_ref(handle);
+}
+
+static void shutdown_mem_profiling(bool remove_file)
+{
+	if (mem_alloc_profiling_enabled())
+		static_branch_disable(&mem_alloc_profiling_key);
+
+	if (!mem_profiling_support)
+		return;
+
+	if (remove_file)
+		remove_proc_entry(ALLOCINFO_FILE_NAME, NULL);
+	mem_profiling_support = false;
+}
+
 static void __init procfs_init(void)
 {
-	proc_create_seq("allocinfo", 0400, NULL, &allocinfo_seq_op);
+	if (!mem_profiling_support)
+		return;
+
+	if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) {
+		pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
+		shutdown_mem_profiling(false);
+	}
+}
+
+void __init alloc_tag_sec_init(void)
+{
+	struct alloc_tag *last_codetag;
+
+	if (!mem_profiling_support)
+		return;
+
+	if (!static_key_enabled(&mem_profiling_compressed))
+		return;
+
+	kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name(
+					SECTION_START(ALLOC_TAG_SECTION_NAME));
+	last_codetag = (struct alloc_tag *)kallsyms_lookup_name(
+					SECTION_STOP(ALLOC_TAG_SECTION_NAME));
+	kernel_tags.count = last_codetag - kernel_tags.first_tag;
+
+	/* Check if kernel tags fit into page flags */
+	if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) {
+		shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
+		pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n",
+			kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS);
+		return;
+	}
+
+	alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS);
+	alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1);
+	pr_debug("Memory allocation profiling compression is using %d page flag bits!\n",
+		 NR_UNUSED_PAGEFLAG_BITS);
+}
+
+#ifdef CONFIG_MODULES
+
+static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE);
+static struct vm_struct *vm_module_tags;
+/* A dummy object used to indicate an unloaded module */
+static struct module unloaded_mod;
+/* A dummy object used to indicate a module prepended area */
+static struct module prepend_mod;
+
+struct alloc_tag_module_section module_tags;
+
+static inline unsigned long alloc_tag_align(unsigned long val)
+{
+	if (!static_key_enabled(&mem_profiling_compressed)) {
+		/* No alignment requirements when we are not indexing the tags */
+		return val;
+	}
+
+	if (val % sizeof(struct alloc_tag) == 0)
+		return val;
+	return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag);
 }
 
-static bool alloc_tag_module_unload(struct codetag_type *cttype,
-				    struct codetag_module *cmod)
+static bool ensure_alignment(unsigned long align, unsigned int *prepend)
 {
-	struct codetag_iterator iter = codetag_get_ct_iter(cttype);
-	struct alloc_tag_counters counter;
-	bool module_unused = true;
+	if (!static_key_enabled(&mem_profiling_compressed)) {
+		/* No alignment requirements when we are not indexing the tags */
+		return true;
+	}
+
+	/*
+	 * If alloc_tag size is not a multiple of required alignment, tag
+	 * indexing does not work.
+	 */
+	if (!IS_ALIGNED(sizeof(struct alloc_tag), align))
+		return false;
+
+	/* Ensure prepend consumes multiple of alloc_tag-sized blocks */
+	if (*prepend)
+		*prepend = alloc_tag_align(*prepend);
+
+	return true;
+}
+
+static inline bool tags_addressable(void)
+{
+	unsigned long tag_idx_count;
+
+	if (!static_key_enabled(&mem_profiling_compressed))
+		return true; /* with page_ext tags are always addressable */
+
+	tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count +
+			module_tags.size / sizeof(struct alloc_tag);
+
+	return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS);
+}
+
+static bool needs_section_mem(struct module *mod, unsigned long size)
+{
+	if (!mem_profiling_support)
+		return false;
+
+	return size >= sizeof(struct alloc_tag);
+}
+
+static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to)
+{
+	while (from <= to) {
+		struct alloc_tag_counters counter;
+
+		counter = alloc_tag_read(from);
+		if (counter.bytes)
+			return from;
+		from++;
+	}
+
+	return NULL;
+}
+
+/* Called with mod_area_mt locked */
+static void clean_unused_module_areas_locked(void)
+{
+	MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
+	struct module *val;
+
+	mas_for_each(&mas, val, module_tags.size) {
+		if (val != &unloaded_mod)
+			continue;
+
+		/* Release area if all tags are unused */
+		if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
+				   (struct alloc_tag *)(module_tags.start_addr + mas.last)))
+			mas_erase(&mas);
+	}
+}
+
+/* Called with mod_area_mt locked */
+static bool find_aligned_area(struct ma_state *mas, unsigned long section_size,
+			      unsigned long size, unsigned int prepend, unsigned long align)
+{
+	bool cleanup_done = false;
+
+repeat:
+	/* Try finding exact size and hope the start is aligned */
+	if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) {
+		if (IS_ALIGNED(mas->index + prepend, align))
+			return true;
+
+		/* Try finding larger area to align later */
+		mas_reset(mas);
+		if (!mas_empty_area(mas, 0, section_size - 1,
+				    size + prepend + align - 1))
+			return true;
+	}
+
+	/* No free area, try cleanup stale data and repeat the search once */
+	if (!cleanup_done) {
+		clean_unused_module_areas_locked();
+		cleanup_done = true;
+		mas_reset(mas);
+		goto repeat;
+	}
+
+	return false;
+}
+
+static int vm_module_tags_populate(void)
+{
+	unsigned long phys_size = vm_module_tags->nr_pages << PAGE_SHIFT;
+
+	if (phys_size < module_tags.size) {
+		struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
+		unsigned long addr = module_tags.start_addr + phys_size;
+		unsigned long more_pages;
+		unsigned long nr;
+
+		more_pages = ALIGN(module_tags.size - phys_size, PAGE_SIZE) >> PAGE_SHIFT;
+		nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN,
+						 NUMA_NO_NODE, more_pages, next_page);
+		if (nr < more_pages ||
+		    vmap_pages_range(addr, addr + (nr << PAGE_SHIFT), PAGE_KERNEL,
+				     next_page, PAGE_SHIFT) < 0) {
+			/* Clean up and error out */
+			for (int i = 0; i < nr; i++)
+				__free_page(next_page[i]);
+			return -ENOMEM;
+		}
+		vm_module_tags->nr_pages += nr;
+	}
+
+	return 0;
+}
+
+static void *reserve_module_tags(struct module *mod, unsigned long size,
+				 unsigned int prepend, unsigned long align)
+{
+	unsigned long section_size = module_tags.end_addr - module_tags.start_addr;
+	MA_STATE(mas, &mod_area_mt, 0, section_size - 1);
+	unsigned long offset;
+	void *ret = NULL;
+
+	/* If no tags return error */
+	if (size < sizeof(struct alloc_tag))
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * align is always power of 2, so we can use IS_ALIGNED and ALIGN.
+	 * align 0 or 1 means no alignment, to simplify set to 1.
+	 */
+	if (!align)
+		align = 1;
+
+	if (!ensure_alignment(align, &prepend)) {
+		shutdown_mem_profiling(true);
+		pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n",
+			mod->name, align);
+		return ERR_PTR(-EINVAL);
+	}
+
+	mas_lock(&mas);
+	if (!find_aligned_area(&mas, section_size, size, prepend, align)) {
+		ret = ERR_PTR(-ENOMEM);
+		goto unlock;
+	}
+
+	/* Mark found area as reserved */
+	offset = mas.index;
+	offset += prepend;
+	offset = ALIGN(offset, align);
+	if (offset != mas.index) {
+		unsigned long pad_start = mas.index;
+
+		mas.last = offset - 1;
+		mas_store(&mas, &prepend_mod);
+		if (mas_is_err(&mas)) {
+			ret = ERR_PTR(xa_err(mas.node));
+			goto unlock;
+		}
+		mas.index = offset;
+		mas.last = offset + size - 1;
+		mas_store(&mas, mod);
+		if (mas_is_err(&mas)) {
+			mas.index = pad_start;
+			mas_erase(&mas);
+			ret = ERR_PTR(xa_err(mas.node));
+		}
+	} else {
+		mas.last = offset + size - 1;
+		mas_store(&mas, mod);
+		if (mas_is_err(&mas))
+			ret = ERR_PTR(xa_err(mas.node));
+	}
+unlock:
+	mas_unlock(&mas);
+
+	if (IS_ERR(ret))
+		return ret;
+
+	if (module_tags.size < offset + size) {
+		int grow_res;
+
+		module_tags.size = offset + size;
+		if (mem_alloc_profiling_enabled() && !tags_addressable()) {
+			shutdown_mem_profiling(true);
+			pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n",
+				mod->name, NR_UNUSED_PAGEFLAG_BITS);
+		}
+
+		grow_res = vm_module_tags_populate();
+		if (grow_res) {
+			shutdown_mem_profiling(true);
+			pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
+			       mod->name);
+			return ERR_PTR(grow_res);
+		}
+	}
+
+	return (struct alloc_tag *)(module_tags.start_addr + offset);
+}
+
+static void release_module_tags(struct module *mod, bool used)
+{
+	MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size);
 	struct alloc_tag *tag;
-	struct codetag *ct;
+	struct module *val;
+
+	mas_lock(&mas);
+	mas_for_each_rev(&mas, val, 0)
+		if (val == mod)
+			break;
+
+	if (!val) /* module not found */
+		goto out;
+
+	if (!used)
+		goto release_area;
+
+	/* Find out if the area is used */
+	tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
+			    (struct alloc_tag *)(module_tags.start_addr + mas.last));
+	if (tag) {
+		struct alloc_tag_counters counter = alloc_tag_read(tag);
+
+		pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
+			tag->ct.filename, tag->ct.lineno, tag->ct.modname,
+			tag->ct.function, counter.bytes);
+	} else {
+		used = false;
+	}
+release_area:
+	mas_store(&mas, used ? &unloaded_mod : NULL);
+	val = mas_prev_range(&mas, 0);
+	if (val == &prepend_mod)
+		mas_store(&mas, NULL);
+out:
+	mas_unlock(&mas);
+}
 
-	for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) {
-		if (iter.cmod != cmod)
+static void replace_module(struct module *mod, struct module *new_mod)
+{
+	MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
+	struct module *val;
+
+	mas_lock(&mas);
+	mas_for_each(&mas, val, module_tags.size) {
+		if (val != mod)
 			continue;
 
-		tag = ct_to_alloc_tag(ct);
-		counter = alloc_tag_read(tag);
+		mas_store_gfp(&mas, new_mod, GFP_KERNEL);
+		break;
+	}
+	mas_unlock(&mas);
+}
 
-		if (WARN(counter.bytes,
-			 "%s:%u module %s func:%s has %llu allocated at module unload",
-			 ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes))
-			module_unused = false;
+static int __init alloc_mod_tags_mem(void)
+{
+	/* Map space to copy allocation tags */
+	vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE);
+	if (!vm_module_tags) {
+		pr_err("Failed to map %lu bytes for module allocation tags\n",
+			MODULE_ALLOC_TAG_VMAP_SIZE);
+		module_tags.start_addr = 0;
+		return -ENOMEM;
 	}
 
-	return module_unused;
+	vm_module_tags->pages = kmalloc_array(get_vm_area_size(vm_module_tags) >> PAGE_SHIFT,
+					sizeof(struct page *), GFP_KERNEL | __GFP_ZERO);
+	if (!vm_module_tags->pages) {
+		free_vm_area(vm_module_tags);
+		return -ENOMEM;
+	}
+
+	module_tags.start_addr = (unsigned long)vm_module_tags->addr;
+	module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE;
+	/* Ensure the base is alloc_tag aligned when required for indexing */
+	module_tags.start_addr = alloc_tag_align(module_tags.start_addr);
+
+	return 0;
+}
+
+static void __init free_mod_tags_mem(void)
+{
+	int i;
+
+	module_tags.start_addr = 0;
+	for (i = 0; i < vm_module_tags->nr_pages; i++)
+		__free_page(vm_module_tags->pages[i]);
+	kfree(vm_module_tags->pages);
+	free_vm_area(vm_module_tags);
 }
 
-#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
-static bool mem_profiling_support __meminitdata = true;
-#else
-static bool mem_profiling_support __meminitdata;
-#endif
+#else /* CONFIG_MODULES */
+
+static inline int alloc_mod_tags_mem(void) { return 0; }
+static inline void free_mod_tags_mem(void) {}
+
+#endif /* CONFIG_MODULES */
 
+/* See: Documentation/mm/allocation-profiling.rst */
 static int __init setup_early_mem_profiling(char *str)
 {
+	bool compressed = false;
 	bool enable;
 
 	if (!str || !str[0])
@@ -190,22 +615,37 @@ static int __init setup_early_mem_profiling(char *str)
 	if (!strncmp(str, "never", 5)) {
 		enable = false;
 		mem_profiling_support = false;
+		pr_info("Memory allocation profiling is disabled!\n");
 	} else {
-		int res;
+		char *token = strsep(&str, ",");
 
-		res = kstrtobool(str, &enable);
-		if (res)
-			return res;
+		if (kstrtobool(token, &enable))
+			return -EINVAL;
 
+		if (str) {
+
+			if (strcmp(str, "compressed"))
+				return -EINVAL;
+
+			compressed = true;
+		}
 		mem_profiling_support = true;
+		pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
+			compressed ? "with" : "without", enable ? "on" : "off");
 	}
 
-	if (enable != static_key_enabled(&mem_alloc_profiling_key)) {
+	if (enable != mem_alloc_profiling_enabled()) {
 		if (enable)
 			static_branch_enable(&mem_alloc_profiling_key);
 		else
 			static_branch_disable(&mem_alloc_profiling_key);
 	}
+	if (compressed != static_key_enabled(&mem_profiling_compressed)) {
+		if (compressed)
+			static_branch_enable(&mem_profiling_compressed);
+		else
+			static_branch_disable(&mem_profiling_compressed);
+	}
 
 	return 0;
 }
@@ -213,6 +653,9 @@ early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling);
 
 static __init bool need_page_alloc_tagging(void)
 {
+	if (static_key_enabled(&mem_profiling_compressed))
+		return false;
+
 	return mem_profiling_support;
 }
 
@@ -255,14 +698,26 @@ static inline void sysctl_init(void) {}
 static int __init alloc_tag_init(void)
 {
 	const struct codetag_type_desc desc = {
-		.section	= "alloc_tags",
-		.tag_size	= sizeof(struct alloc_tag),
-		.module_unload	= alloc_tag_module_unload,
+		.section		= ALLOC_TAG_SECTION_NAME,
+		.tag_size		= sizeof(struct alloc_tag),
+#ifdef CONFIG_MODULES
+		.needs_section_mem	= needs_section_mem,
+		.alloc_section_mem	= reserve_module_tags,
+		.free_section_mem	= release_module_tags,
+		.module_replaced	= replace_module,
+#endif
 	};
+	int res;
+
+	res = alloc_mod_tags_mem();
+	if (res)
+		return res;
 
 	alloc_tag_cttype = codetag_register_type(&desc);
-	if (IS_ERR(alloc_tag_cttype))
+	if (IS_ERR(alloc_tag_cttype)) {
+		free_mod_tags_mem();
 		return PTR_ERR(alloc_tag_cttype);
+	}
 
 	sysctl_init();
 	procfs_init();
diff --git a/lib/checksum.c b/lib/checksum.c
index 6860d6b05a17..025ba546e1ec 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -34,15 +34,6 @@
 #include <asm/byteorder.h>
 
 #ifndef do_csum
-static inline unsigned short from32to16(unsigned int x)
-{
-	/* add up 16-bit and 16-bit for 16+c bit */
-	x = (x & 0xffff) + (x >> 16);
-	/* add up carry.. */
-	x = (x & 0xffff) + (x >> 16);
-	return x;
-}
-
 static unsigned int do_csum(const unsigned char *buff, int len)
 {
 	int odd;
@@ -90,7 +81,7 @@ static unsigned int do_csum(const unsigned char *buff, int len)
 #else
 		result += (*buff << 8);
 #endif
-	result = from32to16(result);
+	result = csum_from32to16(result);
 	if (odd)
 		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
 out:
diff --git a/lib/codetag.c b/lib/codetag.c
index d1fbbb7c2ec3..42aadd6c1454 100644
--- a/lib/codetag.c
+++ b/lib/codetag.c
@@ -149,8 +149,8 @@ static struct codetag_range get_section_range(struct module *mod,
 					      const char *section)
 {
 	return (struct codetag_range) {
-		get_symbol(mod, "__start_", section),
-		get_symbol(mod, "__stop_", section),
+		get_symbol(mod, CODETAG_SECTION_START_PREFIX, section),
+		get_symbol(mod, CODETAG_SECTION_STOP_PREFIX, section),
 	};
 }
 
@@ -207,6 +207,94 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
 }
 
 #ifdef CONFIG_MODULES
+#define CODETAG_SECTION_PREFIX	".codetag."
+
+/* Some codetag types need a separate module section */
+bool codetag_needs_module_section(struct module *mod, const char *name,
+				  unsigned long size)
+{
+	const char *type_name;
+	struct codetag_type *cttype;
+	bool ret = false;
+
+	if (strncmp(name, CODETAG_SECTION_PREFIX, strlen(CODETAG_SECTION_PREFIX)))
+		return false;
+
+	type_name = name + strlen(CODETAG_SECTION_PREFIX);
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link) {
+		if (strcmp(type_name, cttype->desc.section) == 0) {
+			if (!cttype->desc.needs_section_mem)
+				break;
+
+			down_write(&cttype->mod_lock);
+			ret = cttype->desc.needs_section_mem(mod, size);
+			up_write(&cttype->mod_lock);
+			break;
+		}
+	}
+	mutex_unlock(&codetag_lock);
+
+	return ret;
+}
+
+void *codetag_alloc_module_section(struct module *mod, const char *name,
+				   unsigned long size, unsigned int prepend,
+				   unsigned long align)
+{
+	const char *type_name = name + strlen(CODETAG_SECTION_PREFIX);
+	struct codetag_type *cttype;
+	void *ret = ERR_PTR(-EINVAL);
+
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link) {
+		if (strcmp(type_name, cttype->desc.section) == 0) {
+			if (WARN_ON(!cttype->desc.alloc_section_mem))
+				break;
+
+			down_write(&cttype->mod_lock);
+			ret = cttype->desc.alloc_section_mem(mod, size, prepend, align);
+			up_write(&cttype->mod_lock);
+			break;
+		}
+	}
+	mutex_unlock(&codetag_lock);
+
+	return ret;
+}
+
+void codetag_free_module_sections(struct module *mod)
+{
+	struct codetag_type *cttype;
+
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link) {
+		if (!cttype->desc.free_section_mem)
+			continue;
+
+		down_write(&cttype->mod_lock);
+		cttype->desc.free_section_mem(mod, false);
+		up_write(&cttype->mod_lock);
+	}
+	mutex_unlock(&codetag_lock);
+}
+
+void codetag_module_replaced(struct module *mod, struct module *new_mod)
+{
+	struct codetag_type *cttype;
+
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link) {
+		if (!cttype->desc.module_replaced)
+			continue;
+
+		down_write(&cttype->mod_lock);
+		cttype->desc.module_replaced(mod, new_mod);
+		up_write(&cttype->mod_lock);
+	}
+	mutex_unlock(&codetag_lock);
+}
+
 void codetag_load_module(struct module *mod)
 {
 	struct codetag_type *cttype;
@@ -220,13 +308,12 @@ void codetag_load_module(struct module *mod)
 	mutex_unlock(&codetag_lock);
 }
 
-bool codetag_unload_module(struct module *mod)
+void codetag_unload_module(struct module *mod)
 {
 	struct codetag_type *cttype;
-	bool unload_ok = true;
 
 	if (!mod)
-		return true;
+		return;
 
 	/* await any module's kfree_rcu() operations to complete */
 	kvfree_rcu_barrier();
@@ -246,18 +333,17 @@ bool codetag_unload_module(struct module *mod)
 		}
 		if (found) {
 			if (cttype->desc.module_unload)
-				if (!cttype->desc.module_unload(cttype, cmod))
-					unload_ok = false;
+				cttype->desc.module_unload(cttype, cmod);
 
 			cttype->count -= range_size(cttype, &cmod->range);
 			idr_remove(&cttype->mod_idr, mod_id);
 			kfree(cmod);
 		}
 		up_write(&cttype->mod_lock);
+		if (found && cttype->desc.free_section_mem)
+			cttype->desc.free_section_mem(mod, true);
 	}
 	mutex_unlock(&codetag_lock);
-
-	return unload_ok;
 }
 #endif /* CONFIG_MODULES */
 
diff --git a/lib/crc16_kunit.c b/lib/crc16_kunit.c
new file mode 100644
index 000000000000..0918c98a96d2
--- /dev/null
+++ b/lib/crc16_kunit.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnits tests for CRC16.
+ *
+ * Copyright (C) 2024, LKCAMP
+ * Author: Vinicius Peixoto <vpeixoto@lkcamp.dev>
+ * Author: Fabricio Gasperin <fgasperin@lkcamp.dev>
+ * Author: Enzo Bertoloti <ebertoloti@lkcamp.dev>
+ */
+#include <kunit/test.h>
+#include <linux/crc16.h>
+#include <linux/prandom.h>
+
+#define CRC16_KUNIT_DATA_SIZE 4096
+#define CRC16_KUNIT_TEST_SIZE 100
+#define CRC16_KUNIT_SEED 0x12345678
+
+/**
+ * struct crc16_test - CRC16 test data
+ * @crc: initial input value to CRC16
+ * @start: Start index within the data buffer
+ * @length: Length of the data
+ */
+static struct crc16_test {
+	u16 crc;
+	u16 start;
+	u16 length;
+} tests[CRC16_KUNIT_TEST_SIZE];
+
+u8 data[CRC16_KUNIT_DATA_SIZE];
+
+
+/* Naive implementation of CRC16 for validation purposes */
+static inline u16 _crc16_naive_byte(u16 crc, u8 data)
+{
+	u8 i = 0;
+
+	crc ^= (u16) data;
+	for (i = 0; i < 8; i++) {
+		if (crc & 0x01)
+			crc = (crc >> 1) ^ 0xa001;
+		else
+			crc = crc >> 1;
+	}
+
+	return crc;
+}
+
+
+static inline u16 _crc16_naive(u16 crc, u8 *buffer, size_t len)
+{
+	while (len--)
+		crc = _crc16_naive_byte(crc, *buffer++);
+	return crc;
+}
+
+
+/* Small helper for generating pseudorandom 16-bit data */
+static inline u16 _rand16(void)
+{
+	static u32 rand = CRC16_KUNIT_SEED;
+
+	rand = next_pseudo_random32(rand);
+	return rand & 0xFFFF;
+}
+
+
+static int crc16_init_test_data(struct kunit_suite *suite)
+{
+	size_t i;
+
+	/* Fill the data buffer with random bytes */
+	for (i = 0; i < CRC16_KUNIT_DATA_SIZE; i++)
+		data[i] = _rand16() & 0xFF;
+
+	/* Generate random test data while ensuring the random
+	 * start + length values won't overflow the 4096-byte
+	 * buffer (0x7FF * 2 = 0xFFE < 0x1000)
+	 */
+	for (size_t i = 0; i < CRC16_KUNIT_TEST_SIZE; i++) {
+		tests[i].crc = _rand16();
+		tests[i].start = _rand16() & 0x7FF;
+		tests[i].length = _rand16() & 0x7FF;
+	}
+
+	return 0;
+}
+
+static void crc16_test_empty(struct kunit *test)
+{
+	u16 crc;
+
+	/* The result for empty data should be the same as the
+	 * initial crc
+	 */
+	crc = crc16(0x00, data, 0);
+	KUNIT_EXPECT_EQ(test, crc, 0);
+	crc = crc16(0xFF, data, 0);
+	KUNIT_EXPECT_EQ(test, crc, 0xFF);
+}
+
+static void crc16_test_correctness(struct kunit *test)
+{
+	size_t i;
+	u16 crc, crc_naive;
+
+	for (i = 0; i < CRC16_KUNIT_TEST_SIZE; i++) {
+		/* Compare results with the naive crc16 implementation */
+		crc = crc16(tests[i].crc, data + tests[i].start,
+			    tests[i].length);
+		crc_naive = _crc16_naive(tests[i].crc, data + tests[i].start,
+					 tests[i].length);
+		KUNIT_EXPECT_EQ(test, crc, crc_naive);
+	}
+}
+
+
+static void crc16_test_combine(struct kunit *test)
+{
+	size_t i, j;
+	u16 crc, crc_naive;
+
+	/* Make sure that combining two consecutive crc16 calculations
+	 * yields the same result as calculating the crc16 for the whole thing
+	 */
+	for (i = 0; i < CRC16_KUNIT_TEST_SIZE; i++) {
+		crc_naive = crc16(tests[i].crc, data + tests[i].start, tests[i].length);
+		for (j = 0; j < tests[i].length; j++) {
+			crc = crc16(tests[i].crc, data + tests[i].start, j);
+			crc = crc16(crc, data + tests[i].start + j, tests[i].length - j);
+			KUNIT_EXPECT_EQ(test, crc, crc_naive);
+		}
+	}
+}
+
+
+static struct kunit_case crc16_test_cases[] = {
+	KUNIT_CASE(crc16_test_empty),
+	KUNIT_CASE(crc16_test_combine),
+	KUNIT_CASE(crc16_test_correctness),
+	{},
+};
+
+static struct kunit_suite crc16_test_suite = {
+	.name = "crc16",
+	.test_cases = crc16_test_cases,
+	.suite_init = crc16_init_test_data,
+};
+kunit_test_suite(crc16_test_suite);
+
+MODULE_AUTHOR("Fabricio Gasperin <fgasperin@lkcamp.dev>");
+MODULE_AUTHOR("Vinicius Peixoto <vpeixoto@lkcamp.dev>");
+MODULE_AUTHOR("Enzo Bertoloti <ebertoloti@lkcamp.dev>");
+MODULE_DESCRIPTION("Unit tests for crc16");
+MODULE_LICENSE("GPL");
diff --git a/lib/crc32.c b/lib/crc32.c
index 5649847d0a8d..ff587fee3893 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -205,7 +205,11 @@ EXPORT_SYMBOL(crc32_le);
 EXPORT_SYMBOL(__crc32c_le);
 
 u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
+EXPORT_SYMBOL(crc32_le_base);
+
 u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
+EXPORT_SYMBOL(__crc32c_le_base);
+
 u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
 
 /*
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 969baab8c805..01fac1cd05a1 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -58,3 +58,5 @@ libcurve25519-y					+= curve25519-selftest.o
 endif
 
 obj-$(CONFIG_MPILIB) += mpi/
+
+obj-$(CONFIG_CRYPTO_MANAGER_EXTRA_TESTS)	+= simd.o
diff --git a/lib/crypto/mpi/mpi-bit.c b/lib/crypto/mpi/mpi-bit.c
index 835a2f0622a0..934d81311360 100644
--- a/lib/crypto/mpi/mpi-bit.c
+++ b/lib/crypto/mpi/mpi-bit.c
@@ -95,6 +95,7 @@ int mpi_set_bit(MPI a, unsigned int n)
 	a->d[limbno] |= (A_LIMB_1<<bitno);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mpi_set_bit);
 
 /*
  * Shift A by N bits to the right.
diff --git a/lib/crypto/simd.c b/lib/crypto/simd.c
new file mode 100644
index 000000000000..9c36cb3bb49c
--- /dev/null
+++ b/lib/crypto/simd.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SIMD testing utility functions
+ *
+ * Copyright 2024 Google LLC
+ */
+
+#include <crypto/internal/simd.h>
+
+DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test);
+EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test);
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 5ce473ad499b..7f50c4480a4e 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -7,25 +7,30 @@
 
 #define pr_fmt(fmt) "ODEBUG: " fmt
 
+#include <linux/cpu.h>
 #include <linux/debugobjects.h>
-#include <linux/interrupt.h>
+#include <linux/debugfs.h>
+#include <linux/hash.h>
+#include <linux/kmemleak.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
-#include <linux/debugfs.h>
 #include <linux/slab.h>
-#include <linux/hash.h>
-#include <linux/kmemleak.h>
-#include <linux/cpu.h>
+#include <linux/static_key.h>
 
 #define ODEBUG_HASH_BITS	14
 #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
 
-#define ODEBUG_POOL_SIZE	1024
-#define ODEBUG_POOL_MIN_LEVEL	256
-#define ODEBUG_POOL_PERCPU_SIZE	64
+/* Must be power of two */
 #define ODEBUG_BATCH_SIZE	16
 
+/* Initial values. Must all be a multiple of batch size */
+#define ODEBUG_POOL_SIZE	(64 * ODEBUG_BATCH_SIZE)
+#define ODEBUG_POOL_MIN_LEVEL	(ODEBUG_POOL_SIZE / 4)
+
+#define ODEBUG_POOL_PERCPU_SIZE	(8 * ODEBUG_BATCH_SIZE)
+
 #define ODEBUG_CHUNK_SHIFT	PAGE_SHIFT
 #define ODEBUG_CHUNK_SIZE	(1 << ODEBUG_CHUNK_SHIFT)
 #define ODEBUG_CHUNK_MASK	(~(ODEBUG_CHUNK_SIZE - 1))
@@ -35,7 +40,7 @@
  * frequency of 10Hz and about 1024 objects for each freeing operation.
  * So it is freeing at most 10k debug objects per second.
  */
-#define ODEBUG_FREE_WORK_MAX	1024
+#define ODEBUG_FREE_WORK_MAX	(1024 / ODEBUG_BATCH_SIZE)
 #define ODEBUG_FREE_WORK_DELAY	DIV_ROUND_UP(HZ, 10)
 
 struct debug_bucket {
@@ -43,16 +48,24 @@ struct debug_bucket {
 	raw_spinlock_t		lock;
 };
 
-/*
- * Debug object percpu free list
- * Access is protected by disabling irq
- */
-struct debug_percpu_free {
-	struct hlist_head	free_objs;
-	int			obj_free;
+struct pool_stats {
+	unsigned int		cur_used;
+	unsigned int		max_used;
+	unsigned int		min_fill;
 };
 
-static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool);
+struct obj_pool {
+	struct hlist_head	objects;
+	unsigned int		cnt;
+	unsigned int		min_cnt;
+	unsigned int		max_cnt;
+	struct pool_stats	stats;
+} ____cacheline_aligned;
+
+
+static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu)  = {
+	.max_cnt	= ODEBUG_POOL_PERCPU_SIZE,
+};
 
 static struct debug_bucket	obj_hash[ODEBUG_HASH_SIZE];
 
@@ -60,37 +73,32 @@ static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
 
 static DEFINE_RAW_SPINLOCK(pool_lock);
 
-static HLIST_HEAD(obj_pool);
-static HLIST_HEAD(obj_to_free);
+static struct obj_pool pool_global = {
+	.min_cnt		= ODEBUG_POOL_MIN_LEVEL,
+	.max_cnt		= ODEBUG_POOL_SIZE,
+	.stats			= {
+		.min_fill	= ODEBUG_POOL_SIZE,
+	},
+};
 
-/*
- * Because of the presence of percpu free pools, obj_pool_free will
- * under-count those in the percpu free pools. Similarly, obj_pool_used
- * will over-count those in the percpu free pools. Adjustments will be
- * made at debug_stats_show(). Both obj_pool_min_free and obj_pool_max_used
- * can be off.
- */
-static int __data_racy		obj_pool_min_free = ODEBUG_POOL_SIZE;
-static int __data_racy		obj_pool_free = ODEBUG_POOL_SIZE;
-static int			obj_pool_used;
-static int __data_racy		obj_pool_max_used;
+static struct obj_pool pool_to_free = {
+	.max_cnt	= UINT_MAX,
+};
+
+static HLIST_HEAD(pool_boot);
+
+static unsigned long		avg_usage;
 static bool			obj_freeing;
-/* The number of objs on the global free list */
-static int			obj_nr_tofree;
 
 static int __data_racy			debug_objects_maxchain __read_mostly;
 static int __data_racy __maybe_unused	debug_objects_maxchecked __read_mostly;
 static int __data_racy			debug_objects_fixups __read_mostly;
 static int __data_racy			debug_objects_warnings __read_mostly;
-static int __data_racy			debug_objects_enabled __read_mostly
+static bool __data_racy			debug_objects_enabled __read_mostly
 					= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-static int				debug_objects_pool_size __ro_after_init
-					= ODEBUG_POOL_SIZE;
-static int				debug_objects_pool_min_level __ro_after_init
-					= ODEBUG_POOL_MIN_LEVEL;
 
-static const struct debug_obj_descr *descr_test  __read_mostly;
-static struct kmem_cache	*obj_cache __ro_after_init;
+static const struct debug_obj_descr	*descr_test  __read_mostly;
+static struct kmem_cache		*obj_cache __ro_after_init;
 
 /*
  * Track numbers of kmem_cache_alloc()/free() calls done.
@@ -101,19 +109,20 @@ static int __data_racy		debug_objects_freed;
 static void free_obj_work(struct work_struct *work);
 static DECLARE_DELAYED_WORK(debug_obj_work, free_obj_work);
 
+static DEFINE_STATIC_KEY_FALSE(obj_cache_enabled);
+
 static int __init enable_object_debug(char *str)
 {
-	debug_objects_enabled = 1;
+	debug_objects_enabled = true;
 	return 0;
 }
+early_param("debug_objects", enable_object_debug);
 
 static int __init disable_object_debug(char *str)
 {
-	debug_objects_enabled = 0;
+	debug_objects_enabled = false;
 	return 0;
 }
-
-early_param("debug_objects", enable_object_debug);
 early_param("no_debug_objects", disable_object_debug);
 
 static const char *obj_states[ODEBUG_STATE_MAX] = {
@@ -125,61 +134,280 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
 	[ODEBUG_STATE_NOTAVAILABLE]	= "not available",
 };
 
-static void fill_pool(void)
+static __always_inline unsigned int pool_count(struct obj_pool *pool)
+{
+	return READ_ONCE(pool->cnt);
+}
+
+static __always_inline bool pool_should_refill(struct obj_pool *pool)
+{
+	return pool_count(pool) < pool->min_cnt;
+}
+
+static __always_inline bool pool_must_refill(struct obj_pool *pool)
+{
+	return pool_count(pool) < pool->min_cnt / 2;
+}
+
+static bool pool_move_batch(struct obj_pool *dst, struct obj_pool *src)
 {
-	gfp_t gfp = __GFP_HIGH | __GFP_NOWARN;
+	struct hlist_node *last, *next_batch, *first_batch;
 	struct debug_obj *obj;
-	unsigned long flags;
 
-	if (likely(READ_ONCE(obj_pool_free) >= debug_objects_pool_min_level))
+	if (dst->cnt >= dst->max_cnt || !src->cnt)
+		return false;
+
+	first_batch = src->objects.first;
+	obj = hlist_entry(first_batch, typeof(*obj), node);
+	last = obj->batch_last;
+	next_batch = last->next;
+
+	/* Move the next batch to the front of the source pool */
+	src->objects.first = next_batch;
+	if (next_batch)
+		next_batch->pprev = &src->objects.first;
+
+	/* Add the extracted batch to the destination pool */
+	last->next = dst->objects.first;
+	if (last->next)
+		last->next->pprev = &last->next;
+	first_batch->pprev = &dst->objects.first;
+	dst->objects.first = first_batch;
+
+	WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE);
+	WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
+	return true;
+}
+
+static bool pool_push_batch(struct obj_pool *dst, struct hlist_head *head)
+{
+	struct hlist_node *last;
+	struct debug_obj *obj;
+
+	if (dst->cnt >= dst->max_cnt)
+		return false;
+
+	obj = hlist_entry(head->first, typeof(*obj), node);
+	last = obj->batch_last;
+
+	hlist_splice_init(head, last, &dst->objects);
+	WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
+	return true;
+}
+
+static bool pool_pop_batch(struct hlist_head *head, struct obj_pool *src)
+{
+	struct hlist_node *last, *next;
+	struct debug_obj *obj;
+
+	if (!src->cnt)
+		return false;
+
+	/* Move the complete list to the head */
+	hlist_move_list(&src->objects, head);
+
+	obj = hlist_entry(head->first, typeof(*obj), node);
+	last = obj->batch_last;
+	next = last->next;
+	/* Disconnect the batch from the list */
+	last->next = NULL;
+
+	/* Move the node after last back to the source pool. */
+	src->objects.first = next;
+	if (next)
+		next->pprev = &src->objects.first;
+
+	WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE);
+	return true;
+}
+
+static struct debug_obj *__alloc_object(struct hlist_head *list)
+{
+	struct debug_obj *obj;
+
+	if (unlikely(!list->first))
+		return NULL;
+
+	obj = hlist_entry(list->first, typeof(*obj), node);
+	hlist_del(&obj->node);
+	return obj;
+}
+
+static void pcpu_refill_stats(void)
+{
+	struct pool_stats *stats = &pool_global.stats;
+
+	WRITE_ONCE(stats->cur_used, stats->cur_used + ODEBUG_BATCH_SIZE);
+
+	if (stats->cur_used > stats->max_used)
+		stats->max_used = stats->cur_used;
+
+	if (pool_global.cnt < stats->min_fill)
+		stats->min_fill = pool_global.cnt;
+}
+
+static struct debug_obj *pcpu_alloc(void)
+{
+	struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu);
+
+	lockdep_assert_irqs_disabled();
+
+	for (;;) {
+		struct debug_obj *obj = __alloc_object(&pcp->objects);
+
+		if (likely(obj)) {
+			pcp->cnt--;
+			/*
+			 * If this emptied a batch try to refill from the
+			 * free pool. Don't do that if this was the top-most
+			 * batch as pcpu_free() expects the per CPU pool
+			 * to be less than ODEBUG_POOL_PERCPU_SIZE.
+			 */
+			if (unlikely(pcp->cnt < (ODEBUG_POOL_PERCPU_SIZE - ODEBUG_BATCH_SIZE) &&
+				     !(pcp->cnt % ODEBUG_BATCH_SIZE))) {
+				/*
+				 * Don't try to allocate from the regular pool here
+				 * to not exhaust it prematurely.
+				 */
+				if (pool_count(&pool_to_free)) {
+					guard(raw_spinlock)(&pool_lock);
+					pool_move_batch(pcp, &pool_to_free);
+					pcpu_refill_stats();
+				}
+			}
+			return obj;
+		}
+
+		guard(raw_spinlock)(&pool_lock);
+		if (!pool_move_batch(pcp, &pool_to_free)) {
+			if (!pool_move_batch(pcp, &pool_global))
+				return NULL;
+		}
+		pcpu_refill_stats();
+	}
+}
+
+static void pcpu_free(struct debug_obj *obj)
+{
+	struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu);
+	struct debug_obj *first;
+
+	lockdep_assert_irqs_disabled();
+
+	if (!(pcp->cnt % ODEBUG_BATCH_SIZE)) {
+		obj->batch_last = &obj->node;
+	} else {
+		first = hlist_entry(pcp->objects.first, typeof(*first), node);
+		obj->batch_last = first->batch_last;
+	}
+	hlist_add_head(&obj->node, &pcp->objects);
+	pcp->cnt++;
+
+	/* Pool full ? */
+	if (pcp->cnt < ODEBUG_POOL_PERCPU_SIZE)
 		return;
 
+	/* Remove a batch from the per CPU pool */
+	guard(raw_spinlock)(&pool_lock);
+	/* Try to fit the batch into the pool_global first */
+	if (!pool_move_batch(&pool_global, pcp))
+		pool_move_batch(&pool_to_free, pcp);
+	WRITE_ONCE(pool_global.stats.cur_used, pool_global.stats.cur_used - ODEBUG_BATCH_SIZE);
+}
+
+static void free_object_list(struct hlist_head *head)
+{
+	struct hlist_node *tmp;
+	struct debug_obj *obj;
+	int cnt = 0;
+
+	hlist_for_each_entry_safe(obj, tmp, head, node) {
+		hlist_del(&obj->node);
+		kmem_cache_free(obj_cache, obj);
+		cnt++;
+	}
+	debug_objects_freed += cnt;
+}
+
+static void fill_pool_from_freelist(void)
+{
+	static unsigned long state;
+
 	/*
 	 * Reuse objs from the global obj_to_free list; they will be
 	 * reinitialized when allocating.
-	 *
-	 * obj_nr_tofree is checked locklessly; the READ_ONCE() pairs with
-	 * the WRITE_ONCE() in pool_lock critical sections.
 	 */
-	if (READ_ONCE(obj_nr_tofree)) {
-		raw_spin_lock_irqsave(&pool_lock, flags);
-		/*
-		 * Recheck with the lock held as the worker thread might have
-		 * won the race and freed the global free list already.
-		 */
-		while (obj_nr_tofree && (obj_pool_free < debug_objects_pool_min_level)) {
-			obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
-			hlist_del(&obj->node);
-			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
-			hlist_add_head(&obj->node, &obj_pool);
-			WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
+	if (!pool_count(&pool_to_free))
+		return;
+
+	/*
+	 * Prevent the context from being scheduled or interrupted after
+	 * setting the state flag;
+	 */
+	guard(irqsave)();
+
+	/*
+	 * Avoid lock contention on &pool_lock and avoid making the cache
+	 * line exclusive by testing the bit before attempting to set it.
+	 */
+	if (test_bit(0, &state) || test_and_set_bit(0, &state))
+		return;
+
+	/* Avoid taking the lock when there is no work to do */
+	while (pool_should_refill(&pool_global) && pool_count(&pool_to_free)) {
+		guard(raw_spinlock)(&pool_lock);
+		/* Move a batch if possible */
+		pool_move_batch(&pool_global, &pool_to_free);
+	}
+	clear_bit(0, &state);
+}
+
+static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp)
+{
+	struct hlist_node *last = NULL;
+	struct debug_obj *obj;
+
+	for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
+		obj = kmem_cache_zalloc(cache, gfp);
+		if (!obj) {
+			free_object_list(head);
+			return false;
 		}
-		raw_spin_unlock_irqrestore(&pool_lock, flags);
+		debug_objects_allocated++;
+
+		if (!last)
+			last = &obj->node;
+		obj->batch_last = last;
+
+		hlist_add_head(&obj->node, head);
 	}
+	return true;
+}
+
+static void fill_pool(void)
+{
+	static atomic_t cpus_allocating;
 
-	if (unlikely(!obj_cache))
+	/*
+	 * Avoid allocation and lock contention when:
+	 *   - One other CPU is already allocating
+	 *   - the global pool has not reached the critical level yet
+	 */
+	if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
 		return;
 
-	while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) {
-		struct debug_obj *new[ODEBUG_BATCH_SIZE];
-		int cnt;
+	atomic_inc(&cpus_allocating);
+	while (pool_should_refill(&pool_global)) {
+		HLIST_HEAD(head);
 
-		for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
-			new[cnt] = kmem_cache_zalloc(obj_cache, gfp);
-			if (!new[cnt])
-				break;
-		}
-		if (!cnt)
-			return;
+		if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN))
+			break;
 
-		raw_spin_lock_irqsave(&pool_lock, flags);
-		while (cnt) {
-			hlist_add_head(&new[--cnt]->node, &obj_pool);
-			debug_objects_allocated++;
-			WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		}
-		raw_spin_unlock_irqrestore(&pool_lock, flags);
+		guard(raw_spinlock_irqsave)(&pool_lock);
+		if (!pool_push_batch(&pool_global, &head))
+			pool_push_batch(&pool_to_free, &head);
 	}
+	atomic_dec(&cpus_allocating);
 }
 
 /*
@@ -201,72 +429,37 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
 	return NULL;
 }
 
-/*
- * Allocate a new object from the hlist
- */
-static struct debug_obj *__alloc_object(struct hlist_head *list)
+static void calc_usage(void)
 {
-	struct debug_obj *obj = NULL;
+	static DEFINE_RAW_SPINLOCK(avg_lock);
+	static unsigned long avg_period;
+	unsigned long cur, now = jiffies;
 
-	if (list->first) {
-		obj = hlist_entry(list->first, typeof(*obj), node);
-		hlist_del(&obj->node);
-	}
+	if (!time_after_eq(now, READ_ONCE(avg_period)))
+		return;
 
-	return obj;
+	if (!raw_spin_trylock(&avg_lock))
+		return;
+
+	WRITE_ONCE(avg_period, now + msecs_to_jiffies(10));
+	cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX;
+	WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur));
+	raw_spin_unlock(&avg_lock);
 }
 
-static struct debug_obj *
-alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
+static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
+				      const struct debug_obj_descr *descr)
 {
-	struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool);
 	struct debug_obj *obj;
 
-	if (likely(obj_cache)) {
-		obj = __alloc_object(&percpu_pool->free_objs);
-		if (obj) {
-			percpu_pool->obj_free--;
-			goto init_obj;
-		}
-	}
-
-	raw_spin_lock(&pool_lock);
-	obj = __alloc_object(&obj_pool);
-	if (obj) {
-		obj_pool_used++;
-		WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
-
-		/*
-		 * Looking ahead, allocate one batch of debug objects and
-		 * put them into the percpu free pool.
-		 */
-		if (likely(obj_cache)) {
-			int i;
-
-			for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
-				struct debug_obj *obj2;
-
-				obj2 = __alloc_object(&obj_pool);
-				if (!obj2)
-					break;
-				hlist_add_head(&obj2->node,
-					       &percpu_pool->free_objs);
-				percpu_pool->obj_free++;
-				obj_pool_used++;
-				WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
-			}
-		}
-
-		if (obj_pool_used > obj_pool_max_used)
-			obj_pool_max_used = obj_pool_used;
+	calc_usage();
 
-		if (obj_pool_free < obj_pool_min_free)
-			obj_pool_min_free = obj_pool_free;
-	}
-	raw_spin_unlock(&pool_lock);
+	if (static_branch_likely(&obj_cache_enabled))
+		obj = pcpu_alloc();
+	else
+		obj = __alloc_object(&pool_boot);
 
-init_obj:
-	if (obj) {
+	if (likely(obj)) {
 		obj->object = addr;
 		obj->descr  = descr;
 		obj->state  = ODEBUG_STATE_NONE;
@@ -276,142 +469,58 @@ init_obj:
 	return obj;
 }
 
-/*
- * workqueue function to free objects.
- *
- * To reduce contention on the global pool_lock, the actual freeing of
- * debug objects will be delayed if the pool_lock is busy.
- */
+/* workqueue function to free objects. */
 static void free_obj_work(struct work_struct *work)
 {
-	struct hlist_node *tmp;
-	struct debug_obj *obj;
-	unsigned long flags;
-	HLIST_HEAD(tofree);
+	static unsigned long last_use_avg;
+	unsigned long cur_used, last_used, delta;
+	unsigned int max_free = 0;
 
 	WRITE_ONCE(obj_freeing, false);
-	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
-		return;
 
-	if (obj_pool_free >= debug_objects_pool_size)
-		goto free_objs;
+	/* Rate limit freeing based on current use average */
+	cur_used = READ_ONCE(avg_usage);
+	last_used = last_use_avg;
+	last_use_avg = cur_used;
 
-	/*
-	 * The objs on the pool list might be allocated before the work is
-	 * run, so recheck if pool list it full or not, if not fill pool
-	 * list from the global free list. As it is likely that a workload
-	 * may be gearing up to use more and more objects, don't free any
-	 * of them until the next round.
-	 */
-	while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
-		obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
-		hlist_del(&obj->node);
-		hlist_add_head(&obj->node, &obj_pool);
-		WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
-	}
-	raw_spin_unlock_irqrestore(&pool_lock, flags);
-	return;
+	if (!pool_count(&pool_to_free))
+		return;
 
-free_objs:
-	/*
-	 * Pool list is already full and there are still objs on the free
-	 * list. Move remaining free objs to a temporary list to free the
-	 * memory outside the pool_lock held region.
-	 */
-	if (obj_nr_tofree) {
-		hlist_move_list(&obj_to_free, &tofree);
-		debug_objects_freed += obj_nr_tofree;
-		WRITE_ONCE(obj_nr_tofree, 0);
+	if (cur_used <= last_used) {
+		delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX;
+		max_free = min(delta, ODEBUG_FREE_WORK_MAX);
 	}
-	raw_spin_unlock_irqrestore(&pool_lock, flags);
 
-	hlist_for_each_entry_safe(obj, tmp, &tofree, node) {
-		hlist_del(&obj->node);
-		kmem_cache_free(obj_cache, obj);
+	for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
+		HLIST_HEAD(tofree);
+
+		/* Acquire and drop the lock for each batch */
+		scoped_guard(raw_spinlock_irqsave, &pool_lock) {
+			if (!pool_to_free.cnt)
+				return;
+
+			/* Refill the global pool if possible */
+			if (pool_move_batch(&pool_global, &pool_to_free)) {
+				/* Don't free as there seems to be demand */
+				max_free = 0;
+			} else if (max_free) {
+				pool_pop_batch(&tofree, &pool_to_free);
+				max_free--;
+			} else {
+				return;
+			}
+		}
+		free_object_list(&tofree);
 	}
 }
 
 static void __free_object(struct debug_obj *obj)
 {
-	struct debug_obj *objs[ODEBUG_BATCH_SIZE];
-	struct debug_percpu_free *percpu_pool;
-	int lookahead_count = 0;
-	unsigned long flags;
-	bool work;
-
-	local_irq_save(flags);
-	if (!obj_cache)
-		goto free_to_obj_pool;
-
-	/*
-	 * Try to free it into the percpu pool first.
-	 */
-	percpu_pool = this_cpu_ptr(&percpu_obj_pool);
-	if (percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) {
-		hlist_add_head(&obj->node, &percpu_pool->free_objs);
-		percpu_pool->obj_free++;
-		local_irq_restore(flags);
-		return;
-	}
-
-	/*
-	 * As the percpu pool is full, look ahead and pull out a batch
-	 * of objects from the percpu pool and free them as well.
-	 */
-	for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) {
-		objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs);
-		if (!objs[lookahead_count])
-			break;
-		percpu_pool->obj_free--;
-	}
-
-free_to_obj_pool:
-	raw_spin_lock(&pool_lock);
-	work = (obj_pool_free > debug_objects_pool_size) && obj_cache &&
-	       (obj_nr_tofree < ODEBUG_FREE_WORK_MAX);
-	obj_pool_used--;
-
-	if (work) {
-		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
-		hlist_add_head(&obj->node, &obj_to_free);
-		if (lookahead_count) {
-			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count);
-			obj_pool_used -= lookahead_count;
-			while (lookahead_count) {
-				hlist_add_head(&objs[--lookahead_count]->node,
-					       &obj_to_free);
-			}
-		}
-
-		if ((obj_pool_free > debug_objects_pool_size) &&
-		    (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) {
-			int i;
-
-			/*
-			 * Free one more batch of objects from obj_pool.
-			 */
-			for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
-				obj = __alloc_object(&obj_pool);
-				hlist_add_head(&obj->node, &obj_to_free);
-				WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
-				WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
-			}
-		}
-	} else {
-		WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		hlist_add_head(&obj->node, &obj_pool);
-		if (lookahead_count) {
-			WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count);
-			obj_pool_used -= lookahead_count;
-			while (lookahead_count) {
-				hlist_add_head(&objs[--lookahead_count]->node,
-					       &obj_pool);
-			}
-		}
-	}
-	raw_spin_unlock(&pool_lock);
-	local_irq_restore(flags);
+	guard(irqsave)();
+	if (static_branch_likely(&obj_cache_enabled))
+		pcpu_free(obj);
+	else
+		hlist_add_head(&obj->node, &pool_boot);
 }
 
 /*
@@ -421,63 +530,52 @@ free_to_obj_pool:
 static void free_object(struct debug_obj *obj)
 {
 	__free_object(obj);
-	if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) {
+	if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) {
 		WRITE_ONCE(obj_freeing, true);
 		schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
 	}
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int object_cpu_offline(unsigned int cpu)
+static void put_objects(struct hlist_head *list)
 {
-	struct debug_percpu_free *percpu_pool;
 	struct hlist_node *tmp;
 	struct debug_obj *obj;
-	unsigned long flags;
 
-	/* Remote access is safe as the CPU is dead already */
-	percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu);
-	hlist_for_each_entry_safe(obj, tmp, &percpu_pool->free_objs, node) {
+	/*
+	 * Using free_object() puts the objects into reuse or schedules
+	 * them for freeing and it get's all the accounting correct.
+	 */
+	hlist_for_each_entry_safe(obj, tmp, list, node) {
 		hlist_del(&obj->node);
-		kmem_cache_free(obj_cache, obj);
+		free_object(obj);
 	}
+}
 
-	raw_spin_lock_irqsave(&pool_lock, flags);
-	obj_pool_used -= percpu_pool->obj_free;
-	debug_objects_freed += percpu_pool->obj_free;
-	raw_spin_unlock_irqrestore(&pool_lock, flags);
-
-	percpu_pool->obj_free = 0;
+#ifdef CONFIG_HOTPLUG_CPU
+static int object_cpu_offline(unsigned int cpu)
+{
+	/* Remote access is safe as the CPU is dead already */
+	struct obj_pool *pcp = per_cpu_ptr(&pool_pcpu, cpu);
 
+	put_objects(&pcp->objects);
+	pcp->cnt = 0;
 	return 0;
 }
 #endif
 
-/*
- * We run out of memory. That means we probably have tons of objects
- * allocated.
- */
+/* Out of memory. Free all objects from hash */
 static void debug_objects_oom(void)
 {
 	struct debug_bucket *db = obj_hash;
-	struct hlist_node *tmp;
 	HLIST_HEAD(freelist);
-	struct debug_obj *obj;
-	unsigned long flags;
-	int i;
 
 	pr_warn("Out of memory. ODEBUG disabled\n");
 
-	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
-		raw_spin_lock_irqsave(&db->lock, flags);
-		hlist_move_list(&db->list, &freelist);
-		raw_spin_unlock_irqrestore(&db->lock, flags);
+	for (int i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
+		scoped_guard(raw_spinlock_irqsave, &db->lock)
+			hlist_move_list(&db->list, &freelist);
 
-		/* Now free them */
-		hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
-			hlist_del(&obj->node);
-			free_object(obj);
-		}
+		put_objects(&freelist);
 	}
 }
 
@@ -592,12 +690,24 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket
 	}
 
 	/* Out of memory. Do the cleanup outside of the locked region */
-	debug_objects_enabled = 0;
+	debug_objects_enabled = false;
 	return NULL;
 }
 
 static void debug_objects_fill_pool(void)
 {
+	if (!static_branch_likely(&obj_cache_enabled))
+		return;
+
+	if (likely(!pool_should_refill(&pool_global)))
+		return;
+
+	/* Try reusing objects from obj_to_free_list */
+	fill_pool_from_freelist();
+
+	if (likely(!pool_should_refill(&pool_global)))
+		return;
+
 	/*
 	 * On RT enabled kernels the pool refill must happen in preemptible
 	 * context -- for !RT kernels we rely on the fact that spinlock_t and
@@ -1007,7 +1117,7 @@ repeat:
 		debug_objects_maxchecked = objs_checked;
 
 	/* Schedule work to actually kmem_cache_free() objects */
-	if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) {
+	if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) {
 		WRITE_ONCE(obj_freeing, true);
 		schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
 	}
@@ -1024,23 +1134,33 @@ void debug_check_no_obj_freed(const void *address, unsigned long size)
 
 static int debug_stats_show(struct seq_file *m, void *v)
 {
-	int cpu, obj_percpu_free = 0;
+	unsigned int cpu, pool_used, pcp_free = 0;
 
+	/*
+	 * pool_global.stats.cur_used is the number of batches currently
+	 * handed out to per CPU pools. Convert it to number of objects
+	 * and subtract the number of free objects in the per CPU pools.
+	 * As this is lockless the number is an estimate.
+	 */
 	for_each_possible_cpu(cpu)
-		obj_percpu_free += per_cpu(percpu_obj_pool.obj_free, cpu);
-
-	seq_printf(m, "max_chain     :%d\n", debug_objects_maxchain);
-	seq_printf(m, "max_checked   :%d\n", debug_objects_maxchecked);
-	seq_printf(m, "warnings      :%d\n", debug_objects_warnings);
-	seq_printf(m, "fixups        :%d\n", debug_objects_fixups);
-	seq_printf(m, "pool_free     :%d\n", READ_ONCE(obj_pool_free) + obj_percpu_free);
-	seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free);
-	seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
-	seq_printf(m, "pool_used     :%d\n", obj_pool_used - obj_percpu_free);
-	seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
-	seq_printf(m, "on_free_list  :%d\n", READ_ONCE(obj_nr_tofree));
-	seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
-	seq_printf(m, "objs_freed    :%d\n", debug_objects_freed);
+		pcp_free += per_cpu(pool_pcpu.cnt, cpu);
+
+	pool_used = READ_ONCE(pool_global.stats.cur_used);
+	pcp_free = min(pool_used, pcp_free);
+	pool_used -= pcp_free;
+
+	seq_printf(m, "max_chain     : %d\n", debug_objects_maxchain);
+	seq_printf(m, "max_checked   : %d\n", debug_objects_maxchecked);
+	seq_printf(m, "warnings      : %d\n", debug_objects_warnings);
+	seq_printf(m, "fixups        : %d\n", debug_objects_fixups);
+	seq_printf(m, "pool_free     : %u\n", pool_count(&pool_global) + pcp_free);
+	seq_printf(m, "pool_pcp_free : %u\n", pcp_free);
+	seq_printf(m, "pool_min_free : %u\n", data_race(pool_global.stats.min_fill));
+	seq_printf(m, "pool_used     : %u\n", pool_used);
+	seq_printf(m, "pool_max_used : %u\n", data_race(pool_global.stats.max_used));
+	seq_printf(m, "on_free_list  : %u\n", pool_count(&pool_to_free));
+	seq_printf(m, "objs_allocated: %d\n", debug_objects_allocated);
+	seq_printf(m, "objs_freed    : %d\n", debug_objects_freed);
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(debug_stats);
@@ -1194,7 +1314,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
 out:
 	raw_spin_unlock_irqrestore(&db->lock, flags);
 	if (res)
-		debug_objects_enabled = 0;
+		debug_objects_enabled = false;
 	return res;
 }
 
@@ -1209,7 +1329,7 @@ static __initconst const struct debug_obj_descr descr_type_test = {
 
 static __initdata struct self_test obj = { .static_init = 0 };
 
-static void __init debug_objects_selftest(void)
+static bool __init debug_objects_selftest(void)
 {
 	int fixups, oldfixups, warnings, oldwarnings;
 	unsigned long flags;
@@ -1278,9 +1398,10 @@ out:
 	descr_test = NULL;
 
 	local_irq_restore(flags);
+	return debug_objects_enabled;
 }
 #else
-static inline void debug_objects_selftest(void) { }
+static inline bool debug_objects_selftest(void) { return true; }
 #endif
 
 /*
@@ -1295,65 +1416,54 @@ void __init debug_objects_early_init(void)
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++)
 		raw_spin_lock_init(&obj_hash[i].lock);
 
+	/* Keep early boot simple and add everything to the boot list */
 	for (i = 0; i < ODEBUG_POOL_SIZE; i++)
-		hlist_add_head(&obj_static_pool[i].node, &obj_pool);
+		hlist_add_head(&obj_static_pool[i].node, &pool_boot);
 }
 
 /*
- * Convert the statically allocated objects to dynamic ones:
+ * Convert the statically allocated objects to dynamic ones.
+ * debug_objects_mem_init() is called early so only one CPU is up and
+ * interrupts are disabled, which means it is safe to replace the active
+ * object references.
  */
-static int __init debug_objects_replace_static_objects(void)
+static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache)
 {
 	struct debug_bucket *db = obj_hash;
 	struct hlist_node *tmp;
-	struct debug_obj *obj, *new;
+	struct debug_obj *obj;
 	HLIST_HEAD(objects);
-	int i, cnt = 0;
+	int i;
 
-	for (i = 0; i < ODEBUG_POOL_SIZE; i++) {
-		obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL);
-		if (!obj)
+	for (i = 0; i < ODEBUG_POOL_SIZE; i += ODEBUG_BATCH_SIZE) {
+		if (!kmem_alloc_batch(&objects, cache, GFP_KERNEL))
 			goto free;
-		hlist_add_head(&obj->node, &objects);
+		pool_push_batch(&pool_global, &objects);
 	}
 
-	debug_objects_allocated += i;
-
-	/*
-	 * debug_objects_mem_init() is now called early that only one CPU is up
-	 * and interrupts have been disabled, so it is safe to replace the
-	 * active object references.
-	 */
-
-	/* Remove the statically allocated objects from the pool */
-	hlist_for_each_entry_safe(obj, tmp, &obj_pool, node)
-		hlist_del(&obj->node);
-	/* Move the allocated objects to the pool */
-	hlist_move_list(&objects, &obj_pool);
+	/* Disconnect the boot pool. */
+	pool_boot.first = NULL;
 
 	/* Replace the active object references */
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
 		hlist_move_list(&db->list, &objects);
 
 		hlist_for_each_entry(obj, &objects, node) {
-			new = hlist_entry(obj_pool.first, typeof(*obj), node);
-			hlist_del(&new->node);
+			struct debug_obj *new = pcpu_alloc();
+
 			/* copy object data */
 			*new = *obj;
 			hlist_add_head(&new->node, &db->list);
-			cnt++;
 		}
 	}
-
-	pr_debug("%d of %d active objects replaced\n",
-		 cnt, obj_pool_used);
-	return 0;
+	return true;
 free:
-	hlist_for_each_entry_safe(obj, tmp, &objects, node) {
+	/* Can't use free_object_list() as the cache is not populated yet */
+	hlist_for_each_entry_safe(obj, tmp, &pool_global.objects, node) {
 		hlist_del(&obj->node);
-		kmem_cache_free(obj_cache, obj);
+		kmem_cache_free(cache, obj);
 	}
-	return -ENOMEM;
+	return false;
 }
 
 /*
@@ -1364,43 +1474,40 @@ free:
  */
 void __init debug_objects_mem_init(void)
 {
-	int cpu, extras;
+	struct kmem_cache *cache;
+	int extras;
 
 	if (!debug_objects_enabled)
 		return;
 
-	/*
-	 * Initialize the percpu object pools
-	 *
-	 * Initialization is not strictly necessary, but was done for
-	 * completeness.
-	 */
-	for_each_possible_cpu(cpu)
-		INIT_HLIST_HEAD(&per_cpu(percpu_obj_pool.free_objs, cpu));
+	if (!debug_objects_selftest())
+		return;
 
-	obj_cache = kmem_cache_create("debug_objects_cache",
-				      sizeof (struct debug_obj), 0,
-				      SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE,
-				      NULL);
+	cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0,
+				  SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, NULL);
 
-	if (!obj_cache || debug_objects_replace_static_objects()) {
-		debug_objects_enabled = 0;
-		kmem_cache_destroy(obj_cache);
-		pr_warn("out of memory.\n");
+	if (!cache || !debug_objects_replace_static_objects(cache)) {
+		debug_objects_enabled = false;
+		pr_warn("Out of memory.\n");
 		return;
-	} else
-		debug_objects_selftest();
-
-#ifdef CONFIG_HOTPLUG_CPU
-	cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL,
-					object_cpu_offline);
-#endif
+	}
 
 	/*
-	 * Increase the thresholds for allocating and freeing objects
-	 * according to the number of possible CPUs available in the system.
+	 * Adjust the thresholds for allocating and freeing objects
+	 * according to the number of possible CPUs available in the
+	 * system.
 	 */
 	extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
-	debug_objects_pool_size += extras;
-	debug_objects_pool_min_level += extras;
+	pool_global.max_cnt += extras;
+	pool_global.min_cnt += extras;
+
+	/* Everything worked. Expose the cache */
+	obj_cache = cache;
+	static_branch_enable(&obj_cache_enabled);
+
+#ifdef CONFIG_HOTPLUG_CPU
+	cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL,
+				  object_cpu_offline);
+#endif
+	return;
 }
diff --git a/lib/dim/dim.c b/lib/dim/dim.c
index 83b65ac74d73..97c3d084ebf0 100644
--- a/lib/dim/dim.c
+++ b/lib/dim/dim.c
@@ -54,7 +54,8 @@ void dim_park_tired(struct dim *dim)
 }
 EXPORT_SYMBOL(dim_park_tired);
 
-bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+bool dim_calc_stats(const struct dim_sample *start,
+		    const struct dim_sample *end,
 		    struct dim_stats *curr_stats)
 {
 	/* u32 holds up to 71 minutes, should be enough */
diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c
index d7e7028e9b19..d6aa09a979b3 100644
--- a/lib/dim/net_dim.c
+++ b/lib/dim/net_dim.c
@@ -347,7 +347,7 @@ static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
 	return dim->profile_ix != prev_ix;
 }
 
-void net_dim(struct dim *dim, struct dim_sample end_sample)
+void net_dim(struct dim *dim, const struct dim_sample *end_sample)
 {
 	struct dim_stats curr_stats;
 	u16 nevents;
@@ -355,11 +355,11 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
 	switch (dim->state) {
 	case DIM_MEASURE_IN_PROGRESS:
 		nevents = BIT_GAP(BITS_PER_TYPE(u16),
-				  end_sample.event_ctr,
+				  end_sample->event_ctr,
 				  dim->start_sample.event_ctr);
 		if (nevents < DIM_NEVENTS)
 			break;
-		if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats))
+		if (!dim_calc_stats(&dim->start_sample, end_sample, &curr_stats))
 			break;
 		if (net_dim_decision(&curr_stats, dim)) {
 			dim->state = DIM_APPLY_NEW_PROFILE;
@@ -368,8 +368,8 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
 		}
 		fallthrough;
 	case DIM_START_MEASURE:
-		dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
-				  end_sample.byte_ctr, &dim->start_sample);
+		dim_update_sample(end_sample->event_ctr, end_sample->pkt_ctr,
+				  end_sample->byte_ctr, &dim->start_sample);
 		dim->state = DIM_MEASURE_IN_PROGRESS;
 		break;
 	case DIM_APPLY_NEW_PROFILE:
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index e49deddd3de9..c1b7638a594a 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -179,7 +179,7 @@ void dql_completed(struct dql *dql, unsigned int count)
 
 	dql->adj_limit = limit + completed;
 	dql->prev_ovlimit = ovlimit;
-	dql->prev_last_obj_cnt = dql->last_obj_cnt;
+	dql->prev_last_obj_cnt = READ_ONCE(dql->last_obj_cnt);
 	dql->num_completed = completed;
 	dql->prev_num_queued = num_queued;
 
diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index f37f4d44faa9..837064b83a6c 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -2,7 +2,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/interval_tree.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/slab.h>
 #include <asm/timex.h>
 
diff --git a/lib/iomem_copy.c b/lib/iomem_copy.c
new file mode 100644
index 000000000000..dec7eaea60e0
--- /dev/null
+++ b/lib/iomem_copy.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Kalray, Inc.  All Rights Reserved.
+ */
+
+#include <linux/align.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/unaligned.h>
+
+#ifndef memset_io
+/**
+ * memset_io() - Set a range of I/O memory to a constant value
+ * @addr: The beginning of the I/O-memory range to set
+ * @val: The value to set the memory to
+ * @count: The number of bytes to set
+ *
+ * Set a range of I/O memory to a given value.
+ */
+void memset_io(volatile void __iomem *addr, int val, size_t count)
+{
+	long qc = (u8)val;
+
+	qc *= ~0UL / 0xff;
+
+	while (count && !IS_ALIGNED((long)addr, sizeof(long))) {
+		__raw_writeb(val, addr);
+		addr++;
+		count--;
+	}
+
+	while (count >= sizeof(long)) {
+#ifdef CONFIG_64BIT
+		__raw_writeq(qc, addr);
+#else
+		__raw_writel(qc, addr);
+#endif
+
+		addr += sizeof(long);
+		count -= sizeof(long);
+	}
+
+	while (count) {
+		__raw_writeb(val, addr);
+		addr++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(memset_io);
+#endif
+
+#ifndef memcpy_fromio
+/**
+ * memcpy_fromio() - Copy a block of data from I/O memory
+ * @dst: The (RAM) destination for the copy
+ * @src: The (I/O memory) source for the data
+ * @count: The number of bytes to copy
+ *
+ * Copy a block of data from I/O memory.
+ */
+void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
+{
+	while (count && !IS_ALIGNED((long)src, sizeof(long))) {
+		*(u8 *)dst = __raw_readb(src);
+		src++;
+		dst++;
+		count--;
+	}
+
+	while (count >= sizeof(long)) {
+#ifdef CONFIG_64BIT
+		long val = __raw_readq(src);
+#else
+		long val = __raw_readl(src);
+#endif
+		put_unaligned(val, (long *)dst);
+
+
+		src += sizeof(long);
+		dst += sizeof(long);
+		count -= sizeof(long);
+	}
+
+	while (count) {
+		*(u8 *)dst = __raw_readb(src);
+		src++;
+		dst++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(memcpy_fromio);
+#endif
+
+#ifndef memcpy_toio
+/**
+ * memcpy_toio() -Copy a block of data into I/O memory
+ * @dst: The (I/O memory) destination for the copy
+ * @src: The (RAM) source for the data
+ * @count: The number of bytes to copy
+ *
+ * Copy a block of data to I/O memory.
+ */
+void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
+{
+	while (count && !IS_ALIGNED((long)dst, sizeof(long))) {
+		__raw_writeb(*(u8 *)src, dst);
+		src++;
+		dst++;
+		count--;
+	}
+
+	while (count >= sizeof(long)) {
+		long val = get_unaligned((long *)src);
+#ifdef CONFIG_64BIT
+		__raw_writeq(val, dst);
+#else
+		__raw_writel(val, dst);
+#endif
+
+		src += sizeof(long);
+		dst += sizeof(long);
+		count -= sizeof(long);
+	}
+
+	while (count) {
+		__raw_writeb(*(u8 *)src, dst);
+		src++;
+		dst++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(memcpy_toio);
+#endif
+
+
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 908e75a28d90..9ec806f989f2 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1682,8 +1682,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
 }
 
 /*
- * Extract a list of contiguous pages from an ITER_BVEC iterator.  This does
- * not get references on the pages, nor does it get a pin on them.
+ * Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
+ * This does not get references on the pages, nor does it get a pin on them.
  */
 static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   struct page ***pages, size_t maxsize,
@@ -1691,35 +1691,59 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   iov_iter_extraction_t extraction_flags,
 					   size_t *offset0)
 {
-	struct page **p, *page;
-	size_t skip = i->iov_offset, offset, size;
-	int k;
+	size_t skip = i->iov_offset, size = 0;
+	struct bvec_iter bi;
+	int k = 0;
 
-	for (;;) {
-		if (i->nr_segs == 0)
-			return 0;
-		size = min(maxsize, i->bvec->bv_len - skip);
-		if (size)
-			break;
+	if (i->nr_segs == 0)
+		return 0;
+
+	if (i->iov_offset == i->bvec->bv_len) {
 		i->iov_offset = 0;
 		i->nr_segs--;
 		i->bvec++;
 		skip = 0;
 	}
+	bi.bi_idx = 0;
+	bi.bi_size = maxsize;
+	bi.bi_bvec_done = skip;
+
+	maxpages = want_pages_array(pages, maxsize, skip, maxpages);
+
+	while (bi.bi_size && bi.bi_idx < i->nr_segs) {
+		struct bio_vec bv = bvec_iter_bvec(i->bvec, bi);
+
+		/*
+		 * The iov_iter_extract_pages interface only allows an offset
+		 * into the first page.  Break out of the loop if we see an
+		 * offset into subsequent pages, the caller will have to call
+		 * iov_iter_extract_pages again for the reminder.
+		 */
+		if (k) {
+			if (bv.bv_offset)
+				break;
+		} else {
+			*offset0 = bv.bv_offset;
+		}
 
-	skip += i->bvec->bv_offset;
-	page = i->bvec->bv_page + skip / PAGE_SIZE;
-	offset = skip % PAGE_SIZE;
-	*offset0 = offset;
+		(*pages)[k++] = bv.bv_page;
+		size += bv.bv_len;
 
-	maxpages = want_pages_array(pages, size, offset, maxpages);
-	if (!maxpages)
-		return -ENOMEM;
-	p = *pages;
-	for (k = 0; k < maxpages; k++)
-		p[k] = page + k;
+		if (k >= maxpages)
+			break;
+
+		/*
+		 * We are done when the end of the bvec doesn't align to a page
+		 * boundary as that would create a hole in the returned space.
+		 * The caller will handle this with another call to
+		 * iov_iter_extract_pages.
+		 */
+		if (bv.bv_offset + bv.bv_len != PAGE_SIZE)
+			break;
+
+		bvec_iter_advance_single(i->bvec, &bi, bv.bv_len);
+	}
 
-	size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
 	iov_iter_advance(i, size);
 	return size;
 }
diff --git a/lib/kunit/debugfs.c b/lib/kunit/debugfs.c
index d548750a325a..af71911f4a07 100644
--- a/lib/kunit/debugfs.c
+++ b/lib/kunit/debugfs.c
@@ -181,7 +181,7 @@ void kunit_debugfs_create_suite(struct kunit_suite *suite)
 	 * successfully.
 	 */
 	stream = alloc_string_stream(GFP_KERNEL);
-	if (IS_ERR_OR_NULL(stream))
+	if (IS_ERR(stream))
 		return;
 
 	string_stream_set_append_newlines(stream, true);
@@ -189,7 +189,7 @@ void kunit_debugfs_create_suite(struct kunit_suite *suite)
 
 	kunit_suite_for_each_test_case(suite, test_case) {
 		stream = alloc_string_stream(GFP_KERNEL);
-		if (IS_ERR_OR_NULL(stream))
+		if (IS_ERR(stream))
 			goto err;
 
 		string_stream_set_append_newlines(stream, true);
@@ -212,8 +212,11 @@ void kunit_debugfs_create_suite(struct kunit_suite *suite)
 
 err:
 	string_stream_destroy(suite->log);
-	kunit_suite_for_each_test_case(suite, test_case)
+	suite->log = NULL;
+	kunit_suite_for_each_test_case(suite, test_case) {
 		string_stream_destroy(test_case->log);
+		test_case->log = NULL;
+	}
 }
 
 void kunit_debugfs_destroy_suite(struct kunit_suite *suite)
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c
index 37e02be1e710..d9c781c859fd 100644
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -805,6 +805,8 @@ static void kunit_device_driver_test(struct kunit *test)
 	struct device *test_device;
 	struct driver_test_state *test_state = kunit_kzalloc(test, sizeof(*test_state), GFP_KERNEL);
 
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_state);
+
 	test->priv = test_state;
 	test_driver = kunit_driver_create(test, "my_driver");
 
diff --git a/lib/kunit/string-stream-test.c b/lib/kunit/string-stream-test.c
index 7511442ea98f..7734e33156f9 100644
--- a/lib/kunit/string-stream-test.c
+++ b/lib/kunit/string-stream-test.c
@@ -9,6 +9,7 @@
 #include <kunit/static_stub.h>
 #include <kunit/test.h>
 #include <linux/ktime.h>
+#include <linux/prandom.h>
 #include <linux/slab.h>
 #include <linux/timekeeping.h>
 
diff --git a/lib/list-test.c b/lib/list-test.c
index e207c4c98d70..9135cdc1bb39 100644
--- a/lib/list-test.c
+++ b/lib/list-test.c
@@ -412,6 +412,8 @@ static void list_test_list_cut_position(struct kunit *test)
 		KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
 		i++;
 	}
+
+	KUNIT_EXPECT_EQ(test, i, 3);
 }
 
 static void list_test_list_cut_before(struct kunit *test)
@@ -440,6 +442,8 @@ static void list_test_list_cut_before(struct kunit *test)
 		KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
 		i++;
 	}
+
+	KUNIT_EXPECT_EQ(test, i, 3);
 }
 
 static void list_test_list_splice(struct kunit *test)
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 0fb59e92ca2d..8d3f623536fe 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -1,9 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
-#include <linux/string.h>
 #include <linux/list_sort.h>
 #include <linux/list.h>
 
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 6f6a5fc85b42..6e0c019f71b6 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -2710,6 +2710,43 @@ static void local_lock_3B(void)
 
 }
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem)
+{
+	return rwsem->dep_map.name;
+}
+#else
+static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem)
+{
+	return NULL;
+}
+#endif
+
+static void test_lockdep_set_subclass_name(void)
+{
+	const char *name_before = rw_semaphore_lockdep_name(&rwsem_X1);
+	const char *name_after;
+
+	lockdep_set_subclass(&rwsem_X1, 1);
+	name_after = rw_semaphore_lockdep_name(&rwsem_X1);
+	DEBUG_LOCKS_WARN_ON(name_before != name_after);
+}
+
+/*
+ * lockdep_set_subclass() should reuse the existing lock class name instead
+ * of creating a new one.
+ */
+static void lockdep_set_subclass_name_test(void)
+{
+	printk("  --------------------------------------------------------------------------\n");
+	printk("  | lockdep_set_subclass() name test|\n");
+	printk("  -----------------------------------\n");
+
+	print_testname("compare name before and after");
+	dotest(test_lockdep_set_subclass_name, SUCCESS, LOCKTYPE_RWSEM);
+	pr_cont("\n");
+}
+
 static void local_lock_tests(void)
 {
 	printk("  --------------------------------------------------------------------------\n");
@@ -2920,6 +2957,8 @@ void locking_selftest(void)
 	dotest(hardirq_deadlock_softirq_not_deadlock, FAILURE, LOCKTYPE_SPECIAL);
 	pr_cont("\n");
 
+	lockdep_set_subclass_name_test();
+
 	if (unexpected_testcase_failures) {
 		printk("-----------------------------------------------------------------\n");
 		debug_locks = 0;
diff --git a/lib/logic_pio.c b/lib/logic_pio.c
index 2ea564a40064..e29496a38d06 100644
--- a/lib/logic_pio.c
+++ b/lib/logic_pio.c
@@ -122,7 +122,7 @@ void logic_pio_unregister_range(struct logic_pio_hwaddr *range)
  *
  * Traverse the io_range_list to find the registered node for @fwnode.
  */
-struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode)
+struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode)
 {
 	struct logic_pio_hwaddr *range, *found_range = NULL;
 
@@ -186,7 +186,7 @@ resource_size_t logic_pio_to_hwaddr(unsigned long pio)
  *
  * Returns Logical PIO value if successful, ~0UL otherwise
  */
-unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode,
+unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode,
 				     resource_size_t addr, resource_size_t size)
 {
 	struct logic_pio_hwaddr *range;
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 3619301dda2e..d0ae808f3a14 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -64,6 +64,21 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/maple_tree.h>
 
+/*
+ * Kernel pointer hashing renders much of the maple tree dump useless as tagged
+ * pointers get hashed to arbitrary values.
+ *
+ * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is
+ * permissible to bypass this. Otherwise remain cautious and retain the hashing.
+ *
+ * Userland doesn't know about %px so also use %p there.
+ */
+#if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE)
+#define PTR_FMT "%px"
+#else
+#define PTR_FMT "%p"
+#endif
+
 #define MA_ROOT_PARENT 1
 
 /*
@@ -120,7 +135,6 @@ static const unsigned char mt_min_slots[] = {
 #define MAPLE_BIG_NODE_GAPS	(MAPLE_ARANGE64_SLOTS * 2 + 1)
 
 struct maple_big_node {
-	struct maple_pnode *parent;
 	unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
 	union {
 		struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
@@ -1193,19 +1207,17 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
 
 	reuse->request_count = 0;
 	reuse->node_count = 0;
-	if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
-		head->slot[head->node_count++] = reuse;
-		head->total++;
-		goto done;
-	}
-
-	reuse->total = 1;
-	if ((head) && !((unsigned long)head & 0x1)) {
+	if (count) {
+		if (head->node_count < MAPLE_ALLOC_SLOTS) {
+			head->slot[head->node_count++] = reuse;
+			head->total++;
+			goto done;
+		}
 		reuse->slot[0] = head;
 		reuse->node_count = 1;
-		reuse->total += head->total;
 	}
 
+	reuse->total = count + 1;
 	mas->alloc = reuse;
 done:
 	if (requested > 1)
@@ -1251,11 +1263,11 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
 
 		mas->alloc = node;
 		node->total = ++allocated;
+		node->request_count = 0;
 		requested--;
 	}
 
 	node = mas->alloc;
-	node->request_count = 0;
 	while (requested) {
 		max_req = MAPLE_ALLOC_SLOTS - node->node_count;
 		slots = (void **)&node->slot[node->node_count];
@@ -1271,7 +1283,10 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
 
 		node->node_count += count;
 		allocated += count;
-		node = node->slot[0];
+		/* find a non-full node*/
+		do {
+			node = node->slot[0];
+		} while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS));
 		requested -= count;
 	}
 	mas->alloc->total = allocated;
@@ -1280,10 +1295,9 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
 nomem_bulk:
 	/* Clean up potential freed allocations on bulk failure */
 	memset(slots, 0, max_req * sizeof(unsigned long));
+	mas->alloc->total = allocated;
 nomem_one:
 	mas_set_alloc_req(mas, requested);
-	if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
-		mas->alloc->total = allocated;
 	mas_set_err(mas, -ENOMEM);
 }
 
@@ -1943,14 +1957,13 @@ static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
 	for (; i < piv_end; i++, j++) {
 		b_node->pivot[j] = pivots[i];
 		if (unlikely(!b_node->pivot[j]))
-			break;
+			goto complete;
 
 		if (unlikely(mas->max == b_node->pivot[j]))
 			goto complete;
 	}
 
-	if (likely(i <= mas_end))
-		b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
+	b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
 
 complete:
 	b_node->b_end = ++j;
@@ -2139,9 +2152,7 @@ static inline bool mas_prev_sibling(struct ma_state *mas)
 {
 	unsigned int p_slot = mte_parent_slot(mas->node);
 
-	if (mte_is_root(mas->node))
-		return false;
-
+	/* For root node, p_slot is set to 0 by mte_parent_slot(). */
 	if (!p_slot)
 		return false;
 
@@ -3159,10 +3170,7 @@ static inline void mast_fill_bnode(struct maple_subtree_state *mast,
 	bool cp = true;
 	unsigned char split;
 
-	memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap));
-	memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot));
-	memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot));
-	mast->bn->b_end = 0;
+	memset(mast->bn, 0, sizeof(struct maple_big_node));
 
 	if (mte_is_root(mas->node)) {
 		cp = false;
@@ -3400,7 +3408,7 @@ static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas,
  * @mas: The maple state
  * @entry: The entry to store into the tree
  */
-static inline int mas_root_expand(struct ma_state *mas, void *entry)
+static inline void mas_root_expand(struct ma_state *mas, void *entry)
 {
 	void *contents = mas_root_locked(mas);
 	enum maple_type type = maple_leaf_64;
@@ -3436,12 +3444,23 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
 	ma_set_meta(node, maple_leaf_64, 0, slot);
 	/* swap the new root into the tree */
 	rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
-	return slot;
+	return;
 }
 
+/*
+ * mas_store_root() - Storing value into root.
+ * @mas: The maple state
+ * @entry: The entry to store.
+ *
+ * There is no root node now and we are storing a value into the root - this
+ * function either assigns the pointer or expands into a node.
+ */
 static inline void mas_store_root(struct ma_state *mas, void *entry)
 {
-	if (likely((mas->last != 0) || (mas->index != 0)))
+	if (!entry) {
+		if (!mas->index)
+			rcu_assign_pointer(mas->tree->ma_root, NULL);
+	} else if (likely((mas->last != 0) || (mas->index != 0)))
 		mas_root_expand(mas, entry);
 	else if (((unsigned long) (entry) & 3) == 2)
 		mas_root_expand(mas, entry);
@@ -3662,7 +3681,9 @@ static inline void mas_new_root(struct ma_state *mas, void *entry)
 	void __rcu **slots;
 	unsigned long *pivots;
 
-	if (!entry && !mas->index && mas->last == ULONG_MAX) {
+	WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX);
+
+	if (!entry) {
 		mas->depth = 0;
 		mas_set_height(mas);
 		rcu_assign_pointer(mas->tree->ma_root, entry);
@@ -3889,7 +3910,8 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
 			wr_mas->pivots[offset] = mas->index - 1;
 			mas->offset++; /* Keep mas accurate. */
 		}
-	} else if (!mt_in_rcu(mas->tree)) {
+	} else {
+		WARN_ON_ONCE(mt_in_rcu(mas->tree));
 		/*
 		 * Expand the range, only partially overwriting the previous and
 		 * next ranges
@@ -3899,8 +3921,6 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
 		wr_mas->pivots[offset] = mas->index - 1;
 		wr_mas->pivots[offset + 1] = mas->last;
 		mas->offset++; /* Keep mas accurate. */
-	} else {
-		return;
 	}
 
 	trace_ma_write(__func__, mas, 0, wr_mas->entry);
@@ -4181,75 +4201,53 @@ static inline int mas_prealloc_calc(struct ma_state *mas, void *entry)
 }
 
 /*
- * mas_wr_store_type() - Set the store type for a given
+ * mas_wr_store_type() - Determine the store type for a given
  * store operation.
  * @wr_mas: The maple write state
+ *
+ * Return: the type of store needed for the operation
  */
-static inline void mas_wr_store_type(struct ma_wr_state *wr_mas)
+static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
 {
 	struct ma_state *mas = wr_mas->mas;
 	unsigned char new_end;
 
-	if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) {
-		mas->store_type = wr_store_root;
-		return;
-	}
+	if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
+		return wr_store_root;
 
-	if (unlikely(!mas_wr_walk(wr_mas))) {
-		mas->store_type = wr_spanning_store;
-		return;
-	}
+	if (unlikely(!mas_wr_walk(wr_mas)))
+		return wr_spanning_store;
 
 	/* At this point, we are at the leaf node that needs to be altered. */
 	mas_wr_end_piv(wr_mas);
 	if (!wr_mas->entry)
 		mas_wr_extend_null(wr_mas);
 
-	new_end = mas_wr_new_end(wr_mas);
-	if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) {
-		mas->store_type = wr_exact_fit;
-		return;
-	}
+	if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last))
+		return wr_exact_fit;
 
-	if (unlikely(!mas->index && mas->last == ULONG_MAX)) {
-		mas->store_type = wr_new_root;
-		return;
-	}
+	if (unlikely(!mas->index && mas->last == ULONG_MAX))
+		return wr_new_root;
 
+	new_end = mas_wr_new_end(wr_mas);
 	/* Potential spanning rebalance collapsing a node */
 	if (new_end < mt_min_slots[wr_mas->type]) {
-		if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) {
-			mas->store_type = wr_rebalance;
-			return;
-		}
-		mas->store_type = wr_node_store;
-		return;
+		if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK))
+			return  wr_rebalance;
+		return wr_node_store;
 	}
 
-	if (new_end >= mt_slots[wr_mas->type]) {
-		mas->store_type = wr_split_store;
-		return;
-	}
+	if (new_end >= mt_slots[wr_mas->type])
+		return wr_split_store;
 
-	if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) {
-		mas->store_type = wr_append;
-		return;
-	}
+	if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end))
+		return wr_append;
 
 	if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) ||
-		(wr_mas->offset_end - mas->offset == 1))) {
-		mas->store_type = wr_slot_store;
-		return;
-	}
-
-	if (mte_is_root(mas->node) || (new_end >= mt_min_slots[wr_mas->type]) ||
-		(mas->mas_flags & MA_STATE_BULK)) {
-		mas->store_type = wr_node_store;
-		return;
-	}
+		(wr_mas->offset_end - mas->offset == 1)))
+		return wr_slot_store;
 
-	mas->store_type = wr_invalid;
-	MAS_WARN_ON(mas, 1);
+	return wr_node_store;
 }
 
 /**
@@ -4264,7 +4262,7 @@ static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry)
 	int request;
 
 	mas_wr_prealloc_setup(wr_mas);
-	mas_wr_store_type(wr_mas);
+	mas->store_type = mas_wr_store_type(wr_mas);
 	request = mas_prealloc_calc(mas, entry);
 	if (!request)
 		return;
@@ -5419,7 +5417,8 @@ void *mas_store(struct ma_state *mas, void *entry)
 	trace_ma_write(__func__, mas, 0, entry);
 #ifdef CONFIG_DEBUG_MAPLE_TREE
 	if (MAS_WARN_ON(mas, mas->index > mas->last))
-		pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry);
+		pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
+		       entry);
 
 	if (mas->index > mas->last) {
 		mas_set_err(mas, -EINVAL);
@@ -5435,7 +5434,7 @@ void *mas_store(struct ma_state *mas, void *entry)
 	 * overwrite multiple entries within a self-balancing B-Tree.
 	 */
 	mas_wr_prealloc_setup(&wr_mas);
-	mas_wr_store_type(&wr_mas);
+	mas->store_type = mas_wr_store_type(&wr_mas);
 	if (mas->mas_flags & MA_STATE_PREALLOC) {
 		mas_wr_store_entry(&wr_mas);
 		MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
@@ -5538,7 +5537,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
 	int request;
 
 	mas_wr_prealloc_setup(&wr_mas);
-	mas_wr_store_type(&wr_mas);
+	mas->store_type = mas_wr_store_type(&wr_mas);
 	request = mas_prealloc_calc(mas, entry);
 	if (!request)
 		return ret;
@@ -7124,14 +7123,14 @@ static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
 	mt_dump_range(min, max, depth, format);
 
 	if (xa_is_value(entry))
-		pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
-				xa_to_value(entry), entry);
+		pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry),
+			xa_to_value(entry), entry);
 	else if (xa_is_zero(entry))
 		pr_cont("zero (%ld)\n", xa_to_internal(entry));
 	else if (mt_is_reserved(entry))
-		pr_cont("UNKNOWN ENTRY (%p)\n", entry);
+		pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry);
 	else
-		pr_cont("%p\n", entry);
+		pr_cont(PTR_FMT "\n", entry);
 }
 
 static void mt_dump_range64(const struct maple_tree *mt, void *entry,
@@ -7147,13 +7146,13 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry,
 	for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) {
 		switch(format) {
 		case mt_dump_hex:
-			pr_cont("%p %lX ", node->slot[i], node->pivot[i]);
+			pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
 			break;
 		case mt_dump_dec:
-			pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
+			pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
 		}
 	}
-	pr_cont("%p\n", node->slot[i]);
+	pr_cont(PTR_FMT "\n", node->slot[i]);
 	for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
 		unsigned long last = max;
 
@@ -7175,11 +7174,11 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry,
 		if (last > max) {
 			switch(format) {
 			case mt_dump_hex:
-				pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n",
+				pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
 					node, last, max, i);
 				break;
 			case mt_dump_dec:
-				pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+				pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
 					node, last, max, i);
 			}
 		}
@@ -7209,13 +7208,13 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
 	for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) {
 		switch (format) {
 		case mt_dump_hex:
-			pr_cont("%p %lX ", node->slot[i], node->pivot[i]);
+			pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
 			break;
 		case mt_dump_dec:
-			pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
+			pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
 		}
 	}
-	pr_cont("%p\n", node->slot[i]);
+	pr_cont(PTR_FMT "\n", node->slot[i]);
 	for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
 		unsigned long last = max;
 
@@ -7234,11 +7233,11 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
 		if (last > max) {
 			switch(format) {
 			case mt_dump_hex:
-				pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n",
+				pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
 					node, last, max, i);
 				break;
 			case mt_dump_dec:
-				pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+				pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
 					node, last, max, i);
 			}
 		}
@@ -7256,8 +7255,8 @@ static void mt_dump_node(const struct maple_tree *mt, void *entry,
 
 	mt_dump_range(min, max, depth, format);
 
-	pr_cont("node %p depth %d type %d parent %p", node, depth, type,
-			node ? node->parent : NULL);
+	pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node,
+		depth, type, node ? node->parent : NULL);
 	switch (type) {
 	case maple_dense:
 		pr_cont("\n");
@@ -7285,12 +7284,14 @@ void mt_dump(const struct maple_tree *mt, enum mt_dump_format format)
 {
 	void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
 
-	pr_info("maple_tree(%p) flags %X, height %u root %p\n",
+	pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n",
 		 mt, mt->ma_flags, mt_height(mt), entry);
-	if (!xa_is_node(entry))
-		mt_dump_entry(entry, 0, 0, 0, format);
-	else if (entry)
+	if (xa_is_node(entry))
 		mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format);
+	else if (entry)
+		mt_dump_entry(entry, 0, 0, 0, format);
+	else
+		pr_info("(empty)\n");
 }
 EXPORT_SYMBOL_GPL(mt_dump);
 
@@ -7337,7 +7338,7 @@ static void mas_validate_gaps(struct ma_state *mas)
 			MT_BUG_ON(mas->tree, !entry);
 
 			if (gap > p_end - p_start + 1) {
-				pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n",
+				pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n",
 				       mas_mn(mas), i, gap, p_end, p_start,
 				       p_end - p_start + 1);
 				MT_BUG_ON(mas->tree, gap > p_end - p_start + 1);
@@ -7357,19 +7358,19 @@ counted:
 		MT_BUG_ON(mas->tree, !gaps);
 		offset = ma_meta_gap(node);
 		if (offset > i) {
-			pr_err("gap offset %p[%u] is invalid\n", node, offset);
+			pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset);
 			MT_BUG_ON(mas->tree, 1);
 		}
 
 		if (gaps[offset] != max_gap) {
-			pr_err("gap %p[%u] is not the largest gap %lu\n",
+			pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n",
 			       node, offset, max_gap);
 			MT_BUG_ON(mas->tree, 1);
 		}
 
 		for (i++ ; i < mt_slot_count(mte); i++) {
 			if (gaps[i] != 0) {
-				pr_err("gap %p[%u] beyond node limit != 0\n",
+				pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n",
 				       node, i);
 				MT_BUG_ON(mas->tree, 1);
 			}
@@ -7383,7 +7384,7 @@ counted:
 	p_mn = mte_parent(mte);
 	MT_BUG_ON(mas->tree, max_gap > mas->max);
 	if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) {
-		pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
+		pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap);
 		mt_dump(mas->tree, mt_dump_hex);
 		MT_BUG_ON(mas->tree, 1);
 	}
@@ -7413,11 +7414,11 @@ static void mas_validate_parent_slot(struct ma_state *mas)
 		node = mas_slot(mas, slots, i);
 		if (i == p_slot) {
 			if (node != mas->node)
-				pr_err("parent %p[%u] does not have %p\n",
+				pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n",
 					parent, i, mas_mn(mas));
 			MT_BUG_ON(mas->tree, node != mas->node);
 		} else if (node == mas->node) {
-			pr_err("Invalid child %p at parent %p[%u] p_slot %u\n",
+			pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n",
 			       mas_mn(mas), parent, i, p_slot);
 			MT_BUG_ON(mas->tree, node == mas->node);
 		}
@@ -7439,20 +7440,20 @@ static void mas_validate_child_slot(struct ma_state *mas)
 		child = mas_slot(mas, slots, i);
 
 		if (!child) {
-			pr_err("Non-leaf node lacks child at %p[%u]\n",
+			pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n",
 			       mas_mn(mas), i);
 			MT_BUG_ON(mas->tree, 1);
 		}
 
 		if (mte_parent_slot(child) != i) {
-			pr_err("Slot error at %p[%u]: child %p has pslot %u\n",
+			pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n",
 			       mas_mn(mas), i, mte_to_node(child),
 			       mte_parent_slot(child));
 			MT_BUG_ON(mas->tree, 1);
 		}
 
 		if (mte_parent(child) != mte_to_node(mas->node)) {
-			pr_err("child %p has parent %p not %p\n",
+			pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n",
 			       mte_to_node(child), mte_parent(child),
 			       mte_to_node(mas->node));
 			MT_BUG_ON(mas->tree, 1);
@@ -7482,24 +7483,24 @@ static void mas_validate_limits(struct ma_state *mas)
 		piv = mas_safe_pivot(mas, pivots, i, type);
 
 		if (!piv && (i != 0)) {
-			pr_err("Missing node limit pivot at %p[%u]",
+			pr_err("Missing node limit pivot at " PTR_FMT "[%u]",
 			       mas_mn(mas), i);
 			MAS_WARN_ON(mas, 1);
 		}
 
 		if (prev_piv > piv) {
-			pr_err("%p[%u] piv %lu < prev_piv %lu\n",
+			pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n",
 				mas_mn(mas), i, piv, prev_piv);
 			MAS_WARN_ON(mas, piv < prev_piv);
 		}
 
 		if (piv < mas->min) {
-			pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i,
+			pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i,
 				piv, mas->min);
 			MAS_WARN_ON(mas, piv < mas->min);
 		}
 		if (piv > mas->max) {
-			pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i,
+			pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i,
 				piv, mas->max);
 			MAS_WARN_ON(mas, piv > mas->max);
 		}
@@ -7509,7 +7510,7 @@ static void mas_validate_limits(struct ma_state *mas)
 	}
 
 	if (mas_data_end(mas) != i) {
-		pr_err("node%p: data_end %u != the last slot offset %u\n",
+		pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n",
 		       mas_mn(mas), mas_data_end(mas), i);
 		MT_BUG_ON(mas->tree, 1);
 	}
@@ -7518,8 +7519,8 @@ static void mas_validate_limits(struct ma_state *mas)
 		void *entry = mas_slot(mas, slots, i);
 
 		if (entry && (i != mt_slots[type] - 1)) {
-			pr_err("%p[%u] should not have entry %p\n", mas_mn(mas),
-			       i, entry);
+			pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n",
+			       mas_mn(mas), i, entry);
 			MT_BUG_ON(mas->tree, entry != NULL);
 		}
 
@@ -7529,7 +7530,7 @@ static void mas_validate_limits(struct ma_state *mas)
 			if (!piv)
 				continue;
 
-			pr_err("%p[%u] should not have piv %lu\n",
+			pr_err(PTR_FMT "[%u] should not have piv %lu\n",
 			       mas_mn(mas), i, piv);
 			MAS_WARN_ON(mas, i < mt_pivots[type] - 1);
 		}
@@ -7554,7 +7555,7 @@ static void mt_validate_nulls(struct maple_tree *mt)
 	do {
 		entry = mas_slot(&mas, slots, offset);
 		if (!last && !entry) {
-			pr_err("Sequential nulls end at %p[%u]\n",
+			pr_err("Sequential nulls end at " PTR_FMT "[%u]\n",
 				mas_mn(&mas), offset);
 		}
 		MT_BUG_ON(mt, !last && !entry);
@@ -7596,7 +7597,8 @@ void mt_validate(struct maple_tree *mt)
 		end = mas_data_end(&mas);
 		if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) &&
 				(mas.max != ULONG_MAX))) {
-			pr_err("Invalid size %u of %p\n", end, mas_mn(&mas));
+			pr_err("Invalid size %u of " PTR_FMT "\n",
+			       end, mas_mn(&mas));
 		}
 
 		mas_validate_parent_slot(&mas);
@@ -7612,7 +7614,8 @@ EXPORT_SYMBOL_GPL(mt_validate);
 
 void mas_dump(const struct ma_state *mas)
 {
-	pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node);
+	pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ",
+	       mas->tree, mas->node);
 	switch (mas->status) {
 	case ma_active:
 		pr_err("(ma_active)");
@@ -7676,7 +7679,7 @@ void mas_dump(const struct ma_state *mas)
 
 	pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
 	       mas->index, mas->last);
-	pr_err("     min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n",
+	pr_err("     min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n",
 	       mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags);
 	if (mas->index > mas->last)
 		pr_err("Check index & last\n");
@@ -7685,7 +7688,7 @@ EXPORT_SYMBOL_GPL(mas_dump);
 
 void mas_wr_dump(const struct ma_wr_state *wr_mas)
 {
-	pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n",
+	pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n",
 	       wr_mas->node, wr_mas->r_min, wr_mas->r_max);
 	pr_err("        type=%u off_end=%u, node_end=%u, end_piv=%lx\n",
 	       wr_mas->type, wr_mas->offset_end, wr_mas->mas->end,
diff --git a/lib/math/test_div64.c b/lib/math/test_div64.c
index c15edd688dd2..3cd699b654d9 100644
--- a/lib/math/test_div64.c
+++ b/lib/math/test_div64.c
@@ -26,6 +26,9 @@ static const u64 test_div64_dividends[] = {
 	0x0072db27380dd689,
 	0x0842f488162e2284,
 	0xf66745411d8ab063,
+	0xfffffffffffffffb,
+	0xfffffffffffffffc,
+	0xffffffffffffffff,
 };
 #define SIZE_DIV64_DIVIDENDS ARRAY_SIZE(test_div64_dividends)
 
@@ -37,7 +40,10 @@ static const u64 test_div64_dividends[] = {
 #define TEST_DIV64_DIVISOR_5 0x0008a880
 #define TEST_DIV64_DIVISOR_6 0x003fd3ae
 #define TEST_DIV64_DIVISOR_7 0x0b658fac
-#define TEST_DIV64_DIVISOR_8 0xdc08b349
+#define TEST_DIV64_DIVISOR_8 0x80000001
+#define TEST_DIV64_DIVISOR_9 0xdc08b349
+#define TEST_DIV64_DIVISOR_A 0xfffffffe
+#define TEST_DIV64_DIVISOR_B 0xffffffff
 
 static const u32 test_div64_divisors[] = {
 	TEST_DIV64_DIVISOR_0,
@@ -49,13 +55,16 @@ static const u32 test_div64_divisors[] = {
 	TEST_DIV64_DIVISOR_6,
 	TEST_DIV64_DIVISOR_7,
 	TEST_DIV64_DIVISOR_8,
+	TEST_DIV64_DIVISOR_9,
+	TEST_DIV64_DIVISOR_A,
+	TEST_DIV64_DIVISOR_B,
 };
 #define SIZE_DIV64_DIVISORS ARRAY_SIZE(test_div64_divisors)
 
 static const struct {
 	u64 quotient;
 	u32 remainder;
-} test_div64_results[SIZE_DIV64_DIVISORS][SIZE_DIV64_DIVIDENDS] = {
+} test_div64_results[SIZE_DIV64_DIVIDENDS][SIZE_DIV64_DIVISORS] = {
 	{
 		{ 0x0000000013045e47, 0x00000001 },
 		{ 0x000000000161596c, 0x00000030 },
@@ -65,6 +74,9 @@ static const struct {
 		{ 0x00000000000013c4, 0x0004ce80 },
 		{ 0x00000000000002ae, 0x001e143c },
 		{ 0x000000000000000f, 0x0033e56c },
+		{ 0x0000000000000001, 0x2b27507f },
+		{ 0x0000000000000000, 0xab275080 },
+		{ 0x0000000000000000, 0xab275080 },
 		{ 0x0000000000000000, 0xab275080 },
 	}, {
 		{ 0x00000001c45c02d1, 0x00000000 },
@@ -75,7 +87,10 @@ static const struct {
 		{ 0x000000000001d637, 0x0004e5d9 },
 		{ 0x0000000000003fc9, 0x000713bb },
 		{ 0x0000000000000165, 0x029abe7d },
+		{ 0x000000000000001f, 0x673c193a },
 		{ 0x0000000000000012, 0x6e9f7e37 },
+		{ 0x000000000000000f, 0xe73c1977 },
+		{ 0x000000000000000f, 0xe73c1968 },
 	}, {
 		{ 0x000000197a3a0cf7, 0x00000002 },
 		{ 0x00000001d9632e5c, 0x00000021 },
@@ -85,7 +100,10 @@ static const struct {
 		{ 0x00000000001a7bb3, 0x00072331 },
 		{ 0x00000000000397ad, 0x0002c61b },
 		{ 0x000000000000141e, 0x06ea2e89 },
+		{ 0x00000000000001ca, 0x4c0a72e7 },
 		{ 0x000000000000010a, 0xab002ad7 },
+		{ 0x00000000000000e5, 0x4c0a767b },
+		{ 0x00000000000000e5, 0x4c0a7596 },
 	}, {
 		{ 0x0000017949e37538, 0x00000001 },
 		{ 0x0000001b62441f37, 0x00000055 },
@@ -95,7 +113,10 @@ static const struct {
 		{ 0x0000000001882ec6, 0x0005cbf9 },
 		{ 0x000000000035333b, 0x0017abdf },
 		{ 0x00000000000129f1, 0x0ab4520d },
+		{ 0x0000000000001a87, 0x18ff0472 },
 		{ 0x0000000000000f6e, 0x8ac0ce9b },
+		{ 0x0000000000000d43, 0x98ff397f },
+		{ 0x0000000000000d43, 0x98ff2c3c },
 	}, {
 		{ 0x000011f321a74e49, 0x00000006 },
 		{ 0x0000014d8481d211, 0x0000005b },
@@ -105,7 +126,10 @@ static const struct {
 		{ 0x0000000012a88828, 0x00036c97 },
 		{ 0x000000000287f16f, 0x002c2a25 },
 		{ 0x00000000000e2cc7, 0x02d581e3 },
+		{ 0x0000000000014318, 0x2ee07d7f },
 		{ 0x000000000000bbf4, 0x1ba08c03 },
+		{ 0x000000000000a18c, 0x2ee303af },
+		{ 0x000000000000a18c, 0x2ee26223 },
 	}, {
 		{ 0x0000d8db8f72935d, 0x00000005 },
 		{ 0x00000fbd5aed7a2e, 0x00000002 },
@@ -115,7 +139,10 @@ static const struct {
 		{ 0x00000000e16b20fa, 0x0002a14a },
 		{ 0x000000001e940d22, 0x00353b2e },
 		{ 0x0000000000ab40ac, 0x06fba6ba },
+		{ 0x00000000000f3f70, 0x0af7eeda },
 		{ 0x000000000008debd, 0x72d98365 },
+		{ 0x0000000000079fb8, 0x0b166dba },
+		{ 0x0000000000079fb8, 0x0b0ece02 },
 	}, {
 		{ 0x000cc3045b8fc281, 0x00000000 },
 		{ 0x0000ed1f48b5c9fc, 0x00000079 },
@@ -125,7 +152,10 @@ static const struct {
 		{ 0x0000000d43fce827, 0x00082b09 },
 		{ 0x00000001ccaba11a, 0x0037e8dd },
 		{ 0x000000000a13f729, 0x0566dffd },
+		{ 0x0000000000e5b64e, 0x3728203b },
 		{ 0x000000000085a14b, 0x23d36726 },
+		{ 0x000000000072db27, 0x38f38cd7 },
+		{ 0x000000000072db27, 0x3880b1b0 },
 	}, {
 		{ 0x00eafeb9c993592b, 0x00000001 },
 		{ 0x00110e5befa9a991, 0x00000048 },
@@ -135,7 +165,10 @@ static const struct {
 		{ 0x000000f4459740fc, 0x00084484 },
 		{ 0x0000002122c47bf9, 0x002ca446 },
 		{ 0x00000000b9936290, 0x004979c4 },
+		{ 0x000000001085e910, 0x05a83974 },
 		{ 0x00000000099ca89d, 0x9db446bf },
+		{ 0x000000000842f488, 0x26b40b94 },
+		{ 0x000000000842f488, 0x1e71170c },
 	}, {
 		{ 0x1b60cece589da1d2, 0x00000001 },
 		{ 0x01fcb42be1453f5b, 0x0000004f },
@@ -145,7 +178,49 @@ static const struct {
 		{ 0x00001c757dfab350, 0x00048863 },
 		{ 0x000003dc4979c652, 0x00224ea7 },
 		{ 0x000000159edc3144, 0x06409ab3 },
+		{ 0x00000001ecce8a7e, 0x30bc25e5 },
 		{ 0x000000011eadfee3, 0xa99c48a8 },
+		{ 0x00000000f6674543, 0x0a593ae9 },
+		{ 0x00000000f6674542, 0x13f1f5a5 },
+	}, {
+		{ 0x1c71c71c71c71c71, 0x00000002 },
+		{ 0x0210842108421084, 0x0000000b },
+		{ 0x007f01fc07f01fc0, 0x000000fb },
+		{ 0x00014245eabf1f9a, 0x0000a63d },
+		{ 0x0000ffffffffffff, 0x0000fffb },
+		{ 0x00001d913cecc509, 0x0007937b },
+		{ 0x00000402c70c678f, 0x0005bfc9 },
+		{ 0x00000016766cb70b, 0x045edf97 },
+		{ 0x00000001fffffffb, 0x80000000 },
+		{ 0x0000000129d84b3a, 0xa2e8fe71 },
+		{ 0x0000000100000001, 0xfffffffd },
+		{ 0x0000000100000000, 0xfffffffb },
+	}, {
+		{ 0x1c71c71c71c71c71, 0x00000003 },
+		{ 0x0210842108421084, 0x0000000c },
+		{ 0x007f01fc07f01fc0, 0x000000fc },
+		{ 0x00014245eabf1f9a, 0x0000a63e },
+		{ 0x0000ffffffffffff, 0x0000fffc },
+		{ 0x00001d913cecc509, 0x0007937c },
+		{ 0x00000402c70c678f, 0x0005bfca },
+		{ 0x00000016766cb70b, 0x045edf98 },
+		{ 0x00000001fffffffc, 0x00000000 },
+		{ 0x0000000129d84b3a, 0xa2e8fe72 },
+		{ 0x0000000100000002, 0x00000000 },
+		{ 0x0000000100000000, 0xfffffffc },
+	}, {
+		{ 0x1c71c71c71c71c71, 0x00000006 },
+		{ 0x0210842108421084, 0x0000000f },
+		{ 0x007f01fc07f01fc0, 0x000000ff },
+		{ 0x00014245eabf1f9a, 0x0000a641 },
+		{ 0x0000ffffffffffff, 0x0000ffff },
+		{ 0x00001d913cecc509, 0x0007937f },
+		{ 0x00000402c70c678f, 0x0005bfcd },
+		{ 0x00000016766cb70b, 0x045edf9b },
+		{ 0x00000001fffffffc, 0x00000003 },
+		{ 0x0000000129d84b3a, 0xa2e8fe75 },
+		{ 0x0000000100000002, 0x00000003 },
+		{ 0x0000000100000001, 0x00000000 },
 	},
 };
 
@@ -208,6 +283,12 @@ static bool __init test_div64(void)
 			return false;
 		if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_8, i, 8))
 			return false;
+		if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_9, i, 9))
+			return false;
+		if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_A, i, 10))
+			return false;
+		if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_B, i, 11))
+			return false;
 		for (j = 0; j < SIZE_DIV64_DIVISORS; j++) {
 			if (!test_div64_one(dividend, test_div64_divisors[j],
 					    i, j))
diff --git a/lib/min_heap.c b/lib/min_heap.c
new file mode 100644
index 000000000000..4485372ff3b1
--- /dev/null
+++ b/lib/min_heap.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include <linux/min_heap.h>
+
+void __min_heap_init(min_heap_char *heap, void *data, int size)
+{
+	__min_heap_init_inline(heap, data, size);
+}
+EXPORT_SYMBOL(__min_heap_init);
+
+void *__min_heap_peek(struct min_heap_char *heap)
+{
+	return __min_heap_peek_inline(heap);
+}
+EXPORT_SYMBOL(__min_heap_peek);
+
+bool __min_heap_full(min_heap_char *heap)
+{
+	return __min_heap_full_inline(heap);
+}
+EXPORT_SYMBOL(__min_heap_full);
+
+void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
+			  const struct min_heap_callbacks *func, void *args)
+{
+	__min_heap_sift_down_inline(heap, pos, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_sift_down);
+
+void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
+			const struct min_heap_callbacks *func, void *args)
+{
+	__min_heap_sift_up_inline(heap, elem_size, idx, func, args);
+}
+EXPORT_SYMBOL(__min_heap_sift_up);
+
+void __min_heapify_all(min_heap_char *heap, size_t elem_size,
+		       const struct min_heap_callbacks *func, void *args)
+{
+	__min_heapify_all_inline(heap, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heapify_all);
+
+bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
+		    const struct min_heap_callbacks *func, void *args)
+{
+	return __min_heap_pop_inline(heap, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_pop);
+
+void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size,
+			 const struct min_heap_callbacks *func, void *args)
+{
+	__min_heap_pop_push_inline(heap, element, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_pop_push);
+
+bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
+		     const struct min_heap_callbacks *func, void *args)
+{
+	return __min_heap_push_inline(heap, element, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_push);
+
+bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
+		    const struct min_heap_callbacks *func, void *args)
+{
+	return __min_heap_del_inline(heap, elem_size, idx, func, args);
+}
+EXPORT_SYMBOL(__min_heap_del);
diff --git a/lib/objpool.c b/lib/objpool.c
index fd108fe0d095..b998b720c732 100644
--- a/lib/objpool.c
+++ b/lib/objpool.c
@@ -74,15 +74,21 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
 		 * warm caches and TLB hits. in default vmalloc is used to
 		 * reduce the pressure of kernel slab system. as we know,
 		 * mimimal size of vmalloc is one page since vmalloc would
-		 * always align the requested size to page size
+		 * always align the requested size to page size.
+		 * but if vmalloc fails or it is not available (e.g. GFP_ATOMIC)
+		 * allocate percpu slot with kmalloc.
 		 */
-		if ((pool->gfp & GFP_ATOMIC) == GFP_ATOMIC)
-			slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
-		else
+		slot = NULL;
+
+		if ((pool->gfp & (GFP_ATOMIC | GFP_KERNEL)) != GFP_ATOMIC)
 			slot = __vmalloc_node(size, sizeof(void *), pool->gfp,
 				cpu_to_node(i), __builtin_return_address(0));
-		if (!slot)
-			return -ENOMEM;
+
+		if (!slot) {
+			slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
+			if (!slot)
+				return -ENOMEM;
+		}
 		memset(slot, 0, size);
 		pool->cpu_slots[i] = slot;
 
diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c
index 2abc78367dd1..5222c6393f11 100644
--- a/lib/overflow_kunit.c
+++ b/lib/overflow_kunit.c
@@ -1187,7 +1187,7 @@ static void DEFINE_FLEX_test(struct kunit *test)
 {
 	/* Using _RAW_ on a __counted_by struct will initialize "counter" to zero */
 	DEFINE_RAW_FLEX(struct foo, two_but_zero, array, 2);
-#if __has_attribute(__counted_by__)
+#ifdef CONFIG_CC_HAS_COUNTED_BY
 	int expected_raw_size = sizeof(struct foo);
 #else
 	int expected_raw_size = sizeof(struct foo) + 2 * sizeof(s16);
diff --git a/lib/packing.c b/lib/packing.c
index 3f656167c17e..793942745e34 100644
--- a/lib/packing.c
+++ b/lib/packing.c
@@ -9,52 +9,53 @@
 #include <linux/types.h>
 #include <linux/bitrev.h>
 
-static int get_le_offset(int offset)
+/**
+ * calculate_box_addr - Determine physical location of byte in buffer
+ * @box: Index of byte within buffer seen as a logical big-endian big number
+ * @len: Size of buffer in bytes
+ * @quirks: mask of QUIRK_LSW32_IS_FIRST and QUIRK_LITTLE_ENDIAN
+ *
+ * Function interprets the buffer as a @len byte sized big number, and returns
+ * the physical offset of the @box logical octet within it. Internally, it
+ * treats the big number as groups of 4 bytes. If @len is not a multiple of 4,
+ * the last group may be shorter.
+ *
+ * @QUIRK_LSW32_IS_FIRST gives the ordering of groups of 4 octets relative to
+ * each other. If set, the most significant group of 4 octets is last in the
+ * buffer (and may be truncated if @len is not a multiple of 4).
+ *
+ * @QUIRK_LITTLE_ENDIAN gives the ordering of bytes within each group of 4.
+ * If set, the most significant byte is last in the group. If @len takes the
+ * form of 4k+3, the last group will only be able to represent 24 bits, and its
+ * most significant octet is byte 2.
+ *
+ * Return: the physical offset into the buffer corresponding to the logical box.
+ */
+static size_t calculate_box_addr(size_t box, size_t len, u8 quirks)
 {
-	int closest_multiple_of_4;
+	size_t offset_of_group, offset_in_group, this_group = box / 4;
+	size_t group_size;
 
-	closest_multiple_of_4 = (offset / 4) * 4;
-	offset -= closest_multiple_of_4;
-	return closest_multiple_of_4 + (3 - offset);
-}
+	if (quirks & QUIRK_LSW32_IS_FIRST)
+		offset_of_group = this_group * 4;
+	else
+		offset_of_group = len - ((this_group + 1) * 4);
 
-static int get_reverse_lsw32_offset(int offset, size_t len)
-{
-	int closest_multiple_of_4;
-	int word_index;
-
-	word_index = offset / 4;
-	closest_multiple_of_4 = word_index * 4;
-	offset -= closest_multiple_of_4;
-	word_index = (len / 4) - word_index - 1;
-	return word_index * 4 + offset;
-}
+	group_size = min(4, len - offset_of_group);
 
-static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit,
-				       int *box_end_bit, u8 *box_mask)
-{
-	int box_bit_width = *box_start_bit - *box_end_bit + 1;
-	int new_box_start_bit, new_box_end_bit;
-
-	*to_write >>= *box_end_bit;
-	*to_write = bitrev8(*to_write) >> (8 - box_bit_width);
-	*to_write <<= *box_end_bit;
-
-	new_box_end_bit   = box_bit_width - *box_start_bit - 1;
-	new_box_start_bit = box_bit_width - *box_end_bit - 1;
-	*box_mask = GENMASK_ULL(new_box_start_bit, new_box_end_bit);
-	*box_start_bit = new_box_start_bit;
-	*box_end_bit   = new_box_end_bit;
+	if (quirks & QUIRK_LITTLE_ENDIAN)
+		offset_in_group = box - this_group * 4;
+	else
+		offset_in_group = group_size - (box - this_group * 4) - 1;
+
+	return offset_of_group + offset_in_group;
 }
 
 /**
- * packing - Convert numbers (currently u64) between a packed and an unpacked
- *	     format. Unpacked means laid out in memory in the CPU's native
- *	     understanding of integers, while packed means anything else that
- *	     requires translation.
+ * pack - Pack u64 number into bitfield of buffer.
  *
  * @pbuf: Pointer to a buffer holding the packed value.
- * @uval: Pointer to an u64 holding the unpacked value.
+ * @uval: CPU-readable unpacked value to pack.
  * @startbit: The index (in logical notation, compensated for quirks) where
  *	      the packed value starts within pbuf. Must be larger than, or
  *	      equal to, endbit.
@@ -62,79 +63,179 @@ static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit,
  *	    the packed value ends within pbuf. Must be smaller than, or equal
  *	    to, startbit.
  * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
- * @op: If PACK, then uval will be treated as const pointer and copied (packed)
- *	into pbuf, between startbit and endbit.
- *	If UNPACK, then pbuf will be treated as const pointer and the logical
- *	value between startbit and endbit will be copied (unpacked) to uval.
  * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
  *	    QUIRK_MSB_ON_THE_RIGHT.
  *
  * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
- *	   correct usage, return code may be discarded.
- *	   If op is PACK, pbuf is modified.
- *	   If op is UNPACK, uval is modified.
+ *	   correct usage, return code may be discarded. The @pbuf memory will
+ *	   be modified on success.
  */
-int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
-	    enum packing_op op, u8 quirks)
+int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen,
+	 u8 quirks)
 {
-	/* Number of bits for storing "uval"
-	 * also width of the field to access in the pbuf
-	 */
-	u64 value_width;
 	/* Logical byte indices corresponding to the
 	 * start and end of the field.
 	 */
 	int plogical_first_u8, plogical_last_u8, box;
+	/* width of the field to access in the pbuf */
+	u64 value_width;
 
-	/* startbit is expected to be larger than endbit */
-	if (startbit < endbit)
+	/* startbit is expected to be larger than endbit, and both are
+	 * expected to be within the logically addressable range of the buffer.
+	 */
+	if (unlikely(startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen))
 		/* Invalid function call */
 		return -EINVAL;
 
 	value_width = startbit - endbit + 1;
-	if (value_width > 64)
+	if (unlikely(value_width > 64))
 		return -ERANGE;
 
 	/* Check if "uval" fits in "value_width" bits.
 	 * If value_width is 64, the check will fail, but any
 	 * 64-bit uval will surely fit.
 	 */
-	if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width)))
+	if (unlikely(value_width < 64 && uval >= (1ull << value_width)))
 		/* Cannot store "uval" inside "value_width" bits.
 		 * Truncating "uval" is most certainly not desirable,
 		 * so simply erroring out is appropriate.
 		 */
 		return -ERANGE;
 
+	/* Iterate through an idealistic view of the pbuf as an u64 with
+	 * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low
+	 * logical bit significance. "box" denotes the current logical u8.
+	 */
+	plogical_first_u8 = startbit / BITS_PER_BYTE;
+	plogical_last_u8  = endbit / BITS_PER_BYTE;
+
+	for (box = plogical_first_u8; box >= plogical_last_u8; box--) {
+		/* Bit indices into the currently accessed 8-bit box */
+		size_t box_start_bit, box_end_bit, box_addr;
+		u8  box_mask;
+		/* Corresponding bits from the unpacked u64 parameter */
+		size_t proj_start_bit, proj_end_bit;
+		u64 proj_mask;
+		u64 pval;
+
+		/* This u8 may need to be accessed in its entirety
+		 * (from bit 7 to bit 0), or not, depending on the
+		 * input arguments startbit and endbit.
+		 */
+		if (box == plogical_first_u8)
+			box_start_bit = startbit % BITS_PER_BYTE;
+		else
+			box_start_bit = 7;
+		if (box == plogical_last_u8)
+			box_end_bit = endbit % BITS_PER_BYTE;
+		else
+			box_end_bit = 0;
+
+		/* We have determined the box bit start and end.
+		 * Now we calculate where this (masked) u8 box would fit
+		 * in the unpacked (CPU-readable) u64 - the u8 box's
+		 * projection onto the unpacked u64. Though the
+		 * box is u8, the projection is u64 because it may fall
+		 * anywhere within the unpacked u64.
+		 */
+		proj_start_bit = ((box * BITS_PER_BYTE) + box_start_bit) - endbit;
+		proj_end_bit = ((box * BITS_PER_BYTE) + box_end_bit) - endbit;
+		proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit);
+		box_mask = GENMASK(box_start_bit, box_end_bit);
+
+		/* Determine the offset of the u8 box inside the pbuf,
+		 * adjusted for quirks. The adjusted box_addr will be used for
+		 * effective addressing inside the pbuf (so it's not
+		 * logical any longer).
+		 */
+		box_addr = calculate_box_addr(box, pbuflen, quirks);
+
+		/* Write to pbuf, read from uval */
+		pval = uval & proj_mask;
+		pval >>= proj_end_bit;
+		pval <<= box_end_bit;
+
+		if (quirks & QUIRK_MSB_ON_THE_RIGHT) {
+			pval = bitrev8(pval);
+			box_mask = bitrev8(box_mask);
+		}
+
+		((u8 *)pbuf)[box_addr] &= ~box_mask;
+		((u8 *)pbuf)[box_addr] |= pval;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(pack);
+
+/**
+ * unpack - Unpack u64 number from packed buffer.
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @uval: Pointer to an u64 holding the unpacked value.
+ * @startbit: The index (in logical notation, compensated for quirks) where
+ *	      the packed value starts within pbuf. Must be larger than, or
+ *	      equal to, endbit.
+ * @endbit: The index (in logical notation, compensated for quirks) where
+ *	    the packed value ends within pbuf. Must be smaller than, or equal
+ *	    to, startbit.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ *	    QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
+ *	   correct usage, return code may be discarded. The @uval will be
+ *	   modified on success.
+ */
+int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
+	   size_t pbuflen, u8 quirks)
+{
+	/* Logical byte indices corresponding to the
+	 * start and end of the field.
+	 */
+	int plogical_first_u8, plogical_last_u8, box;
+	/* width of the field to access in the pbuf */
+	u64 value_width;
+
+	/* startbit is expected to be larger than endbit, and both are
+	 * expected to be within the logically addressable range of the buffer.
+	 */
+	if (unlikely(startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen))
+		/* Invalid function call */
+		return -EINVAL;
+
+	value_width = startbit - endbit + 1;
+	if (unlikely(value_width > 64))
+		return -ERANGE;
+
 	/* Initialize parameter */
-	if (op == UNPACK)
-		*uval = 0;
+	*uval = 0;
 
 	/* Iterate through an idealistic view of the pbuf as an u64 with
 	 * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low
 	 * logical bit significance. "box" denotes the current logical u8.
 	 */
-	plogical_first_u8 = startbit / 8;
-	plogical_last_u8  = endbit / 8;
+	plogical_first_u8 = startbit / BITS_PER_BYTE;
+	plogical_last_u8  = endbit / BITS_PER_BYTE;
 
 	for (box = plogical_first_u8; box >= plogical_last_u8; box--) {
 		/* Bit indices into the currently accessed 8-bit box */
-		int box_start_bit, box_end_bit, box_addr;
+		size_t box_start_bit, box_end_bit, box_addr;
 		u8  box_mask;
 		/* Corresponding bits from the unpacked u64 parameter */
-		int proj_start_bit, proj_end_bit;
+		size_t proj_start_bit, proj_end_bit;
 		u64 proj_mask;
+		u64 pval;
 
 		/* This u8 may need to be accessed in its entirety
 		 * (from bit 7 to bit 0), or not, depending on the
 		 * input arguments startbit and endbit.
 		 */
 		if (box == plogical_first_u8)
-			box_start_bit = startbit % 8;
+			box_start_bit = startbit % BITS_PER_BYTE;
 		else
 			box_start_bit = 7;
 		if (box == plogical_last_u8)
-			box_end_bit = endbit % 8;
+			box_end_bit = endbit % BITS_PER_BYTE;
 		else
 			box_end_bit = 0;
 
@@ -145,57 +246,72 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
 		 * box is u8, the projection is u64 because it may fall
 		 * anywhere within the unpacked u64.
 		 */
-		proj_start_bit = ((box * 8) + box_start_bit) - endbit;
-		proj_end_bit   = ((box * 8) + box_end_bit) - endbit;
+		proj_start_bit = ((box * BITS_PER_BYTE) + box_start_bit) - endbit;
+		proj_end_bit = ((box * BITS_PER_BYTE) + box_end_bit) - endbit;
 		proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit);
-		box_mask  = GENMASK_ULL(box_start_bit, box_end_bit);
+		box_mask = GENMASK(box_start_bit, box_end_bit);
 
 		/* Determine the offset of the u8 box inside the pbuf,
 		 * adjusted for quirks. The adjusted box_addr will be used for
 		 * effective addressing inside the pbuf (so it's not
 		 * logical any longer).
 		 */
-		box_addr = pbuflen - box - 1;
-		if (quirks & QUIRK_LITTLE_ENDIAN)
-			box_addr = get_le_offset(box_addr);
-		if (quirks & QUIRK_LSW32_IS_FIRST)
-			box_addr = get_reverse_lsw32_offset(box_addr,
-							    pbuflen);
-
-		if (op == UNPACK) {
-			u64 pval;
-
-			/* Read from pbuf, write to uval */
-			pval = ((u8 *)pbuf)[box_addr] & box_mask;
-			if (quirks & QUIRK_MSB_ON_THE_RIGHT)
-				adjust_for_msb_right_quirk(&pval,
-							   &box_start_bit,
-							   &box_end_bit,
-							   &box_mask);
-
-			pval >>= box_end_bit;
-			pval <<= proj_end_bit;
-			*uval &= ~proj_mask;
-			*uval |= pval;
-		} else {
-			u64 pval;
-
-			/* Write to pbuf, read from uval */
-			pval = (*uval) & proj_mask;
-			pval >>= proj_end_bit;
-			if (quirks & QUIRK_MSB_ON_THE_RIGHT)
-				adjust_for_msb_right_quirk(&pval,
-							   &box_start_bit,
-							   &box_end_bit,
-							   &box_mask);
-
-			pval <<= box_end_bit;
-			((u8 *)pbuf)[box_addr] &= ~box_mask;
-			((u8 *)pbuf)[box_addr] |= pval;
-		}
+		box_addr = calculate_box_addr(box, pbuflen, quirks);
+
+		/* Read from pbuf, write to uval */
+		pval = ((u8 *)pbuf)[box_addr];
+
+		if (quirks & QUIRK_MSB_ON_THE_RIGHT)
+			pval = bitrev8(pval);
+
+		pval &= box_mask;
+
+		pval >>= box_end_bit;
+		pval <<= proj_end_bit;
+		*uval &= ~proj_mask;
+		*uval |= pval;
 	}
 	return 0;
 }
+EXPORT_SYMBOL(unpack);
+
+/**
+ * packing - Convert numbers (currently u64) between a packed and an unpacked
+ *	     format. Unpacked means laid out in memory in the CPU's native
+ *	     understanding of integers, while packed means anything else that
+ *	     requires translation.
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @uval: Pointer to an u64 holding the unpacked value.
+ * @startbit: The index (in logical notation, compensated for quirks) where
+ *	      the packed value starts within pbuf. Must be larger than, or
+ *	      equal to, endbit.
+ * @endbit: The index (in logical notation, compensated for quirks) where
+ *	    the packed value ends within pbuf. Must be smaller than, or equal
+ *	    to, startbit.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @op: If PACK, then uval will be treated as const pointer and copied (packed)
+ *	into pbuf, between startbit and endbit.
+ *	If UNPACK, then pbuf will be treated as const pointer and the logical
+ *	value between startbit and endbit will be copied (unpacked) to uval.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ *	    QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Note: this is deprecated, prefer to use pack() or unpack() in new code.
+ *
+ * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
+ *	   correct usage, return code may be discarded.
+ *	   If op is PACK, pbuf is modified.
+ *	   If op is UNPACK, uval is modified.
+ */
+int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
+	    enum packing_op op, u8 quirks)
+{
+	if (op == PACK)
+		return pack(pbuf, *uval, startbit, endbit, pbuflen, quirks);
+
+	return unpack(pbuf, uval, startbit, endbit, pbuflen, quirks);
+}
 EXPORT_SYMBOL(packing);
 
 MODULE_DESCRIPTION("Generic bitfield packing and unpacking");
diff --git a/lib/packing_test.c b/lib/packing_test.c
new file mode 100644
index 000000000000..b38ea43c03fd
--- /dev/null
+++ b/lib/packing_test.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Vladimir Oltean <olteanv@gmail.com>
+ * Copyright (c) 2024, Intel Corporation.
+ */
+#include <kunit/test.h>
+#include <linux/packing.h>
+
+struct packing_test_case {
+	const char *desc;
+	const u8 *pbuf;
+	size_t pbuf_size;
+	u64 uval;
+	size_t start_bit;
+	size_t end_bit;
+	u8 quirks;
+};
+
+#define NO_QUIRKS	0
+
+/**
+ * PBUF - Initialize .pbuf and .pbuf_size
+ * @array: elements of constant physical buffer
+ *
+ * Initializes the .pbuf and .pbuf_size fields of a struct packing_test_case
+ * with a constant array of the specified elements.
+ */
+#define PBUF(array...)					\
+	.pbuf = (const u8[]){ array },			\
+	.pbuf_size = sizeof((const u8 []){ array })
+
+static const struct packing_test_case cases[] = {
+	/* These tests pack and unpack a magic 64-bit value
+	 * (0xcafedeadbeefcafe) at a fixed logical offset (32) within an
+	 * otherwise zero array of 128 bits (16 bytes). They test all possible
+	 * bit layouts of the 128 bit buffer.
+	 */
+	{
+		.desc = "no quirks, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad,
+		     0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "lsw32 first, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xbe, 0xef, 0xca, 0xfe,
+		     0xca, 0xfe, 0xde, 0xad, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "little endian words, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xad, 0xde, 0xfe, 0xca,
+		     0xfe, 0xca, 0xef, 0xbe, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x53, 0x7f, 0x7b, 0xb5,
+		     0x7d, 0xf7, 0x53, 0x7f, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT,
+	},
+	{
+		.desc = "msb right + lsw32 first, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x7d, 0xf7, 0x53, 0x7f,
+		     0x53, 0x7f, 0x7b, 0xb5, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "msb right + little endian words, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xb5, 0x7b, 0x7f, 0x53,
+		     0x7f, 0x53, 0xf7, 0x7d, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right + lsw32 first + little endian words, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x7f, 0x53, 0xf7, 0x7d,
+		     0xb5, 0x7b, 0x7f, 0x53, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	/* These tests pack and unpack a magic 64-bit value
+	 * (0xcafedeadbeefcafe) at a fixed logical offset (32) within an
+	 * otherwise zero array of varying size from 18 bytes to 24 bytes.
+	 */
+	{
+		.desc = "no quirks, 18 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca, 0xfe,
+		     0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00,
+		     0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "no quirks, 19 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca,
+		     0xfe, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0x00,
+		     0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "no quirks, 20 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0xca, 0xfe, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe,
+		     0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "no quirks, 22 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0xca, 0xfe, 0xde, 0xad, 0xbe, 0xef,
+		     0xca, 0xfe, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "no quirks, 24 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad,
+		     0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 18 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 19 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 20 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 22 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 24 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	/* These tests pack and unpack a magic 64-bit value
+	 * (0x1122334455667788) at an odd starting bit (43) within an
+	 * otherwise zero array of 128 bits (16 bytes). They test all possible
+	 * bit layouts of the 128 bit buffer.
+	 */
+	{
+		.desc = "no quirks, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x89, 0x11, 0x9a, 0x22, 0xab,
+		     0x33, 0xbc, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "lsw32 first, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x33, 0xbc, 0x40, 0x00,
+		     0x11, 0x9a, 0x22, 0xab, 0x00, 0x00, 0x00, 0x89),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "little endian words, 16 bytes, non-aligned",
+		PBUF(0x89, 0x00, 0x00, 0x00, 0xab, 0x22, 0x9a, 0x11,
+		     0x00, 0x40, 0xbc, 0x33, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0xbc, 0x33,
+		     0xab, 0x22, 0x9a, 0x11, 0x89, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x91, 0x88, 0x59, 0x44, 0xd5,
+		     0xcc, 0x3d, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT,
+	},
+	{
+		.desc = "msb right + lsw32 first, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xcc, 0x3d, 0x02, 0x00,
+		     0x88, 0x59, 0x44, 0xd5, 0x00, 0x00, 0x00, 0x91),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "msb right + little endian words, 16 bytes, non-aligned",
+		PBUF(0x91, 0x00, 0x00, 0x00, 0xd5, 0x44, 0x59, 0x88,
+		     0x00, 0x02, 0x3d, 0xcc, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right + lsw32 first + little endian words, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x3d, 0xcc,
+		     0xd5, 0x44, 0x59, 0x88, 0x91, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	/* These tests pack and unpack a u64 with all bits set
+	 * (0xffffffffffffffff) at an odd starting bit (43) within an
+	 * otherwise zero array of 128 bits (16 bytes). They test all possible
+	 * bit layouts of the 128 bit buffer.
+	 */
+	{
+		.desc = "no quirks, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff,
+		     0xff, 0xff, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "lsw32 first, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xf8, 0x00,
+		     0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x07, 0xff),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "little endian words, 16 bytes, non-aligned, 0xff",
+		PBUF(0xff, 0x07, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+		     0x00, 0xf8, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff,
+		     0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff,
+		     0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT,
+	},
+	{
+		.desc = "msb right + lsw32 first, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x1f, 0x00,
+		     0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xe0, 0xff),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "msb right + little endian words, 16 bytes, non-aligned, 0xff",
+		PBUF(0xff, 0xe0, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+		     0x00, 0x1f, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right + lsw32 first + little endian words, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0xff, 0xff,
+		     0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+};
+
+KUNIT_ARRAY_PARAM_DESC(packing, cases, desc);
+
+static void packing_test_pack(struct kunit *test)
+{
+	const struct packing_test_case *params = test->param_value;
+	u8 *pbuf;
+	int err;
+
+	pbuf = kunit_kzalloc(test, params->pbuf_size, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_NULL(test, pbuf);
+
+	err = pack(pbuf, params->uval, params->start_bit, params->end_bit,
+		   params->pbuf_size, params->quirks);
+
+	KUNIT_EXPECT_EQ_MSG(test, err, 0, "pack() returned %pe\n", ERR_PTR(err));
+	KUNIT_EXPECT_MEMEQ(test, pbuf, params->pbuf, params->pbuf_size);
+}
+
+static void packing_test_unpack(struct kunit *test)
+{
+	const struct packing_test_case *params = test->param_value;
+	u64 uval;
+	int err;
+
+	err = unpack(params->pbuf, &uval, params->start_bit, params->end_bit,
+		     params->pbuf_size, params->quirks);
+	KUNIT_EXPECT_EQ_MSG(test, err, 0, "unpack() returned %pe\n", ERR_PTR(err));
+	KUNIT_EXPECT_EQ(test, uval, params->uval);
+}
+
+static struct kunit_case packing_test_cases[] = {
+	KUNIT_CASE_PARAM(packing_test_pack, packing_gen_params),
+	KUNIT_CASE_PARAM(packing_test_unpack, packing_gen_params),
+	{},
+};
+
+static struct kunit_suite packing_test_suite = {
+	.name = "packing",
+	.test_cases = packing_test_cases,
+};
+
+kunit_test_suite(packing_test_suite);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KUnit tests for packing library");
diff --git a/lib/percpu_test.c b/lib/percpu_test.c
index 4a3d70bbc1a0..ce7124b16dab 100644
--- a/lib/percpu_test.c
+++ b/lib/percpu_test.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
+#include <linux/limits.h>
 #include <linux/module.h>
 
 /* validate @native and @pcp counter values match @expected */
@@ -24,8 +25,9 @@ static int __init percpu_test_init(void)
 	 * +ul_one/-ul_one below would replace with inc/dec instructions.
 	 */
 	volatile unsigned int ui_one = 1;
-	long l = 0;
+	unsigned long long ull = 0;
 	unsigned long ul = 0;
+	long l = 0;
 
 	pr_info("percpu test start\n");
 
@@ -112,6 +114,13 @@ static int __init percpu_test_init(void)
 	CHECK(ul, ulong_counter, -1);
 	CHECK(ul, ulong_counter, ULONG_MAX);
 
+	ul = ull = 0;
+	__this_cpu_write(ulong_counter, 0);
+
+	ul = ull += UINT_MAX;
+	__this_cpu_add(ulong_counter, ull);
+	CHECK(ul, ulong_counter, UINT_MAX);
+
 	ul = 3;
 	__this_cpu_write(ulong_counter, 3);
 
diff --git a/lib/random32.c b/lib/random32.c
index 0a5a0e3600c8..24e7acd9343f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -36,7 +36,7 @@
 #include <linux/percpu.h>
 #include <linux/export.h>
 #include <linux/jiffies.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index 41ae3c7570d3..8655a76d29a1 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -2,7 +2,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/rbtree_augmented.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/slab.h>
 #include <asm/timex.h>
 
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 473b2646f71c..5bb6b8aff232 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -474,14 +474,14 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
 		return -EOPNOTSUPP;
 
 	if (sgt_append->prv) {
-		unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) +
-			sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE;
+		unsigned long next_pfn;
 
 		if (WARN_ON(offset))
 			return -EINVAL;
 
 		/* Merge contiguous pages into the last SG */
 		prv_len = sgt_append->prv->length;
+		next_pfn = (sg_phys(sgt_append->prv) + prv_len) / PAGE_SIZE;
 		if (page_to_pfn(pages[0]) == next_pfn) {
 			last_pg = pfn_to_page(next_pfn - 1);
 			while (n_pages && pages_are_mergeable(pages[0], last_pg)) {
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index 33564f965958..f11691315c2f 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -192,6 +192,47 @@ static void test_leak_destroy(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, 2, slab_errors);
 }
 
+static void test_krealloc_redzone_zeroing(struct kunit *test)
+{
+	u8 *p;
+	int i;
+	struct kmem_cache *s = test_kmem_cache_create("TestSlub_krealloc", 64,
+				SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
+
+	p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 48));
+	memset(p, 0xff, 48);
+
+	kasan_disable_current();
+	OPTIMIZER_HIDE_VAR(p);
+
+	/* Test shrink */
+	p = krealloc(p, 40, GFP_KERNEL | __GFP_ZERO);
+	for (i = 40; i < 64; i++)
+		KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
+
+	/* Test grow within the same 64B kmalloc object */
+	p = krealloc(p, 56, GFP_KERNEL | __GFP_ZERO);
+	for (i = 40; i < 56; i++)
+		KUNIT_EXPECT_EQ(test, p[i], 0);
+	for (i = 56; i < 64; i++)
+		KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 0, slab_errors);
+
+	memset(p, 0xff, 56);
+	/* Test grow with allocating a bigger 128B object */
+	p = krealloc(p, 112, GFP_KERNEL | __GFP_ZERO);
+	for (i = 0; i < 56; i++)
+		KUNIT_EXPECT_EQ(test, p[i], 0xff);
+	for (i = 56; i < 112; i++)
+		KUNIT_EXPECT_EQ(test, p[i], 0);
+
+	kfree(p);
+	kasan_enable_current();
+	kmem_cache_destroy(s);
+}
+
 static int test_init(struct kunit *test)
 {
 	slab_errors = 0;
@@ -214,6 +255,7 @@ static struct kunit_case test_cases[] = {
 	KUNIT_CASE(test_kmalloc_redzone_access),
 	KUNIT_CASE(test_kfree_rcu),
 	KUNIT_CASE(test_leak_destroy),
+	KUNIT_CASE(test_krealloc_redzone_zeroing),
 	{}
 };
 
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 4f887aa62fa0..91fa37b5c510 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -57,7 +57,7 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
 	static const unsigned int rounding[] = { 500, 50, 5 };
 	int i = 0, j;
 	u32 remainder = 0, sf_cap;
-	char tmp[8];
+	char tmp[12];
 	const char *unit;
 
 	tmp[0] = '\0';
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 989a12a67872..6dc234913dd5 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -120,6 +120,9 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
 	if (unlikely(count <= 0))
 		return 0;
 
+	kasan_check_write(dst, count);
+	check_object_size(dst, count, false);
+
 	if (can_do_masked_user_access()) {
 		long retval;
 
@@ -142,8 +145,6 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
 		if (max > count)
 			max = count;
 
-		kasan_check_write(dst, count);
-		check_object_size(dst, count, false);
 		if (user_read_access_begin(src, max)) {
 			retval = do_strncpy_from_user(dst, src, count, max);
 			user_read_access_end();
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index fa5edd6ef7f7..2eed1ad958e9 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -14,7 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/highmem.h>
 #include <linux/sched.h>
 
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 31561e0e1a0d..704cb1093ae8 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -1387,6 +1387,92 @@ static noinline void __init check_prev_entry(struct maple_tree *mt)
 	mas_unlock(&mas);
 }
 
+static noinline void __init check_store_null(struct maple_tree *mt)
+{
+	MA_STATE(mas, mt, 0, ULONG_MAX);
+
+	/*
+	 * Store NULL at range [0, ULONG_MAX] to an empty tree should result
+	 * in an empty tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at any range to an empty tree should result in an empty
+	 * tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set_range(&mas, 3, 10);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at range [0, ULONG_MAX] to a single entry tree should
+	 * result in an empty tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set(&mas, 0);
+	mas_store_gfp(&mas, &mas, GFP_KERNEL);
+	mas_set_range(&mas, 0, ULONG_MAX);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at range [0, n] to a single entry tree should
+	 * result in an empty tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set(&mas, 0);
+	mas_store_gfp(&mas, &mas, GFP_KERNEL);
+	mas_set_range(&mas, 0, 5);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at range [m, n] where m > 0 to a single entry tree
+	 * should still be a single entry tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set(&mas, 0);
+	mas_store_gfp(&mas, &mas, GFP_KERNEL);
+	mas_set_range(&mas, 2, 5);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, mtree_empty(mt));
+//	MT_BUG_ON(mt, xa_is_node(mas_root(&mas)));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at range [0, ULONG_MAX] to a tree with node should
+	 * result in an empty tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set_range(&mas, 1, 3);
+	mas_store_gfp(&mas, &mas, GFP_KERNEL);
+//	MT_BUG_ON(mt, !xa_is_node(mas_root(&mas)));
+	mas_set_range(&mas, 0, ULONG_MAX);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+}
+
 static noinline void __init check_root_expand(struct maple_tree *mt)
 {
 	MA_STATE(mas, mt, 0, 0);
@@ -3711,6 +3797,10 @@ static int __init maple_tree_seed(void)
 #endif
 
 	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+	check_store_null(&tree);
+	mtree_destroy(&tree);
+
+	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
 	check_root_expand(&tree);
 	mtree_destroy(&tree);
 
diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c
index 64c877e73b64..e6fbb798558b 100644
--- a/lib/test_min_heap.c
+++ b/lib/test_min_heap.c
@@ -23,14 +23,6 @@ static __init bool greater_than(const void *lhs, const void *rhs, void __always_
 	return *(int *)lhs > *(int *)rhs;
 }
 
-static __init void swap_ints(void *lhs, void *rhs, void __always_unused *args)
-{
-	int temp = *(int *)lhs;
-
-	*(int *)lhs = *(int *)rhs;
-	*(int *)rhs = temp;
-}
-
 static __init int pop_verify_heap(bool min_heap,
 				struct min_heap_test *heap,
 				const struct min_heap_callbacks *funcs)
@@ -72,7 +64,7 @@ static __init int test_heapify_all(bool min_heap)
 	};
 	struct min_heap_callbacks funcs = {
 		.less = min_heap ? less_than : greater_than,
-		.swp = swap_ints,
+		.swp = NULL,
 	};
 	int i, err;
 
@@ -104,7 +96,7 @@ static __init int test_heap_push(bool min_heap)
 	};
 	struct min_heap_callbacks funcs = {
 		.less = min_heap ? less_than : greater_than,
-		.swp = swap_ints,
+		.swp = NULL,
 	};
 	int i, temp, err;
 
@@ -136,7 +128,7 @@ static __init int test_heap_pop_push(bool min_heap)
 	};
 	struct min_heap_callbacks funcs = {
 		.less = min_heap ? less_than : greater_than,
-		.swp = swap_ints,
+		.swp = NULL,
 	};
 	int i, temp, err;
 
@@ -175,7 +167,7 @@ static __init int test_heap_del(bool min_heap)
 	heap.nr = ARRAY_SIZE(values);
 	struct min_heap_callbacks funcs = {
 		.less = min_heap ? less_than : greater_than,
-		.swp = swap_ints,
+		.swp = NULL,
 	};
 	int i, err;
 
diff --git a/lib/test_parman.c b/lib/test_parman.c
index 35e32243693c..f9b97426a337 100644
--- a/lib/test_parman.c
+++ b/lib/test_parman.c
@@ -39,7 +39,7 @@
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/err.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/parman.h>
 
 #define TEST_PARMAN_PRIO_SHIFT 7 /* defines number of prios for testing */
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 8448b6d02bd9..59dbe4f9a4cb 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -386,6 +386,66 @@ kernel_ptr(void)
 static void __init
 struct_resource(void)
 {
+	struct resource test_resource = {
+		.start = 0xc0ffee00,
+		.end = 0xc0ffee00,
+		.flags = IORESOURCE_MEM,
+	};
+
+	test("[mem 0xc0ffee00 flags 0x200]",
+	     "%pr", &test_resource);
+
+	test_resource = (struct resource) {
+		.start = 0xc0ffee,
+		.end = 0xba5eba11,
+		.flags = IORESOURCE_MEM,
+	};
+	test("[mem 0x00c0ffee-0xba5eba11 flags 0x200]",
+	     "%pr", &test_resource);
+
+	test_resource = (struct resource) {
+		.start = 0xba5eba11,
+		.end = 0xc0ffee,
+		.flags = IORESOURCE_MEM,
+	};
+	test("[mem 0xba5eba11-0x00c0ffee flags 0x200]",
+	     "%pr", &test_resource);
+
+	test_resource = (struct resource) {
+		.start = 0xba5eba11,
+		.end = 0xba5eca11,
+		.flags = IORESOURCE_MEM,
+	};
+
+	test("[mem 0xba5eba11-0xba5eca11 flags 0x200]",
+	     "%pr", &test_resource);
+
+	test_resource = (struct resource) {
+		.start = 0xba11,
+		.end = 0xca10,
+		.flags = IORESOURCE_IO |
+			 IORESOURCE_DISABLED |
+			 IORESOURCE_UNSET,
+	};
+
+	test("[io  size 0x1000 disabled]",
+	     "%pR", &test_resource);
+}
+
+static void __init
+struct_range(void)
+{
+	struct range test_range = DEFINE_RANGE(0xc0ffee00ba5eba11,
+					       0xc0ffee00ba5eba11);
+	test("[range 0xc0ffee00ba5eba11]", "%pra", &test_range);
+
+	test_range = DEFINE_RANGE(0xc0ffee, 0xba5eba11);
+	test("[range 0x0000000000c0ffee-0x00000000ba5eba11]",
+	     "%pra", &test_range);
+
+	test_range = DEFINE_RANGE(0xba5eba11, 0xc0ffee);
+	test("[range 0x00000000ba5eba11-0x0000000000c0ffee]",
+	     "%pra", &test_range);
 }
 
 static void __init
@@ -763,6 +823,7 @@ test_pointer(void)
 	symbol_ptr();
 	kernel_ptr();
 	struct_resource();
+	struct_range();
 	addr();
 	escaped_str();
 	hex_string();
diff --git a/lib/test_scanf.c b/lib/test_scanf.c
index 7257b1768545..44f8508c9d88 100644
--- a/lib/test_scanf.c
+++ b/lib/test_scanf.c
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/overflow.h>
 #include <linux/printk.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
diff --git a/lib/tests/Makefile b/lib/tests/Makefile
new file mode 100644
index 000000000000..8e4f42cb9c54
--- /dev/null
+++ b/lib/tests/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_TEST_RUNTIME_MODULE)		+= module/
diff --git a/lib/tests/module/.gitignore b/lib/tests/module/.gitignore
new file mode 100644
index 000000000000..8be7891b250f
--- /dev/null
+++ b/lib/tests/module/.gitignore
@@ -0,0 +1,4 @@
+test_kallsyms_a.c
+test_kallsyms_b.c
+test_kallsyms_c.c
+test_kallsyms_d.c
diff --git a/lib/tests/module/Makefile b/lib/tests/module/Makefile
new file mode 100644
index 000000000000..2f3e1a772c2c
--- /dev/null
+++ b/lib/tests/module/Makefile
@@ -0,0 +1,14 @@
+obj-$(CONFIG_TEST_KALLSYMS_A) += test_kallsyms_a.o
+obj-$(CONFIG_TEST_KALLSYMS_B) += test_kallsyms_b.o
+obj-$(CONFIG_TEST_KALLSYMS_C) += test_kallsyms_c.o
+obj-$(CONFIG_TEST_KALLSYMS_D) += test_kallsyms_d.o
+
+quiet_cmd_gen_test_kallsyms = GEN     $@
+	cmd_gen_test_kallsyms = $< $@ \
+	$(CONFIG_TEST_KALLSYMS_NUMSYMS) \
+	$(CONFIG_TEST_KALLSYMS_SCALE_FACTOR)
+
+$(obj)/%.c: $(src)/gen_test_kallsyms.sh FORCE
+	$(call if_changed,gen_test_kallsyms)
+
+targets += $(foreach x, a b c d, test_kallsyms_$(x).c)
diff --git a/lib/tests/module/gen_test_kallsyms.sh b/lib/tests/module/gen_test_kallsyms.sh
new file mode 100755
index 000000000000..561dcac0f359
--- /dev/null
+++ b/lib/tests/module/gen_test_kallsyms.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+
+TARGET=$(basename $1)
+DIR=lib/tests/module
+TARGET="$DIR/$TARGET"
+NUM_SYMS=$2
+SCALE_FACTOR=$3
+TEST_TYPE=$(echo $TARGET | sed -e 's|lib/tests/module/test_kallsyms_||g')
+TEST_TYPE=$(echo $TEST_TYPE | sed -e 's|.c||g')
+FIRST_B_LOOKUP=1
+
+if [[ $NUM_SYMS -gt 2 ]]; then
+	FIRST_B_LOOKUP=$((NUM_SYMS/2))
+fi
+
+gen_template_module_header()
+{
+	cat <<____END_MODULE
+// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
+/*
+ * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
+ *
+ * Automatically generated code for testing, do not edit manually.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+____END_MODULE
+}
+
+gen_num_syms()
+{
+	PREFIX=$1
+	NUM=$2
+	for i in $(seq 1 $NUM); do
+		printf "int auto_test_%s_%010d = 0;\n" $PREFIX $i
+		printf "EXPORT_SYMBOL_GPL(auto_test_%s_%010d);\n" $PREFIX $i
+	done
+	echo
+}
+
+gen_template_module_data_a()
+{
+	gen_num_syms a $1
+	cat <<____END_MODULE
+static int auto_runtime_test(void)
+{
+	return 0;
+}
+
+____END_MODULE
+}
+
+gen_template_module_data_b()
+{
+	printf "\nextern int auto_test_a_%010d;\n\n" $FIRST_B_LOOKUP
+	echo "static int auto_runtime_test(void)"
+	echo "{"
+	printf "\nreturn auto_test_a_%010d;\n" $FIRST_B_LOOKUP
+	echo "}"
+}
+
+gen_template_module_data_c()
+{
+	gen_num_syms c $1
+	cat <<____END_MODULE
+static int auto_runtime_test(void)
+{
+	return 0;
+}
+
+____END_MODULE
+}
+
+gen_template_module_data_d()
+{
+	gen_num_syms d $1
+	cat <<____END_MODULE
+static int auto_runtime_test(void)
+{
+	return 0;
+}
+
+____END_MODULE
+}
+
+gen_template_module_exit()
+{
+	cat <<____END_MODULE
+static int __init auto_test_module_init(void)
+{
+	return auto_runtime_test();
+}
+module_init(auto_test_module_init);
+
+static void __exit auto_test_module_exit(void)
+{
+}
+module_exit(auto_test_module_exit);
+
+MODULE_AUTHOR("Luis Chamberlain <mcgrof@kernel.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Test module for kallsyms");
+____END_MODULE
+}
+
+case $TEST_TYPE in
+	a)
+		gen_template_module_header > $TARGET
+		gen_template_module_data_a $NUM_SYMS >> $TARGET
+		gen_template_module_exit >> $TARGET
+		;;
+	b)
+		gen_template_module_header > $TARGET
+		gen_template_module_data_b >> $TARGET
+		gen_template_module_exit >> $TARGET
+		;;
+	c)
+		gen_template_module_header > $TARGET
+		gen_template_module_data_c $((NUM_SYMS * SCALE_FACTOR)) >> $TARGET
+		gen_template_module_exit >> $TARGET
+		;;
+	d)
+		gen_template_module_header > $TARGET
+		gen_template_module_data_d $((NUM_SYMS * SCALE_FACTOR * 2)) >> $TARGET
+		gen_template_module_exit >> $TARGET
+		;;
+	*)
+		;;
+esac
diff --git a/lib/util_macros_kunit.c b/lib/util_macros_kunit.c
new file mode 100644
index 000000000000..94cc9f0de50a
--- /dev/null
+++ b/lib/util_macros_kunit.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Test cases for bitfield helpers.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <kunit/test.h>
+#include <linux/util_macros.h>
+
+#define FIND_CLOSEST_RANGE_CHECK(from, to, array, exp_idx)		\
+{									\
+	int i;								\
+	for (i = from; i <= to; i++) {					\
+		int found = find_closest(i, array, ARRAY_SIZE(array));	\
+		KUNIT_ASSERT_EQ(ctx, exp_idx, found);			\
+	}								\
+}
+
+static void test_find_closest(struct kunit *ctx)
+{
+	/* This will test a few arrays that are found in drivers */
+	static const int ina226_avg_tab[] = { 1, 4, 16, 64, 128, 256, 512, 1024 };
+	static const unsigned int ad7616_oversampling_avail[] = {
+		1, 2, 4, 8, 16, 32, 64, 128,
+	};
+	static u32 wd_timeout_table[] = { 2, 4, 6, 8, 16, 32, 48, 64 };
+	static int array_prog1a[] = { 1, 2, 3, 4, 5 };
+	static u32 array_prog1b[] = { 2, 3, 4, 5, 6 };
+	static int array_prog1mix[] = { -2, -1, 0, 1, 2 };
+	static int array_prog2a[] = { 1, 3, 5, 7 };
+	static u32 array_prog2b[] = { 2, 4, 6, 8 };
+	static int array_prog3a[] = { 1, 4, 7, 10 };
+	static u32 array_prog3b[] = { 2, 5, 8, 11 };
+	static int array_prog4a[] = { 1, 5, 9, 13 };
+	static u32 array_prog4b[] = { 2, 6, 10, 14 };
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 2, ina226_avg_tab, 0);
+	FIND_CLOSEST_RANGE_CHECK(3, 10, ina226_avg_tab, 1);
+	FIND_CLOSEST_RANGE_CHECK(11, 40, ina226_avg_tab, 2);
+	FIND_CLOSEST_RANGE_CHECK(41, 96, ina226_avg_tab, 3);
+	FIND_CLOSEST_RANGE_CHECK(97, 192, ina226_avg_tab, 4);
+	FIND_CLOSEST_RANGE_CHECK(193, 384, ina226_avg_tab, 5);
+	FIND_CLOSEST_RANGE_CHECK(385, 768, ina226_avg_tab, 6);
+	FIND_CLOSEST_RANGE_CHECK(769, 2048, ina226_avg_tab, 7);
+
+	/* The array that found the bug that caused this kunit to exist */
+	FIND_CLOSEST_RANGE_CHECK(-3, 1, ad7616_oversampling_avail, 0);
+	FIND_CLOSEST_RANGE_CHECK(2, 3, ad7616_oversampling_avail, 1);
+	FIND_CLOSEST_RANGE_CHECK(4, 6, ad7616_oversampling_avail, 2);
+	FIND_CLOSEST_RANGE_CHECK(7, 12, ad7616_oversampling_avail, 3);
+	FIND_CLOSEST_RANGE_CHECK(13, 24, ad7616_oversampling_avail, 4);
+	FIND_CLOSEST_RANGE_CHECK(25, 48, ad7616_oversampling_avail, 5);
+	FIND_CLOSEST_RANGE_CHECK(49, 96, ad7616_oversampling_avail, 6);
+	FIND_CLOSEST_RANGE_CHECK(97, 256, ad7616_oversampling_avail, 7);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 3, wd_timeout_table, 0);
+	FIND_CLOSEST_RANGE_CHECK(4, 5, wd_timeout_table, 1);
+	FIND_CLOSEST_RANGE_CHECK(6, 7, wd_timeout_table, 2);
+	FIND_CLOSEST_RANGE_CHECK(8, 12, wd_timeout_table, 3);
+	FIND_CLOSEST_RANGE_CHECK(13, 24, wd_timeout_table, 4);
+	FIND_CLOSEST_RANGE_CHECK(25, 40, wd_timeout_table, 5);
+	FIND_CLOSEST_RANGE_CHECK(41, 56, wd_timeout_table, 6);
+	FIND_CLOSEST_RANGE_CHECK(57, 128, wd_timeout_table, 7);
+
+	/* One could argue that find_closest() should not be used for monotonic
+	 * arrays (like 1,2,3,4,5), but even so, it should work as long as the
+	 * array is sorted ascending. */
+	FIND_CLOSEST_RANGE_CHECK(-3, 1, array_prog1a, 0);
+	FIND_CLOSEST_RANGE_CHECK(2, 2, array_prog1a, 1);
+	FIND_CLOSEST_RANGE_CHECK(3, 3, array_prog1a, 2);
+	FIND_CLOSEST_RANGE_CHECK(4, 4, array_prog1a, 3);
+	FIND_CLOSEST_RANGE_CHECK(5, 8, array_prog1a, 4);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog1b, 0);
+	FIND_CLOSEST_RANGE_CHECK(3, 3, array_prog1b, 1);
+	FIND_CLOSEST_RANGE_CHECK(4, 4, array_prog1b, 2);
+	FIND_CLOSEST_RANGE_CHECK(5, 5, array_prog1b, 3);
+	FIND_CLOSEST_RANGE_CHECK(6, 8, array_prog1b, 4);
+
+	FIND_CLOSEST_RANGE_CHECK(-4, -2, array_prog1mix, 0);
+	FIND_CLOSEST_RANGE_CHECK(-1, -1, array_prog1mix, 1);
+	FIND_CLOSEST_RANGE_CHECK(0, 0, array_prog1mix, 2);
+	FIND_CLOSEST_RANGE_CHECK(1, 1, array_prog1mix, 3);
+	FIND_CLOSEST_RANGE_CHECK(2, 5, array_prog1mix, 4);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog2a, 0);
+	FIND_CLOSEST_RANGE_CHECK(3, 4, array_prog2a, 1);
+	FIND_CLOSEST_RANGE_CHECK(5, 6, array_prog2a, 2);
+	FIND_CLOSEST_RANGE_CHECK(7, 10, array_prog2a, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog2b, 0);
+	FIND_CLOSEST_RANGE_CHECK(4, 5, array_prog2b, 1);
+	FIND_CLOSEST_RANGE_CHECK(6, 7, array_prog2b, 2);
+	FIND_CLOSEST_RANGE_CHECK(8, 10, array_prog2b, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog3a, 0);
+	FIND_CLOSEST_RANGE_CHECK(3, 5, array_prog3a, 1);
+	FIND_CLOSEST_RANGE_CHECK(6, 8, array_prog3a, 2);
+	FIND_CLOSEST_RANGE_CHECK(9, 20, array_prog3a, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog3b, 0);
+	FIND_CLOSEST_RANGE_CHECK(4, 6, array_prog3b, 1);
+	FIND_CLOSEST_RANGE_CHECK(7, 9, array_prog3b, 2);
+	FIND_CLOSEST_RANGE_CHECK(10, 20, array_prog3b, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog4a, 0);
+	FIND_CLOSEST_RANGE_CHECK(4, 7, array_prog4a, 1);
+	FIND_CLOSEST_RANGE_CHECK(8, 11, array_prog4a, 2);
+	FIND_CLOSEST_RANGE_CHECK(12, 20, array_prog4a, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 4, array_prog4b, 0);
+	FIND_CLOSEST_RANGE_CHECK(5, 8, array_prog4b, 1);
+	FIND_CLOSEST_RANGE_CHECK(9, 12, array_prog4b, 2);
+	FIND_CLOSEST_RANGE_CHECK(13, 20, array_prog4b, 3);
+}
+
+#define FIND_CLOSEST_DESC_RANGE_CHECK(from, to, array, exp_idx)	\
+{									\
+	int i;								\
+	for (i = from; i <= to; i++) {					\
+		int found = find_closest_descending(i, array,		\
+						ARRAY_SIZE(array));	\
+		KUNIT_ASSERT_EQ(ctx, exp_idx, found);			\
+	}								\
+}
+
+static void test_find_closest_descending(struct kunit *ctx)
+{
+	/* Same arrays as 'test_find_closest' but reversed */
+	static const int ina226_avg_tab[] = { 1024, 512, 256, 128, 64, 16, 4, 1 };
+	static const unsigned int ad7616_oversampling_avail[] = {
+		128, 64, 32, 16, 8, 4, 2, 1
+	};
+	static u32 wd_timeout_table[] = { 64, 48, 32, 16, 8, 6, 4, 2 };
+	static int array_prog1a[] = { 5, 4, 3, 2, 1 };
+	static u32 array_prog1b[] = { 6, 5, 4, 3, 2 };
+	static int array_prog1mix[] = { 2, 1, 0, -1, -2 };
+	static int array_prog2a[] = { 7, 5, 3, 1 };
+	static u32 array_prog2b[] = { 8, 6, 4, 2 };
+	static int array_prog3a[] = { 10, 7, 4, 1 };
+	static u32 array_prog3b[] = { 11, 8, 5, 2 };
+	static int array_prog4a[] = { 13, 9, 5, 1 };
+	static u32 array_prog4b[] = { 14, 10, 6, 2 };
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, ina226_avg_tab, 7);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 10, ina226_avg_tab, 6);
+	FIND_CLOSEST_DESC_RANGE_CHECK(11, 40, ina226_avg_tab, 5);
+	FIND_CLOSEST_DESC_RANGE_CHECK(41, 96, ina226_avg_tab, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(97, 192, ina226_avg_tab, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(193, 384, ina226_avg_tab, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(385, 768, ina226_avg_tab, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(769, 2048, ina226_avg_tab, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 1, ad7616_oversampling_avail, 7);
+	FIND_CLOSEST_DESC_RANGE_CHECK(2, 3, ad7616_oversampling_avail, 6);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 6, ad7616_oversampling_avail, 5);
+	FIND_CLOSEST_DESC_RANGE_CHECK(7, 12, ad7616_oversampling_avail, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(13, 24, ad7616_oversampling_avail, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(25, 48, ad7616_oversampling_avail, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(49, 96, ad7616_oversampling_avail, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(97, 256, ad7616_oversampling_avail, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, wd_timeout_table, 7);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 5, wd_timeout_table, 6);
+	FIND_CLOSEST_DESC_RANGE_CHECK(6, 7, wd_timeout_table, 5);
+	FIND_CLOSEST_DESC_RANGE_CHECK(8, 12, wd_timeout_table, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(13, 24, wd_timeout_table, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(25, 40, wd_timeout_table, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(41, 56, wd_timeout_table, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(57, 128, wd_timeout_table, 0);
+
+	/* One could argue that find_closest_descending() should not be used
+	 * for monotonic arrays (like 5,4,3,2,1), but even so, it should still
+	 * it should work as long as the array is sorted descending. */
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 1, array_prog1a, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(2, 2, array_prog1a, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 3, array_prog1a, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 4, array_prog1a, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(5, 8, array_prog1a, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog1b, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 3, array_prog1b, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 4, array_prog1b, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(5, 5, array_prog1b, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(6, 8, array_prog1b, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-4, -2, array_prog1mix, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(-1, -1, array_prog1mix, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(0, 0, array_prog1mix, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(1, 1, array_prog1mix, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(2, 5, array_prog1mix, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog2a, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 4, array_prog2a, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(5, 6, array_prog2a, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(7, 10, array_prog2a, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog2b, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 5, array_prog2b, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(6, 7, array_prog2b, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(8, 10, array_prog2b, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog3a, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 5, array_prog3a, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(6, 8, array_prog3a, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(9, 20, array_prog3a, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog3b, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 6, array_prog3b, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(7, 9, array_prog3b, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(10, 20, array_prog3b, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog4a, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 7, array_prog4a, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(8, 11, array_prog4a, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(12, 20, array_prog4a, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 4, array_prog4b, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(5, 8, array_prog4b, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(9, 12, array_prog4b, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(13, 20, array_prog4b, 0);
+}
+
+static struct kunit_case __refdata util_macros_test_cases[] = {
+	KUNIT_CASE(test_find_closest),
+	KUNIT_CASE(test_find_closest_descending),
+	{}
+};
+
+static struct kunit_suite util_macros_test_suite = {
+	.name = "util_macros.h",
+	.test_cases = util_macros_test_cases,
+};
+
+kunit_test_suites(&util_macros_test_suite);
+
+MODULE_AUTHOR("Alexandru Ardelean <aardelean@baylibre.com>");
+MODULE_DESCRIPTION("Test cases for util_macros.h helpers");
+MODULE_LICENSE("GPL");
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index c5e2ec9303c5..6ac02bbb7df1 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1040,6 +1040,20 @@ static const struct printf_spec default_dec04_spec = {
 };
 
 static noinline_for_stack
+char *hex_range(char *buf, char *end, u64 start_val, u64 end_val,
+		struct printf_spec spec)
+{
+	buf = number(buf, end, start_val, spec);
+	if (start_val == end_val)
+		return buf;
+
+	if (buf < end)
+		*buf = '-';
+	++buf;
+	return number(buf, end, end_val, spec);
+}
+
+static noinline_for_stack
 char *resource_string(char *buf, char *end, struct resource *res,
 		      struct printf_spec spec, const char *fmt)
 {
@@ -1115,11 +1129,7 @@ char *resource_string(char *buf, char *end, struct resource *res,
 		p = string_nocheck(p, pend, "size ", str_spec);
 		p = number(p, pend, resource_size(res), *specp);
 	} else {
-		p = number(p, pend, res->start, *specp);
-		if (res->start != res->end) {
-			*p++ = '-';
-			p = number(p, pend, res->end, *specp);
-		}
+		p = hex_range(p, pend, res->start, res->end, *specp);
 	}
 	if (decode) {
 		if (res->flags & IORESOURCE_MEM_64)
@@ -1141,6 +1151,31 @@ char *resource_string(char *buf, char *end, struct resource *res,
 }
 
 static noinline_for_stack
+char *range_string(char *buf, char *end, const struct range *range,
+		   struct printf_spec spec, const char *fmt)
+{
+	char sym[sizeof("[range 0x0123456789abcdef-0x0123456789abcdef]")];
+	char *p = sym, *pend = sym + sizeof(sym);
+
+	struct printf_spec range_spec = {
+		.field_width = 2 + 2 * sizeof(range->start), /* 0x + 2 * 8 */
+		.flags = SPECIAL | SMALL | ZEROPAD,
+		.base = 16,
+		.precision = -1,
+	};
+
+	if (check_pointer(&buf, end, range, spec))
+		return buf;
+
+	p = string_nocheck(p, pend, "[range ", default_str_spec);
+	p = hex_range(p, pend, range->start, range->end, range_spec);
+	*p++ = ']';
+	*p = '\0';
+
+	return string_nocheck(buf, end, sym, spec);
+}
+
+static noinline_for_stack
 char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
 		 const char *fmt)
 {
@@ -2229,6 +2264,15 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode,
 	return widen_string(buf, buf - buf_start, end, spec);
 }
 
+static noinline_for_stack
+char *resource_or_range(const char *fmt, char *buf, char *end, void *ptr,
+			struct printf_spec spec)
+{
+	if (*fmt == 'r' && fmt[1] == 'a')
+		return range_string(buf, end, ptr, spec, fmt);
+	return resource_string(buf, end, ptr, spec, fmt);
+}
+
 int __init no_hash_pointers_enable(char *str)
 {
 	if (no_hash_pointers)
@@ -2277,6 +2321,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr);
  * - 'Bb' as above with module build ID (for use in backtraces)
  * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
  * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201]
+ * - 'ra' For struct ranges, e.g., [range 0x0000000000000000 - 0x00000000000000ff]
  * - 'b[l]' For a bitmap, the number of bits is determined by the field
  *       width which must be explicitly specified either as part of the
  *       format string '%32b[l]' or through '%*b[l]', [l] selects
@@ -2401,7 +2446,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		return symbol_string(buf, end, ptr, spec, fmt);
 	case 'R':
 	case 'r':
-		return resource_string(buf, end, ptr, spec, fmt);
+		return resource_or_range(fmt, buf, end, ptr, spec);
 	case 'h':
 		return hex_string(buf, end, ptr, spec, fmt);
 	case 'b':