summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-05 20:31:23 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-05 20:31:23 +0200
commit063df71a574b88e94391a3a719cf66d1b46df884 (patch)
tree64c3443d62b4a8ee4d63a77a450cf735f93bc2f5 /arch
parentEnable '-Werror' by default for all kernel builds (diff)
parentriscv: add support for hugepage migration (diff)
downloadlinux-063df71a574b88e94391a3a719cf66d1b46df884.tar.xz
linux-063df71a574b88e94391a3a719cf66d1b46df884.zip
Merge tag 'riscv-for-linus-5.15-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull RISC-V updates from Palmer Dabbelt: - support PC-relative instructions (auipc and branches) in kprobes - support for forced IRQ threading - support for the hlt/nohlt kernel command line options, via the generic idle loop - show the edge/level triggered behavior of interrupts in /proc/interrupts - a handful of cleanups to our address mapping mechanisms - support for allocating gigantic hugepages via CMA - support for the undefined behavior sanitizer (UBSAN) - a handful of cleanups to the VDSO that allow the kernel to build with LLD. - support for hugepage migration * tag 'riscv-for-linus-5.15-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (21 commits) riscv: add support for hugepage migration RISC-V: Fix VDSO build for !MMU riscv: use strscpy to replace strlcpy riscv: explicitly use symbol offsets for VDSO riscv: Enable Undefined Behavior Sanitizer UBSAN riscv: Keep the riscv Kconfig selects sorted riscv: Support allocating gigantic hugepages using CMA riscv: fix the global name pfn_base confliction error riscv: Move early fdt mapping creation in its own function riscv: Simplify BUILTIN_DTB device tree mapping handling riscv: Use __maybe_unused instead of #ifdefs around variable declarations riscv: Get rid of map_size parameter to create_kernel_page_table riscv: Introduce va_kernel_pa_offset for 32-bit kernel riscv: Optimize kernel virtual address conversion macro dt-bindings: riscv: add starfive jh7100 bindings riscv: Enable GENERIC_IRQ_SHOW_LEVEL riscv: Enable idle generic idle loop riscv: Allow forced irq threading riscv: Implement thread_struct whitelist for hardened usercopy riscv: kprobes: implement the branch instructions ...
Diffstat (limited to 'arch')
-rw-r--r--arch/riscv/Kconfig12
-rw-r--r--arch/riscv/Makefile6
-rw-r--r--arch/riscv/include/asm/page.h21
-rw-r--r--arch/riscv/include/asm/processor.h8
-rw-r--r--arch/riscv/include/asm/vdso.h23
-rw-r--r--arch/riscv/kernel/probes/decode-insn.c5
-rw-r--r--arch/riscv/kernel/probes/simulate-insn.c112
-rw-r--r--arch/riscv/kernel/setup.c2
-rw-r--r--arch/riscv/kernel/vdso/Makefile26
-rwxr-xr-xarch/riscv/kernel/vdso/gen_vdso_offsets.sh5
-rwxr-xr-xarch/riscv/kernel/vdso/so2s.sh6
-rw-r--r--arch/riscv/mm/init.c130
12 files changed, 229 insertions, 127 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 1452584f83be..baf60fc350a4 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -13,9 +13,7 @@ config 32BIT
config RISCV
def_bool y
select ARCH_CLOCKSOURCE_INIT
- select ARCH_SUPPORTS_ATOMIC_RMW
- select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
- select ARCH_STACKWALK
+ select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_VIRTUAL if MMU
@@ -31,8 +29,12 @@ config RISCV
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+ select ARCH_STACKWALK
+ select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
select ARCH_USE_MEMTEST
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
@@ -48,9 +50,11 @@ config RISCV
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_EARLY_IOREMAP
select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
+ select GENERIC_IDLE_POLL_SETUP
select GENERIC_IOREMAP
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_IRQ_SHOW
+ select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_LIB_DEVMEM_IS_ALLOWED
select GENERIC_PCI_IOMAP
select GENERIC_PTDUMP if MMU
@@ -70,6 +74,7 @@ config RISCV
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+ select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
select HAVE_ASM_MODVERSIONS
select HAVE_CONTEXT_TRACKING
@@ -95,6 +100,7 @@ config RISCV
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN
+ select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA if MODULES
select MODULE_SECTIONS if MODULES
select OF
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index dcfbd2a87d41..01906a90c501 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -108,6 +108,12 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
+ifeq ($(CONFIG_MMU),y)
+prepare: vdso_prepare
+vdso_prepare: prepare0
+ $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
+endif
+
ifneq ($(CONFIG_XIP_KERNEL),y)
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
KBUILD_IMAGE := $(boot)/loader.bin
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index b0ca5058e7ae..109c97e991a6 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -79,8 +79,8 @@ typedef struct page *pgtable_t;
#endif
#ifdef CONFIG_MMU
-extern unsigned long pfn_base;
-#define ARCH_PFN_OFFSET (pfn_base)
+extern unsigned long riscv_pfn_base;
+#define ARCH_PFN_OFFSET (riscv_pfn_base)
#else
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */
@@ -91,10 +91,8 @@ struct kernel_mapping {
uintptr_t size;
/* Offset between linear mapping virtual address and kernel load address */
unsigned long va_pa_offset;
-#ifdef CONFIG_64BIT
/* Offset between kernel mapping virtual address and kernel load address */
unsigned long va_kernel_pa_offset;
-#endif
unsigned long va_kernel_xip_pa_offset;
#ifdef CONFIG_XIP_KERNEL
uintptr_t xiprom;
@@ -105,11 +103,11 @@ struct kernel_mapping {
extern struct kernel_mapping kernel_map;
extern phys_addr_t phys_ram_base;
-#ifdef CONFIG_64BIT
#define is_kernel_mapping(x) \
((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
+
#define is_linear_mapping(x) \
- ((x) >= PAGE_OFFSET && (x) < kernel_map.virt_addr)
+ ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr))
#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
#define kernel_mapping_pa_to_va(y) ({ \
@@ -123,7 +121,7 @@ extern phys_addr_t phys_ram_base;
#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset)
#define kernel_mapping_va_to_pa(y) ({ \
unsigned long _y = y; \
- (_y < kernel_map.virt_addr + XIP_OFFSET) ? \
+ (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \
((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) : \
((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \
})
@@ -133,15 +131,6 @@ extern phys_addr_t phys_ram_base;
is_linear_mapping(_x) ? \
linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x); \
})
-#else
-#define is_kernel_mapping(x) \
- ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
-#define is_linear_mapping(x) \
- ((x) >= PAGE_OFFSET)
-
-#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + kernel_map.va_pa_offset))
-#define __va_to_pa_nodebug(x) ((unsigned long)(x) - kernel_map.va_pa_offset)
-#endif /* CONFIG_64BIT */
#ifdef CONFIG_DEBUG_VIRTUAL
extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 021ed64ee608..46b492c78cbb 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -37,6 +37,14 @@ struct thread_struct {
unsigned long bad_cause;
};
+/* Whitelist the fstate from the task_struct for hardened usercopy */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ *offset = offsetof(struct thread_struct, fstate);
+ *size = sizeof_field(struct thread_struct, fstate);
+}
+
#define INIT_THREAD { \
.sp = sizeof(init_stack) + (long)&init_stack, \
}
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 1453a2f563bc..893e47195e30 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -8,26 +8,25 @@
#ifndef _ASM_RISCV_VDSO_H
#define _ASM_RISCV_VDSO_H
+
+/*
+ * All systems with an MMU have a VDSO, but systems without an MMU don't
+ * support shared libraries and therefor don't have one.
+ */
+#ifdef CONFIG_MMU
+
#include <linux/types.h>
+#include <generated/vdso-offsets.h>
#ifndef CONFIG_GENERIC_TIME_VSYSCALL
struct vdso_data {
};
#endif
-/*
- * The VDSO symbols are mapped into Linux so we can just use regular symbol
- * addressing to get their offsets in userspace. The symbols are mapped at an
- * offset of 0, but since the linker must support setting weak undefined
- * symbols to the absolute address 0 it also happens to support other low
- * addresses even when the code model suggests those low addresses would not
- * otherwise be availiable.
- */
#define VDSO_SYMBOL(base, name) \
-({ \
- extern const char __vdso_##name[]; \
- (void __user *)((unsigned long)(base) + __vdso_##name); \
-})
+ (void __user *)((unsigned long)(base) + __vdso_##name##_offset)
+
+#endif /* CONFIG_MMU */
asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c
index 0ed043acc882..64f6183b4717 100644
--- a/arch/riscv/kernel/probes/decode-insn.c
+++ b/arch/riscv/kernel/probes/decode-insn.c
@@ -38,11 +38,10 @@ riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api)
RISCV_INSN_REJECTED(c_ebreak, insn);
#endif
- RISCV_INSN_REJECTED(auipc, insn);
- RISCV_INSN_REJECTED(branch, insn);
-
RISCV_INSN_SET_SIMULATE(jal, insn);
RISCV_INSN_SET_SIMULATE(jalr, insn);
+ RISCV_INSN_SET_SIMULATE(auipc, insn);
+ RISCV_INSN_SET_SIMULATE(branch, insn);
return INSN_GOOD;
}
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
index 2519ce26377d..d73e96f6ed7c 100644
--- a/arch/riscv/kernel/probes/simulate-insn.c
+++ b/arch/riscv/kernel/probes/simulate-insn.c
@@ -83,3 +83,115 @@ bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *reg
return ret;
}
+
+#define auipc_rd_idx(opcode) \
+ ((opcode >> 7) & 0x1f)
+
+#define auipc_imm(opcode) \
+ ((((opcode) >> 12) & 0xfffff) << 12)
+
+#if __riscv_xlen == 64
+#define auipc_offset(opcode) sign_extend64(auipc_imm(opcode), 31)
+#elif __riscv_xlen == 32
+#define auipc_offset(opcode) auipc_imm(opcode)
+#else
+#error "Unexpected __riscv_xlen"
+#endif
+
+bool __kprobes simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ /*
+ * auipc instruction:
+ * 31 12 11 7 6 0
+ * | imm[31:12] | rd | opcode |
+ * 20 5 7
+ */
+
+ u32 rd_idx = auipc_rd_idx(opcode);
+ unsigned long rd_val = addr + auipc_offset(opcode);
+
+ if (!rv_insn_reg_set_val(regs, rd_idx, rd_val))
+ return false;
+
+ instruction_pointer_set(regs, addr + 4);
+
+ return true;
+}
+
+#define branch_rs1_idx(opcode) \
+ (((opcode) >> 15) & 0x1f)
+
+#define branch_rs2_idx(opcode) \
+ (((opcode) >> 20) & 0x1f)
+
+#define branch_funct3(opcode) \
+ (((opcode) >> 12) & 0x7)
+
+#define branch_imm(opcode) \
+ (((((opcode) >> 8) & 0xf ) << 1) | \
+ ((((opcode) >> 25) & 0x3f) << 5) | \
+ ((((opcode) >> 7) & 0x1 ) << 11) | \
+ ((((opcode) >> 31) & 0x1 ) << 12))
+
+#define branch_offset(opcode) \
+ sign_extend32((branch_imm(opcode)), 12)
+
+#define BRANCH_BEQ 0x0
+#define BRANCH_BNE 0x1
+#define BRANCH_BLT 0x4
+#define BRANCH_BGE 0x5
+#define BRANCH_BLTU 0x6
+#define BRANCH_BGEU 0x7
+
+bool __kprobes simulate_branch(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ /*
+ * branch instructions:
+ * 31 30 25 24 20 19 15 14 12 11 8 7 6 0
+ * | imm[12] | imm[10:5] | rs2 | rs1 | funct3 | imm[4:1] | imm[11] | opcode |
+ * 1 6 5 5 3 4 1 7
+ * imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ
+ * imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE
+ * imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT
+ * imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE
+ * imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU
+ * imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU
+ */
+
+ s32 offset;
+ s32 offset_tmp;
+ unsigned long rs1_val;
+ unsigned long rs2_val;
+
+ if (!rv_insn_reg_get_val(regs, branch_rs1_idx(opcode), &rs1_val) ||
+ !rv_insn_reg_get_val(regs, branch_rs2_idx(opcode), &rs2_val))
+ return false;
+
+ offset_tmp = branch_offset(opcode);
+ switch (branch_funct3(opcode)) {
+ case BRANCH_BEQ:
+ offset = (rs1_val == rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BNE:
+ offset = (rs1_val != rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BLT:
+ offset = ((long)rs1_val < (long)rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BGE:
+ offset = ((long)rs1_val >= (long)rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BLTU:
+ offset = (rs1_val < rs2_val) ? offset_tmp : 4;
+ break;
+ case BRANCH_BGEU:
+ offset = (rs1_val >= rs2_val) ? offset_tmp : 4;
+ break;
+ default:
+ return false;
+ }
+
+ instruction_pointer_set(regs, addr + offset);
+
+ return true;
+}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 120b2f6f71bc..b9620e5f00ba 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -255,7 +255,7 @@ static void __init parse_dtb(void)
pr_err("No DTB passed to the kernel\n");
#ifdef CONFIG_CMDLINE_FORCE
- strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
pr_info("Forcing kernel command line to: %s\n", boot_command_line);
#endif
}
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 24d936c147cd..f2e065671e4d 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -23,10 +23,10 @@ ifneq ($(c-gettimeofday-y),)
endif
# Build rules
-targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-syms.S
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
-obj-y += vdso.o vdso-syms.o
+obj-y += vdso.o
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
# Disable -pg to prevent insert call site
@@ -36,6 +36,7 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
GCOV_PROFILE := n
KCOV_INSTRUMENT := n
KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
# Force dependency
$(obj)/vdso.o: $(obj)/vdso.so
@@ -43,20 +44,22 @@ $(obj)/vdso.o: $(obj)/vdso.so
# link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold)
-LDFLAGS_vdso.so.dbg = -shared -s -soname=linux-vdso.so.1 \
+LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \
--build-id=sha1 --hash-style=both --eh-frame-hdr
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO. With ld --just-symbols we can then
-# refer to these symbols in the kernel code rather than hand-coded addresses.
-$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE
- $(call if_changed,so2s)
-
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,vdsosym)
+
# actual build commands
# The DSO images are built using a special linker script
# Make sure only to export the intended __vdso_xxx symbol offsets.
@@ -65,11 +68,6 @@ quiet_cmd_vdsold = VDSOLD $@
$(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
rm $@.tmp
-# Extracts symbol offsets from the VDSO, converting them into an assembly file
-# that contains the same symbols at the same offsets.
-quiet_cmd_so2s = SO2S $@
- cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@
-
# install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
diff --git a/arch/riscv/kernel/vdso/gen_vdso_offsets.sh b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..c2e5613f3495
--- /dev/null
+++ b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+LC_ALL=C
+sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define \2_offset\t0x\1/p'
diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh
deleted file mode 100755
index e64cb6d9440e..000000000000
--- a/arch/riscv/kernel/vdso/so2s.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0+
-# Copyright 2020 Palmer Dabbelt <palmerdabbelt@google.com>
-
-sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \
-| grep '^\.'
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index fc818c86cc9b..c0cddf0fc22d 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -19,6 +19,7 @@
#include <linux/set_memory.h>
#include <linux/dma-map-ops.h>
#include <linux/crash_dump.h>
+#include <linux/hugetlb.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
@@ -222,6 +223,8 @@ static void __init setup_bootmem(void)
early_init_fdt_scan_reserved_mem();
dma_contiguous_reserve(dma32_phys_limit);
+ if (IS_ENABLED(CONFIG_64BIT))
+ hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
memblock_allow_resize();
}
@@ -234,14 +237,15 @@ static struct pt_alloc_ops _pt_ops __initdata;
#define pt_ops _pt_ops
#endif
-unsigned long pfn_base __ro_after_init;
-EXPORT_SYMBOL(pfn_base);
+unsigned long riscv_pfn_base __ro_after_init;
+EXPORT_SYMBOL(riscv_pfn_base);
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
@@ -322,7 +326,6 @@ static void __init create_pte_mapping(pte_t *ptep,
static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
-static pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
#define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
@@ -408,6 +411,7 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next fixmap_pte
+#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
#endif
void __init create_pgd_mapping(pgd_t *pgdp,
@@ -515,49 +519,80 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
#endif
#ifdef CONFIG_XIP_KERNEL
-static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size,
+static void __init create_kernel_page_table(pgd_t *pgdir,
__always_unused bool early)
{
uintptr_t va, end_va;
/* Map the flash resident part */
end_va = kernel_map.virt_addr + kernel_map.xiprom_sz;
- for (va = kernel_map.virt_addr; va < end_va; va += map_size)
+ for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
create_pgd_mapping(pgdir, va,
kernel_map.xiprom + (va - kernel_map.virt_addr),
- map_size, PAGE_KERNEL_EXEC);
+ PMD_SIZE, PAGE_KERNEL_EXEC);
/* Map the data in RAM */
end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
- for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += map_size)
+ for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
create_pgd_mapping(pgdir, va,
kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
- map_size, PAGE_KERNEL);
+ PMD_SIZE, PAGE_KERNEL);
}
#else
-static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size,
- bool early)
+static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
{
uintptr_t va, end_va;
end_va = kernel_map.virt_addr + kernel_map.size;
- for (va = kernel_map.virt_addr; va < end_va; va += map_size)
+ for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
create_pgd_mapping(pgdir, va,
kernel_map.phys_addr + (va - kernel_map.virt_addr),
- map_size,
+ PMD_SIZE,
early ?
PAGE_KERNEL_EXEC : pgprot_from_va(va));
}
#endif
-asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+/*
+ * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel,
+ * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
+ * entry.
+ */
+static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
{
- uintptr_t __maybe_unused pa;
- uintptr_t map_size;
-#ifndef __PAGETABLE_PMD_FOLDED
- pmd_t fix_bmap_spmd, fix_bmap_epmd;
+#ifndef CONFIG_BUILTIN_DTB
+ uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
+
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+ IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
+ PGDIR_SIZE,
+ IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
+
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
+ pa, PMD_SIZE, PAGE_KERNEL);
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
+ pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+ }
+
+ dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else
+ /*
+ * For 64-bit kernel, __va can't be used since it would return a linear
+ * mapping address whereas dtb_early_va will be used before
+ * setup_vm_final installs the linear mapping. For 32-bit kernel, as the
+ * kernel is mapped in the linear mapping, that makes no difference.
+ */
+ dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
#endif
+ dtb_early_pa = dtb_pa;
+}
+
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+ pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
+
kernel_map.virt_addr = KERNEL_LINK_ADDR;
#ifdef CONFIG_XIP_KERNEL
@@ -573,23 +608,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.phys_addr = (uintptr_t)(&_start);
kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
#endif
-
kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
-#ifdef CONFIG_64BIT
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
-#endif
-
- pfn_base = PFN_DOWN(kernel_map.phys_addr);
- /*
- * Enforce boot alignment requirements of RV32 and
- * RV64 by only allowing PMD or PGD mappings.
- */
- map_size = PMD_SIZE;
+ riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
- BUG_ON((kernel_map.phys_addr % map_size) != 0);
+ BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
#ifdef CONFIG_64BIT
/*
@@ -634,50 +660,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
* us to reach paging_init(). We map all memory banks later
* in setup_vm_final() below.
*/
- create_kernel_page_table(early_pg_dir, map_size, true);
+ create_kernel_page_table(early_pg_dir, true);
-#ifndef __PAGETABLE_PMD_FOLDED
- /* Setup early PMD for DTB */
- create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
- (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
-#ifndef CONFIG_BUILTIN_DTB
- /* Create two consecutive PMD mappings for FDT early scan */
- pa = dtb_pa & ~(PMD_SIZE - 1);
- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
- pa, PMD_SIZE, PAGE_KERNEL);
- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
- pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
- dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
-#else /* CONFIG_BUILTIN_DTB */
-#ifdef CONFIG_64BIT
- /*
- * __va can't be used since it would return a linear mapping address
- * whereas dtb_early_va will be used before setup_vm_final installs
- * the linear mapping.
- */
- dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
-#else
- dtb_early_va = __va(dtb_pa);
-#endif /* CONFIG_64BIT */
-#endif /* CONFIG_BUILTIN_DTB */
-#else
-#ifndef CONFIG_BUILTIN_DTB
- /* Create two consecutive PGD mappings for FDT early scan */
- pa = dtb_pa & ~(PGDIR_SIZE - 1);
- create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
- pa, PGDIR_SIZE, PAGE_KERNEL);
- create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
- pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
- dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
-#else /* CONFIG_BUILTIN_DTB */
-#ifdef CONFIG_64BIT
- dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
-#else
- dtb_early_va = __va(dtb_pa);
-#endif /* CONFIG_64BIT */
-#endif /* CONFIG_BUILTIN_DTB */
-#endif
- dtb_early_pa = dtb_pa;
+ /* Setup early mapping for FDT early scan */
+ create_fdt_early_page_table(early_pg_dir, dtb_pa);
/*
* Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
@@ -752,7 +738,7 @@ static void __init setup_vm_final(void)
#ifdef CONFIG_64BIT
/* Map the kernel */
- create_kernel_page_table(swapper_pg_dir, PMD_SIZE, false);
+ create_kernel_page_table(swapper_pg_dir, false);
#endif
/* Clear fixmap PTE and PMD mappings */