# Base git commit: 94f6f0550c62 # (Linux 6.6-rc5) # # Author: Russell King (Oracle) (Fri 27 May 13:27:19 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:48 BST 2023) # # arm64: text replication: verify kernel text # # Verify that the replicated kernel image for the non-boot nodes matches # the boot kernel image, and report differences found. This ensures that # the non-boot modes are running an identical copy of the kernel. # # Signed-off-by: Russell King (Oracle) # # 76d031e02302639c357c61a23423a32f8e624dd0 # arch/arm64/mm/ktext.c | 37 +++++++++++++++++++++++++++++++++++++ # 1 file changed, 37 insertions(+) # # Author: Russell King (Oracle) (Fri 17 Jun 13:25:15 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:47 BST 2023) # # arm64: text replication: add test module # # Add a module to allow kernel text replication to be tested; this # exposes some data in procfs which can be used to verify that: # (a) we're using different page tables in TTBR1 on CPUs in different # NUMA nodes # (b) that CPUs in different NUMA nodes are indeed accessing different # copies of the kernel # # Signed-off-by: Russell King (Oracle) # # 15a21053b29510f45765a58378510d3e30462004 # arch/arm64/Kconfig | 8 ++++ # arch/arm64/include/asm/ktext.h | 2 + # arch/arm64/mm/Makefile | 1 + # arch/arm64/mm/ktext-test.c | 93 ++++++++++++++++++++++++++++++++++++++++++ # arch/arm64/mm/ktext.c | 15 +++++++ # 5 files changed, 119 insertions(+) # create mode 100644 arch/arm64/mm/ktext-test.c # # Author: Russell King (Oracle) (Wed 5 Apr 11:41:41 BST 2023) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:47 BST 2023) # # arm64: text replication: add Kconfig for default state # # Add a kernel configuration option to determine whether kernel text # replication should default to being enabled or disabled at boot # without a command line specifier. # # Signed-off-by: Russell King (Oracle) # # 8792c75aa0f8f31e2cca286214304f1a711c9738 # arch/arm64/Kconfig | 7 +++++++ # arch/arm64/mm/ktext.c | 11 ++--------- # 2 files changed, 9 insertions(+), 9 deletions(-) # # Author: Russell King (Oracle) (Thu 21 Apr 16:12:12 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:47 BST 2023) # # arm64: text replication: add Kconfig # # Add the Kconfig symbol for kernel text replication. This unfortunately # requires KASAN and kernel text randomisation options to be disabled at # the moment. # # Signed-off-by: Russell King (Oracle) # # 251f6fb45a6db97181205448129b3ed328dfce6e # arch/arm64/Kconfig | 10 +++++++++- # 1 file changed, 9 insertions(+), 1 deletion(-) # # Author: Russell King (Oracle) (Thu 7 Jul 15:29:34 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:46 BST 2023) # # arm64: text replication: early kernel option to enable replication # # Provide an early kernel option "ktext=" which allows the kernel text # replication to be enabled. This takes a boolean argument. # # The way this has been implemented means that we take all the same paths # through the kernel at runtime whether kernel text replication has been # enabled or not; this allows the performance effects of the code changes # to be evaluated separately from the act of running with replicating the # kernel text. # # Signed-off-by: Russell King (Oracle) # # d2e2204756b93da4c4843f584118c42d87eb4009 # Documentation/admin-guide/kernel-parameters.txt | 5 +++++ # arch/arm64/mm/ktext.c | 18 ++++++++++++++++++ # 2 files changed, 23 insertions(+) # # Author: Russell King (Oracle) (Fri 17 Jun 13:23:39 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:46 BST 2023) # # arm64: text replication: include most of read-only data as well # # Include as much of the read-only data in the replication as we can # without needing to move away from the generic RO_DATA() macro in # the linker script. # # Unfortunately, the read-only data section is immedaitely followed # by the read-only after init data with no page alignment, which # means we can't have separate mappings for the read-only data # section and everything else. Changing that would mean replacing # the generic RO_DATA() macro which increases the maintenance burden. # # however, this is likely not worth the effort as the majority of # read-only data will be covered. # # Signed-off-by: Russell King (Oracle) # # 5e16e46d053571905ab638b4149eb40bce2ffb28 # arch/arm64/mm/ktext.c | 2 +- # arch/arm64/mm/mmu.c | 21 ++++++++++++++++++--- # 2 files changed, 19 insertions(+), 4 deletions(-) # # Author: Russell King (Oracle) (Mon 16 May 12:15:29 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:46 BST 2023) # # arm64: text replication: setup page tables for copied kernel # # Setup page table entries in each non-boot NUMA node page table to # point at each node's own copy of the kernel text. This switches # each node to use its own unique copy of the kernel text. # # Signed-off-by: Russell King (Oracle) # # 1648029d44a53bc9325f263d750161e53442d5a6 # arch/arm64/include/asm/ktext.h | 1 + # arch/arm64/mm/ktext.c | 8 +++++++ # arch/arm64/mm/mmu.c | 53 +++++++++++++++++++++++++++++++++++------- # 3 files changed, 53 insertions(+), 9 deletions(-) # # Author: Russell King (Oracle) (Mon 16 May 15:57:35 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:46 BST 2023) # # arm64: text replication: update cnp support # # Add changes for CNP (Common Not Private) support of kernel text # replication. Although text replication has only been tested on # dual-socket Ampere A1 systems, provided the different NUMA nodes # are not part of the same inner shareable domain, CNP should not # be a problem. # # Signed-off-by: Russell King (Oracle) # # c25aed59780b6d84d034c56386fa7ead9c805f91 # arch/arm64/include/asm/mmu_context.h | 2 +- # arch/arm64/kernel/cpufeature.c | 2 +- # arch/arm64/kernel/suspend.c | 3 ++- # 3 files changed, 4 insertions(+), 3 deletions(-) # # Author: Russell King (Oracle) (Mon 16 May 10:22:31 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:45 BST 2023) # # arm64: text replication: boot secondary CPUs with appropriate TTBR1 # # Arrange for secondary CPUs to boot with TTBR1 pointing at the # appropriate per-node copy of the kernel page tables for the CPUs NUMA # node. # # Signed-off-by: Russell King (Oracle) # # 2e62e2dcac1fcf62b679de10b6b3fad9ed2ef58b # arch/arm64/include/asm/smp.h | 1 + # arch/arm64/kernel/asm-offsets.c | 1 + # arch/arm64/kernel/head.S | 3 ++- # arch/arm64/kernel/smp.c | 3 +++ # 4 files changed, 7 insertions(+), 1 deletion(-) # # Author: Russell King (Oracle) (Wed 20 Apr 15:58:27 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:45 BST 2023) # # arm64: text replication: create per-node kernel page tables # # Allocate the level 0 page tables for the per-node kernel text # replication, but copy all level 0 table entries from the NUMA node 0 # table. Therefore, for the time being, each node's level 0 page tables # will contain identical entries, and thus other nodes will continue # to use the node 0 kernel text. # # Since the level 0 page tables can be updated at runtime to add entries # for vmalloc and module space, propagate these updates to the other # swapper page tables. The exception is if we see an update for the # level 0 entry which points to the kernel mapping. # # We also need to setup a copy of the trampoline page tables as well, as # the assembly code relies on the two page tables being a fixed offset # apart. # # Signed-off-by: Russell King (Oracle) # # 535af21956d829052244fe4fb513b556e6f4fe47 # arch/arm64/include/asm/ktext.h | 12 ++++++++++++ # arch/arm64/mm/ktext.c | 42 +++++++++++++++++++++++++++++++++++++++++- # arch/arm64/mm/mmu.c | 5 +++++ # 3 files changed, 58 insertions(+), 1 deletion(-) # # Author: Russell King (Oracle) (Wed 20 Apr 17:34:03 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:45 BST 2023) # # arm64: text replication: add swapper page directory helpers # # Add a series of helpers for the swapper page directories - a set which # return those for the calling CPU, and those which take the NUMA node # number. # # Signed-off-by: Russell King (Oracle) # # 6dd1ffa6be039751a8ba1a0cc2b89241e33201da # arch/arm64/include/asm/pgtable.h | 19 +++++++++++++++++++ # arch/arm64/kernel/hibernate.c | 2 +- # arch/arm64/mm/ktext.c | 20 ++++++++++++++++++++ # 3 files changed, 40 insertions(+), 1 deletion(-) # # Author: Russell King (Oracle) (Thu 21 Apr 16:46:13 BST 2022) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:44 BST 2023) # # arm64: text replication: add node 0 page table definitions # # Add a struct definition for the level zero page table group (the # optional trampoline page tables, reserved page tables, and swapper page # tables). # # Add a symbol and extern declaration for the node 0 page table group. # # Add an array of pointers to per-node page tables, which will default to # using the node 0 page table group. # # Signed-off-by: Russell King (Oracle) # # 5b05b079df42df655c3dae352c93d69b01337243 # arch/arm64/include/asm/pgtable.h | 14 ++++++++++++++ # arch/arm64/kernel/vmlinux.lds.S | 3 +++ # arch/arm64/mm/ktext.c | 4 ++++ # 3 files changed, 21 insertions(+) # # Author: Russell King (Oracle) (Wed 11 Oct 18:12:53 BST 2023) # Committer: Russell King (Oracle) (Sat 14 Oct 16:22:43 BST 2023) # # arm64: text replication: handle aarch64_insn_write_literal_u64() # # aarch64_insn_write_literal_u64() was introduced in v6.3-rc1 for # updating ftrace ops pointers in the kernel text. This needs to be # fixed up for kernel text replication, so provide a version that # will update the mapping. # # Signed-off-by: Russell King (Oracle) # # fa8b039f930726e48394cb72d4b5fa312184794f # arch/arm64/include/asm/ktext.h | 5 +++++ # arch/arm64/kernel/patching.c | 2 ++ # arch/arm64/mm/ktext.c | 21 +++++++++++++++++++++ # 3 files changed, 28 insertions(+) # # Author: Russell King (Oracle) (Tue 24 May 21:11:13 BST 2022) # Committer: Russell King (Oracle) (Thu 12 Oct 10:52:17 BST 2023) # # arm64: text replication: add node text patching # # Add support for text patching on our replicated texts. # # Signed-off-by: Russell King (Oracle) # # ba8108d18b5484df278fef0f2287e93347f53f8e # arch/arm64/include/asm/ktext.h | 12 +++++++++ # arch/arm64/kernel/alternative.c | 2 ++ # arch/arm64/kernel/patching.c | 7 ++++- # arch/arm64/mm/ktext.c | 58 +++++++++++++++++++++++++++++++++++++++++ # 4 files changed, 78 insertions(+), 1 deletion(-) # # Author: Russell King (Oracle) (Mon 16 May 10:24:35 BST 2022) # Committer: Russell King (Oracle) (Thu 12 Oct 10:52:15 BST 2023) # # arm64: text replication: copy initial kernel text # # Allocate memory on the appropriate node for the per-node copies of the # kernel text, and copy the kernel text to that memory. Clean and # invalidate the caches to the point of unification so that the copied # text is correctly visible to the target node. # # Signed-off-by: Russell King (Oracle) # # 808c5d1aa91b9988671bd1fbb1b4c2b9c741871e # arch/arm64/mm/ktext.c | 21 +++++++++++++++++++++ # 1 file changed, 21 insertions(+) # # Author: Russell King (Oracle) (Thu 21 Apr 16:09:57 BST 2022) # Committer: Russell King (Oracle) (Thu 12 Oct 10:52:14 BST 2023) # # arm64: text replication: add sanity checks # # The kernel text and modules must be in separate L0 page table entries. # # Signed-off-by: Russell King (Oracle) # # 9c39f8df28f65c151fd3c0a4c9ad880950c7ebc6 # arch/arm64/mm/ktext.c | 21 +++++++++++++++++++++ # 1 file changed, 21 insertions(+) # # Author: Russell King (Oracle) (Thu 21 Apr 16:40:02 BST 2022) # Committer: Russell King (Oracle) (Thu 12 Oct 10:52:13 BST 2023) # # arm64: text replication: add init function # # A simple patch that adds an empty function for kernel text replication # initialisation and hooks it into the initialisation path. # # Signed-off-by: Russell King (Oracle) # # 1bbbd0034ceb0ddd05a5a344bb148193252ba535 # arch/arm64/include/asm/ktext.h | 20 ++++++++++++++++++++ # arch/arm64/mm/Makefile | 2 ++ # arch/arm64/mm/init.c | 3 +++ # arch/arm64/mm/ktext.c | 8 ++++++++ # 4 files changed, 33 insertions(+) # create mode 100644 arch/arm64/include/asm/ktext.h # create mode 100644 arch/arm64/mm/ktext.c # # Author: Russell King (Oracle) (Thu 21 Apr 13:51:35 BST 2022) # Committer: Russell King (Oracle) (Thu 12 Oct 10:52:12 BST 2023) # # arm64: place kernel in its own L0 page table entry # # Kernel text replication needs to maintain separate per-node page # tables for the kernel text. In order to do this without affecting # other kernel memory mappings, placing the kernel such that it does # not share a L0 page table entry with any other mapping is desirable. # # Prior to this commit, the layout without KASLR was: # # +----------+ # | vmalloc | # +----------+ # | Kernel | # +----------+ MODULES_END, VMALLOC_START, KIMAGE_VADDR = # | Modules | MODULES_VADDR + MODULES_VSIZE # +----------+ MODULES_VADDR = _PAGE_END(VA_BITS_MIN) # | VA space | # +----------+ 0 # # This becomes: # # +----------+ # | vmalloc | # +----------+ VMALLOC_START = MODULES_END + PGDIR_SIZE # | Kernel | # +----------+ MODULES_END, KIMAGE_VADDR = _PAGE_END(VA_BITS_MIN) + # | Modules | max(PGDIR_SIZE, MODULES_VSIZE) # +----------+ MODULES_VADDR = MODULES_END - MODULES_VSIZE # | VA space | # +----------+ 0 # # This assumes MODULES_VSIZE (128M) <= PGDIR_SIZE. # # One side effect of this change is that KIMAGE_VADDR's definition now # includes PGDIR_SIZE (to leave room for the modules) but this is not # defined when asm/memory.h is included. This means KIMAGE_VADDR can # not be used in inline functions within this file, so we convert # kaslr_offset() and kaslr_enabled() to be macros instead. # # Signed-off-by: Russell King (Oracle) # # 2a8a27a3150afc7fb123441c919f8d7ae997628c # arch/arm64/include/asm/memory.h | 28 +++++++++++++++++++++------- # arch/arm64/include/asm/pgtable.h | 2 +- # arch/arm64/kernel/kaslr.c | 1 + # 3 files changed, 23 insertions(+), 8 deletions(-) # # Author: Russell King (Oracle) (Tue 16 Aug 12:19:40 BST 2022) # Committer: Russell King (Oracle) (Thu 12 Oct 10:52:10 BST 2023) # # arm64: make clean_dcache_range_nopatch() visible # # When we hook into the kernel text patching code, we will need to call # clean_dcache_range_nopatch() to ensure that the patching of the # replicated kernel text is properly visible to other CPUs. Make this # function available to the replication code. # # Signed-off-by: Russell King (Oracle) # # e04b02dc9d5a7bd08f90cfaa788f308a7ef063f2 # arch/arm64/include/asm/cacheflush.h | 2 ++ # arch/arm64/kernel/alternative.c | 2 +- # 2 files changed, 3 insertions(+), 1 deletion(-) # # Author: Russell King (Oracle) (Tue 9 Aug 11:38:45 BST 2022) # Committer: Russell King (Oracle) (Thu 12 Oct 10:52:09 BST 2023) # # arm64: provide cpu_replace_ttbr1_phys() # # Provide a version of cpu_replace_ttbr1_phys() which operates using a # physical address rather than the virtual address of the page tables. # # Signed-off-by: Russell King (Oracle) # # 0486d7fff1cc0672cf375387d8a9a01174f610fe # arch/arm64/include/asm/mmu_context.h | 12 +++++++++--- # 1 file changed, 9 insertions(+), 3 deletions(-) # diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0a1731a0f0ef..d74fca365bed 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2526,6 +2526,11 @@ 0: force disabled 1: force enabled + ktext= [ARM64] Control kernel text replication on NUMA + machines. Default: disabled. + 0: disable kernel text replication + 1: enable kernel text replication + kunit.enable= [KUNIT] Enable executing KUnit tests. Requires CONFIG_KUNIT to be set to be fully enabled. The default value can be overridden via diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 78f20e632712..636cdc7bbf13 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -162,7 +162,7 @@ config ARM64 select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE - select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) + select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48 && !REPLICATE_KTEXT) select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) @@ -1440,6 +1440,28 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. +config REPLICATE_KTEXT + bool "Replicate kernel text across numa nodes" + depends on NUMA + help + Say Y here to enable replicating the kernel text across multiple + nodes in a NUMA cluster. This trades memory for speed. + +config REPLICATE_KTEXT_DEFAULT + bool "Enable kernel text replication by default" + depends on REPLICATE_KTEXT + help + Determine whether kernel text replication is enabled at boot by + default. + +config TEST_KTEXT_REPLICATE + tristate "Kernel text replication testing module" + depends on REPLICATE_KTEXT + help + Enable building of a test module for kernel text replication. + + If unsure, say N. + source "kernel/Kconfig.hz" config ARCH_SPARSEMEM_ENABLE @@ -2155,6 +2177,7 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" + depends on !REPLICATE_KTEXT select RELOCATABLE help Randomizes the virtual address at which the kernel image is diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index d115451ed263..e1f5047d0a06 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -104,6 +104,8 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) } #define flush_icache_range flush_icache_range +void clean_dcache_range_nopatch(u64 start, u64 end); + /* * Copy user data from/to a page which is mapped into a different * processes address space. Really, we want to allow our "user diff --git a/arch/arm64/include/asm/ktext.h b/arch/arm64/include/asm/ktext.h new file mode 100644 index 000000000000..741b9f75c63e --- /dev/null +++ b/arch/arm64/include/asm/ktext.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022, Oracle and/or its affiliates. + */ +#ifndef ASM_KTEXT_H +#define ASM_KTEXT_H + +#include + +#include + +#ifdef CONFIG_REPLICATE_KTEXT + +void ktext_replication_init(void); +void ktext_replication_write(void *addr, void *data, size_t size); +void __kprobes ktext_replication_patch(u32 *tp, __le32 insn); +void ktext_replication_patch_alternative(__le32 *src, int nr_inst); +void ktext_replication_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); +void ktext_replication_init_tramp(void); +void create_kernel_nid_map(pgd_t *pgdp, void *ktext); + +extern const char ktext_nid[32]; + +#else + +static inline void ktext_replication_init(void) +{ +} + +static inline void ktext_replication_write(void *addr, void *data, size_t size) +{ +} + +static inline void __kprobes ktext_replication_patch(u32 *tp, __le32 insn) +{ +} + +static inline void ktext_replication_patch_alternative(__le32 *src, int nr_inst) +{ +} + +static inline void ktext_replication_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) +{ +} + +static inline void ktext_replication_init_tramp(void) +{ +} + +#endif + +#endif diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index fde4186cc387..9410ec4e4207 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -43,9 +43,26 @@ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define _PAGE_OFFSET(va) (-(UL(1) << (va))) #define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) -#define KIMAGE_VADDR (MODULES_END) -#define MODULES_END (MODULES_VADDR + MODULES_VSIZE) -#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) + +/* + * Setting KIMAGE_VADDR has got a lot harder, ideally we'd like to use + * min(PGDIR_SIZE, MODULES_VSIZE) but this can't work because this is used + * both in assembly as C, where it causes problems. min_t() solves the + * C problems but can't be used in assembly. + * CONFIG_ARM64_4K_PAGES, PGDIR_SIZE is 2M, 1G, 512G + * CONFIG_ARM64_16K_PAGES, PGDIR_SIZE is 32M, 64G or 128T + * CONFIG_ARM64_64K_PAGES, PGDIR_SIZE is 512M or 4T + */ +#if (CONFIG_ARM64_4K_PAGES && CONFIG_PGTABLE_LEVELS < 4) || \ + (CONFIG_ARM64_16K_PAGES && CONFIG_PGTABLE_LEVELS < 3) || \ + (CONFIG_ARM64_64K_PAGES && CONFIG_PGTABLE_LEVELS < 2) +#define KIMAGE_OFFSET MODULES_VSIZE +#else +#define KIMAGE_OFFSET PGDIR_SIZE +#endif +#define KIMAGE_VADDR (_PAGE_END(VA_BITS_MIN) + KIMAGE_OFFSET) +#define MODULES_END (KIMAGE_VADDR) +#define MODULES_VADDR (MODULES_END - MODULES_VSIZE) #define MODULES_VSIZE (SZ_2G) #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) @@ -199,10 +216,7 @@ extern u64 kimage_vaddr; /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; -static inline unsigned long kaslr_offset(void) -{ - return kimage_vaddr - KIMAGE_VADDR; -} +#define kaslr_offset() ((unsigned long)(kimage_vaddr - KIMAGE_VADDR)) #ifdef CONFIG_RANDOMIZE_BASE void kaslr_init(void); diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a6fb325424e7..36f6b1b65ae5 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -152,7 +152,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ -static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) +static inline void cpu_replace_ttbr1_phys(phys_addr_t pgd_phys, pgd_t *idmap) { typedef void (ttbr_replace_func)(phys_addr_t); extern ttbr_replace_func idmap_cpu_replace_ttbr1; @@ -160,9 +160,10 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) unsigned long daif; /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ - phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + phys_addr_t ttbr1 = phys_to_ttbr(pgd_phys); - if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { + if (system_supports_cnp() && + !WARN_ON(pgd_phys != swapper_pg_dir_node_phys())) { /* * cpu_replace_ttbr1() is used when there's a boot CPU * up (i.e. cpufeature framework is not up yet) and @@ -189,6 +190,11 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) cpu_uninstall_idmap(); } +static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) +{ + cpu_replace_ttbr1_phys(virt_to_phys(pgdp), idmap); +} + /* * It would be nice to return ASIDs back to the allocator, but unfortunately * that introduces a race with a generation rollover where we could erroneously diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7f7d9b1df4e5..4eb5de62dbf9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -21,7 +21,7 @@ * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space * and fixed mappings */ -#define VMALLOC_START (MODULES_END) +#define VMALLOC_START (MODULES_END + PGDIR_SIZE) #define VMALLOC_END (VMEMMAP_START - SZ_256M) #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) @@ -615,6 +615,39 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; extern pgd_t reserved_pg_dir[PTRS_PER_PGD]; +struct pgtables { +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + pgd_t tramp_pg_dir[PTRS_PER_PGD]; +#endif + pgd_t reserved_pg_dir[PTRS_PER_PGD]; + pgd_t swapper_pg_dir[PTRS_PER_PGD]; +}; + +extern struct pgtables pgtable_node0; + +#ifdef CONFIG_REPLICATE_KTEXT +extern struct pgtables *pgtables[MAX_NUMNODES]; + +pgd_t *swapper_pg_dir_node(void); +phys_addr_t __swapper_pg_dir_node_phys(int nid); +phys_addr_t swapper_pg_dir_node_phys(void); +#else +static inline pgd_t *swapper_pg_dir_node(void) +{ + return swapper_pg_dir; +} + +static inline phys_addr_t __swapper_pg_dir_node_phys(int nid) +{ + return __pa_symbol(swapper_pg_dir); +} + +static inline phys_addr_t swapper_pg_dir_node_phys(void) +{ + return __pa_symbol(swapper_pg_dir); +} +#endif + extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); static inline bool in_swapper_pgdir(void *addr) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 9b31e6d0da17..9635e473969b 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -79,6 +79,7 @@ asmlinkage void secondary_start_kernel(void); struct secondary_data { struct task_struct *task; long status; + phys_addr_t ttbr1; }; extern struct secondary_data secondary_data; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 8ff6610af496..6f17e2b4e1c3 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -121,7 +122,7 @@ static noinstr void patch_alternative(struct alt_instr *alt, * accidentally call into the cache.S code, which is patched by us at * runtime. */ -static noinstr void clean_dcache_range_nopatch(u64 start, u64 end) +noinstr void clean_dcache_range_nopatch(u64 start, u64 end) { u64 cur, d_size, ctr_el0; @@ -174,6 +175,7 @@ static void __apply_alternatives(const struct alt_region *region, alt_cb(alt, origptr, updptr, nr_inst); if (!is_module) { + ktext_replication_patch_alternative(updptr, nr_inst); clean_dcache_range_nopatch((u64)origptr, (u64)(origptr + nr_inst)); } diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5ff1942b04fc..ce9d265bc099 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -121,6 +121,7 @@ int main(void) DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending)); BLANK(); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); + DEFINE(CPU_BOOT_TTBR1, offsetof(struct secondary_data, ttbr1)); BLANK(); DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val)); DEFINE(FTR_OVR_MASK_OFFSET, offsetof(struct arm64_ftr_override, mask)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 444a73c2e638..fbc736fe8650 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3422,7 +3422,7 @@ subsys_initcall_sync(init_32bit_el0_mask); static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) { - cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); + cpu_replace_ttbr1_phys(swapper_pg_dir_node_phys(), idmap_pg_dir); } /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7b236994f0e1..109fadc8713d 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -648,7 +648,8 @@ SYM_FUNC_START_LOCAL(secondary_startup) ldr_l x0, vabits_actual #endif bl __cpu_setup // initialise processor - adrp x1, swapper_pg_dir + adr_l x1, secondary_data + ldr x1, [x1, #CPU_BOOT_TTBR1] adrp x2, idmap_pg_dir bl __enable_mmu ldr x8, =__secondary_switched diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 02870beb271e..be69515da802 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -113,7 +113,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) return -EOVERFLOW; arch_hdr_invariants(&hdr->invariants); - hdr->ttbr1_el1 = __pa_symbol(swapper_pg_dir); + hdr->ttbr1_el1 = swapper_pg_dir_node_phys(); hdr->reenter_kernel = _cpu_resume; /* We can't use __hyp_get_vectors() because kvm may still be loaded */ diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 94a269cd1f07..6ffea2ce1a11 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -9,6 +9,7 @@ #include #include +#include u16 __initdata memstart_offset_seed; diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index b4835f6d594b..b670e159a766 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -102,6 +103,8 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val) patch_unmap(FIX_TEXT_POKE0); raw_spin_unlock_irqrestore(&patch_lock, flags); + ktext_replication_write(addr, &val, sizeof(val)); + return ret; } @@ -115,9 +118,13 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) return -EINVAL; ret = aarch64_insn_write(tp, insn); - if (ret == 0) + if (ret == 0) { + /* Also patch the other nodes */ + ktext_replication_patch(tp, cpu_to_le32(insn)); + caches_clean_inval_pou((uintptr_t)tp, (uintptr_t)tp + AARCH64_INSN_SIZE); + } return ret; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 960b98b43506..b8493022814b 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -119,6 +119,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * page tables. */ secondary_data.task = idle; + secondary_data.ttbr1 = __swapper_pg_dir_node_phys(cpu_to_node(cpu)); + dcache_clean_poc((uintptr_t)&secondary_data, + (uintptr_t)&secondary_data + sizeof(secondary_data)); update_cpu_boot_status(CPU_MMU_OFF); /* Now bring the CPU into our world */ diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 0fbdf5fe64d8..49fa80bafd6d 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -55,7 +55,8 @@ void notrace __cpu_suspend_exit(void) /* Restore CnP bit in TTBR1_EL1 */ if (system_supports_cnp()) - cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); + cpu_replace_ttbr1_phys(swapper_pg_dir_node_phys(), + idmap_pg_dir); /* * PSTATE was not saved over suspend/resume, re-enable any detected diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3cd7e76cc562..d3c7ed76adbf 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -212,6 +212,9 @@ SECTIONS idmap_pg_dir = .; . += PAGE_SIZE; + /* pgtable struct - covers the tramp, reserved and swapper pgdirs */ + pgtable_node0 = .; + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 tramp_pg_dir = .; . += PAGE_SIZE; diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index dbd1bc95967d..482ba013c390 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -14,3 +14,6 @@ KASAN_SANITIZE_physaddr.o += n obj-$(CONFIG_KASAN) += kasan_init.o KASAN_SANITIZE_kasan_init.o := n + +obj-$(CONFIG_REPLICATE_KTEXT) += ktext.o +obj-$(CONFIG_TEST_KTEXT_REPLICATE) += ktext-test.o diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8a0f8604348b..4110e9396d86 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -452,6 +453,8 @@ void __init bootmem_init(void) arch_numa_init(); + ktext_replication_init(); + /* * must be done after arch_numa_init() which calls numa_init() to * initialize node_online_map that gets used in hugetlb_cma_reserve() diff --git a/arch/arm64/mm/ktext-test.c b/arch/arm64/mm/ktext-test.c new file mode 100644 index 000000000000..f397b2fe2e0b --- /dev/null +++ b/arch/arm64/mm/ktext-test.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include + +#include + +static int ttbr1_show(struct seq_file *m, void *v) +{ + unsigned long ttbr; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + ttbr = read_sysreg(ttbr1_el1); + preempt_enable(); + + seq_printf(m, "CPU%u: TTBR1 0x%08lx\n", cpu, ttbr); + + return 0; +} + +static int ttbr1_open(struct inode *inode, struct file *file) +{ + return single_open(file, ttbr1_show, NULL); +} + +static const struct proc_ops ttbr1_fops = { + .proc_open = ttbr1_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +extern const char ktext_nid[32]; + +static int nid_show(struct seq_file *m, void *v) +{ + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + seq_printf(m, "CPU%u: nid %s\n", + cpu, ktext_nid); + preempt_enable(); + + + return 0; +} + +static int nid_open(struct inode *inode, struct file *file) +{ + return single_open(file, nid_show, NULL); +} + +static const struct proc_ops nid_fops = { + .proc_open = nid_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int ttbr1_init(void) +{ + struct proc_dir_entry *dir; + + dir = proc_mkdir("ktext", NULL); + if (!dir) + return -ENOMEM; + + if (!proc_create("ttbr1", S_IRUSR, dir, &ttbr1_fops) || + !proc_create("text_nid", S_IRUSR, dir, &nid_fops)) { + remove_proc_subtree("ktext", NULL); + return -ENOMEM; + } + + return 0; +} +module_init(ttbr1_init); + +static void ttbr1_fin(void) +{ + remove_proc_subtree("ktext", NULL); +} +module_exit(ttbr1_fin); + +MODULE_AUTHOR("Russell King"); +MODULE_LICENSE("GPL"); diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c new file mode 100644 index 000000000000..d9d7c0bb064d --- /dev/null +++ b/arch/arm64/mm/ktext.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022, Oracle and/or its affiliates. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct pgtables *pgtables[MAX_NUMNODES] = { + [0 ... MAX_NUMNODES - 1] = &pgtable_node0, +}; + +static void *kernel_texts[MAX_NUMNODES]; + +#if IS_ENABLED(CONFIG_TEST_KTEXT_REPLICATE) +const char ktext_nid[32] = "0"; +EXPORT_SYMBOL_GPL(ktext_nid); +#endif + +static pgd_t *__swapper_pg_dir_node(int nid) +{ + return pgtables[nid]->swapper_pg_dir; +} + +pgd_t *swapper_pg_dir_node(void) +{ + return __swapper_pg_dir_node(numa_node_id()); +} + +phys_addr_t __swapper_pg_dir_node_phys(int nid) +{ + return __pa(__swapper_pg_dir_node(nid)); +} + +phys_addr_t swapper_pg_dir_node_phys(void) +{ + return __swapper_pg_dir_node_phys(numa_node_id()); +} + +noinstr void ktext_replication_write(void *addr, void *data, size_t size) +{ + unsigned long offset; + void *ptr; + int nid; + + if (!is_kernel_text((unsigned long)addr)) + return; + + offset = (unsigned long)addr - (unsigned long)_stext; + + for_each_node(nid) { + if (!kernel_texts[nid] || !nid) + continue; + + ptr = kernel_texts[nid] + offset; + + memcpy(ptr, data, size); + } +} + +void __kprobes ktext_replication_patch(u32 *tp, __le32 insn) +{ + unsigned long offset; + int nid, this_nid; + __le32 *p; + + if (!is_kernel_text((unsigned long)tp)) + return; + + offset = (unsigned long)tp - (unsigned long)_stext; + + this_nid = numa_node_id(); + if (this_nid) { + /* The cache maintenance by aarch64_insn_patch_text_nosync() + * will occur on this node. We need it to occur on node 0. + */ + p = (void *)lm_alias(_stext) + offset; + caches_clean_inval_pou((u64)p, (u64)p + AARCH64_INSN_SIZE); + } + + for_each_node(nid) { + if (!kernel_texts[nid]) + continue; + + p = kernel_texts[nid] + offset; + WRITE_ONCE(*p, insn); + caches_clean_inval_pou((u64)p, (u64)p + AARCH64_INSN_SIZE); + } +} + +/* Copy the patched alternative from the node0 image to the other + * modes. src is the node 0 linear-mapping address. + */ +void ktext_replication_patch_alternative(__le32 *src, int nr_inst) +{ + unsigned long offset; + size_t size; + int nid; + __le32 *p; + + offset = (unsigned long)src - (unsigned long)lm_alias(_stext); + if (offset >= _etext - _stext) + return; + + size = AARCH64_INSN_SIZE * nr_inst; + + for_each_node(nid) { + if (!kernel_texts[nid]) + continue; + + p = kernel_texts[nid] + offset; + memcpy(p, src, size); + clean_dcache_range_nopatch((u64)p, (u64)p + size); + } +} + +static int __init ktext_replication_check(void) +{ + size_t size = _etext - _stext; + int nid; + + preempt_disable(); + pr_info("CPU%u: Checking ktext replication\n", smp_processor_id()); + + for_each_node(nid) { + if (!kernel_texts[nid]) + continue; + + if (memcmp(_stext, kernel_texts[nid], size)) { + u32 *st, *kt; + size_t i, n; + + pr_err("NID%u: kernel text disagreement\n", nid); + + st = (u32 *)_stext; + kt = kernel_texts[nid]; + for (i = n = 0; i < size / 4; i ++) { + if (st[i] != kt[i]) { + pr_err("Offset 0x%zx: 0x%08x != 0x%08x\n", + i * 4, st[i], kt[i]); + if (n++ > 8) + break; + } + } + } + } + preempt_enable(); + + return 0; +} +late_initcall(ktext_replication_check); + + +static bool ktext_enabled = IS_ENABLED(CONFIG_REPLICATE_KTEXT_DEFAULT); + +static int __init parse_ktext(char *str) +{ + return strtobool(str, &ktext_enabled); +} +early_param("ktext", parse_ktext); + +/* Allocate page tables and memory for the replicated kernel texts. */ +void __init ktext_replication_init(void) +{ + size_t size = __end_rodata - _stext; +#if IS_ENABLED(CONFIG_TEST_KTEXT_REPLICATE) + size_t kt_nid = ktext_nid - _stext; +#endif + int kidx = pgd_index((phys_addr_t)KERNEL_START); + int nid; + + /* + * If we've messed up and the kernel shares a L0 entry with the + * module or vmalloc area, then don't even attempt to use text + * replication. + */ + if (pgd_index(MODULES_VADDR) == kidx) { + pr_warn("Kernel is located in the same L0 index as modules - text replication disabled\n"); + return; + } + if (pgd_index(VMALLOC_START) == kidx) { + pr_warn("Kernel is located in the same L0 index as vmalloc - text replication disabled\n"); + return; + } + + if (!ktext_enabled) + return; + + for_each_node(nid) { + /* Nothing to do for node 0 */ + if (!nid) + continue; + + /* Allocate and copy initial kernel text for this node */ + kernel_texts[nid] = memblock_alloc_node(size, PAGE_SIZE, nid); + memcpy(kernel_texts[nid], _stext, size); + +#if IS_ENABLED(CONFIG_TEST_KTEXT_REPLICATE) + /* Update the node ID in each copy of the kernel text/rodata */ + snprintf(kernel_texts[nid] + kt_nid, sizeof(kernel_texts[nid]), + "%u", nid); +#endif + + caches_clean_inval_pou((u64)kernel_texts[nid], + (u64)kernel_texts[nid] + size); + + /* Allocate the pagetables for this node */ + pgtables[nid] = memblock_alloc_node(sizeof(*pgtables[0]), + PGD_SIZE, nid); + + /* Copy initial swapper page directory */ + memcpy(pgtables[nid]->swapper_pg_dir, swapper_pg_dir, PGD_SIZE); + + /* Clear the kernel mapping */ + memset(&pgtables[nid]->swapper_pg_dir[kidx], 0, + sizeof(pgtables[nid]->swapper_pg_dir[kidx])); + + /* Create kernel mapping pointing at our local copy */ + create_kernel_nid_map(pgtables[nid]->swapper_pg_dir, + kernel_texts[nid]); + } +} + +void ktext_replication_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) +{ + unsigned long idx = pgdp - swapper_pg_dir; + int nid; + + if (WARN_ON_ONCE(idx >= PTRS_PER_PGD) || + WARN_ON_ONCE(idx == pgd_index((phys_addr_t)KERNEL_START))) + return; + + for_each_node(nid) { + if (pgtables[nid]->swapper_pg_dir == swapper_pg_dir) + continue; + + WRITE_ONCE(pgtables[nid]->swapper_pg_dir[idx], pgd); + } +} + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +void __init ktext_replication_init_tramp(void) +{ + int nid; + + for_each_node(nid) { + /* Nothing to do for node 0 */ + if (pgtables[nid]->tramp_pg_dir == tramp_pg_dir) + continue; + + memcpy(pgtables[nid]->tramp_pg_dir, tramp_pg_dir, PGD_SIZE); + } +} +#endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 47781bec6171..1e829ddd5781 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -81,6 +82,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) pgd_t *fixmap_pgdp; spin_lock(&swapper_pgdir_lock); + ktext_replication_set_swapper_pgd(pgdp, pgd); fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp)); WRITE_ONCE(*fixmap_pgdp, pgd); /* @@ -639,6 +641,16 @@ void mark_rodata_ro(void) debug_checkwx(); } +static void __init create_kernel_mapping(pgd_t *pgdp, phys_addr_t pa_start, + void *va_start, void *va_end, + pgprot_t prot, int flags) +{ + size_t size = va_end - va_start; + + __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, + prot, early_pgtable_alloc, flags); +} + static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, pgprot_t prot, struct vm_struct *vma, int flags, unsigned long vm_flags) @@ -649,8 +661,7 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(pa_start)); BUG_ON(!PAGE_ALIGNED(size)); - __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc, flags); + create_kernel_mapping(pgdp, pa_start, va_start, va_end, prot, flags); if (!(vm_flags & VM_NO_GUARD)) size += PAGE_SIZE; @@ -695,6 +706,9 @@ static int __init map_entry_trampoline(void) __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); + /* Copy trampoline page tables to other numa nodes */ + ktext_replication_init_tramp(); + return 0; } core_initcall(map_entry_trampoline); @@ -716,14 +730,8 @@ static bool arm64_early_this_cpu_has_bti(void) ID_AA64PFR1_EL1_BT_SHIFT); } -/* - * Create fine-grained mappings for the kernel. - */ -static void __init map_kernel(pgd_t *pgdp) +static pgprot_t __init kernel_text_pgprot(void) { - static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, - vmlinux_initdata, vmlinux_data; - /* * External debuggers may need to write directly to the text * mapping to install SW breakpoints. Allow this (only) when @@ -739,6 +747,53 @@ static void __init map_kernel(pgd_t *pgdp) if (arm64_early_this_cpu_has_bti()) text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); + return text_prot; +} + +#ifdef CONFIG_REPLICATE_KTEXT +void __init create_kernel_nid_map(pgd_t *pgdp, void *ktext) +{ + phys_addr_t pa_ktext; + size_t ro_offset; + void *ro_end; + pgprot_t text_prot = kernel_text_pgprot(); + + pa_ktext = __pa(ktext); + ro_offset = __pa_symbol(__start_rodata) - __pa_symbol(_stext); + /* + * We must not cover the read-only data after init, since this + * is written to during boot, and thus must be shared between + * the NUMA nodes. + */ + ro_end = PTR_ALIGN_DOWN((void *)__start_ro_after_init, PAGE_SIZE); + + create_kernel_mapping(pgdp, pa_ktext, _stext, _etext, text_prot, 0); + create_kernel_mapping(pgdp, pa_ktext + ro_offset, + __start_rodata, ro_end, + PAGE_KERNEL, NO_CONT_MAPPINGS); + create_kernel_mapping(pgdp, __pa_symbol(ro_end), + ro_end, __inittext_begin, + PAGE_KERNEL, NO_CONT_MAPPINGS); + create_kernel_mapping(pgdp, __pa_symbol(__inittext_begin), + __inittext_begin, __inittext_end, + text_prot, 0); + create_kernel_mapping(pgdp, __pa_symbol(__initdata_begin), + __initdata_begin, __initdata_end, + PAGE_KERNEL, 0); + create_kernel_mapping(pgdp, __pa_symbol(_data), _data, _end, + PAGE_KERNEL, 0); +} +#endif + +/* + * Create fine-grained mappings for the kernel. + */ +static void __init map_kernel(pgd_t *pgdp) +{ + static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, + vmlinux_initdata, vmlinux_data; + pgprot_t text_prot = kernel_text_pgprot(); + /* * Only rodata will be remapped with different permissions later on, * all other segments are allowed to use contiguous mappings.