Web lists-archives.com

[PATCH v4 5/6] x86/mm/KASLR: Calculate the actual size of vmemmap region




Vmemmap region has different maximum size depending on paging mode.
Now its size is hardcoded as 1TB in memory KASLR, this is not
right for 5-level paging mode. It will cause overflow if vmemmap
region is randomized to be adjacent to cpu_entry_area region and
its actual size is bigger than 1 TB.

So here calculate how many TB by the actual size of vmemmap region
and align up to 1TB boundary. In 4-level the size will be 1 TB always
since the max is 1 TB. In 5-level it's variable so that space can
be saved for randomization.

Signed-off-by: Baoquan He <bhe@xxxxxxxxxx>
---
 arch/x86/mm/kaslr.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index ebf6d1d92385..615a79f6b701 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -69,19 +69,22 @@ static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
  *
  *	{ &page_offset_base, 0 },
  *	{ &vmalloc_base,     0 },
- *	{ &vmemmap_base,     1 },
+ *	{ &vmemmap_base,     0 },
  *
  * @size_tb: size in TB of each memory region. E.g, the sizes in 4-level
  * pageing mode are:
  *
  *	- Physical memory mapping: (actual RAM size + 10 TB padding)
  *	- Vmalloc: 32 TB
- *	- Vmemmap: 1 TB
+ *	- Vmemmap: (needed size aligned to 1TB boundary)
  *
- * As seen, the size of the physical memory mapping region is variable,
- * calculated according to the actual size of system RAM in order to
- * save more space for randomization. The rest are fixed values related
- * to paging mode.
+ * As seen, only the vmalloc region is fixed value related to paging
+ * mode. While the sizes of the physical memory mapping region and
+ * vmemmap region are variable. The size of the physical memory mapping
+ * region is calculated according to the actual size of system RAM plus
+ * padding value. And the size of vmemmap is calculated as needed and
+ * aligned to 1 TB boundary. The calculations done here is to save more
+ * space for randomization.
  */
 static __initdata struct kaslr_memory_region {
 	unsigned long *base;
@@ -89,7 +92,7 @@ static __initdata struct kaslr_memory_region {
 } kaslr_regions[] = {
 	{ &page_offset_base, 0 },
 	{ &vmalloc_base, 0 },
-	{ &vmemmap_base, 1 },
+	{ &vmemmap_base, 0 },
 };
 
 /*
@@ -115,6 +118,7 @@ void __init kernel_randomize_memory(void)
 	unsigned long rand, memory_tb;
 	struct rnd_state rand_state;
 	unsigned long remain_entropy;
+	unsigned long vmemmap_size;
 
 	vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
 	vaddr = vaddr_start;
@@ -146,6 +150,15 @@ void __init kernel_randomize_memory(void)
 	if (memory_tb < kaslr_regions[0].size_tb)
 		kaslr_regions[0].size_tb = memory_tb;
 
+	/*
+	 * Calculate how many TB vmemmap region needs, and align to 1 TB
+	 * boundary. It's 1 TB in 4-level since the max is 1 TB, while
+	 * variable in 5-level.
+	 */
+	vmemmap_size = (kaslr_regions[0].size_tb << (TB_SHIFT - PAGE_SHIFT)) *
+		sizeof(struct page);
+	kaslr_regions[2].size_tb = DIV_ROUND_UP(vmemmap_size, 1UL << TB_SHIFT);
+
 	/* Calculate entropy available between regions */
 	remain_entropy = vaddr_end - vaddr_start;
 	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
-- 
2.17.2