aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2019-11-25 17:10:31 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2019-11-25 17:10:31 +1100
commit16acdeaca058fc64efbb36ec10179d13b0225f4d (patch)
treef6b3e4bd0ef80c8d283793047c6158c7b245f566
parentb4f518fc38c5c96ada2043e7b5d87c33090aa99c (diff)
parent2a3cf7b6ac179edcb706a54fbb9ee5a87d082fa7 (diff)
download96b-common-16acdeaca058fc64efbb36ec10179d13b0225f4d.tar.gz
Merge branch 'akpm/master'
-rw-r--r--Documentation/core-api/genalloc.rst2
-rw-r--r--arch/alpha/include/asm/mmzone.h1
-rw-r--r--arch/alpha/include/asm/pgalloc.h4
-rw-r--r--arch/alpha/include/asm/pgtable.h24
-rw-r--r--arch/alpha/mm/init.c12
-rw-r--r--arch/arc/include/asm/pgtable.h1
-rw-r--r--arch/arm/include/asm/pgtable-2level.h1
-rw-r--r--arch/arm/include/asm/pgtable-3level.h1
-rw-r--r--arch/arm/include/asm/pgtable.h2
-rw-r--r--arch/arm/mm/dma-mapping.c2
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/Kconfig.debug19
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/include/asm/ptdump.h8
-rw-r--r--arch/arm64/mm/Makefile4
-rw-r--r--arch/arm64/mm/dump.c148
-rw-r--r--arch/arm64/mm/mmu.c4
-rw-r--r--arch/arm64/mm/ptdump_debugfs.c2
-rw-r--r--arch/c6x/include/asm/pgtable.h2
-rw-r--r--arch/m68k/include/asm/mcf_pgalloc.h7
-rw-r--r--arch/m68k/include/asm/mcf_pgtable.h28
-rw-r--r--arch/m68k/include/asm/mmu_context.h12
-rw-r--r--arch/m68k/include/asm/motorola_pgalloc.h4
-rw-r--r--arch/m68k/include/asm/motorola_pgtable.h32
-rw-r--r--arch/m68k/include/asm/page.h9
-rw-r--r--arch/m68k/include/asm/pgtable_mm.h11
-rw-r--r--arch/m68k/include/asm/pgtable_no.h2
-rw-r--r--arch/m68k/include/asm/sun3_pgalloc.h5
-rw-r--r--arch/m68k/include/asm/sun3_pgtable.h18
-rw-r--r--arch/m68k/kernel/sys_m68k.c10
-rw-r--r--arch/m68k/mm/init.c6
-rw-r--r--arch/m68k/mm/kmap.c24
-rw-r--r--arch/m68k/mm/mcfmmu.c16
-rw-r--r--arch/m68k/mm/motorola.c17
-rw-r--r--arch/m68k/sun3x/dvma.c7
-rw-r--r--arch/microblaze/include/asm/page.h3
-rw-r--r--arch/microblaze/include/asm/pgalloc.h16
-rw-r--r--arch/microblaze/include/asm/pgtable.h32
-rw-r--r--arch/microblaze/kernel/signal.c10
-rw-r--r--arch/microblaze/mm/init.c7
-rw-r--r--arch/microblaze/mm/pgtable.c13
-rw-r--r--arch/mips/include/asm/pgtable.h5
-rw-r--r--arch/nds32/include/asm/page.h3
-rw-r--r--arch/nds32/include/asm/pgalloc.h3
-rw-r--r--arch/nds32/include/asm/pgtable.h12
-rw-r--r--arch/nds32/include/asm/tlb.h1
-rw-r--r--arch/nds32/kernel/pm.c4
-rw-r--r--arch/nds32/mm/fault.c16
-rw-r--r--arch/nds32/mm/init.c11
-rw-r--r--arch/nds32/mm/mm-nds32.c6
-rw-r--r--arch/nds32/mm/proc.c26
-rw-r--r--arch/parisc/include/asm/page.h30
-rw-r--r--arch/parisc/include/asm/pgalloc.h41
-rw-r--r--arch/parisc/include/asm/pgtable.h52
-rw-r--r--arch/parisc/include/asm/tlb.h2
-rw-r--r--arch/parisc/kernel/cache.c13
-rw-r--r--arch/parisc/kernel/pci-dma.c9
-rw-r--r--arch/parisc/mm/fixmap.c10
-rw-r--r--arch/parisc/mm/hugetlbpage.c18
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h30
-rw-r--r--arch/riscv/include/asm/pgtable-64.h7
-rw-r--r--arch/riscv/include/asm/pgtable.h7
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/sparc/include/asm/pgalloc_32.h6
-rw-r--r--arch/sparc/include/asm/pgtable_32.h28
-rw-r--r--arch/sparc/include/asm/pgtable_64.h2
-rw-r--r--arch/sparc/mm/fault_32.c11
-rw-r--r--arch/sparc/mm/highmem.c6
-rw-r--r--arch/sparc/mm/io-unit.c6
-rw-r--r--arch/sparc/mm/iommu.c6
-rw-r--r--arch/sparc/mm/srmmu.c51
-rw-r--r--arch/um/include/asm/pgtable-2level.h1
-rw-r--r--arch/um/include/asm/pgtable-3level.h1
-rw-r--r--arch/um/include/asm/pgtable.h3
-rw-r--r--arch/um/kernel/mem.c8
-rw-r--r--arch/um/kernel/skas/mmu.c12
-rw-r--r--arch/um/kernel/skas/uaccess.c7
-rw-r--r--arch/um/kernel/tlb.c85
-rw-r--r--arch/um/kernel/trap.c4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Kconfig.debug20
-rw-r--r--arch/x86/include/asm/pgtable.h10
-rw-r--r--arch/x86/mm/Makefile4
-rw-r--r--arch/x86/mm/debug_pagetables.c8
-rw-r--r--arch/x86/mm/dump_pagetables.c343
-rw-r--r--arch/x86/platform/efi/efi_32.c2
-rw-r--r--arch/x86/platform/efi/efi_64.c4
-rw-r--r--drivers/auxdisplay/charlcd.c34
-rw-r--r--drivers/block/null_blk_main.c56
-rw-r--r--drivers/firmware/efi/arm-runtime.c2
-rw-r--r--drivers/gpio/gpio-104-dio-48e.c73
-rw-r--r--drivers/gpio/gpio-104-idi-48.c36
-rw-r--r--drivers/gpio/gpio-74x164.c19
-rw-r--r--drivers/gpio/gpio-gpio-mm.c73
-rw-r--r--drivers/gpio/gpio-max3191x.c19
-rw-r--r--drivers/gpio/gpio-pca953x.c195
-rw-r--r--drivers/gpio/gpio-pci-idio-16.c75
-rw-r--r--drivers/gpio/gpio-pcie-idio-24.c109
-rw-r--r--drivers/gpio/gpio-pisosr.c12
-rw-r--r--drivers/gpio/gpio-uniphier.c13
-rw-r--r--drivers/gpio/gpio-ws16c48.c73
-rw-r--r--drivers/media/platform/sti/delta/delta-ipc.c4
-rw-r--r--drivers/misc/sram-exec.c2
-rw-r--r--drivers/pinctrl/pxa/pinctrl-pxa2xx.c1
-rw-r--r--drivers/thermal/intel/intel_soc_dts_iosf.c31
-rw-r--r--drivers/thermal/intel/intel_soc_dts_iosf.h2
-rw-r--r--drivers/tty/serial/sh-sci.c2
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--include/asm-generic/4level-fixup.h39
-rw-r--r--include/asm-generic/bitops/find.h17
-rw-r--r--include/asm-generic/pgtable.h20
-rw-r--r--include/linux/bitmap.h51
-rw-r--r--include/linux/bitops.h12
-rw-r--r--include/linux/genalloc.h2
-rw-r--r--include/linux/kernel.h19
-rw-r--r--include/linux/mm.h10
-rw-r--r--include/linux/pagewalk.h42
-rw-r--r--include/linux/ptdump.h22
-rw-r--r--kernel/dma/remap.c2
-rw-r--r--lib/Kconfig.debug645
-rw-r--r--lib/bitmap.c12
-rw-r--r--lib/find_bit.c14
-rw-r--r--lib/genalloc.c5
-rw-r--r--lib/test_bitmap.c202
-rw-r--r--mm/Kconfig.debug21
-rw-r--r--mm/Makefile1
-rw-r--r--mm/hmm.c8
-rw-r--r--mm/memory.c8
-rw-r--r--mm/migrate.c5
-rw-r--r--mm/mincore.c1
-rw-r--r--mm/pagewalk.c126
-rw-r--r--mm/ptdump.c151
132 files changed, 1977 insertions, 1759 deletions
diff --git a/Documentation/core-api/genalloc.rst b/Documentation/core-api/genalloc.rst
index 098a46f55798..a5af2cbf58a5 100644
--- a/Documentation/core-api/genalloc.rst
+++ b/Documentation/core-api/genalloc.rst
@@ -129,7 +129,7 @@ writing of special-purpose memory allocators in the future.
:functions: gen_pool_for_each_chunk
.. kernel-doc:: lib/genalloc.c
- :functions: addr_in_gen_pool
+ :functions: gen_pool_has_addr
.. kernel-doc:: lib/genalloc.c
:functions: gen_pool_avail
diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
index 889b5d3ad825..7ee144f484f1 100644
--- a/arch/alpha/include/asm/mmzone.h
+++ b/arch/alpha/include/asm/mmzone.h
@@ -73,7 +73,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32))
-#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> 32))
#define pte_pfn(pte) (pte_val(pte) >> 32)
#define mk_pte(page, pgprot) \
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index eb91f1e85629..a1a29f60934c 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -27,9 +27,9 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
}
static inline void
-pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- pgd_set(pgd, pmd);
+ pud_set(pud, pmd);
}
extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 065b57f408c3..299791ce14b6 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -2,7 +2,7 @@
#ifndef _ALPHA_PGTABLE_H
#define _ALPHA_PGTABLE_H
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
/*
* This file contains the functions and defines necessary to modify and use
@@ -226,8 +226,8 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
extern inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
{ pmd_val(*pmdp) = _PAGE_TABLE | ((((unsigned long) ptep) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
-extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
-{ pgd_val(*pgdp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
+extern inline void pud_set(pud_t * pudp, pmd_t * pmdp)
+{ pud_val(*pudp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
extern inline unsigned long
@@ -238,11 +238,11 @@ pmd_page_vaddr(pmd_t pmd)
#ifndef CONFIG_DISCONTIGMEM
#define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32))
-#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32))
+#define pud_page(pud) (mem_map + ((pud_val(pud) & _PFN_MASK) >> 32))
#endif
-extern inline unsigned long pgd_page_vaddr(pgd_t pgd)
-{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
+extern inline unsigned long pud_page_vaddr(pud_t pgd)
+{ return PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
extern inline int pte_none(pte_t pte) { return !pte_val(pte); }
extern inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_VALID; }
@@ -256,10 +256,10 @@ extern inline int pmd_bad(pmd_t pmd) { return (pmd_val(pmd) & ~_PFN_MASK) != _P
extern inline int pmd_present(pmd_t pmd) { return pmd_val(pmd) & _PAGE_VALID; }
extern inline void pmd_clear(pmd_t * pmdp) { pmd_val(*pmdp) = 0; }
-extern inline int pgd_none(pgd_t pgd) { return !pgd_val(pgd); }
-extern inline int pgd_bad(pgd_t pgd) { return (pgd_val(pgd) & ~_PFN_MASK) != _PAGE_TABLE; }
-extern inline int pgd_present(pgd_t pgd) { return pgd_val(pgd) & _PAGE_VALID; }
-extern inline void pgd_clear(pgd_t * pgdp) { pgd_val(*pgdp) = 0; }
+extern inline int pud_none(pud_t pud) { return !pud_val(pud); }
+extern inline int pud_bad(pud_t pud) { return (pud_val(pud) & ~_PFN_MASK) != _PAGE_TABLE; }
+extern inline int pud_present(pud_t pud) { return pud_val(pud) & _PAGE_VALID; }
+extern inline void pud_clear(pud_t * pudp) { pud_val(*pudp) = 0; }
/*
* The following only work if pte_present() is true.
@@ -301,9 +301,9 @@ extern inline pte_t pte_mkspecial(pte_t pte) { return pte; }
*/
/* Find an entry in the second-level page table.. */
-extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address)
{
- pmd_t *ret = (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
+ pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
smp_read_barrier_depends(); /* see above */
return ret;
}
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index e2cbec3789e8..12e218d3792a 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -146,6 +146,8 @@ callback_init(void * kernel_end)
{
struct crb_struct * crb;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
void *two_pages;
@@ -184,8 +186,10 @@ callback_init(void * kernel_end)
memset(two_pages, 0, 2*PAGE_SIZE);
pgd = pgd_offset_k(VMALLOC_START);
- pgd_set(pgd, (pmd_t *)two_pages);
- pmd = pmd_offset(pgd, VMALLOC_START);
+ p4d = p4d_offset(pgd, VMALLOC_START);
+ pud = pud_offset(p4d, VMALLOC_START);
+ pud_set(pud, (pmd_t *)two_pages);
+ pmd = pmd_offset(pud, VMALLOC_START);
pmd_set(pmd, (pte_t *)(two_pages + PAGE_SIZE));
if (alpha_using_srm) {
@@ -214,9 +218,9 @@ callback_init(void * kernel_end)
/* Newer consoles (especially on larger
systems) may require more pages of
PTEs. Grab additional pages as needed. */
- if (pmd != pmd_offset(pgd, vaddr)) {
+ if (pmd != pmd_offset(pud, vaddr)) {
memset(kernel_end, 0, PAGE_SIZE);
- pmd = pmd_offset(pgd, vaddr);
+ pmd = pmd_offset(pud, vaddr);
pmd_set(pmd, (pte_t *)kernel_end);
kernel_end += PAGE_SIZE;
}
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 9019ed9f9c94..12be7e1b7cc0 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -273,6 +273,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
#define pmd_none(x) (!pmd_val(x))
#define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK))
#define pmd_present(x) (pmd_val(x))
+#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ)
#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 51beec41d48c..0d3ea35c97fe 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -189,6 +189,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
}
#define pmd_large(pmd) (pmd_val(pmd) & 2)
+#define pmd_leaf(pmd) (pmd_val(pmd) & 2)
#define pmd_bad(pmd) (pmd_val(pmd) & 2)
#define pmd_present(pmd) (pmd_val(pmd))
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 5b18295021a0..ad55ab068dbf 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -134,6 +134,7 @@
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
#define pmd_large(pmd) pmd_sect(pmd)
+#define pmd_leaf(pmd) pmd_sect(pmd)
#define pud_clear(pudp) \
do { \
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 3ae120cd1715..eabcb48a7840 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -12,7 +12,7 @@
#ifndef CONFIG_MMU
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
#include <asm/pgtable-nommu.h>
#else
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1df6eb42f22e..e822af0d9219 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -529,7 +529,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
static bool __in_atomic_pool(void *start, size_t size)
{
- return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+ return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
}
static int __free_from_pool(void *start, size_t size)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d33058b9966b..c4d6d8d6b6c4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -103,6 +103,7 @@ config ARM64
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_PCI_IOMAP
+ select GENERIC_PTDUMP
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select GENERIC_STRNCPY_FROM_USER
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index cf09010d825f..1c906d932d6b 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -1,22 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-config ARM64_PTDUMP_CORE
- def_bool n
-
-config ARM64_PTDUMP_DEBUGFS
- bool "Export kernel pagetable layout to userspace via debugfs"
- depends on DEBUG_KERNEL
- select ARM64_PTDUMP_CORE
- select DEBUG_FS
- help
- Say Y here if you want to show the kernel pagetable layout in a
- debugfs file. This information is only useful for kernel developers
- who are working in architecture specific areas of the kernel.
- It is probably not a good idea to enable this feature in a production
- kernel.
-
- If in doubt, say N.
-
config PID_IN_CONTEXTIDR
bool "Write the current PID to the CONTEXTIDR register"
help
@@ -42,7 +25,7 @@ config ARM64_RANDOMIZE_TEXT_OFFSET
config DEBUG_WX
bool "Warn on W+X mappings at boot"
- select ARM64_PTDUMP_CORE
+ select PTDUMP_CORE
---help---
Generate a warning if any W+X mappings are found at boot.
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 5d15b4735a0e..40df7e16d397 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -445,6 +445,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
PMD_TYPE_TABLE)
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
+#define pmd_leaf(pmd) pmd_sect(pmd)
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
static inline bool pud_sect(pud_t pud) { return false; }
@@ -529,6 +530,7 @@ static inline void pte_unmap(pte_t *pte) { }
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
#define pud_present(pud) pte_present(pud_pte(pud))
+#define pud_leaf(pud) pud_sect(pud)
#define pud_valid(pud) pte_valid(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 0b8e7269ec82..38187f74e089 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -5,7 +5,7 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
-#ifdef CONFIG_ARM64_PTDUMP_CORE
+#ifdef CONFIG_PTDUMP_CORE
#include <linux/mm_types.h>
#include <linux/seq_file.h>
@@ -21,15 +21,15 @@ struct ptdump_info {
unsigned long base_addr;
};
-void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info);
-#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS
+void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
+#ifdef CONFIG_PTDUMP_DEBUGFS
void ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
#endif
void ptdump_check_wx(void);
-#endif /* CONFIG_ARM64_PTDUMP_CORE */
+#endif /* CONFIG_PTDUMP_CORE */
#ifdef CONFIG_DEBUG_WX
#define debug_checkwx() ptdump_check_wx()
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 849c1df3d214..d91030f0ffee 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,8 +4,8 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
ioremap.o mmap.o pgd.o mmu.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o
-obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o
+obj-$(CONFIG_PTDUMP_CORE) += dump.o
+obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
KASAN_SANITIZE_physaddr.o += n
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 93f9f77582ae..4997ce244172 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -15,6 +15,7 @@
#include <linux/io.h>
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/ptdump.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
@@ -75,10 +76,11 @@ static struct addr_marker address_markers[] = {
* dumps out a description of the range.
*/
struct pg_state {
+ struct ptdump_state ptdump;
struct seq_file *seq;
const struct addr_marker *marker;
unsigned long start_address;
- unsigned level;
+ int level;
u64 current_prot;
bool check_wx;
unsigned long wx_pages;
@@ -173,11 +175,14 @@ struct pg_level {
};
static struct pg_level pg_level[] = {
- {
- }, { /* pgd */
+ { /* pgd */
.name = "PGD",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
+ }, { /* p4d */
+ .name = "P4D",
+ .bits = pte_bits,
+ .num = ARRAY_SIZE(pte_bits),
}, { /* pud */
.name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
.bits = pte_bits,
@@ -240,13 +245,17 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
-static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
- u64 val)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ unsigned long val)
{
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
static const char units[] = "KMGTPE";
- u64 prot = val & pg_level[level].mask;
+ u64 prot = 0;
+
+ if (level >= 0)
+ prot = val & pg_level[level].mask;
- if (!st->level) {
+ if (st->level == -1) {
st->level = level;
st->current_prot = prot;
st->start_address = addr;
@@ -259,21 +268,22 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
if (st->current_prot) {
note_prot_uxn(st, addr);
note_prot_wx(st, addr);
- pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ",
+ }
+
+ pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ",
st->start_address, addr);
- delta = (addr - st->start_address) >> 10;
- while (!(delta & 1023) && unit[1]) {
- delta >>= 10;
- unit++;
- }
- pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
- pg_level[st->level].name);
- if (pg_level[st->level].bits)
- dump_prot(st, pg_level[st->level].bits,
- pg_level[st->level].num);
- pt_dump_seq_puts(st->seq, "\n");
+ delta = (addr - st->start_address) >> 10;
+ while (!(delta & 1023) && unit[1]) {
+ delta >>= 10;
+ unit++;
}
+ pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
+ pg_level[st->level].name);
+ if (st->current_prot && pg_level[st->level].bits)
+ dump_prot(st, pg_level[st->level].bits,
+ pg_level[st->level].num);
+ pt_dump_seq_puts(st->seq, "\n");
if (addr >= st->marker[1].start_address) {
st->marker++;
@@ -292,85 +302,27 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
}
-static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start,
- unsigned long end)
-{
- unsigned long addr = start;
- pte_t *ptep = pte_offset_kernel(pmdp, start);
-
- do {
- note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
- } while (ptep++, addr += PAGE_SIZE, addr != end);
-}
-
-static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start,
- unsigned long end)
-{
- unsigned long next, addr = start;
- pmd_t *pmdp = pmd_offset(pudp, start);
-
- do {
- pmd_t pmd = READ_ONCE(*pmdp);
- next = pmd_addr_end(addr, end);
-
- if (pmd_none(pmd) || pmd_sect(pmd)) {
- note_page(st, addr, 3, pmd_val(pmd));
- } else {
- BUG_ON(pmd_bad(pmd));
- walk_pte(st, pmdp, addr, next);
- }
- } while (pmdp++, addr = next, addr != end);
-}
-
-static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start,
- unsigned long end)
+void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
{
- unsigned long next, addr = start;
- pud_t *pudp = pud_offset(pgdp, start);
-
- do {
- pud_t pud = READ_ONCE(*pudp);
- next = pud_addr_end(addr, end);
-
- if (pud_none(pud) || pud_sect(pud)) {
- note_page(st, addr, 2, pud_val(pud));
- } else {
- BUG_ON(pud_bad(pud));
- walk_pmd(st, pudp, addr, next);
- }
- } while (pudp++, addr = next, addr != end);
-}
+ unsigned long end = ~0UL;
+ struct pg_state st;
-static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
- unsigned long start)
-{
- unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0;
- unsigned long next, addr = start;
- pgd_t *pgdp = pgd_offset(mm, start);
-
- do {
- pgd_t pgd = READ_ONCE(*pgdp);
- next = pgd_addr_end(addr, end);
-
- if (pgd_none(pgd)) {
- note_page(st, addr, 1, pgd_val(pgd));
- } else {
- BUG_ON(pgd_bad(pgd));
- walk_pud(st, pgdp, addr, next);
- }
- } while (pgdp++, addr = next, addr != end);
-}
+ if (info->base_addr < TASK_SIZE_64)
+ end = TASK_SIZE_64;
-void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
-{
- struct pg_state st = {
- .seq = m,
+ st = (struct pg_state){
+ .seq = s,
.marker = info->markers,
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]){
+ {info->base_addr, end},
+ {0, 0}
+ }
+ }
};
- walk_pgd(&st, info->mm, info->base_addr);
-
- note_page(&st, 0, 0, 0);
+ ptdump_walk_pgd(&st.ptdump, info->mm);
}
static void ptdump_initialize(void)
@@ -397,11 +349,19 @@ void ptdump_check_wx(void)
{ 0, NULL},
{ -1, NULL},
},
+ .level = -1,
.check_wx = true,
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {PAGE_OFFSET, ~0UL},
+ {0, 0}
+ }
+ }
};
- walk_pgd(&st, &init_mm, PAGE_OFFSET);
- note_page(&st, 0, 0, 0);
+ ptdump_walk_pgd(&st.ptdump, &init_mm);
+
if (st.wx_pages || st.uxn_pages)
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
st.wx_pages, st.uxn_pages);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 40797cbfba2d..128f70852bf3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -943,13 +943,13 @@ int __init arch_ioremap_pud_supported(void)
* SW table walks can't handle removal of intermediate entries.
*/
return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
- !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS);
+ !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
int __init arch_ioremap_pmd_supported(void)
{
/* See arch_ioremap_pud_supported() */
- return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS);
+ return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 064163f25592..1f2eae3e988b 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -7,7 +7,7 @@
static int ptdump_show(struct seq_file *m, void *v)
{
struct ptdump_info *info = m->private;
- ptdump_walk_pgd(m, info);
+ ptdump_walk(m, info);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 0b6919c00413..197c473b796a 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -8,7 +8,7 @@
#ifndef _ASM_C6X_PGTABLE_H
#define _ASM_C6X_PGTABLE_H
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
#include <asm/setup.h>
#include <asm/page.h>
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index b34d44d666a4..82ec54c2eaa4 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -28,9 +28,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
return (pmd_t *) pgd;
}
-#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
-#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); })
-
#define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
(unsigned long)(page_address(page)))
@@ -45,8 +42,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
__free_page(page);
}
-#define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
-
static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *page = alloc_pages(GFP_DMA, 0);
@@ -100,6 +95,4 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return new_pgd;
}
-#define pgd_populate(mm, pmd, pte) BUG()
-
#endif /* M68K_MCF_PGALLOC_H */
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index 5d5502cb2b2d..b9f45aeded25 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -198,17 +198,9 @@ static inline int pmd_bad2(pmd_t *pmd) { return 0; }
#define pmd_present(pmd) (!pmd_none2(&(pmd)))
static inline void pmd_clear(pmd_t *pmdp) { pmd_val(*pmdp) = 0; }
-static inline int pgd_none(pgd_t pgd) { return 0; }
-static inline int pgd_bad(pgd_t pgd) { return 0; }
-static inline int pgd_present(pgd_t pgd) { return 1; }
-static inline void pgd_clear(pgd_t *pgdp) {}
-
#define pte_ERROR(e) \
printk(KERN_ERR "%s:%d: bad pte %08lx.\n", \
__FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
- printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \
- __FILE__, __LINE__, pmd_val(e))
#define pgd_ERROR(e) \
printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \
__FILE__, __LINE__, pgd_val(e))
@@ -340,14 +332,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
/*
- * Find an entry in the second-level pagetable.
- */
-static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address)
-{
- return (pmd_t *) pgd;
-}
-
-/*
* Find an entry in the third-level pagetable.
*/
#define __pte_offset(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
@@ -360,12 +344,16 @@ static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address)
static inline void nocache_page(void *vaddr)
{
pgd_t *dir;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long addr = (unsigned long) vaddr;
dir = pgd_offset_k(addr);
- pmdp = pmd_offset(dir, addr);
+ p4dp = p4d_offset(dir, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_kernel(pmdp, addr);
*ptep = pte_mknocache(*ptep);
}
@@ -376,12 +364,16 @@ static inline void nocache_page(void *vaddr)
static inline void cache_page(void *vaddr)
{
pgd_t *dir;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long addr = (unsigned long) vaddr;
dir = pgd_offset_k(addr);
- pmdp = pmd_offset(dir, addr);
+ p4dp = p4d_offset(dir, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_kernel(pmdp, addr);
*ptep = pte_mkcache(*ptep);
}
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index f5b1852b4663..cac9f289d1f6 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -100,6 +100,8 @@ static inline void load_ksp_mmu(struct task_struct *task)
struct mm_struct *mm;
int asid;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long mmuar;
@@ -127,7 +129,15 @@ static inline void load_ksp_mmu(struct task_struct *task)
if (pgd_none(*pgd))
goto bug;
- pmd = pmd_offset(pgd, mmuar);
+ p4d = p4d_offset(pgd, mmuar);
+ if (p4d_none(*p4d))
+ goto bug;
+
+ pud = pud_offset(p4d, mmuar);
+ if (pud_none(*pud))
+ goto bug;
+
+ pmd = pmd_offset(pud, mmuar);
if (pmd_none(*pmd))
goto bug;
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index acab315c851f..ff9cc401ffd1 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -106,9 +106,9 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
}
#define pmd_pgtable(pmd) pmd_page(pmd)
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- pgd_set(pgd, pmd);
+ pud_set(pud, pmd);
}
#endif /* _MOTOROLA_PGALLOC_H */
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 7f66a7bad7a5..62bedc61f110 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -117,14 +117,14 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
}
}
-static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
+static inline void pud_set(pud_t *pudp, pmd_t *pmdp)
{
- pgd_val(*pgdp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp);
+ pud_val(*pudp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp);
}
#define __pte_page(pte) ((unsigned long)__va(pte_val(pte) & PAGE_MASK))
#define __pmd_page(pmd) ((unsigned long)__va(pmd_val(pmd) & _TABLE_MASK))
-#define __pgd_page(pgd) ((unsigned long)__va(pgd_val(pgd) & _TABLE_MASK))
+#define pud_page_vaddr(pud) ((unsigned long)__va(pud_val(pud) & _TABLE_MASK))
#define pte_none(pte) (!pte_val(pte))
@@ -147,11 +147,11 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
#define pmd_page(pmd) virt_to_page(__va(pmd_val(pmd)))
-#define pgd_none(pgd) (!pgd_val(pgd))
-#define pgd_bad(pgd) ((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE)
-#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_TABLE)
-#define pgd_clear(pgdp) ({ pgd_val(*pgdp) = 0; })
-#define pgd_page(pgd) (mem_map + ((unsigned long)(__va(pgd_val(pgd)) - PAGE_OFFSET) >> PAGE_SHIFT))
+#define pud_none(pud) (!pud_val(pud))
+#define pud_bad(pud) ((pud_val(pud) & _DESCTYPE_MASK) != _PAGE_TABLE)
+#define pud_present(pud) (pud_val(pud) & _PAGE_TABLE)
+#define pud_clear(pudp) ({ pud_val(*pudp) = 0; })
+#define pud_page(pud) (mem_map + ((unsigned long)(__va(pud_val(pud)) - PAGE_OFFSET) >> PAGE_SHIFT))
#define pte_ERROR(e) \
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
@@ -209,9 +209,9 @@ static inline pgd_t *pgd_offset_k(unsigned long address)
/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
+static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address)
{
- return (pmd_t *)__pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1));
+ return (pmd_t *)pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1));
}
/* Find an entry in the third-level page table.. */
@@ -239,11 +239,15 @@ static inline void nocache_page(void *vaddr)
if (CPU_IS_040_OR_060) {
pgd_t *dir;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
dir = pgd_offset_k(addr);
- pmdp = pmd_offset(dir, addr);
+ p4dp = p4d_offset(dir, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_kernel(pmdp, addr);
*ptep = pte_mknocache(*ptep);
}
@@ -255,11 +259,15 @@ static inline void cache_page(void *vaddr)
if (CPU_IS_040_OR_060) {
pgd_t *dir;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
dir = pgd_offset_k(addr);
- pmdp = pmd_offset(dir, addr);
+ p4dp = p4d_offset(dir, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_kernel(pmdp, addr);
*ptep = pte_mkcache(*ptep);
}
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index 700d8195880c..05e1e1e77a9a 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -21,19 +21,22 @@
/*
* These are used to make use of C type-checking..
*/
-typedef struct { unsigned long pte; } pte_t;
+#if !defined(CONFIG_MMU) || CONFIG_PGTABLE_LEVELS == 3
typedef struct { unsigned long pmd[16]; } pmd_t;
+#define pmd_val(x) ((&x)->pmd[0])
+#define __pmd(x) ((pmd_t) { { (x) }, })
+#endif
+
+typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
typedef struct page *pgtable_t;
#define pte_val(x) ((x).pte)
-#define pmd_val(x) ((&x)->pmd[0])
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)
#define __pte(x) ((pte_t) { (x) } )
-#define __pmd(x) ((pmd_t) { { (x) }, })
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index 646c174fff99..2bf5c3501e78 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -2,7 +2,12 @@
#ifndef _M68K_PGTABLE_H
#define _M68K_PGTABLE_H
-#include <asm-generic/4level-fixup.h>
+
+#if defined(CONFIG_SUN3) || defined(CONFIG_COLDFIRE)
+#include <asm-generic/pgtable-nopmd.h>
+#else
+#include <asm-generic/pgtable-nopud.h>
+#endif
#include <asm/setup.h>
@@ -30,9 +35,7 @@
/* PMD_SHIFT determines the size of the area a second-level page table can map */
-#ifdef CONFIG_SUN3
-#define PMD_SHIFT 17
-#else
+#if CONFIG_PGTABLE_LEVELS == 3
#define PMD_SHIFT 22
#endif
#define PMD_SIZE (1UL << PMD_SHIFT)
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index c18165b0d904..ccc4568299e5 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -2,7 +2,7 @@
#ifndef _M68KNOMMU_PGTABLE_H
#define _M68KNOMMU_PGTABLE_H
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
/*
* (C) Copyright 2000-2002, Greg Ungerer <gerg@snapgear.com>
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 856121122b91..11b95dadf7c0 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -17,8 +17,6 @@
extern const char bad_pmd_string[];
-#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })
-
#define __pte_free_tlb(tlb,pte,addr) \
do { \
pgtable_pte_page_dtor(pte); \
@@ -41,7 +39,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
* inside the pgd, so has no extra memory associated with it.
*/
#define pmd_free(mm, x) do { } while (0)
-#define __pmd_free_tlb(tlb, x, addr) do { } while (0)
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
@@ -58,6 +55,4 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm)
return new_pgd;
}
-#define pgd_populate(mm, pmd, pte) BUG()
-
#endif /* SUN3_PGALLOC_H */
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index c987d50866b4..bc4155264810 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -110,11 +110,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pmd_set(pmdp,ptep) do {} while (0)
-static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
-{
- pgd_val(*pgdp) = virt_to_phys(pmdp);
-}
-
#define __pte_page(pte) \
((unsigned long) __va ((pte_val (pte) & SUN3_PAGE_PGNUM_MASK) << PAGE_SHIFT))
#define __pmd_page(pmd) \
@@ -145,16 +140,9 @@ static inline int pmd_present2 (pmd_t *pmd) { return pmd_val (*pmd) & SUN3_PMD_V
#define pmd_present(pmd) (!pmd_none2(&(pmd)))
static inline void pmd_clear (pmd_t *pmdp) { pmd_val (*pmdp) = 0; }
-static inline int pgd_none (pgd_t pgd) { return 0; }
-static inline int pgd_bad (pgd_t pgd) { return 0; }
-static inline int pgd_present (pgd_t pgd) { return 1; }
-static inline void pgd_clear (pgd_t *pgdp) {}
-
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
- pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
@@ -194,12 +182,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
/* Find an entry in a kernel pagetable directory. */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-/* Find an entry in the second-level pagetable. */
-static inline pmd_t *pmd_offset (pgd_t *pgd, unsigned long address)
-{
- return (pmd_t *) pgd;
-}
-
/* Find an entry in the third-level pagetable. */
#define pte_index(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
#define pte_offset_kernel(pmd, address) ((pte_t *) __pmd_page(*pmd) + pte_index(address))
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 6363ec83a290..18a4de7d5934 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -465,6 +465,8 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
for (;;) {
struct mm_struct *mm = current->mm;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
spinlock_t *ptl;
@@ -474,7 +476,13 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
pgd = pgd_offset(mm, (unsigned long)mem);
if (!pgd_present(*pgd))
goto bad_access;
- pmd = pmd_offset(pgd, (unsigned long)mem);
+ p4d = p4d_offset(pgd, (unsigned long)mem);
+ if (!p4d_present(*p4d))
+ goto bad_access;
+ pud = pud_offset(p4d, (unsigned long)mem);
+ if (!pud_present(*pud))
+ goto bad_access;
+ pmd = pmd_offset(pud, (unsigned long)mem);
if (!pmd_present(*pmd))
goto bad_access;
pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl);
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 778cacb7d57b..27c453f4fffe 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -130,8 +130,10 @@ static inline void init_pointer_tables(void)
/* insert pointer tables allocated so far into the tablelist */
init_pointer_table((unsigned long)kernel_pg_dir);
for (i = 0; i < PTRS_PER_PGD; i++) {
- if (pgd_present(kernel_pg_dir[i]))
- init_pointer_table(__pgd_page(kernel_pg_dir[i]));
+ pud_t *pud = (pud_t *)(&kernel_pg_dir[i]);
+
+ if (pud_present(*pud))
+ init_pointer_table(pgd_page_vaddr(kernel_pg_dir[i]));
}
/* insert also pointer table that we used to unmap the zero page */
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 23f9466aabb5..607296f09d94 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -159,6 +159,8 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
unsigned long virtaddr, retaddr;
long offset;
pgd_t *pgd_dir;
+ p4d_t *p4d_dir;
+ pud_t *pud_dir;
pmd_t *pmd_dir;
pte_t *pte_dir;
@@ -245,18 +247,23 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
printk ("\npa=%#lx va=%#lx ", physaddr, virtaddr);
#endif
pgd_dir = pgd_offset_k(virtaddr);
- pmd_dir = pmd_alloc(&init_mm, pgd_dir, virtaddr);
+ p4d_dir = p4d_offset(pgd_dir, virtaddr);
+ pud_dir = pud_offset(p4d_dir, virtaddr);
+ pmd_dir = pmd_alloc(&init_mm, pud_dir, virtaddr);
if (!pmd_dir) {
printk("ioremap: no mem for pmd_dir\n");
return NULL;
}
+#if CONFIG_PGTABLE_LEVELS == 3
if (CPU_IS_020_OR_030) {
pmd_dir->pmd[(virtaddr/PTRTREESIZE) & 15] = physaddr;
physaddr += PTRTREESIZE;
virtaddr += PTRTREESIZE;
size -= PTRTREESIZE;
- } else {
+ } else
+#endif
+ {
pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
if (!pte_dir) {
printk("ioremap: no mem for pte_dir\n");
@@ -338,16 +345,20 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
cmode = 0;
}
}
+#endif
while ((long)size > 0) {
pgd_dir = pgd_offset_k(virtaddr);
- if (pgd_bad(*pgd_dir)) {
- printk("iocachemode: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
- pgd_clear(pgd_dir);
+ p4d_dir = p4d_offset(pgd_dir, virtaddr);
+ pud_dir = pud_offset(p4d_dir, virtaddr);
+ if (pud_bad(*pud_dir)) {
+ printk("iocachemode: bad pud(%08lx)\n", pud_val(*pud_dir));
+ pud_clear(pud_dir);
return;
}
- pmd_dir = pmd_offset(pgd_dir, virtaddr);
+ pmd_dir = pmd_offset(pud_dir, virtaddr);
+#if CONFIG_PGTABLE_LEVELS == 3
if (CPU_IS_020_OR_030) {
int pmd_off = (virtaddr/PTRTREESIZE) & 15;
@@ -359,6 +370,7 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
continue;
}
}
+#endif
if (pmd_bad(*pmd_dir)) {
printk("iocachemode: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 6cb1e41d58d0..0ea375607767 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -92,6 +92,8 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
unsigned long flags, mmuar, mmutr;
struct mm_struct *mm;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int asid;
@@ -113,7 +115,19 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
return -1;
}
- pmd = pmd_offset(pgd, mmuar);
+ p4d = p4d_offset(pgd, mmuar);
+ if (p4d_none(*p4d)) {
+ local_irq_restore(flags);
+ return -1;
+ }
+
+ pud = pud_offset(p4d, mmuar);
+ if (pud_none(*pud)) {
+ local_irq_restore(flags);
+ return -1;
+ }
+
+ pmd = pmd_offset(pud, mmuar);
if (pmd_none(*pmd)) {
local_irq_restore(flags);
return -1;
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 356601bf96d9..4857985b8080 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -82,9 +82,11 @@ static pmd_t * __init kernel_ptr_table(void)
*/
last = (unsigned long)kernel_pg_dir;
for (i = 0; i < PTRS_PER_PGD; i++) {
- if (!pgd_present(kernel_pg_dir[i]))
+ pud_t *pud = (pud_t *)(&kernel_pg_dir[i]);
+
+ if (!pud_present(*pud))
continue;
- pmd = __pgd_page(kernel_pg_dir[i]);
+ pmd = pgd_page_vaddr(kernel_pg_dir[i]);
if (pmd > last)
last = pmd;
}
@@ -118,6 +120,8 @@ static void __init map_node(int node)
#define ROOTTREESIZE (32*1024*1024)
unsigned long physaddr, virtaddr, size;
pgd_t *pgd_dir;
+ p4d_t *p4d_dir;
+ pud_t *pud_dir;
pmd_t *pmd_dir;
pte_t *pte_dir;
@@ -149,14 +153,16 @@ static void __init map_node(int node)
continue;
}
}
- if (!pgd_present(*pgd_dir)) {
+ p4d_dir = p4d_offset(pgd_dir, virtaddr);
+ pud_dir = pud_offset(p4d_dir, virtaddr);
+ if (!pud_present(*pud_dir)) {
pmd_dir = kernel_ptr_table();
#ifdef DEBUG
printk ("[new pointer %p]", pmd_dir);
#endif
- pgd_set(pgd_dir, pmd_dir);
+ pud_set(pud_dir, pmd_dir);
} else
- pmd_dir = pmd_offset(pgd_dir, virtaddr);
+ pmd_dir = pmd_offset(pud_dir, virtaddr);
if (CPU_IS_020_OR_030) {
if (virtaddr) {
@@ -304,4 +310,3 @@ void __init paging_init(void)
node_set_state(i, N_NORMAL_MEMORY);
}
}
-
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index 89e630e66555..c4b8aa1d80f4 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -80,6 +80,8 @@ inline int dvma_map_cpu(unsigned long kaddr,
unsigned long vaddr, int len)
{
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
unsigned long end;
int ret = 0;
@@ -90,12 +92,14 @@ inline int dvma_map_cpu(unsigned long kaddr,
pr_debug("dvma: mapping kern %08lx to virt %08lx\n", kaddr, vaddr);
pgd = pgd_offset_k(vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
do {
pmd_t *pmd;
unsigned long end2;
- if((pmd = pmd_alloc(&init_mm, pgd, vaddr)) == NULL) {
+ if((pmd = pmd_alloc(&init_mm, pud, vaddr)) == NULL) {
ret = -ENOMEM;
goto out;
}
@@ -196,4 +200,3 @@ void dvma_unmap_iommu(unsigned long baddr, int len)
}
}
-
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index d506bb0893f9..f4b44b24b02e 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -90,7 +90,6 @@ typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pgprot; } pgprot_t;
/* FIXME this can depend on linux kernel version */
# ifdef CONFIG_MMU
-typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
# else /* CONFIG_MMU */
typedef struct { unsigned long ste[64]; } pmd_t;
@@ -103,7 +102,6 @@ typedef struct { p4d_t pge[1]; } pgd_t;
# define pgprot_val(x) ((x).pgprot)
# ifdef CONFIG_MMU
-# define pmd_val(x) ((x).pmd)
# define pgd_val(x) ((x).pgd)
# else /* CONFIG_MMU */
# define pmd_val(x) ((x).ste[0])
@@ -112,7 +110,6 @@ typedef struct { p4d_t pge[1]; } pgd_t;
# endif /* CONFIG_MMU */
# define __pte(x) ((pte_t) { (x) })
-# define __pmd(x) ((pmd_t) { (x) })
# define __pgd(x) ((pgd_t) { (x) })
# define __pgprot(x) ((pgprot_t) { (x) })
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7ecb05baa601..fcf1e23f2e0a 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -41,13 +41,6 @@ static inline void free_pgd(pgd_t *pgd)
#define pmd_pgtable(pmd) pmd_page(pmd)
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
-#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); })
-
extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, (pte))
@@ -58,15 +51,6 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
#define pmd_populate_kernel(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long) (pte))
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x) do { } while (0)
-#define __pmd_free_tlb(tlb, x, addr) pmd_free((tlb)->mm, x)
-#define pgd_populate(mm, pmd, pte) BUG()
-
#endif /* CONFIG_MMU */
#endif /* _ASM_MICROBLAZE_PGALLOC_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 954b69af451f..2def331f9e2c 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -59,9 +59,7 @@ extern int mem_init_done;
#else /* CONFIG_MMU */
-#include <asm-generic/4level-fixup.h>
-
-#define __PAGETABLE_PMD_FOLDED 1
+#include <asm-generic/pgtable-nopmd.h>
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
@@ -138,13 +136,8 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
*
*/
-/* PMD_SHIFT determines the size of the area mapped by the PTE pages */
-#define PMD_SHIFT (PAGE_SHIFT + PTE_SHIFT)
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-
/* PGDIR_SHIFT determines what a top-level page table entry can map */
-#define PGDIR_SHIFT PMD_SHIFT
+#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT)
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
@@ -165,9 +158,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
#define pte_ERROR(e) \
printk(KERN_ERR "%s:%d: bad pte "PTE_FMT".\n", \
__FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
- printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \
- __FILE__, __LINE__, pmd_val(e))
#define pgd_ERROR(e) \
printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \
__FILE__, __LINE__, pgd_val(e))
@@ -314,18 +304,6 @@ extern unsigned long empty_zero_page[1024];
#ifndef __ASSEMBLY__
/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-static inline int pgd_none(pgd_t pgd) { return 0; }
-static inline int pgd_bad(pgd_t pgd) { return 0; }
-static inline int pgd_present(pgd_t pgd) { return 1; }
-#define pgd_clear(xp) do { } while (0)
-#define pgd_page(pgd) \
- ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
-
-/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
@@ -479,12 +457,6 @@ static inline void ptep_mkdirty(struct mm_struct *mm,
#define pgd_index(address) ((address) >> PGDIR_SHIFT)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
-{
- return (pmd_t *) dir;
-}
-
/* Find an entry in the third-level page table.. */
#define pte_index(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index cdd4feb279c5..c9125c328949 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -160,6 +160,9 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
int err = 0, sig = ksig->sig;
unsigned long address = 0;
#ifdef CONFIG_MMU
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
#endif
@@ -195,9 +198,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
address = ((unsigned long)frame->tramp);
#ifdef CONFIG_MMU
- pmdp = pmd_offset(pud_offset(
- pgd_offset(current->mm, address),
- address), address);
+ pgdp = pgd_offset(current->mm, address);
+ p4dp = p4d_offset(pgdp, address);
+ pudp = pud_offset(p4dp, address);
+ pmdp = pmd_offset(pudp, address);
preempt_disable();
ptep = pte_offset_map(pmdp, address);
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index a015a951c8b7..050fc621c920 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -53,8 +53,11 @@ EXPORT_SYMBOL(kmap_prot);
static inline pte_t *virt_to_kpte(unsigned long vaddr)
{
- return pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr),
- vaddr), vaddr);
+ pgd_t *pgd = pgd_offset_k(vaddr);
+ p4d_t *p4d = p4d_offset(pgd, vaddr);
+ pud_t *pud = pud_offset(p4d, vaddr);
+
+ return pte_offset_kernel(pmd_offset(pud, vaddr), vaddr);
}
static void __init highmem_init(void)
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 010bb9cee2e4..68c26cacd930 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -134,11 +134,16 @@ EXPORT_SYMBOL(iounmap);
int map_page(unsigned long va, phys_addr_t pa, int flags)
{
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pd;
pte_t *pg;
int err = -ENOMEM;
+
/* Use upper 10 bits of VA to index the first level map */
- pd = pmd_offset(pgd_offset_k(va), va);
+ p4d = p4d_offset(pgd_offset_k(va), va);
+ pud = pud_offset(p4d, va);
+ pd = pmd_offset(pud, va);
/* Use middle 10 bits of VA to index the second-level map */
pg = pte_alloc_kernel(pd, va); /* from powerpc - pgtable.c */
/* pg = pte_alloc_kernel(&init_mm, pd, va); */
@@ -188,13 +193,17 @@ void __init mapin_ram(void)
static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
{
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int retval = 0;
pgd = pgd_offset(mm, addr & PAGE_MASK);
if (pgd) {
- pmd = pmd_offset(pgd, addr & PAGE_MASK);
+ p4d = p4d_offset(pgd, addr & PAGE_MASK);
+ pud = pud_offset(p4d, addr & PAGE_MASK);
+ pmd = pmd_offset(pud, addr & PAGE_MASK);
if (pmd_present(*pmd)) {
pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
if (pte) {
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 91b89aab1787..aef5378f909c 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -639,6 +639,11 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#ifdef _PAGE_HUGE
+#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0)
+#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0)
+#endif
+
#define gup_fast_permitted(start, end) (!cpu_has_dc_aliases)
#include <asm-generic/pgtable.h>
diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h
index 8feb1fa12f01..86b32014c5f9 100644
--- a/arch/nds32/include/asm/page.h
+++ b/arch/nds32/include/asm/page.h
@@ -41,17 +41,14 @@ void clear_page(void *page);
void copy_page(void *to, void *from);
typedef unsigned long pte_t;
-typedef unsigned long pmd_t;
typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;
#define pte_val(x) (x)
-#define pmd_val(x) (x)
#define pgd_val(x) (x)
#define pgprot_val(x) (x)
#define __pte(x) (x)
-#define __pmd(x) (x)
#define __pgd(x) (x)
#define __pgprot(x) (x)
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 37125e6884d7..85c117347c86 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -15,9 +15,6 @@
/*
* Since we have only two-level page tables, these are trivial
*/
-#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, pmd) do { } while (0)
-#define pgd_populate(mm, pmd, pte) BUG()
#define pmd_pgtable(pmd) pmd_page(pmd)
extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index 6fbf251cfc26..0214e4150539 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -4,8 +4,7 @@
#ifndef _ASMNDS32_PGTABLE_H
#define _ASMNDS32_PGTABLE_H
-#define __PAGETABLE_PMD_FOLDED 1
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopmd.h>
#include <linux/sizes.h>
#include <asm/memory.h>
@@ -18,26 +17,20 @@
#ifdef CONFIG_ANDES_PAGE_SIZE_4KB
#define PGDIR_SHIFT 22
#define PTRS_PER_PGD 1024
-#define PMD_SHIFT 22
-#define PTRS_PER_PMD 1
#define PTRS_PER_PTE 1024
#endif
#ifdef CONFIG_ANDES_PAGE_SIZE_8KB
#define PGDIR_SHIFT 24
#define PTRS_PER_PGD 256
-#define PMD_SHIFT 24
-#define PTRS_PER_PMD 1
#define PTRS_PER_PTE 2048
#endif
#ifndef __ASSEMBLY__
extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pmd_error(const char *file, int line, unsigned long val);
extern void __pgd_error(const char *file, int line, unsigned long val);
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
-#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd))
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
#endif /* !__ASSEMBLY__ */
@@ -368,9 +361,6 @@ static inline pmd_t __mk_pmd(pte_t * ptep, unsigned long prot)
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(dir, addr) ((pmd_t *)(dir))
-
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
const unsigned long mask = 0xfff;
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index a8aff1c8b4f4..672603804a3b 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -7,6 +7,5 @@
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
-#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tln)->mm, pmd)
#endif
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
index ffa8040d8be7..e25700e125d8 100644
--- a/arch/nds32/kernel/pm.c
+++ b/arch/nds32/kernel/pm.c
@@ -14,6 +14,7 @@ unsigned int *phy_addr_sp_tmp;
static void nds32_suspend2ram(void)
{
pgd_t *pgdv;
+ p4d_t *p4dv;
pud_t *pudv;
pmd_t *pmdv;
pte_t *ptev;
@@ -21,7 +22,8 @@ static void nds32_suspend2ram(void)
pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
- pudv = pud_offset(pgdv, (unsigned int)cpu_resume);
+ p4dv = p4d_offset(pgdv, (unsigned int)cpu_resume);
+ pudv = pud_offset(p4dv, (unsigned int)cpu_resume);
pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 064ae5d2159d..906dfb25353c 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -31,6 +31,8 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
do {
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
if (pgd_none(*pgd))
@@ -41,7 +43,9 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
break;
}
- pmd = pmd_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
+ pmd = pmd_offset(pud, addr);
#if PTRS_PER_PMD != 1
pr_alert(", *pmd=%08lx", pmd_val(*pmd));
#endif
@@ -359,6 +363,7 @@ vmalloc_fault:
unsigned int index = pgd_index(addr);
pgd_t *pgd, *pgd_k;
+ p4d_t *p4d, *p4d_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
@@ -369,8 +374,13 @@ vmalloc_fault:
if (!pgd_present(*pgd_k))
goto no_context;
- pud = pud_offset(pgd, addr);
- pud_k = pud_offset(pgd_k, addr);
+ p4d = p4d_offset(pgd, addr);
+ p4d_k = p4d_offset(pgd_k, addr);
+ if (!p4d_present(*p4d_k))
+ goto no_context;
+
+ pud = pud_offset(p4d, addr);
+ pud_k = pud_offset(p4d_k, addr);
if (!pud_present(*pud_k))
goto no_context;
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 55703b03d172..0be3833f6814 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -54,6 +54,7 @@ static void __init map_ram(void)
{
unsigned long v, p, e;
pgd_t *pge;
+ p4d_t *p4e;
pud_t *pue;
pmd_t *pme;
pte_t *pte;
@@ -69,7 +70,8 @@ static void __init map_ram(void)
while (p < e) {
int j;
- pue = pud_offset(pge, v);
+ p4e = p4d_offset(pge, v);
+ pue = pud_offset(p4e, v);
pme = pmd_offset(pue, v);
if ((u32) pue != (u32) pge || (u32) pme != (u32) pge) {
@@ -100,6 +102,7 @@ static void __init fixedrange_init(void)
{
unsigned long vaddr;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
#ifdef CONFIG_HIGHMEM
@@ -111,7 +114,8 @@ static void __init fixedrange_init(void)
*/
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
fixmap_pmd_p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!fixmap_pmd_p)
@@ -126,7 +130,8 @@ static void __init fixedrange_init(void)
vaddr = PKMAP_BASE;
pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!pte)
diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c
index 3b43798d754f..8503bee882d1 100644
--- a/arch/nds32/mm/mm-nds32.c
+++ b/arch/nds32/mm/mm-nds32.c
@@ -74,6 +74,8 @@ void setup_mm_for_reboot(char mode)
{
unsigned long pmdval;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
int i;
@@ -84,7 +86,9 @@ void setup_mm_for_reboot(char mode)
for (i = 0; i < USER_PTRS_PER_PGD; i++) {
pmdval = (i << PGDIR_SHIFT);
- pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT);
+ p4d = p4d_offset(pgd, i << PGDIR_SHIFT);
+ pud = pud_offset(p4d, i << PGDIR_SHIFT);
+ pmd = pmd_offset(pud + i, i << PGDIR_SHIFT);
set_pmd(pmd, __pmd(pmdval));
}
}
diff --git a/arch/nds32/mm/proc.c b/arch/nds32/mm/proc.c
index ba80992d13a2..837ae7728830 100644
--- a/arch/nds32/mm/proc.c
+++ b/arch/nds32/mm/proc.c
@@ -16,10 +16,14 @@ extern struct cache_info L1_cache_info[2];
int va_kernel_present(unsigned long addr)
{
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *ptep, pte;
- pmd = pmd_offset(pgd_offset_k(addr), addr);
+ p4d = p4d_offset(pgd_offset_k(addr), addr);
+ pud = pud_offset(p4d, addr);
+ pmd = pmd_offset(pud, addr);
if (!pmd_none(*pmd)) {
ptep = pte_offset_map(pmd, addr);
pte = *ptep;
@@ -32,20 +36,24 @@ int va_kernel_present(unsigned long addr)
pte_t va_present(struct mm_struct * mm, unsigned long addr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *ptep, pte;
pgd = pgd_offset(mm, addr);
if (!pgd_none(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd)) {
- ptep = pte_offset_map(pmd, addr);
- pte = *ptep;
- if (pte_present(pte))
- return pte;
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d)) {
+ pud = pud_offset(p4d, addr);
+ if (!pud_none(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd)) {
+ ptep = pte_offset_map(pmd, addr);
+ pte = *ptep;
+ if (pte_present(pte))
+ return pte;
+ }
}
}
}
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 93caf17ac5e2..796ae29e9b9a 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -42,48 +42,54 @@ typedef struct { unsigned long pte; } pte_t; /* either 32 or 64bit */
/* NOTE: even on 64 bits, these entries are __u32 because we allocate
* the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
-typedef struct { __u32 pmd; } pmd_t;
typedef struct { __u32 pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
-#define pte_val(x) ((x).pte)
-/* These do not work lvalues, so make sure we don't use them as such. */
+#if CONFIG_PGTABLE_LEVELS == 3
+typedef struct { __u32 pmd; } pmd_t;
+#define __pmd(x) ((pmd_t) { (x) } )
+/* pXd_val() do not work as lvalues, so make sure we don't use them as such. */
#define pmd_val(x) ((x).pmd + 0)
+#endif
+
+#define pte_val(x) ((x).pte)
#define pgd_val(x) ((x).pgd + 0)
#define pgprot_val(x) ((x).pgprot)
#define __pte(x) ((pte_t) { (x) } )
-#define __pmd(x) ((pmd_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
-#define __pmd_val_set(x,n) (x).pmd = (n)
-#define __pgd_val_set(x,n) (x).pgd = (n)
-
#else
/*
* .. while these make it easier on the compiler
*/
typedef unsigned long pte_t;
+
+#if CONFIG_PGTABLE_LEVELS == 3
typedef __u32 pmd_t;
+#define pmd_val(x) (x)
+#define __pmd(x) (x)
+#endif
+
typedef __u32 pgd_t;
typedef unsigned long pgprot_t;
#define pte_val(x) (x)
-#define pmd_val(x) (x)
#define pgd_val(x) (x)
#define pgprot_val(x) (x)
#define __pte(x) (x)
-#define __pmd(x) (x)
#define __pgd(x) (x)
#define __pgprot(x) (x)
-#define __pmd_val_set(x,n) (x) = (n)
-#define __pgd_val_set(x,n) (x) = (n)
-
#endif /* STRICT_MM_TYPECHECKS */
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#if CONFIG_PGTABLE_LEVELS == 3
+#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
+#endif
+
typedef struct page *pgtable_t;
typedef struct __physmem_range {
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index d98647c29b74..9ac74da256b8 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -34,13 +34,13 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
/* Populate first pmd with allocated memory. We mark it
* with PxD_FLAG_ATTACHED as a signal to the system that this
* pmd entry may not be cleared. */
- __pgd_val_set(*actual_pgd, (PxD_FLAG_PRESENT |
- PxD_FLAG_VALID |
- PxD_FLAG_ATTACHED)
- + (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT));
+ set_pgd(actual_pgd, __pgd((PxD_FLAG_PRESENT |
+ PxD_FLAG_VALID |
+ PxD_FLAG_ATTACHED)
+ + (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT)));
/* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
* a signal that this pmd may not be freed */
- __pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
+ set_pgd(pgd, __pgd(PxD_FLAG_ATTACHED));
#endif
}
spin_lock_init(pgd_spinlock(actual_pgd));
@@ -59,10 +59,10 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
/* Three Level Page Table Support for pmd's */
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- __pgd_val_set(*pgd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
- (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT));
+ set_pud(pud, __pud((PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
+ (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)));
}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -88,19 +88,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
free_pages((unsigned long)pmd, PMD_ORDER);
}
-#else
-
-/* Two Level Page Table Support for pmd's */
-
-/*
- * allocating and freeing a pmd is trivial: the 1-entry pmd is
- * inside the pgd, so has no extra memory associated with it.
- */
-
-#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x) do { } while (0)
-#define pgd_populate(mm, pmd, pte) BUG()
-
#endif
static inline void
@@ -110,14 +97,14 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
/* preserve the gateway marker if this is the beginning of
* the permanent pmd */
if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
- __pmd_val_set(*pmd, (PxD_FLAG_PRESENT |
- PxD_FLAG_VALID |
- PxD_FLAG_ATTACHED)
- + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+ set_pmd(pmd, __pmd((PxD_FLAG_PRESENT |
+ PxD_FLAG_VALID |
+ PxD_FLAG_ATTACHED)
+ + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
else
#endif
- __pmd_val_set(*pmd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID)
- + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+ set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
+ + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
}
#define pmd_populate(mm, pmd, pte_page) \
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 4ac374b3a99f..f0a365950536 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -3,7 +3,12 @@
#define _PARISC_PGTABLE_H
#include <asm/page.h>
-#include <asm-generic/4level-fixup.h>
+
+#if CONFIG_PGTABLE_LEVELS == 3
+#include <asm-generic/pgtable-nopud.h>
+#elif CONFIG_PGTABLE_LEVELS == 2
+#include <asm-generic/pgtable-nopmd.h>
+#endif
#include <asm/fixmap.h>
@@ -101,8 +106,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define pte_ERROR(e) \
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#if CONFIG_PGTABLE_LEVELS == 3
#define pmd_ERROR(e) \
printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, (unsigned long)pmd_val(e))
+#endif
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
@@ -132,19 +139,18 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define PTRS_PER_PTE (1UL << BITS_PER_PTE)
/* Definitions for 2nd level */
+#if CONFIG_PGTABLE_LEVELS == 3
#define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE)
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#if CONFIG_PGTABLE_LEVELS == 3
#define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
+#define PTRS_PER_PMD (1UL << BITS_PER_PMD)
#else
-#define __PAGETABLE_PMD_FOLDED 1
#define BITS_PER_PMD 0
#endif
-#define PTRS_PER_PMD (1UL << BITS_PER_PMD)
/* Definitions for 1st level */
-#define PGDIR_SHIFT (PMD_SHIFT + BITS_PER_PMD)
+#define PGDIR_SHIFT (PLD_SHIFT + BITS_PER_PTE + BITS_PER_PMD)
#if (PGDIR_SHIFT + PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY) > BITS_PER_LONG
#define BITS_PER_PGD (BITS_PER_LONG - PGDIR_SHIFT)
#else
@@ -317,6 +323,8 @@ extern unsigned long *empty_zero_page;
#define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK)
#define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
+#define pud_flag(x) (pud_val(x) & PxD_FLAG_MASK)
+#define pud_address(x) ((unsigned long)(pud_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
#define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK)
#define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
@@ -334,42 +342,32 @@ static inline void pmd_clear(pmd_t *pmd) {
if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
/* This is the entry pointing to the permanent pmd
* attached to the pgd; cannot clear it */
- __pmd_val_set(*pmd, PxD_FLAG_ATTACHED);
+ set_pmd(pmd, __pmd(PxD_FLAG_ATTACHED));
else
#endif
- __pmd_val_set(*pmd, 0);
+ set_pmd(pmd, __pmd(0));
}
#if CONFIG_PGTABLE_LEVELS == 3
-#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
-#define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd))
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_address(pud)))
+#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
/* For 64 bit we have three level tables */
-#define pgd_none(x) (!pgd_val(x))
-#define pgd_bad(x) (!(pgd_flag(x) & PxD_FLAG_VALID))
-#define pgd_present(x) (pgd_flag(x) & PxD_FLAG_PRESENT)
-static inline void pgd_clear(pgd_t *pgd) {
+#define pud_none(x) (!pud_val(x))
+#define pud_bad(x) (!(pud_flag(x) & PxD_FLAG_VALID))
+#define pud_present(x) (pud_flag(x) & PxD_FLAG_PRESENT)
+static inline void pud_clear(pud_t *pud) {
#if CONFIG_PGTABLE_LEVELS == 3
- if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED)
- /* This is the permanent pmd attached to the pgd; cannot
+ if(pud_flag(*pud) & PxD_FLAG_ATTACHED)
+ /* This is the permanent pmd attached to the pud; cannot
* free it */
return;
#endif
- __pgd_val_set(*pgd, 0);
+ set_pud(pud, __pud(0));
}
-#else
-/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-static inline int pgd_none(pgd_t pgd) { return 0; }
-static inline int pgd_bad(pgd_t pgd) { return 0; }
-static inline int pgd_present(pgd_t pgd) { return 1; }
-static inline void pgd_clear(pgd_t * pgdp) { }
#endif
/*
@@ -452,7 +450,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#if CONFIG_PGTABLE_LEVELS == 3
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
#define pmd_offset(dir,address) \
-((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(address))
+((pmd_t *) pud_page_vaddr(*(dir)) + pmd_index(address))
#else
#define pmd_offset(dir,addr) ((pmd_t *) dir)
#endif
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 8c0446b04c9e..44235f367674 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -4,7 +4,9 @@
#include <asm-generic/tlb.h>
+#if CONFIG_PGTABLE_LEVELS == 3
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
+#endif
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
#endif
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 2407b0b789d3..1eedfecc5137 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -534,11 +534,14 @@ static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr)
pte_t *ptep = NULL;
if (!pgd_none(*pgd)) {
- pud_t *pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
- pmd_t *pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd))
- ptep = pte_offset_map(pmd, addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d)) {
+ pud_t *pud = pud_offset(p4d, addr);
+ if (!pud_none(*pud)) {
+ pmd_t *pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd))
+ ptep = pte_offset_map(pmd, addr);
+ }
}
}
return ptep;
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index a60d47fd4d55..0f1b460ee715 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -133,9 +133,14 @@ static inline int map_uncached_pages(unsigned long vaddr, unsigned long size,
dir = pgd_offset_k(vaddr);
do {
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
-
- pmd = pmd_alloc(NULL, dir, vaddr);
+
+ p4d = p4d_offset(dir, vaddr);
+ pud = pud_offset(p4d, vaddr);
+ pmd = pmd_alloc(NULL, pud, vaddr);
+
if (!pmd)
return -ENOMEM;
if (map_pmd_uncached(pmd, vaddr, end - vaddr, &paddr))
diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c
index 474cd241c150..e2d8b0a857ee 100644
--- a/arch/parisc/mm/fixmap.c
+++ b/arch/parisc/mm/fixmap.c
@@ -14,11 +14,13 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
{
unsigned long vaddr = __fix_to_virt(idx);
pgd_t *pgd = pgd_offset_k(vaddr);
- pmd_t *pmd = pmd_offset(pgd, vaddr);
+ p4d_t *p4d = p4d_offset(pgd, vaddr);
+ pud_t *pud = pud_offset(p4d, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
pte_t *pte;
if (pmd_none(*pmd))
- pmd = pmd_alloc(NULL, pgd, vaddr);
+ pmd = pmd_alloc(NULL, pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
if (pte_none(*pte))
@@ -32,7 +34,9 @@ void notrace clear_fixmap(enum fixed_addresses idx)
{
unsigned long vaddr = __fix_to_virt(idx);
pgd_t *pgd = pgd_offset_k(vaddr);
- pmd_t *pmd = pmd_offset(pgd, vaddr);
+ p4d_t *p4d = p4d_offset(pgd, vaddr);
+ pud_t *pud = pud_offset(p4d, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
pte_t *pte = pte_offset_kernel(pmd, vaddr);
if (WARN_ON(pte_none(*pte)))
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index d578809e55cf..0e1e212f1c96 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -49,6 +49,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte = NULL;
@@ -61,7 +62,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
addr &= HPAGE_MASK;
pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = pud_alloc(mm, p4d, addr);
if (pud) {
pmd = pmd_alloc(mm, pud, addr);
if (pmd)
@@ -74,6 +76,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte = NULL;
@@ -82,11 +85,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
pgd = pgd_offset(mm, addr);
if (!pgd_none(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd))
- pte = pte_offset_map(pmd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d)) {
+ pud = pud_offset(p4d, addr);
+ if (!pud_none(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd))
+ pte = pte_offset_map(pmd, addr);
+ }
}
}
return pte;
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index b01624e5c467..3dd7b6f5edd0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -923,6 +923,12 @@ static inline int pud_present(pud_t pud)
return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
}
+#define pud_leaf pud_large
+static inline int pud_large(pud_t pud)
+{
+ return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
+
extern struct page *pud_page(pud_t pud);
extern struct page *pmd_page(pmd_t pmd);
static inline pte_t pud_pte(pud_t pud)
@@ -966,6 +972,12 @@ static inline int pgd_present(pgd_t pgd)
return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT));
}
+#define pgd_leaf pgd_large
+static inline int pgd_large(pgd_t pgd)
+{
+ return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE));
+}
+
static inline pte_t pgd_pte(pgd_t pgd)
{
return __pte_raw(pgd_raw(pgd));
@@ -1133,6 +1145,15 @@ static inline bool pmd_access_permitted(pmd_t pmd, bool write)
return pte_access_permitted(pmd_pte(pmd), write);
}
+#define pmd_leaf pmd_large
+/*
+ * returns true for pmd migration entries, THP, devmap, hugetlb
+ */
+static inline int pmd_large(pmd_t pmd)
+{
+ return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
@@ -1159,15 +1180,6 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
}
-/*
- * returns true for pmd migration entries, THP, devmap, hugetlb
- * But compile time dependent on THP config
- */
-static inline int pmd_large(pmd_t pmd)
-{
- return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
-}
-
static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 74630989006d..4c4d2c65ba6c 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -43,6 +43,13 @@ static inline int pud_bad(pud_t pud)
return !pud_present(pud);
}
+#define pud_leaf pud_leaf
+static inline int pud_leaf(pud_t pud)
+{
+ return pud_present(pud) &&
+ (pud_val(pud) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+}
+
static inline void set_pud(pud_t *pudp, pud_t pud)
{
*pudp = pud;
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7ff0ed4f292e..5cf96b2b4d5a 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -105,6 +105,13 @@ static inline int pmd_bad(pmd_t pmd)
return !pmd_present(pmd);
}
+#define pmd_leaf pmd_leaf
+static inline int pmd_leaf(pmd_t pmd)
+{
+ return pmd_present(pmd) &&
+ (pmd_val(pmd) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
+}
+
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 7b03037a8475..137a3920ca36 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -673,6 +673,7 @@ static inline int pud_none(pud_t pud)
return pud_val(pud) == _REGION3_ENTRY_EMPTY;
}
+#define pud_leaf pud_large
static inline int pud_large(pud_t pud)
{
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
@@ -690,6 +691,7 @@ static inline unsigned long pud_pfn(pud_t pud)
return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
}
+#define pmd_leaf pmd_large
static inline int pmd_large(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 10538a4d1a1e..eae0c92ec422 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -26,14 +26,14 @@ static inline void free_pgd_fast(pgd_t *pgd)
#define pgd_free(mm, pgd) free_pgd_fast(pgd)
#define pgd_alloc(mm) get_pgd_fast()
-static inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
+static inline void pud_set(pud_t * pudp, pmd_t * pmdp)
{
unsigned long pa = __nocache_pa(pmdp);
- set_pte((pte_t *)pgdp, __pte((SRMMU_ET_PTD | (pa >> 4))));
+ set_pte((pte_t *)pudp, __pte((SRMMU_ET_PTD | (pa >> 4))));
}
-#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, PMD)
+#define pud_populate(MM, PGD, PMD) pud_set(PGD, PMD)
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm,
unsigned long address)
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 31da44826645..6d6f44c0cad9 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -12,7 +12,7 @@
#include <linux/const.h>
#ifndef __ASSEMBLY__
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
#include <linux/spinlock.h>
#include <linux/mm_types.h>
@@ -132,12 +132,12 @@ static inline struct page *pmd_page(pmd_t pmd)
return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4));
}
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+static inline unsigned long pud_page_vaddr(pud_t pud)
{
- if (srmmu_device_memory(pgd_val(pgd))) {
+ if (srmmu_device_memory(pud_val(pud))) {
return ~0;
} else {
- unsigned long v = pgd_val(pgd) & SRMMU_PTD_PMASK;
+ unsigned long v = pud_val(pud) & SRMMU_PTD_PMASK;
return (unsigned long)__nocache_va(v << 4);
}
}
@@ -184,24 +184,24 @@ static inline void pmd_clear(pmd_t *pmdp)
set_pte((pte_t *)&pmdp->pmdv[i], __pte(0));
}
-static inline int pgd_none(pgd_t pgd)
+static inline int pud_none(pud_t pud)
{
- return !(pgd_val(pgd) & 0xFFFFFFF);
+ return !(pud_val(pud) & 0xFFFFFFF);
}
-static inline int pgd_bad(pgd_t pgd)
+static inline int pud_bad(pud_t pud)
{
- return (pgd_val(pgd) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
+ return (pud_val(pud) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
}
-static inline int pgd_present(pgd_t pgd)
+static inline int pud_present(pud_t pud)
{
- return ((pgd_val(pgd) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
+ return ((pud_val(pud) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
}
-static inline void pgd_clear(pgd_t *pgdp)
+static inline void pud_clear(pud_t *pudp)
{
- set_pte((pte_t *)pgdp, __pte(0));
+ set_pte((pte_t *)pudp, __pte(0));
}
/*
@@ -319,9 +319,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
+static inline pmd_t *pmd_offset(pud_t * dir, unsigned long address)
{
- return (pmd_t *) pgd_page_vaddr(*dir) +
+ return (pmd_t *) pud_page_vaddr(*dir) +
((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
}
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 6ae8016ef4ec..43206652eaf5 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -683,6 +683,7 @@ static inline unsigned long pte_special(pte_t pte)
return pte_val(pte) & _PAGE_SPECIAL;
}
+#define pmd_leaf pmd_large
static inline unsigned long pmd_large(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
@@ -867,6 +868,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
/* only used by the stubbed out hugetlb gup code, should never be called */
#define pgd_page(pgd) NULL
+#define pud_leaf pud_large
static inline unsigned long pud_large(pud_t pud)
{
pte_t pte = __pte(pud_val(pud));
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 8d69de111470..89976c9b936c 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -351,6 +351,8 @@ vmalloc_fault:
*/
int offset = pgd_index(address);
pgd_t *pgd, *pgd_k;
+ p4d_t *p4d, *p4d_k;
+ pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
pgd = tsk->active_mm->pgd + offset;
@@ -363,8 +365,13 @@ vmalloc_fault:
return;
}
- pmd = pmd_offset(pgd, address);
- pmd_k = pmd_offset(pgd_k, address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
+ pmd = pmd_offset(pud, address);
+
+ p4d_k = p4d_offset(pgd_k, address);
+ pud_k = pud_offset(p4d_k, address);
+ pmd_k = pmd_offset(pud_k, address);
if (pmd_present(*pmd) || !pmd_present(*pmd_k))
goto bad_area_nosemaphore;
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index 86bc2a58d26c..d4a80adea7e5 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -39,10 +39,14 @@ static pte_t *kmap_pte;
void __init kmap_init(void)
{
unsigned long address;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *dir;
address = __fix_to_virt(FIX_KMAP_BEGIN);
- dir = pmd_offset(pgd_offset_k(address), address);
+ p4d = p4d_offset(pgd_offset_k(address), address);
+ pud = pud_offset(p4d, address);
+ dir = pmd_offset(pud, address);
/* cache the first kmap pte */
kmap_pte = pte_offset_kernel(dir, address);
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
index f770ee7229d8..33a0facd9eb5 100644
--- a/arch/sparc/mm/io-unit.c
+++ b/arch/sparc/mm/io-unit.c
@@ -239,12 +239,16 @@ static void *iounit_alloc(struct device *dev, size_t len,
page = va;
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
long i;
pgdp = pgd_offset(&init_mm, addr);
- pmdp = pmd_offset(pgdp, addr);
+ p4dp = p4d_offset(pgdp, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_map(pmdp, addr);
set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 71ac353032b6..4d3c6991f0ae 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -343,6 +343,8 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
page = va;
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
@@ -354,7 +356,9 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
__flush_page_to_ram(page);
pgdp = pgd_offset(&init_mm, addr);
- pmdp = pmd_offset(pgdp, addr);
+ p4dp = p4d_offset(pgdp, addr);
+ pudp = pud_offset(p4dp, addr);
+ pmdp = pmd_offset(pudp, addr);
ptep = pte_offset_map(pmdp, addr);
set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index cc3ad64479ac..f56c3c9a9793 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -296,6 +296,8 @@ static void __init srmmu_nocache_init(void)
void *srmmu_nocache_bitmap;
unsigned int bitmap_bits;
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long paddr, vaddr;
@@ -329,6 +331,8 @@ static void __init srmmu_nocache_init(void)
while (vaddr < srmmu_nocache_end) {
pgd = pgd_offset_k(vaddr);
+ p4d = p4d_offset(__nocache_fix(pgd), vaddr);
+ pud = pud_offset(__nocache_fix(p4d), vaddr);
pmd = pmd_offset(__nocache_fix(pgd), vaddr);
pte = pte_offset_kernel(__nocache_fix(pmd), vaddr);
@@ -516,13 +520,17 @@ static inline void srmmu_mapioaddr(unsigned long physaddr,
unsigned long virt_addr, int bus_type)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
unsigned long tmp;
physaddr &= PAGE_MASK;
pgdp = pgd_offset_k(virt_addr);
- pmdp = pmd_offset(pgdp, virt_addr);
+ p4dp = p4d_offset(pgdp, virt_addr);
+ pudp = pud_offset(p4dp, virt_addr);
+ pmdp = pmd_offset(pudp, virt_addr);
ptep = pte_offset_kernel(pmdp, virt_addr);
tmp = (physaddr >> 4) | SRMMU_ET_PTE;
@@ -551,11 +559,16 @@ void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
static inline void srmmu_unmapioaddr(unsigned long virt_addr)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
+
pgdp = pgd_offset_k(virt_addr);
- pmdp = pmd_offset(pgdp, virt_addr);
+ p4dp = p4d_offset(pgdp, virt_addr);
+ pudp = pud_offset(p4dp, virt_addr);
+ pmdp = pmd_offset(pudp, virt_addr);
ptep = pte_offset_kernel(pmdp, virt_addr);
/* No need to flush uncacheable page. */
@@ -693,20 +706,24 @@ static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
unsigned long end)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
while (start < end) {
pgdp = pgd_offset_k(start);
- if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+ p4dp = p4d_offset(pgdp, start);
+ pudp = pud_offset(p4dp, start);
+ if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
pmdp = __srmmu_get_nocache(
SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
if (pmdp == NULL)
early_pgtable_allocfail("pmd");
memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
- pgd_set(__nocache_fix(pgdp), pmdp);
+ pud_set(__nocache_fix(pudp), pmdp);
}
- pmdp = pmd_offset(__nocache_fix(pgdp), start);
+ pmdp = pmd_offset(__nocache_fix(pudp), start);
if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
if (ptep == NULL)
@@ -724,19 +741,23 @@ static void __init srmmu_allocate_ptable_skeleton(unsigned long start,
unsigned long end)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
while (start < end) {
pgdp = pgd_offset_k(start);
- if (pgd_none(*pgdp)) {
+ p4dp = p4d_offset(pgdp, start);
+ pudp = pud_offset(p4dp, start);
+ if (pud_none(*pudp)) {
pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
if (pmdp == NULL)
early_pgtable_allocfail("pmd");
memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE);
- pgd_set(pgdp, pmdp);
+ pud_set((pud_t *)pgdp, pmdp);
}
- pmdp = pmd_offset(pgdp, start);
+ pmdp = pmd_offset(pudp, start);
if (srmmu_pmd_none(*pmdp)) {
ptep = __srmmu_get_nocache(PTE_SIZE,
PTE_SIZE);
@@ -779,6 +800,8 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
unsigned long probed;
unsigned long addr;
pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
int what; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */
@@ -810,18 +833,20 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
}
pgdp = pgd_offset_k(start);
+ p4dp = p4d_offset(pgdp, start);
+ pudp = pud_offset(p4dp, start);
if (what == 2) {
*(pgd_t *)__nocache_fix(pgdp) = __pgd(probed);
start += SRMMU_PGDIR_SIZE;
continue;
}
- if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+ if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE,
SRMMU_PMD_TABLE_SIZE);
if (pmdp == NULL)
early_pgtable_allocfail("pmd");
memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
- pgd_set(__nocache_fix(pgdp), pmdp);
+ pud_set(__nocache_fix(pudp), pmdp);
}
pmdp = pmd_offset(__nocache_fix(pgdp), start);
if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
@@ -906,6 +931,8 @@ void __init srmmu_paging_init(void)
phandle cpunode;
char node_str[128];
pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long pages_avail;
@@ -967,7 +994,9 @@ void __init srmmu_paging_init(void)
srmmu_allocate_ptable_skeleton(PKMAP_BASE, PKMAP_END);
pgd = pgd_offset_k(PKMAP_BASE);
- pmd = pmd_offset(pgd, PKMAP_BASE);
+ p4d = p4d_offset(pgd, PKMAP_BASE);
+ pud = pud_offset(p4d, PKMAP_BASE);
+ pmd = pmd_offset(pud, PKMAP_BASE);
pte = pte_offset_kernel(pmd, PKMAP_BASE);
pkmap_page_table = pte;
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index 32b3d26a7109..32106d31e4ab 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -8,7 +8,6 @@
#ifndef __UM_PGTABLE_2LEVEL_H
#define __UM_PGTABLE_2LEVEL_H
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
/* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index 9812269fefc9..8a3b689e0f86 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -7,7 +7,6 @@
#ifndef __UM_PGTABLE_3LEVEL_H
#define __UM_PGTABLE_3LEVEL_H
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h>
/* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 36a44d58f373..2daa58df2190 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -106,6 +106,9 @@ extern unsigned long end_iomem;
#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE)
#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE)
+#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE)
+#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE)
+
#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
#define pte_page(x) pfn_to_page(pte_pfn(x))
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 417ff647fb37..30885d0b94ac 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -96,6 +96,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
pgd_t *pgd_base)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
int i, j;
@@ -107,7 +108,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
pgd = pgd_base + i;
for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
if (pud_none(*pud))
one_md_table_init(pud);
pmd = pmd_offset(pud, vaddr);
@@ -124,6 +126,7 @@ static void __init fixaddr_user_init( void)
#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
long size = FIXADDR_USER_END - FIXADDR_USER_START;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -144,7 +147,8 @@ static void __init fixaddr_user_init( void)
for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
p += PAGE_SIZE) {
pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
pte_set_val(*pte, p, PAGE_READONLY);
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index b5e3d91fc9c2..3f0d9a573fd6 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -19,15 +19,21 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
unsigned long kernel)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, proc);
- pud = pud_alloc(mm, pgd, proc);
- if (!pud)
+
+ p4d = p4d_alloc(mm, pgd, proc);
+ if (!p4d)
goto out;
+ pud = pud_alloc(mm, p4d, proc);
+ if (!pud)
+ goto out_pud;
+
pmd = pmd_alloc(mm, pud, proc);
if (!pmd)
goto out_pmd;
@@ -44,6 +50,8 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
pmd_free(mm, pmd);
out_pmd:
pud_free(mm, pud);
+ out_pud:
+ p4d_free(mm, p4d);
out:
return -ENOMEM;
}
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 3236052f20e6..d617f8dc9c19 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -17,6 +17,7 @@
pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -27,7 +28,11 @@ pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
if (!pgd_present(*pgd))
return NULL;
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d))
+ return NULL;
+
+ pud = pud_offset(p4d, addr);
if (!pud_present(*pud))
return NULL;
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index b7eaf655635c..80a358c6d652 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -277,7 +277,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
return ret;
}
-static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end,
struct host_vm_change *hvc)
{
@@ -285,7 +285,7 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
unsigned long next;
int ret = 0;
- pud = pud_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
if (!pud_present(*pud)) {
@@ -299,6 +299,28 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
return ret;
}
+static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end,
+ struct host_vm_change *hvc)
+{
+ p4d_t *p4d;
+ unsigned long next;
+ int ret = 0;
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (!p4d_present(*p4d)) {
+ if (hvc->force || p4d_newpage(*p4d)) {
+ ret = add_munmap(addr, next - addr, hvc);
+ p4d_mkuptodate(*p4d);
+ }
+ } else
+ ret = update_pud_range(p4d, addr, next, hvc);
+ } while (p4d++, addr = next, ((addr < end) && !ret));
+ return ret;
+}
+
void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
unsigned long end_addr, int force)
{
@@ -316,8 +338,8 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
ret = add_munmap(addr, next - addr, &hvc);
pgd_mkuptodate(*pgd);
}
- }
- else ret = update_pud_range(pgd, addr, next, &hvc);
+ } else
+ ret = update_p4d_range(pgd, addr, next, &hvc);
} while (pgd++, addr = next, ((addr < end_addr) && !ret));
if (!ret)
@@ -338,6 +360,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
{
struct mm_struct *mm;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -364,7 +387,23 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
continue;
}
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d)) {
+ last = ADD_ROUND(addr, P4D_SIZE);
+ if (last > end)
+ last = end;
+ if (p4d_newpage(*p4d)) {
+ updated = 1;
+ err = add_munmap(addr, last - addr, &hvc);
+ if (err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pud = pud_offset(p4d, addr);
if (!pud_present(*pud)) {
last = ADD_ROUND(addr, PUD_SIZE);
if (last > end)
@@ -424,6 +463,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -437,7 +477,11 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
if (!pgd_present(*pgd))
goto kill;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(*p4d))
+ goto kill;
+
+ pud = pud_offset(p4d, address);
if (!pud_present(*pud))
goto kill;
@@ -490,35 +534,6 @@ kill:
force_sig(SIGKILL);
}
-pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
-{
- return pgd_offset(mm, address);
-}
-
-pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
-{
- return pud_offset(pgd, address);
-}
-
-pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
-{
- return pmd_offset(pud, address);
-}
-
-pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
-{
- return pte_offset_kernel(pmd, address);
-}
-
-pte_t *addr_pte(struct task_struct *task, unsigned long addr)
-{
- pgd_t *pgd = pgd_offset(task->mm, addr);
- pud_t *pud = pud_offset(pgd, addr);
- pmd_t *pmd = pmd_offset(pud, addr);
-
- return pte_offset_map(pmd, addr);
-}
-
void flush_tlb_all(void)
{
/*
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index e62296c66c95..818553064f04 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -28,6 +28,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -104,7 +105,8 @@ good_area:
}
pgd = pgd_offset(mm, address);
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
} while (!pte_present(*pte));
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f3aa4fc9ac59..3fc6daff2109 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -120,6 +120,7 @@ config X86
select GENERIC_IRQ_RESERVATION_MODE
select GENERIC_IRQ_SHOW
select GENERIC_PENDING_IRQ if SMP
+ select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 409c00f74e60..442cf6f08fb6 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,26 +62,10 @@ config EARLY_PRINTK_USB_XDBC
config MCSAFE_TEST
def_bool n
-config X86_PTDUMP_CORE
- def_bool n
-
-config X86_PTDUMP
- tristate "Export kernel pagetable layout to userspace via debugfs"
- depends on DEBUG_KERNEL
- select DEBUG_FS
- select X86_PTDUMP_CORE
- ---help---
- Say Y here if you want to show the kernel pagetable layout in a
- debugfs file. This information is only useful for kernel developers
- who are working in architecture specific areas of the kernel.
- It is probably not a good idea to enable this feature in a production
- kernel.
- If in doubt, say "N"
-
config EFI_PGT_DUMP
bool "Dump the EFI pagetable"
depends on EFI
- select X86_PTDUMP_CORE
+ select PTDUMP_CORE
---help---
Enable this if you want to dump the EFI page table before
enabling virtual mode. This can be used to debug miscellaneous
@@ -90,7 +74,7 @@ config EFI_PGT_DUMP
config DEBUG_WX
bool "Warn on W+X mappings at boot"
- select X86_PTDUMP_CORE
+ select PTDUMP_CORE
---help---
Generate a warning if any W+X mappings are found at boot.
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index ad97dc155195..7e118660bbd9 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -29,8 +29,9 @@
extern pgd_t early_top_pgt[PTRS_PER_PGD];
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
-void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
+void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
+ bool user);
void ptdump_walk_pgd_level_checkwx(void);
void ptdump_walk_user_pgd_level_checkwx(void);
@@ -239,6 +240,7 @@ static inline unsigned long pgd_pfn(pgd_t pgd)
return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
+#define p4d_leaf p4d_large
static inline int p4d_large(p4d_t p4d)
{
/* No 512 GiB pages yet */
@@ -247,6 +249,7 @@ static inline int p4d_large(p4d_t p4d)
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
+#define pmd_leaf pmd_large
static inline int pmd_large(pmd_t pte)
{
return pmd_flags(pte) & _PAGE_PSE;
@@ -874,6 +877,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
}
+#define pud_leaf pud_large
static inline int pud_large(pud_t pud)
{
return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -885,6 +889,7 @@ static inline int pud_bad(pud_t pud)
return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
}
#else
+#define pud_leaf pud_large
static inline int pud_large(pud_t pud)
{
return 0;
@@ -1233,6 +1238,7 @@ static inline bool pgdp_maps_userspace(void *__ptr)
return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
}
+#define pgd_leaf pgd_large
static inline int pgd_large(pgd_t pgd) { return 0; }
#ifdef CONFIG_PAGE_TABLE_ISOLATION
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 74279ce2dc3a..4adebe8fbe62 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -32,8 +32,8 @@ obj-$(CONFIG_X86_PAT) += pat_interval.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
-obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o
+obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o
+obj-$(CONFIG_PTDUMP_DEBUGFS) += debug_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index 39001a401eff..d0efec713c6c 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -7,7 +7,7 @@
static int ptdump_show(struct seq_file *m, void *v)
{
- ptdump_walk_pgd_level_debugfs(m, NULL, false);
+ ptdump_walk_pgd_level_debugfs(m, &init_mm, false);
return 0;
}
@@ -17,7 +17,7 @@ static int ptdump_curknl_show(struct seq_file *m, void *v)
{
if (current->mm->pgd) {
down_read(&current->mm->mmap_sem);
- ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
+ ptdump_walk_pgd_level_debugfs(m, current->mm, false);
up_read(&current->mm->mmap_sem);
}
return 0;
@@ -30,7 +30,7 @@ static int ptdump_curusr_show(struct seq_file *m, void *v)
{
if (current->mm->pgd) {
down_read(&current->mm->mmap_sem);
- ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
+ ptdump_walk_pgd_level_debugfs(m, current->mm, true);
up_read(&current->mm->mmap_sem);
}
return 0;
@@ -43,7 +43,7 @@ DEFINE_SHOW_ATTRIBUTE(ptdump_curusr);
static int ptdump_efi_show(struct seq_file *m, void *v)
{
if (efi_mm.pgd)
- ptdump_walk_pgd_level_debugfs(m, efi_mm.pgd, false);
+ ptdump_walk_pgd_level_debugfs(m, &efi_mm, false);
return 0;
}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ab67822fd2f4..d3c28b3765fc 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -16,6 +16,7 @@
#include <linux/seq_file.h>
#include <linux/highmem.h>
#include <linux/pci.h>
+#include <linux/ptdump.h>
#include <asm/e820/types.h>
#include <asm/pgtable.h>
@@ -26,16 +27,18 @@
* when a "break" in the continuity is found.
*/
struct pg_state {
+ struct ptdump_state ptdump;
int level;
- pgprot_t current_prot;
+ pgprotval_t current_prot;
pgprotval_t effective_prot;
+ pgprotval_t prot_levels[5];
unsigned long start_address;
- unsigned long current_address;
const struct addr_marker *marker;
unsigned long lines;
bool to_dmesg;
bool check_wx;
unsigned long wx_pages;
+ struct seq_file *seq;
};
struct addr_marker {
@@ -106,8 +109,6 @@ static struct addr_marker address_markers[] = {
[END_OF_SPACE_NR] = { -1, NULL }
};
-#define INIT_PGD ((pgd_t *) &init_top_pgt)
-
#else /* CONFIG_X86_64 */
enum address_markers_idx {
@@ -142,8 +143,6 @@ static struct addr_marker address_markers[] = {
[END_OF_SPACE_NR] = { -1, NULL }
};
-#define INIT_PGD (swapper_pg_dir)
-
#endif /* !CONFIG_X86_64 */
/* Multipliers for offsets within the PTEs */
@@ -174,11 +173,10 @@ static struct addr_marker address_markers[] = {
/*
* Print a readable form of a pgprot_t to the seq_file
*/
-static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
+static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg)
{
- pgprotval_t pr = pgprot_val(prot);
static const char * const level_name[] =
- { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
+ { "pgd", "p4d", "pud", "pmd", "pte" };
if (!(pr & _PAGE_PRESENT)) {
/* Not present */
@@ -202,12 +200,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
pt_dump_cont_printf(m, dmsg, " ");
/* Bit 7 has a different meaning on level 3 vs 4 */
- if (level <= 4 && pr & _PAGE_PSE)
+ if (level <= 3 && pr & _PAGE_PSE)
pt_dump_cont_printf(m, dmsg, "PSE ");
else
pt_dump_cont_printf(m, dmsg, " ");
- if ((level == 5 && pr & _PAGE_PAT) ||
- ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE))
+ if ((level == 4 && pr & _PAGE_PAT) ||
+ ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
pt_dump_cont_printf(m, dmsg, "PAT ");
else
pt_dump_cont_printf(m, dmsg, " ");
@@ -223,24 +221,11 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
}
-/*
- * On 64 bits, sign-extend the 48 bit address to 64 bit
- */
-static unsigned long normalize_addr(unsigned long u)
-{
- int shift;
- if (!IS_ENABLED(CONFIG_X86_64))
- return u;
-
- shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
- return (signed long)(u << shift) >> shift;
-}
-
-static void note_wx(struct pg_state *st)
+static void note_wx(struct pg_state *st, unsigned long addr)
{
unsigned long npages;
- npages = (st->current_address - st->start_address) / PAGE_SIZE;
+ npages = (addr - st->start_address) / PAGE_SIZE;
#ifdef CONFIG_PCI_BIOS
/*
@@ -248,7 +233,7 @@ static void note_wx(struct pg_state *st)
* Inform about it, but avoid the warning.
*/
if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN &&
- st->current_address <= PAGE_OFFSET + BIOS_END) {
+ addr <= PAGE_OFFSET + BIOS_END) {
pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages);
return;
}
@@ -260,27 +245,47 @@ static void note_wx(struct pg_state *st)
(void *)st->start_address);
}
+static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+{
+ return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
+ ((prot1 | prot2) & _PAGE_NX);
+}
+
/*
* This function gets called on a break in a continuous series
* of PTE entries; the next one is different so we need to
* print what we collected so far.
*/
-static void note_page(struct seq_file *m, struct pg_state *st,
- pgprot_t new_prot, pgprotval_t new_eff, int level)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ unsigned long val)
{
- pgprotval_t prot, cur, eff;
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ pgprotval_t new_prot, new_eff;
+ pgprotval_t cur, eff;
static const char units[] = "BKMGTPE";
+ struct seq_file *m = st->seq;
+
+ new_prot = val & PTE_FLAGS_MASK;
+
+ if (level > 0) {
+ new_eff = effective_prot(st->prot_levels[level - 1],
+ new_prot);
+ } else {
+ new_eff = new_prot;
+ }
+
+ if (level >= 0)
+ st->prot_levels[level] = new_eff;
/*
* If we have a "break" in the series, we need to flush the state that
* we have now. "break" is either changing perms, levels or
* address space marker.
*/
- prot = pgprot_val(new_prot);
- cur = pgprot_val(st->current_prot);
+ cur = st->current_prot;
eff = st->effective_prot;
- if (!st->level) {
+ if (st->level == -1) {
/* First entry */
st->current_prot = new_prot;
st->effective_prot = new_eff;
@@ -289,14 +294,14 @@ static void note_page(struct seq_file *m, struct pg_state *st,
st->lines = 0;
pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
st->marker->name);
- } else if (prot != cur || new_eff != eff || level != st->level ||
- st->current_address >= st->marker[1].start_address) {
+ } else if (new_prot != cur || new_eff != eff || level != st->level ||
+ addr >= st->marker[1].start_address) {
const char *unit = units;
unsigned long delta;
int width = sizeof(unsigned long) * 2;
if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX))
- note_wx(st);
+ note_wx(st, addr);
/*
* Now print the actual finished series
@@ -306,9 +311,9 @@ static void note_page(struct seq_file *m, struct pg_state *st,
pt_dump_seq_printf(m, st->to_dmesg,
"0x%0*lx-0x%0*lx ",
width, st->start_address,
- width, st->current_address);
+ width, addr);
- delta = st->current_address - st->start_address;
+ delta = addr - st->start_address;
while (!(delta & 1023) && unit[1]) {
delta >>= 10;
unit++;
@@ -325,7 +330,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
* such as the start of vmalloc space etc.
* This helps in the interpretation.
*/
- if (st->current_address >= st->marker[1].start_address) {
+ if (addr >= st->marker[1].start_address) {
if (st->marker->max_lines &&
st->lines > st->marker->max_lines) {
unsigned long nskip =
@@ -341,222 +346,44 @@ static void note_page(struct seq_file *m, struct pg_state *st,
st->marker->name);
}
- st->start_address = st->current_address;
+ st->start_address = addr;
st->current_prot = new_prot;
st->effective_prot = new_eff;
st->level = level;
}
}
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
-{
- return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
- ((prot1 | prot2) & _PAGE_NX);
-}
-
-static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
- pgprotval_t eff_in, unsigned long P)
-{
- int i;
- pte_t *pte;
- pgprotval_t prot, eff;
-
- for (i = 0; i < PTRS_PER_PTE; i++) {
- st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
- pte = pte_offset_map(&addr, st->current_address);
- prot = pte_flags(*pte);
- eff = effective_prot(eff_in, prot);
- note_page(m, st, __pgprot(prot), eff, 5);
- pte_unmap(pte);
- }
-}
-#ifdef CONFIG_KASAN
-
-/*
- * This is an optimization for KASAN=y case. Since all kasan page tables
- * eventually point to the kasan_early_shadow_page we could call note_page()
- * right away without walking through lower level page tables. This saves
- * us dozens of seconds (minutes for 5-level config) while checking for
- * W+X mapping or reading kernel_page_tables debugfs file.
- */
-static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
- void *pt)
-{
- if (__pa(pt) == __pa(kasan_early_shadow_pmd) ||
- (pgtable_l5_enabled() &&
- __pa(pt) == __pa(kasan_early_shadow_p4d)) ||
- __pa(pt) == __pa(kasan_early_shadow_pud)) {
- pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]);
- note_page(m, st, __pgprot(prot), 0, 5);
- return true;
- }
- return false;
-}
-#else
-static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
- void *pt)
-{
- return false;
-}
-#endif
-
-#if PTRS_PER_PMD > 1
-
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
- pgprotval_t eff_in, unsigned long P)
-{
- int i;
- pmd_t *start, *pmd_start;
- pgprotval_t prot, eff;
-
- pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
- for (i = 0; i < PTRS_PER_PMD; i++) {
- st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
- if (!pmd_none(*start)) {
- prot = pmd_flags(*start);
- eff = effective_prot(eff_in, prot);
- if (pmd_large(*start) || !pmd_present(*start)) {
- note_page(m, st, __pgprot(prot), eff, 4);
- } else if (!kasan_page_table(m, st, pmd_start)) {
- walk_pte_level(m, st, *start, eff,
- P + i * PMD_LEVEL_MULT);
- }
- } else
- note_page(m, st, __pgprot(0), 0, 4);
- start++;
- }
-}
-
-#else
-#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p)
-#define pud_large(a) pmd_large(__pmd(pud_val(a)))
-#define pud_none(a) pmd_none(__pmd(pud_val(a)))
-#endif
-
-#if PTRS_PER_PUD > 1
-
-static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
- pgprotval_t eff_in, unsigned long P)
-{
- int i;
- pud_t *start, *pud_start;
- pgprotval_t prot, eff;
-
- pud_start = start = (pud_t *)p4d_page_vaddr(addr);
-
- for (i = 0; i < PTRS_PER_PUD; i++) {
- st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
- if (!pud_none(*start)) {
- prot = pud_flags(*start);
- eff = effective_prot(eff_in, prot);
- if (pud_large(*start) || !pud_present(*start)) {
- note_page(m, st, __pgprot(prot), eff, 3);
- } else if (!kasan_page_table(m, st, pud_start)) {
- walk_pmd_level(m, st, *start, eff,
- P + i * PUD_LEVEL_MULT);
- }
- } else
- note_page(m, st, __pgprot(0), 0, 3);
-
- start++;
- }
-}
-
-#else
-#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p)
-#define p4d_large(a) pud_large(__pud(p4d_val(a)))
-#define p4d_none(a) pud_none(__pud(p4d_val(a)))
-#endif
-
-static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
- pgprotval_t eff_in, unsigned long P)
-{
- int i;
- p4d_t *start, *p4d_start;
- pgprotval_t prot, eff;
-
- if (PTRS_PER_P4D == 1)
- return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P);
-
- p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
-
- for (i = 0; i < PTRS_PER_P4D; i++) {
- st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
- if (!p4d_none(*start)) {
- prot = p4d_flags(*start);
- eff = effective_prot(eff_in, prot);
- if (p4d_large(*start) || !p4d_present(*start)) {
- note_page(m, st, __pgprot(prot), eff, 2);
- } else if (!kasan_page_table(m, st, p4d_start)) {
- walk_pud_level(m, st, *start, eff,
- P + i * P4D_LEVEL_MULT);
- }
- } else
- note_page(m, st, __pgprot(0), 0, 2);
-
- start++;
- }
-}
-
-#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
-#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
-
-static inline bool is_hypervisor_range(int idx)
-{
-#ifdef CONFIG_X86_64
- /*
- * A hole in the beginning of kernel address space reserved
- * for a hypervisor.
- */
- return (idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) &&
- (idx < pgd_index(GUARD_HOLE_END_ADDR));
-#else
- return false;
-#endif
-}
-
-static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
+static void ptdump_walk_pgd_level_core(struct seq_file *m, struct mm_struct *mm,
bool checkwx, bool dmesg)
{
- pgd_t *start = INIT_PGD;
- pgprotval_t prot, eff;
- int i;
- struct pg_state st = {};
-
- if (pgd) {
- start = pgd;
- st.to_dmesg = dmesg;
- }
+ const struct ptdump_range ptdump_ranges[] = {
+#ifdef CONFIG_X86_64
- st.check_wx = checkwx;
- if (checkwx)
- st.wx_pages = 0;
+#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1))
+#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \
+ normalize_addr_shift)
- for (i = 0; i < PTRS_PER_PGD; i++) {
- st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
- if (!pgd_none(*start) && !is_hypervisor_range(i)) {
- prot = pgd_flags(*start);
-#ifdef CONFIG_X86_PAE
- eff = _PAGE_USER | _PAGE_RW;
+ {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
+ {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL},
#else
- eff = prot;
+ {0, ~0UL},
#endif
- if (pgd_large(*start) || !pgd_present(*start)) {
- note_page(m, &st, __pgprot(prot), eff, 1);
- } else {
- walk_p4d_level(m, &st, *start, eff,
- i * PGD_LEVEL_MULT);
- }
- } else
- note_page(m, &st, __pgprot(0), 0, 1);
+ {0, 0}
+};
- cond_resched();
- start++;
- }
+ struct pg_state st = {
+ .ptdump = {
+ .note_page = note_page,
+ .range = ptdump_ranges
+ },
+ .level = -1,
+ .to_dmesg = dmesg,
+ .check_wx = checkwx,
+ .seq = m
+ };
+
+ ptdump_walk_pgd(&st.ptdump, mm);
- /* Flush out the last page */
- st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
- note_page(m, &st, __pgprot(0), 0, 0);
if (!checkwx)
return;
if (st.wx_pages)
@@ -566,39 +393,51 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
}
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
+void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm)
+{
+ ptdump_walk_pgd_level_core(m, mm, false, true);
+}
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+static void ptdump_walk_pgd_level_user_core(struct seq_file *m,
+ struct mm_struct *mm,
+ bool checkwx, bool dmesg)
{
- ptdump_walk_pgd_level_core(m, pgd, false, true);
+ struct mm_struct fake_mm = {
+ .pgd = kernel_to_user_pgdp(mm->pgd)
+ };
+ init_rwsem(&fake_mm.mmap_sem);
+ ptdump_walk_pgd_level_core(m, &fake_mm, checkwx, dmesg);
}
+#endif
-void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
+ bool user)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
if (user && boot_cpu_has(X86_FEATURE_PTI))
- pgd = kernel_to_user_pgdp(pgd);
+ ptdump_walk_pgd_level_user_core(m, mm, false, false);
+ else
#endif
- ptdump_walk_pgd_level_core(m, pgd, false, false);
+ ptdump_walk_pgd_level_core(m, mm, false, false);
}
EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
void ptdump_walk_user_pgd_level_checkwx(void)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pgd_t *pgd = INIT_PGD;
-
if (!(__supported_pte_mask & _PAGE_NX) ||
!boot_cpu_has(X86_FEATURE_PTI))
return;
pr_info("x86/mm: Checking user space page tables\n");
- pgd = kernel_to_user_pgdp(pgd);
- ptdump_walk_pgd_level_core(NULL, pgd, true, false);
+ ptdump_walk_pgd_level_user_core(NULL, &init_mm, true, false);
#endif
}
void ptdump_walk_pgd_level_checkwx(void)
{
- ptdump_walk_pgd_level_core(NULL, NULL, true, false);
+ ptdump_walk_pgd_level_core(NULL, &init_mm, true, false);
}
static int __init pt_dump_init(void)
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 9959657127f4..1616074075c3 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -49,7 +49,7 @@ void efi_sync_low_kernel_mappings(void) {}
void __init efi_dump_pagetable(void)
{
#ifdef CONFIG_EFI_PGT_DUMP
- ptdump_walk_pgd_level(NULL, swapper_pg_dir);
+ ptdump_walk_pgd_level(NULL, &init_mm);
#endif
}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 08ce8177c3af..3cb63cd369d6 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -614,9 +614,9 @@ void __init efi_dump_pagetable(void)
{
#ifdef CONFIG_EFI_PGT_DUMP
if (efi_enabled(EFI_OLD_MEMMAP))
- ptdump_walk_pgd_level(NULL, swapper_pg_dir);
+ ptdump_walk_pgd_level(NULL, &init_mm);
else
- ptdump_walk_pgd_level(NULL, efi_mm.pgd);
+ ptdump_walk_pgd_level(NULL, &efi_mm);
#endif
}
diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index bef6b85778b6..874c259a8829 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -288,31 +288,6 @@ static int charlcd_init_display(struct charlcd *lcd)
}
/*
- * Parses an unsigned integer from a string, until a non-digit character
- * is found. The empty string is not accepted. No overflow checks are done.
- *
- * Returns whether the parsing was successful. Only in that case
- * the output parameters are written to.
- *
- * TODO: If the kernel adds an inplace version of kstrtoul(), this function
- * could be easily replaced by that.
- */
-static bool parse_n(const char *s, unsigned long *res, const char **next_s)
-{
- if (!isdigit(*s))
- return false;
-
- *res = 0;
- while (isdigit(*s)) {
- *res = *res * 10 + (*s - '0');
- ++s;
- }
-
- *next_s = s;
- return true;
-}
-
-/*
* Parses a movement command of the form "(.*);", where the group can be
* any number of subcommands of the form "(x|y)[0-9]+".
*
@@ -336,6 +311,7 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y)
{
unsigned long new_x = *x;
unsigned long new_y = *y;
+ char *p;
for (;;) {
if (!*s)
@@ -345,11 +321,15 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y)
break;
if (*s == 'x') {
- if (!parse_n(s + 1, &new_x, &s))
+ new_x = simple_strtoul(s + 1, &p, 10);
+ if (p == s + 1)
return false;
+ s = p;
} else if (*s == 'y') {
- if (!parse_n(s + 1, &new_y, &s))
+ new_y = simple_strtoul(s + 1, &p, 10);
+ if (p == s + 1)
return false;
+ s = p;
} else {
return false;
}
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 795fda576824..7869dbd2dbad 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -263,34 +263,34 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
}
/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
-#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
-static ssize_t \
-nullb_device_##NAME##_show(struct config_item *item, char *page) \
-{ \
- return nullb_device_##TYPE##_attr_show( \
- to_nullb_device(item)->NAME, page); \
-} \
-static ssize_t \
-nullb_device_##NAME##_store(struct config_item *item, const char *page, \
- size_t count) \
-{ \
- int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY; \
- struct nullb_device *dev = to_nullb_device(item); \
- TYPE new_value; \
- int ret; \
- \
- ret = nullb_device_##TYPE##_attr_store(&new_value, page, count); \
- if (ret < 0) \
- return ret; \
- if (apply_fn) \
- ret = apply_fn(dev, new_value); \
- else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
- ret = -EBUSY; \
- if (ret < 0) \
- return ret; \
- dev->NAME = new_value; \
- return count; \
-} \
+#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
+static ssize_t \
+nullb_device_##NAME##_show(struct config_item *item, char *page) \
+{ \
+ return nullb_device_##TYPE##_attr_show( \
+ to_nullb_device(item)->NAME, page); \
+} \
+static ssize_t \
+nullb_device_##NAME##_store(struct config_item *item, const char *page, \
+ size_t count) \
+{ \
+ int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
+ struct nullb_device *dev = to_nullb_device(item); \
+ TYPE uninitialized_var(new_value); \
+ int ret; \
+ \
+ ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
+ if (ret < 0) \
+ return ret; \
+ if (apply_fn) \
+ ret = apply_fn(dev, new_value); \
+ else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
+ ret = -EBUSY; \
+ if (ret < 0) \
+ return ret; \
+ dev->NAME = new_value; \
+ return count; \
+} \
CONFIGFS_ATTR(nullb_device_, NAME);
static int nullb_apply_submit_queues(struct nullb_device *dev,
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 899b803842bb..9dda2602c862 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -27,7 +27,7 @@
extern u64 efi_system_table;
-#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS
+#if defined(CONFIG_PTDUMP_DEBUGFS) && defined(CONFIG_ARM64)
#include <asm/ptdump.h>
static struct ptdump_info efi_ptdump_info = {
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 400c09b905f8..1f7d9bbec0fc 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -178,46 +178,25 @@ static int dio48e_gpio_get(struct gpio_chip *chip, unsigned offset)
return !!(port_state & mask);
}
+static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
+
static int dio48e_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
unsigned long *bits)
{
struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
- size_t i;
- static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+ unsigned long offset;
+ unsigned long gpio_mask;
+ unsigned int port_addr;
unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
-
- /* read bits from current gpio port */
- port_state = inb(dio48egpio->base + ports[i]);
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ port_addr = dio48egpio->base + ports[offset / 8];
+ port_state = inb(port_addr) & gpio_mask;
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -247,37 +226,27 @@ static void dio48e_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
- unsigned int i;
- const unsigned int gpio_reg_size = 8;
- unsigned int port;
- unsigned int out_port;
- unsigned int bitmask;
+ unsigned long offset;
+ unsigned long gpio_mask;
+ size_t index;
+ unsigned int port_addr;
+ unsigned long bitmask;
unsigned long flags;
- /* set bits are evaluated a gpio register size at a time */
- for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
- /* no more set bits in this mask word; skip to the next word */
- if (!mask[BIT_WORD(i)]) {
- i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
- continue;
- }
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
+ port_addr = dio48egpio->base + ports[index];
- port = i / gpio_reg_size;
- out_port = (port > 2) ? port + 1 : port;
- bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)];
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
raw_spin_lock_irqsave(&dio48egpio->lock, flags);
/* update output state data and set device gpio register */
- dio48egpio->out_state[port] &= ~mask[BIT_WORD(i)];
- dio48egpio->out_state[port] |= bitmask;
- outb(dio48egpio->out_state[port], dio48egpio->base + out_port);
+ dio48egpio->out_state[index] &= ~gpio_mask;
+ dio48egpio->out_state[index] |= bitmask;
+ outb(dio48egpio->out_state[index], port_addr);
raw_spin_unlock_irqrestore(&dio48egpio->lock, flags);
-
- /* prepare for next gpio register set */
- mask[BIT_WORD(i)] >>= gpio_reg_size;
- bits[BIT_WORD(i)] >>= gpio_reg_size;
}
}
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index c50329ab493a..d350ac0de06b 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -85,42 +85,20 @@ static int idi_48_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
unsigned long *bits)
{
struct idi_48_gpio *const idi48gpio = gpiochip_get_data(chip);
- size_t i;
+ unsigned long offset;
+ unsigned long gpio_mask;
static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+ unsigned int port_addr;
unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ port_addr = idi48gpio->base + ports[offset / 8];
+ port_state = inb(port_addr) & gpio_mask;
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
-
- /* read bits from current gpio port */
- port_state = inb(idi48gpio->base + ports[i]);
-
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index e81307f9754e..05637d585152 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -6,6 +6,7 @@
* Copyright (C) 2010 Miguel Gaio <miguel.gaio@efixo.com>
*/
+#include <linux/bitops.h>
#include <linux/gpio/consumer.h>
#include <linux/gpio/driver.h>
#include <linux/module.h>
@@ -72,20 +73,18 @@ static void gen_74x164_set_multiple(struct gpio_chip *gc, unsigned long *mask,
unsigned long *bits)
{
struct gen_74x164_chip *chip = gpiochip_get_data(gc);
- unsigned int i, idx, shift;
- u8 bank, bankmask;
+ unsigned long offset;
+ unsigned long bankmask;
+ size_t bank;
+ unsigned long bitmask;
mutex_lock(&chip->lock);
- for (i = 0, bank = chip->registers - 1; i < chip->registers;
- i++, bank--) {
- idx = i / sizeof(*mask);
- shift = i % sizeof(*mask) * BITS_PER_BYTE;
- bankmask = mask[idx] >> shift;
- if (!bankmask)
- continue;
+ for_each_set_clump8(offset, bankmask, mask, chip->registers * 8) {
+ bank = chip->registers - 1 - offset / 8;
+ bitmask = bitmap_get_value8(bits, offset) & bankmask;
chip->buffer[bank] &= ~bankmask;
- chip->buffer[bank] |= bankmask & (bits[idx] >> shift);
+ chip->buffer[bank] |= bitmask;
}
__gen_74x164_write_config(chip);
mutex_unlock(&chip->lock);
diff --git a/drivers/gpio/gpio-gpio-mm.c b/drivers/gpio/gpio-gpio-mm.c
index c22d6f94129c..b89b8c5ff1f5 100644
--- a/drivers/gpio/gpio-gpio-mm.c
+++ b/drivers/gpio/gpio-gpio-mm.c
@@ -167,46 +167,25 @@ static int gpiomm_gpio_get(struct gpio_chip *chip, unsigned int offset)
return !!(port_state & mask);
}
+static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
+
static int gpiomm_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
unsigned long *bits)
{
struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip);
- size_t i;
- static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+ unsigned long offset;
+ unsigned long gpio_mask;
+ unsigned int port_addr;
unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
-
- /* read bits from current gpio port */
- port_state = inb(gpiommgpio->base + ports[i]);
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ port_addr = gpiommgpio->base + ports[offset / 8];
+ port_state = inb(port_addr) & gpio_mask;
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -237,37 +216,27 @@ static void gpiomm_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip);
- unsigned int i;
- const unsigned int gpio_reg_size = 8;
- unsigned int port;
- unsigned int out_port;
- unsigned int bitmask;
+ unsigned long offset;
+ unsigned long gpio_mask;
+ size_t index;
+ unsigned int port_addr;
+ unsigned long bitmask;
unsigned long flags;
- /* set bits are evaluated a gpio register size at a time */
- for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
- /* no more set bits in this mask word; skip to the next word */
- if (!mask[BIT_WORD(i)]) {
- i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
- continue;
- }
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
+ port_addr = gpiommgpio->base + ports[index];
- port = i / gpio_reg_size;
- out_port = (port > 2) ? port + 1 : port;
- bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)];
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
spin_lock_irqsave(&gpiommgpio->lock, flags);
/* update output state data and set device gpio register */
- gpiommgpio->out_state[port] &= ~mask[BIT_WORD(i)];
- gpiommgpio->out_state[port] |= bitmask;
- outb(gpiommgpio->out_state[port], gpiommgpio->base + out_port);
+ gpiommgpio->out_state[index] &= ~gpio_mask;
+ gpiommgpio->out_state[index] |= bitmask;
+ outb(gpiommgpio->out_state[index], port_addr);
spin_unlock_irqrestore(&gpiommgpio->lock, flags);
-
- /* prepare for next gpio register set */
- mask[BIT_WORD(i)] >>= gpio_reg_size;
- bits[BIT_WORD(i)] >>= gpio_reg_size;
}
}
diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index 0696d5a21431..310d1a248cae 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -31,6 +31,7 @@
*/
#include <linux/bitmap.h>
+#include <linux/bitops.h>
#include <linux/crc8.h>
#include <linux/gpio/consumer.h>
#include <linux/gpio/driver.h>
@@ -232,16 +233,20 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask,
unsigned long *bits)
{
struct max3191x_chip *max3191x = gpiochip_get_data(gpio);
- int ret, bit = 0, wordlen = max3191x_wordlen(max3191x);
+ const unsigned int wordlen = max3191x_wordlen(max3191x);
+ int ret;
+ unsigned long bit;
+ unsigned long gpio_mask;
+ unsigned long in;
mutex_lock(&max3191x->lock);
ret = max3191x_readout_locked(max3191x);
if (ret)
goto out_unlock;
- while ((bit = find_next_bit(mask, gpio->ngpio, bit)) != gpio->ngpio) {
+ bitmap_zero(bits, gpio->ngpio);
+ for_each_set_clump8(bit, gpio_mask, mask, gpio->ngpio) {
unsigned int chipnum = bit / MAX3191X_NGPIO;
- unsigned long in, shift, index;
if (max3191x_chip_is_faulting(max3191x, chipnum)) {
ret = -EIO;
@@ -249,12 +254,8 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask,
}
in = ((u8 *)max3191x->xfer.rx_buf)[chipnum * wordlen];
- shift = round_down(bit % BITS_PER_LONG, MAX3191X_NGPIO);
- index = bit / BITS_PER_LONG;
- bits[index] &= ~(mask[index] & (0xff << shift));
- bits[index] |= mask[index] & (in << shift); /* copy bits */
-
- bit = (chipnum + 1) * MAX3191X_NGPIO; /* go to next chip */
+ in &= gpio_mask;
+ bitmap_set_value8(bits, in, bit);
}
out_unlock:
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 82122c3c688a..6652bee01966 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -9,7 +9,7 @@
*/
#include <linux/acpi.h>
-#include <linux/bits.h>
+#include <linux/bitmap.h>
#include <linux/gpio/driver.h>
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
@@ -115,6 +115,7 @@ MODULE_DEVICE_TABLE(acpi, pca953x_acpi_ids);
#define MAX_BANK 5
#define BANK_SZ 8
+#define MAX_LINE (MAX_BANK * BANK_SZ)
#define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ)
@@ -146,10 +147,10 @@ struct pca953x_chip {
#ifdef CONFIG_GPIO_PCA953X_IRQ
struct mutex irq_lock;
- u8 irq_mask[MAX_BANK];
- u8 irq_stat[MAX_BANK];
- u8 irq_trig_raise[MAX_BANK];
- u8 irq_trig_fall[MAX_BANK];
+ DECLARE_BITMAP(irq_mask, MAX_LINE);
+ DECLARE_BITMAP(irq_stat, MAX_LINE);
+ DECLARE_BITMAP(irq_trig_raise, MAX_LINE);
+ DECLARE_BITMAP(irq_trig_fall, MAX_LINE);
struct irq_chip irq_chip;
#endif
atomic_t wakeup_path;
@@ -333,12 +334,16 @@ static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off,
return regaddr;
}
-static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val)
+static int pca953x_write_regs(struct pca953x_chip *chip, int reg, unsigned long *val)
{
u8 regaddr = pca953x_recalc_addr(chip, reg, 0, true, true);
- int ret;
+ u8 value[MAX_BANK];
+ int i, ret;
+
+ for (i = 0; i < NBANK(chip); i++)
+ value[i] = bitmap_get_value8(val, i * BANK_SZ);
- ret = regmap_bulk_write(chip->regmap, regaddr, val, NBANK(chip));
+ ret = regmap_bulk_write(chip->regmap, regaddr, value, NBANK(chip));
if (ret < 0) {
dev_err(&chip->client->dev, "failed writing register\n");
return ret;
@@ -347,17 +352,21 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val)
return 0;
}
-static int pca953x_read_regs(struct pca953x_chip *chip, int reg, u8 *val)
+static int pca953x_read_regs(struct pca953x_chip *chip, int reg, unsigned long *val)
{
u8 regaddr = pca953x_recalc_addr(chip, reg, 0, false, true);
- int ret;
+ u8 value[MAX_BANK];
+ int i, ret;
- ret = regmap_bulk_read(chip->regmap, regaddr, val, NBANK(chip));
+ ret = regmap_bulk_read(chip->regmap, regaddr, value, NBANK(chip));
if (ret < 0) {
dev_err(&chip->client->dev, "failed reading register\n");
return ret;
}
+ for (i = 0; i < NBANK(chip); i++)
+ bitmap_set_value8(val, value[i], i * BANK_SZ);
+
return 0;
}
@@ -412,7 +421,9 @@ static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off)
ret = regmap_read(chip->regmap, inreg, &reg_val);
mutex_unlock(&chip->i2c_lock);
if (ret < 0) {
- /* NOTE: diagnostic already emitted; that's all we should
+ /*
+ * NOTE:
+ * diagnostic already emitted; that's all we should
* do unless gpio_*_value_cansleep() calls become different
* from their nonsleeping siblings (and report faults).
*/
@@ -459,9 +470,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
unsigned long *mask, unsigned long *bits)
{
struct pca953x_chip *chip = gpiochip_get_data(gc);
- unsigned int bank_mask, bank_val;
- int bank;
- u8 reg_val[MAX_BANK];
+ DECLARE_BITMAP(reg_val, MAX_LINE);
int ret;
mutex_lock(&chip->i2c_lock);
@@ -469,16 +478,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
if (ret)
goto exit;
- for (bank = 0; bank < NBANK(chip); bank++) {
- bank_mask = mask[bank / sizeof(*mask)] >>
- ((bank % sizeof(*mask)) * 8);
- if (bank_mask) {
- bank_val = bits[bank / sizeof(*bits)] >>
- ((bank % sizeof(*bits)) * 8);
- bank_val &= bank_mask;
- reg_val[bank] = (reg_val[bank] & ~bank_mask) | bank_val;
- }
- }
+ bitmap_replace(reg_val, reg_val, bits, mask, gc->ngpio);
pca953x_write_regs(chip, chip->regs->output, reg_val);
exit:
@@ -605,10 +605,9 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct pca953x_chip *chip = gpiochip_get_data(gc);
- u8 new_irqs;
- int level, i;
- u8 invert_irq_mask[MAX_BANK];
- u8 reg_direction[MAX_BANK];
+ DECLARE_BITMAP(irq_mask, MAX_LINE);
+ DECLARE_BITMAP(reg_direction, MAX_LINE);
+ int level;
pca953x_read_regs(chip, chip->regs->direction, reg_direction);
@@ -616,25 +615,18 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
/* Enable latch on interrupt-enabled inputs */
pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask);
- for (i = 0; i < NBANK(chip); i++)
- invert_irq_mask[i] = ~chip->irq_mask[i];
+ bitmap_complement(irq_mask, chip->irq_mask, gc->ngpio);
/* Unmask enabled interrupts */
- pca953x_write_regs(chip, PCAL953X_INT_MASK, invert_irq_mask);
+ pca953x_write_regs(chip, PCAL953X_INT_MASK, irq_mask);
}
+ bitmap_or(irq_mask, chip->irq_trig_fall, chip->irq_trig_raise, gc->ngpio);
+ bitmap_and(irq_mask, irq_mask, reg_direction, gc->ngpio);
+
/* Look for any newly setup interrupt */
- for (i = 0; i < NBANK(chip); i++) {
- new_irqs = chip->irq_trig_fall[i] | chip->irq_trig_raise[i];
- new_irqs &= reg_direction[i];
-
- while (new_irqs) {
- level = __ffs(new_irqs);
- pca953x_gpio_direction_input(&chip->gpio_chip,
- level + (BANK_SZ * i));
- new_irqs &= ~(1 << level);
- }
- }
+ for_each_set_bit(level, irq_mask, gc->ngpio)
+ pca953x_gpio_direction_input(&chip->gpio_chip, level);
mutex_unlock(&chip->irq_lock);
}
@@ -675,15 +667,15 @@ static void pca953x_irq_shutdown(struct irq_data *d)
chip->irq_trig_fall[d->hwirq / BANK_SZ] &= ~mask;
}
-static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
+static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pending)
{
- u8 cur_stat[MAX_BANK];
- u8 old_stat[MAX_BANK];
- bool pending_seen = false;
- bool trigger_seen = false;
- u8 trigger[MAX_BANK];
- u8 reg_direction[MAX_BANK];
- int ret, i;
+ struct gpio_chip *gc = &chip->gpio_chip;
+ DECLARE_BITMAP(reg_direction, MAX_LINE);
+ DECLARE_BITMAP(old_stat, MAX_LINE);
+ DECLARE_BITMAP(cur_stat, MAX_LINE);
+ DECLARE_BITMAP(new_stat, MAX_LINE);
+ DECLARE_BITMAP(trigger, MAX_LINE);
+ int ret;
if (chip->driver_data & PCA_PCAL) {
/* Read the current interrupt status from the device */
@@ -692,20 +684,16 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
return false;
/* Check latched inputs and clear interrupt status */
- ret = pca953x_read_regs(chip, PCA953X_INPUT, cur_stat);
+ ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
if (ret)
return false;
- for (i = 0; i < NBANK(chip); i++) {
- /* Apply filter for rising/falling edge selection */
- pending[i] = (~cur_stat[i] & chip->irq_trig_fall[i]) |
- (cur_stat[i] & chip->irq_trig_raise[i]);
- pending[i] &= trigger[i];
- if (pending[i])
- pending_seen = true;
- }
+ /* Apply filter for rising/falling edge selection */
+ bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio);
+
+ bitmap_and(pending, new_stat, trigger, gc->ngpio);
- return pending_seen;
+ return !bitmap_empty(pending, gc->ngpio);
}
ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
@@ -714,64 +702,49 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
/* Remove output pins from the equation */
pca953x_read_regs(chip, chip->regs->direction, reg_direction);
- for (i = 0; i < NBANK(chip); i++)
- cur_stat[i] &= reg_direction[i];
- memcpy(old_stat, chip->irq_stat, NBANK(chip));
+ bitmap_copy(old_stat, chip->irq_stat, gc->ngpio);
- for (i = 0; i < NBANK(chip); i++) {
- trigger[i] = (cur_stat[i] ^ old_stat[i]) & chip->irq_mask[i];
- if (trigger[i])
- trigger_seen = true;
- }
+ bitmap_and(new_stat, cur_stat, reg_direction, gc->ngpio);
+ bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio);
+ bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio);
- if (!trigger_seen)
+ if (bitmap_empty(trigger, gc->ngpio))
return false;
- memcpy(chip->irq_stat, cur_stat, NBANK(chip));
+ bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
- for (i = 0; i < NBANK(chip); i++) {
- pending[i] = (old_stat[i] & chip->irq_trig_fall[i]) |
- (cur_stat[i] & chip->irq_trig_raise[i]);
- pending[i] &= trigger[i];
- if (pending[i])
- pending_seen = true;
- }
+ bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio);
+ bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio);
+ bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio);
+ bitmap_and(pending, new_stat, trigger, gc->ngpio);
- return pending_seen;
+ return !bitmap_empty(pending, gc->ngpio);
}
static irqreturn_t pca953x_irq_handler(int irq, void *devid)
{
struct pca953x_chip *chip = devid;
- u8 pending[MAX_BANK];
- u8 level;
- unsigned nhandled = 0;
- int i;
+ struct gpio_chip *gc = &chip->gpio_chip;
+ DECLARE_BITMAP(pending, MAX_LINE);
+ int level;
if (!pca953x_irq_pending(chip, pending))
return IRQ_NONE;
- for (i = 0; i < NBANK(chip); i++) {
- while (pending[i]) {
- level = __ffs(pending[i]);
- handle_nested_irq(irq_find_mapping(chip->gpio_chip.irq.domain,
- level + (BANK_SZ * i)));
- pending[i] &= ~(1 << level);
- nhandled++;
- }
- }
+ for_each_set_bit(level, pending, gc->ngpio)
+ handle_nested_irq(irq_find_mapping(gc->irq.domain, level));
- return (nhandled > 0) ? IRQ_HANDLED : IRQ_NONE;
+ return IRQ_HANDLED;
}
-static int pca953x_irq_setup(struct pca953x_chip *chip,
- int irq_base)
+static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base)
{
struct i2c_client *client = chip->client;
struct irq_chip *irq_chip = &chip->irq_chip;
- u8 reg_direction[MAX_BANK];
- int ret, i;
+ DECLARE_BITMAP(reg_direction, MAX_LINE);
+ DECLARE_BITMAP(irq_stat, MAX_LINE);
+ int ret;
if (!client->irq)
return 0;
@@ -782,7 +755,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
if (!(chip->driver_data & PCA_INT))
return 0;
- ret = pca953x_read_regs(chip, chip->regs->input, chip->irq_stat);
+ ret = pca953x_read_regs(chip, chip->regs->input, irq_stat);
if (ret)
return ret;
@@ -792,8 +765,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
* this purpose.
*/
pca953x_read_regs(chip, chip->regs->direction, reg_direction);
- for (i = 0; i < NBANK(chip); i++)
- chip->irq_stat[i] &= reg_direction[i];
+ bitmap_and(chip->irq_stat, irq_stat, reg_direction, chip->gpio_chip.ngpio);
mutex_init(&chip->irq_lock);
ret = devm_request_threaded_irq(&client->dev, client->irq,
@@ -816,9 +788,9 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
irq_chip->irq_set_type = pca953x_irq_set_type;
irq_chip->irq_shutdown = pca953x_irq_shutdown;
- ret = gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip,
- irq_base, handle_simple_irq,
- IRQ_TYPE_NONE);
+ ret = gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip,
+ irq_base, handle_simple_irq,
+ IRQ_TYPE_NONE);
if (ret) {
dev_err(&client->dev,
"could not connect irqchip to gpiochip\n");
@@ -845,8 +817,8 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
{
+ DECLARE_BITMAP(val, MAX_LINE);
int ret;
- u8 val[MAX_BANK];
ret = regcache_sync_region(chip->regmap, chip->regs->output,
chip->regs->output + NBANK(chip));
@@ -860,9 +832,9 @@ static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
/* set platform specific polarity inversion */
if (invert)
- memset(val, 0xFF, NBANK(chip));
+ bitmap_fill(val, MAX_LINE);
else
- memset(val, 0, NBANK(chip));
+ bitmap_zero(val, MAX_LINE);
ret = pca953x_write_regs(chip, chip->regs->invert, val);
out:
@@ -871,8 +843,8 @@ out:
static int device_pca957x_init(struct pca953x_chip *chip, u32 invert)
{
+ DECLARE_BITMAP(val, MAX_LINE);
int ret;
- u8 val[MAX_BANK];
ret = device_pca95xx_init(chip, invert);
if (ret)
@@ -892,7 +864,7 @@ out:
static const struct of_device_id pca953x_dt_ids[];
static int pca953x_probe(struct i2c_client *client,
- const struct i2c_device_id *i2c_id)
+ const struct i2c_device_id *i2c_id)
{
struct pca953x_platform_data *pdata;
struct pca953x_chip *chip;
@@ -901,8 +873,7 @@ static int pca953x_probe(struct i2c_client *client,
u32 invert = 0;
struct regulator *reg;
- chip = devm_kzalloc(&client->dev,
- sizeof(struct pca953x_chip), GFP_KERNEL);
+ chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
if (chip == NULL)
return -ENOMEM;
@@ -1016,7 +987,7 @@ static int pca953x_probe(struct i2c_client *client,
if (pdata && pdata->setup) {
ret = pdata->setup(client, chip->gpio_chip.base,
- chip->gpio_chip.ngpio, pdata->context);
+ chip->gpio_chip.ngpio, pdata->context);
if (ret < 0)
dev_warn(&client->dev, "setup failed, %d\n", ret);
}
@@ -1036,7 +1007,7 @@ static int pca953x_remove(struct i2c_client *client)
if (pdata && pdata->teardown) {
ret = pdata->teardown(client, chip->gpio_chip.base,
- chip->gpio_chip.ngpio, pdata->context);
+ chip->gpio_chip.ngpio, pdata->context);
if (ret < 0)
dev_err(&client->dev, "teardown failed, %d\n", ret);
} else {
diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c
index df51dd08bdfe..638d6656ce73 100644
--- a/drivers/gpio/gpio-pci-idio-16.c
+++ b/drivers/gpio/gpio-pci-idio-16.c
@@ -100,45 +100,23 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
- size_t i;
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
- unsigned long port_state;
+ unsigned long offset;
+ unsigned long gpio_mask;
void __iomem *ports[] = {
&idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
&idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15,
};
+ void __iomem *port_addr;
+ unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ port_addr = ports[offset / 8];
+ port_state = ioread8(port_addr) & gpio_mask;
- /* read bits from current gpio port */
- port_state = ioread8(ports[i]);
-
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -178,30 +156,31 @@ static void idio_16_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
+ unsigned long offset;
+ unsigned long gpio_mask;
+ void __iomem *ports[] = {
+ &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
+ };
+ size_t index;
+ void __iomem *port_addr;
+ unsigned long bitmask;
unsigned long flags;
- unsigned int out_state;
+ unsigned long out_state;
- raw_spin_lock_irqsave(&idio16gpio->lock, flags);
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
+ port_addr = ports[index];
- /* process output lines 0-7 */
- if (*mask & 0xFF) {
- out_state = ioread8(&idio16gpio->reg->out0_7) & ~*mask;
- out_state |= *mask & *bits;
- iowrite8(out_state, &idio16gpio->reg->out0_7);
- }
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
+
+ raw_spin_lock_irqsave(&idio16gpio->lock, flags);
- /* shift to next output line word */
- *mask >>= 8;
+ out_state = ioread8(port_addr) & ~gpio_mask;
+ out_state |= bitmask;
+ iowrite8(out_state, port_addr);
- /* process output lines 8-15 */
- if (*mask & 0xFF) {
- *bits >>= 8;
- out_state = ioread8(&idio16gpio->reg->out8_15) & ~*mask;
- out_state |= *mask & *bits;
- iowrite8(out_state, &idio16gpio->reg->out8_15);
+ raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
}
-
- raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
}
static void idio_16_irq_ack(struct irq_data *data)
diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c
index 44c1e4fc489f..1d475794a50f 100644
--- a/drivers/gpio/gpio-pcie-idio-24.c
+++ b/drivers/gpio/gpio-pcie-idio-24.c
@@ -201,52 +201,34 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
- size_t i;
- const unsigned int gpio_reg_size = 8;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
- unsigned long port_state;
+ unsigned long offset;
+ unsigned long gpio_mask;
void __iomem *ports[] = {
&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
&idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7,
&idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23,
};
+ size_t index;
+ unsigned long port_state;
const unsigned long out_mode_mask = BIT(1);
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports) + 1; i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
/* read bits from current gpio port (port 6 is TTL GPIO) */
- if (i < 6)
- port_state = ioread8(ports[i]);
+ if (index < 6)
+ port_state = ioread8(ports[index]);
else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
port_state = ioread8(&idio24gpio->reg->ttl_out0_7);
else
port_state = ioread8(&idio24gpio->reg->ttl_in0_7);
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ port_state &= gpio_mask;
+
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -297,59 +279,48 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
- size_t i;
- unsigned long bits_offset;
+ unsigned long offset;
unsigned long gpio_mask;
- const unsigned int gpio_reg_size = 8;
- const unsigned long port_mask = GENMASK(gpio_reg_size, 0);
- unsigned long flags;
- unsigned int out_state;
void __iomem *ports[] = {
&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
&idio24gpio->reg->out16_23
};
+ size_t index;
+ unsigned long bitmask;
+ unsigned long flags;
+ unsigned long out_state;
const unsigned long out_mode_mask = BIT(1);
- const unsigned int ttl_offset = 48;
- const size_t ttl_i = BIT_WORD(ttl_offset);
- const unsigned int word_offset = ttl_offset % BITS_PER_LONG;
- const unsigned long ttl_mask = (mask[ttl_i] >> word_offset) & port_mask;
- const unsigned long ttl_bits = (bits[ttl_i] >> word_offset) & ttl_mask;
-
- /* set bits are processed a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
-
- /* check if any set bits for current port */
- gpio_mask = (*mask >> bits_offset) & port_mask;
- if (!gpio_mask) {
- /* no set bits for this port so move on to next port */
- continue;
- }
- raw_spin_lock_irqsave(&idio24gpio->lock, flags);
+ for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+ index = offset / 8;
- /* process output lines */
- out_state = ioread8(ports[i]) & ~gpio_mask;
- out_state |= (*bits >> bits_offset) & gpio_mask;
- iowrite8(out_state, ports[i]);
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
- raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
- }
+ raw_spin_lock_irqsave(&idio24gpio->lock, flags);
- /* check if setting TTL lines and if they are in output mode */
- if (!ttl_mask || !(ioread8(&idio24gpio->reg->ctl) & out_mode_mask))
- return;
+ /* read bits from current gpio port (port 6 is TTL GPIO) */
+ if (index < 6) {
+ out_state = ioread8(ports[index]);
+ } else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask) {
+ out_state = ioread8(&idio24gpio->reg->ttl_out0_7);
+ } else {
+ /* skip TTL GPIO if set for input */
+ raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+ continue;
+ }
- /* handle TTL output */
- raw_spin_lock_irqsave(&idio24gpio->lock, flags);
+ /* set requested bit states */
+ out_state &= ~gpio_mask;
+ out_state |= bitmask;
- /* process output lines */
- out_state = ioread8(&idio24gpio->reg->ttl_out0_7) & ~ttl_mask;
- out_state |= ttl_bits;
- iowrite8(out_state, &idio24gpio->reg->ttl_out0_7);
+ /* write bits for current gpio port (port 6 is TTL GPIO) */
+ if (index < 6)
+ iowrite8(out_state, ports[index]);
+ else
+ iowrite8(out_state, &idio24gpio->reg->ttl_out0_7);
- raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+ raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+ }
}
static void idio_24_irq_ack(struct irq_data *data)
diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c
index 1331b2a94679..6698feabaced 100644
--- a/drivers/gpio/gpio-pisosr.c
+++ b/drivers/gpio/gpio-pisosr.c
@@ -96,16 +96,16 @@ static int pisosr_gpio_get_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct pisosr_gpio *gpio = gpiochip_get_data(chip);
- unsigned int nbytes = DIV_ROUND_UP(chip->ngpio, 8);
- unsigned int i, j;
+ unsigned long offset;
+ unsigned long gpio_mask;
+ unsigned long buffer_state;
pisosr_gpio_refresh(gpio);
bitmap_zero(bits, chip->ngpio);
- for (i = 0; i < nbytes; i++) {
- j = i / sizeof(unsigned long);
- bits[j] |= ((unsigned long) gpio->buffer[i])
- << (8 * (i % sizeof(unsigned long)));
+ for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+ buffer_state = gpio->buffer[offset / 8] & gpio_mask;
+ bitmap_set_value8(bits, buffer_state, offset);
}
return 0;
diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
index bd203e8fa58e..7ec97499b7f7 100644
--- a/drivers/gpio/gpio-uniphier.c
+++ b/drivers/gpio/gpio-uniphier.c
@@ -15,9 +15,6 @@
#include <linux/spinlock.h>
#include <dt-bindings/gpio/uniphier-gpio.h>
-#define UNIPHIER_GPIO_BANK_MASK \
- GENMASK((UNIPHIER_GPIO_LINES_PER_BANK) - 1, 0)
-
#define UNIPHIER_GPIO_IRQ_MAX_NUM 24
#define UNIPHIER_GPIO_PORT_DATA 0x0 /* data */
@@ -150,15 +147,11 @@ static void uniphier_gpio_set(struct gpio_chip *chip,
static void uniphier_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
- unsigned int bank, shift, bank_mask, bank_bits;
- int i;
+ unsigned long i, bank, bank_mask, bank_bits;
- for (i = 0; i < chip->ngpio; i += UNIPHIER_GPIO_LINES_PER_BANK) {
+ for_each_set_clump8(i, bank_mask, mask, chip->ngpio) {
bank = i / UNIPHIER_GPIO_LINES_PER_BANK;
- shift = i % BITS_PER_LONG;
- bank_mask = (mask[BIT_WORD(i)] >> shift) &
- UNIPHIER_GPIO_BANK_MASK;
- bank_bits = bits[BIT_WORD(i)] >> shift;
+ bank_bits = bitmap_get_value8(bits, i);
uniphier_gpio_bank_write(chip, bank, UNIPHIER_GPIO_PORT_DATA,
bank_mask, bank_bits);
diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c
index fe456bea81f6..cb510df2b014 100644
--- a/drivers/gpio/gpio-ws16c48.c
+++ b/drivers/gpio/gpio-ws16c48.c
@@ -129,42 +129,19 @@ static int ws16c48_gpio_get_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
- const unsigned int gpio_reg_size = 8;
- size_t i;
- const size_t num_ports = chip->ngpio / gpio_reg_size;
- unsigned int bits_offset;
- size_t word_index;
- unsigned int word_offset;
- unsigned long word_mask;
- const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+ unsigned long offset;
+ unsigned long gpio_mask;
+ unsigned int port_addr;
unsigned long port_state;
/* clear bits array to a clean slate */
bitmap_zero(bits, chip->ngpio);
- /* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < num_ports; i++) {
- /* gpio offset in bits array */
- bits_offset = i * gpio_reg_size;
+ for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+ port_addr = ws16c48gpio->base + offset / 8;
+ port_state = inb(port_addr) & gpio_mask;
- /* word index for bits array */
- word_index = BIT_WORD(bits_offset);
-
- /* gpio offset within current word of bits array */
- word_offset = bits_offset % BITS_PER_LONG;
-
- /* mask of get bits for current gpio within current word */
- word_mask = mask[word_index] & (port_mask << word_offset);
- if (!word_mask) {
- /* no get bits in this port so skip to next one */
- continue;
- }
-
- /* read bits from current gpio port */
- port_state = inb(ws16c48gpio->base + i);
-
- /* store acquired bits at respective bits array offset */
- bits[word_index] |= (port_state << word_offset) & word_mask;
+ bitmap_set_value8(bits, port_state, offset);
}
return 0;
@@ -198,39 +175,29 @@ static void ws16c48_gpio_set_multiple(struct gpio_chip *chip,
unsigned long *mask, unsigned long *bits)
{
struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
- unsigned int i;
- const unsigned int gpio_reg_size = 8;
- unsigned int port;
- unsigned int iomask;
- unsigned int bitmask;
+ unsigned long offset;
+ unsigned long gpio_mask;
+ size_t index;
+ unsigned int port_addr;
+ unsigned long bitmask;
unsigned long flags;
- /* set bits are evaluated a gpio register size at a time */
- for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
- /* no more set bits in this mask word; skip to the next word */
- if (!mask[BIT_WORD(i)]) {
- i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
- continue;
- }
-
- port = i / gpio_reg_size;
+ for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+ index = offset / 8;
+ port_addr = ws16c48gpio->base + index;
/* mask out GPIO configured for input */
- iomask = mask[BIT_WORD(i)] & ~ws16c48gpio->io_state[port];
- bitmask = iomask & bits[BIT_WORD(i)];
+ gpio_mask &= ~ws16c48gpio->io_state[index];
+ bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
/* update output state data and set device gpio register */
- ws16c48gpio->out_state[port] &= ~iomask;
- ws16c48gpio->out_state[port] |= bitmask;
- outb(ws16c48gpio->out_state[port], ws16c48gpio->base + port);
+ ws16c48gpio->out_state[index] &= ~gpio_mask;
+ ws16c48gpio->out_state[index] |= bitmask;
+ outb(ws16c48gpio->out_state[index], port_addr);
raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-
- /* prepare for next gpio register set */
- mask[BIT_WORD(i)] >>= gpio_reg_size;
- bits[BIT_WORD(i)] >>= gpio_reg_size;
}
}
diff --git a/drivers/media/platform/sti/delta/delta-ipc.c b/drivers/media/platform/sti/delta/delta-ipc.c
index 186d88f02ecd..371429d81ea1 100644
--- a/drivers/media/platform/sti/delta/delta-ipc.c
+++ b/drivers/media/platform/sti/delta/delta-ipc.c
@@ -175,8 +175,8 @@ int delta_ipc_open(struct delta_ctx *pctx, const char *name,
msg.ipc_buf_size = ipc_buf_size;
msg.ipc_buf_paddr = ctx->ipc_buf->paddr;
- memcpy(msg.name, name, sizeof(msg.name));
- msg.name[sizeof(msg.name) - 1] = 0;
+ memset(msg.name, 0, sizeof(msg.name));
+ strcpy(msg.name, name);
msg.param_size = param->size;
memcpy(ctx->ipc_buf->vaddr, param->data, msg.param_size);
diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c
index 426ad912b441..d054e2842a5f 100644
--- a/drivers/misc/sram-exec.c
+++ b/drivers/misc/sram-exec.c
@@ -96,7 +96,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
if (!part)
return NULL;
- if (!addr_in_gen_pool(pool, (unsigned long)dst, size))
+ if (!gen_pool_has_addr(pool, (unsigned long)dst, size))
return NULL;
base = (unsigned long)part->base;
diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
index 21c370dbbfba..bddf2c5dd3bf 100644
--- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
+++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
@@ -10,6 +10,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/module.h>
+#include <linux/pinctrl/machine.h>
#include <linux/pinctrl/pinconf.h>
#include <linux/pinctrl/pinconf-generic.h>
#include <linux/pinctrl/pinmux.h>
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index 5716b62e0f73..f75271b669c6 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
@@ -103,6 +104,7 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
int status;
u32 temp_out;
u32 out;
+ unsigned long update_ptps;
u32 store_ptps;
u32 store_ptmc;
u32 store_te_out;
@@ -120,8 +122,10 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
if (status)
return status;
- out = (store_ptps & ~(0xFF << (thres_index * 8)));
- out |= (temp_out & 0xFF) << (thres_index * 8);
+ update_ptps = store_ptps;
+ bitmap_set_value8(&update_ptps, temp_out & 0xFF, thres_index * 8);
+ out = update_ptps;
+
status = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
SOC_DTS_OFFSET_PTPS, out);
if (status)
@@ -223,6 +227,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd,
u32 out;
struct intel_soc_dts_sensor_entry *dts;
struct intel_soc_dts_sensors *sensors;
+ unsigned long raw;
dts = tzd->devdata;
sensors = dts->sensors;
@@ -231,8 +236,8 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd,
if (status)
return status;
- out = (out & dts->temp_mask) >> dts->temp_shift;
- out -= SOC_DTS_TJMAX_ENCODING;
+ raw = out;
+ out = bitmap_get_value8(&raw, dts->id * 8) - SOC_DTS_TJMAX_ENCODING;
*temp = sensors->tj_max - out * 1000;
return 0;
@@ -280,11 +285,14 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
int read_only_trip_cnt)
{
char name[10];
+ unsigned long trip;
int trip_count = 0;
int trip_mask = 0;
+ int writable_trip_cnt = 0;
+ unsigned long ptps;
u32 store_ptps;
+ unsigned long i;
int ret;
- int i;
/* Store status to restor on exit */
ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
@@ -293,11 +301,10 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
goto err_ret;
dts->id = id;
- dts->temp_mask = 0x00FF << (id * 8);
- dts->temp_shift = id * 8;
if (notification_support) {
trip_count = min(SOC_MAX_DTS_TRIPS, trip_cnt);
- trip_mask = BIT(trip_count - read_only_trip_cnt) - 1;
+ writable_trip_cnt = trip_count - read_only_trip_cnt;
+ trip_mask = GENMASK(writable_trip_cnt - 1, 0);
}
/* Check if the writable trip we provide is not used by BIOS */
@@ -306,11 +313,9 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
if (ret)
trip_mask = 0;
else {
- for (i = 0; i < trip_count; ++i) {
- if (trip_mask & BIT(i))
- if (store_ptps & (0xff << (i * 8)))
- trip_mask &= ~BIT(i);
- }
+ ptps = store_ptps;
+ for_each_set_clump8(i, trip, &ptps, writable_trip_cnt * 8)
+ trip_mask &= ~BIT(i / 8);
}
dts->trip_mask = trip_mask;
dts->trip_count = trip_count;
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.h b/drivers/thermal/intel/intel_soc_dts_iosf.h
index adfb09af33fc..c54945748200 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.h
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.h
@@ -24,8 +24,6 @@ struct intel_soc_dts_sensors;
struct intel_soc_dts_sensor_entry {
int id;
- u32 temp_mask;
- u32 temp_shift;
u32 store_status;
u32 trip_mask;
u32 trip_count;
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 58bf9d496ba5..14a702feaa8d 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1349,7 +1349,7 @@ static int sci_dma_rx_submit(struct sci_port *s, bool port_lock_held)
{
struct dma_chan *chan = s->chan_rx;
struct uart_port *port = &s->port;
- unsigned long flags;
+ unsigned long uninitialized_var(flags);
int i;
for (i = 0; i < 2; i++) {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9442631fd4af..3ba9ae83bff5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -505,7 +505,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
#ifdef CONFIG_SHMEM
static int smaps_pte_hole(unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+ __always_unused int depth, struct mm_walk *walk)
{
struct mem_size_stats *mss = walk->private;
@@ -1282,7 +1282,7 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
}
static int pagemap_pte_hole(unsigned long start, unsigned long end,
- struct mm_walk *walk)
+ __always_unused int depth, struct mm_walk *walk)
{
struct pagemapread *pm = walk->private;
unsigned long addr = start;
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
deleted file mode 100644
index c86cf7cb4bba..000000000000
--- a/include/asm-generic/4level-fixup.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _4LEVEL_FIXUP_H
-#define _4LEVEL_FIXUP_H
-
-#define __ARCH_HAS_4LEVEL_HACK
-#define __PAGETABLE_PUD_FOLDED 1
-
-#define PUD_SHIFT PGDIR_SHIFT
-#define PUD_SIZE PGDIR_SIZE
-#define PUD_MASK PGDIR_MASK
-#define PTRS_PER_PUD 1
-
-#define pud_t pgd_t
-
-#define pmd_alloc(mm, pud, address) \
- ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
- NULL: pmd_offset(pud, address))
-
-#define pud_offset(pgd, start) (pgd)
-#define pud_none(pud) 0
-#define pud_bad(pud) 0
-#define pud_present(pud) 1
-#define pud_ERROR(pud) do { } while (0)
-#define pud_clear(pud) pgd_clear(pud)
-#define pud_val(pud) pgd_val(pud)
-#define pud_populate(mm, pud, pmd) pgd_populate(mm, pud, pmd)
-#define pud_page(pud) pgd_page(pud)
-#define pud_page_vaddr(pud) pgd_page_vaddr(pud)
-
-#undef pud_free_tlb
-#define pud_free_tlb(tlb, x, addr) do { } while (0)
-#define pud_free(mm, x) do { } while (0)
-
-#undef pud_addr_end
-#define pud_addr_end(addr, end) (end)
-
-#include <asm-generic/5level-fixup.h>
-
-#endif
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 8a1ee10014de..9fdf21302fdf 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -80,4 +80,21 @@ extern unsigned long find_first_zero_bit(const unsigned long *addr,
#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
+/**
+ * find_next_clump8 - find next 8-bit clump with set bits in a memory region
+ * @clump: location to store copy of found clump
+ * @addr: address to base the search on
+ * @size: bitmap size in number of bits
+ * @offset: bit offset at which to start searching
+ *
+ * Returns the bit offset for the next set clump; the found clump value is
+ * copied to the location pointed by @clump. If no bits are set, returns @size.
+ */
+extern unsigned long find_next_clump8(unsigned long *clump,
+ const unsigned long *addr,
+ unsigned long size, unsigned long offset);
+
+#define find_first_clump8(clump, bits, size) \
+ find_next_clump8((clump), (bits), (size), 0)
+
#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 798ea36a0549..e2e2bef07dd2 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1238,4 +1238,24 @@ static inline bool arch_has_pfn_modify_check(void)
#define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED)
#endif
+/*
+ * p?d_leaf() - true if this entry is a final mapping to a physical address.
+ * This differs from p?d_huge() by the fact that they are always available (if
+ * the architecture supports large pages at the appropriate level) even
+ * if CONFIG_HUGETLB_PAGE is not defined.
+ * Only meaningful when called on a valid entry.
+ */
+#ifndef pgd_leaf
+#define pgd_leaf(x) 0
+#endif
+#ifndef p4d_leaf
+#define p4d_leaf(x) 0
+#endif
+#ifndef pud_leaf
+#define pud_leaf(x) 0
+#endif
+#ifndef pmd_leaf
+#define pmd_leaf(x) 0
+#endif
+
#endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 29fc933df3bf..ff335b22f23c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -53,6 +53,7 @@
* bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
+ * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask)
* bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
* bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit)
* bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap
@@ -66,6 +67,8 @@
* bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region
* bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst
* bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst
+ * bitmap_get_value8(map, start) Get 8bit value from map at start
+ * bitmap_set_value8(map, value, start) Set 8bit value to map at start
*
* Note, bitmap_zero() and bitmap_fill() operate over the region of
* unsigned longs, that is, bits behind bitmap till the unsigned long
@@ -138,6 +141,9 @@ extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
+extern void __bitmap_replace(unsigned long *dst,
+ const unsigned long *old, const unsigned long *new,
+ const unsigned long *mask, unsigned int nbits);
extern int __bitmap_intersects(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_subset(const unsigned long *bitmap1,
@@ -432,6 +438,18 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr
__bitmap_shift_left(dst, src, shift, nbits);
}
+static inline void bitmap_replace(unsigned long *dst,
+ const unsigned long *old,
+ const unsigned long *new,
+ const unsigned long *mask,
+ unsigned int nbits)
+{
+ if (small_const_nbits(nbits))
+ *dst = (*old & ~(*mask)) | (*new & *mask);
+ else
+ __bitmap_replace(dst, old, new, mask, nbits);
+}
+
static inline int bitmap_parse(const char *buf, unsigned int buflen,
unsigned long *maskp, int nmaskbits)
{
@@ -489,6 +507,39 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
dst[1] = mask >> 32;
}
+/**
+ * bitmap_get_value8 - get an 8-bit value within a memory region
+ * @map: address to the bitmap memory region
+ * @start: bit offset of the 8-bit value; must be a multiple of 8
+ *
+ * Returns the 8-bit value located at the @start bit offset within the @src
+ * memory region.
+ */
+static inline unsigned long bitmap_get_value8(const unsigned long *map,
+ unsigned long start)
+{
+ const size_t index = BIT_WORD(start);
+ const unsigned long offset = start % BITS_PER_LONG;
+
+ return (map[index] >> offset) & 0xFF;
+}
+
+/**
+ * bitmap_set_value8 - set an 8-bit value within a memory region
+ * @map: address to the bitmap memory region
+ * @value: the 8-bit value; values wider than 8 bits may clobber bitmap
+ * @start: bit offset of the 8-bit value; must be a multiple of 8
+ */
+static inline void bitmap_set_value8(unsigned long *map, unsigned long value,
+ unsigned long start)
+{
+ const size_t index = BIT_WORD(start);
+ const unsigned long offset = start % BITS_PER_LONG;
+
+ map[index] &= ~(0xFFUL << offset);
+ map[index] |= value << offset;
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __LINUX_BITMAP_H */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c94a9ff9f082..e479067c202c 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -47,6 +47,18 @@ extern unsigned long __sw_hweight64(__u64 w);
(bit) < (size); \
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+/**
+ * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
+ * @start: bit offset to start search and to store the current iteration offset
+ * @clump: location to store copy of current 8-bit clump
+ * @bits: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_clump8(start, clump, bits, size) \
+ for ((start) = find_first_clump8(&(clump), (bits), (size)); \
+ (start) < (size); \
+ (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
+
static inline int get_bitmask_order(unsigned int count)
{
int order;
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 4bd583bd6934..5b14a0f38124 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -206,7 +206,7 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev,
int min_alloc_order, int nid, const char *name);
extern struct gen_pool *gen_pool_get(struct device *dev, const char *name);
-bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+extern bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start,
size_t size);
#ifdef CONFIG_OF
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d83d403dac2e..351d731954fe 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -355,8 +355,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
* @res: Where to write the result of the conversion on success.
*
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Used as a replacement for the simple_strtoull. Return code must be checked.
*/
static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
{
@@ -384,8 +383,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign
* @res: Where to write the result of the conversion on success.
*
* Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Used as a replacement for the simple_strtoull. Return code must be checked.
*/
static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
{
@@ -461,7 +459,18 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t
return kstrtoint_from_user(s, count, base, res);
}
-/* Obsolete, do not use. Use kstrto<foo> instead */
+/*
+ * Use kstrto<foo> instead.
+ *
+ * NOTE: simple_strto<foo> does not check for the range overflow and,
+ * depending on the input, may give interesting results.
+ *
+ * Use these functions if and only if you cannot use kstrto<foo>, because
+ * the conversion ends on the first non-digit character, which may be far
+ * beyond the supported range. It might be useful to parse the strings like
+ * 10x50 or 12:21 without altering original string or temporary buffer in use.
+ * Keep in mind above caveat.
+ */
extern unsigned long simple_strtoul(const char *,char **,unsigned int);
extern long simple_strtol(const char *,char **,unsigned int);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60d947cdf65f..f0688fca3039 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1838,12 +1838,12 @@ static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
int __pte_alloc_kernel(pmd_t *pmd);
+#if defined(CONFIG_MMU)
+
/*
- * The following ifdef needed to get the 4level-fixup.h header to work.
- * Remove it when 4level-fixup.h has been removed.
+ * The following ifdef needed to get the 5level-fixup.h header to work.
+ * Remove it when 5level-fixup.h has been removed.
*/
-#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
-
#ifndef __ARCH_HAS_5LEVEL_HACK
static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
unsigned long address)
@@ -1865,7 +1865,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
}
-#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+#endif /* CONFIG_MMU */
#if USE_SPLIT_PTE_PTLOCKS
#if ALLOC_SPLIT_PTLOCKS
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index bddd9759bab9..94c9ad171f1c 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -8,24 +8,39 @@ struct mm_walk;
/**
* mm_walk_ops - callbacks for walk_page_range
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
- * this handler should only handle pud_trans_huge() puds.
- * the pmd_entry or pte_entry callbacks will be used for
- * regular PUDs.
- * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
+ * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
+ * @p4d_entry: if set, called for each non-empty P4D entry
+ * @pud_entry: if set, called for each non-empty PUD entry
+ * @pmd_entry: if set, called for each non-empty PMD entry
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
* split_huge_page() instead of handling it explicitly.
- * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
- * @pte_hole: if set, called for each hole at all levels
+ * @pte_entry: if set, called for each non-empty PTE (lowest-level)
+ * entry
+ * @pte_hole: if set, called for each hole at all levels,
+ * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD
+ * 4:PTE. Any folded depths (where PTRS_PER_P?D is equal
+ * to 1) are skipped.
* @hugetlb_entry: if set, called for each hugetlb entry
* @test_walk: caller specific callback function to determine whether
* we walk over the current vma or not. Returning 0 means
* "do page table walk over the current vma", returning
* a negative value means "abort current page table walk
* right now" and returning 1 means "skip the current vma"
+ * @test_pmd: similar to test_walk(), but called for every pmd.
+ * @test_pud: similar to test_walk(), but called for every pud.
+ * @test_p4d: similar to test_walk(), but called for every p4d.
+ * Returning 0 means walk this part of the page tables,
+ * returning 1 means to skip this range.
+ *
+ * p?d_entry callbacks are called even if those levels are folded on a
+ * particular architecture/configuration.
*/
struct mm_walk_ops {
+ int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
+ int (*p4d_entry)(p4d_t *p4d, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
int (*pud_entry)(pud_t *pud, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
@@ -33,12 +48,18 @@ struct mm_walk_ops {
int (*pte_entry)(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pte_hole)(unsigned long addr, unsigned long next,
- struct mm_walk *walk);
+ int depth, struct mm_walk *walk);
int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long next,
struct mm_walk *walk);
int (*test_walk)(unsigned long addr, unsigned long next,
struct mm_walk *walk);
+ int (*test_pmd)(unsigned long addr, unsigned long next,
+ pmd_t *pmd_start, struct mm_walk *walk);
+ int (*test_pud)(unsigned long addr, unsigned long next,
+ pud_t *pud_start, struct mm_walk *walk);
+ int (*test_p4d)(unsigned long addr, unsigned long next,
+ p4d_t *p4d_start, struct mm_walk *walk);
};
/**
@@ -46,6 +67,7 @@ struct mm_walk_ops {
* @ops: operation to call during the walk
* @mm: mm_struct representing the target process of page table walk
* @vma: vma currently walked (NULL if walking outside vmas)
+ * @no_vma: walk ignoring vmas (vma will always be NULL)
* @private: private data for callbacks' usage
*
* (see the comment on walk_page_range() for more details)
@@ -54,12 +76,16 @@ struct mm_walk {
const struct mm_walk_ops *ops;
struct mm_struct *mm;
struct vm_area_struct *vma;
+ bool no_vma;
void *private;
};
int walk_page_range(struct mm_struct *mm, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private);
+int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
+ unsigned long end, const struct mm_walk_ops *ops,
+ void *private);
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private);
diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
new file mode 100644
index 000000000000..b28f3f2acf90
--- /dev/null
+++ b/include/linux/ptdump.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_PTDUMP_H
+#define _LINUX_PTDUMP_H
+
+#include <linux/mm_types.h>
+
+struct ptdump_range {
+ unsigned long start;
+ unsigned long end;
+};
+
+struct ptdump_state {
+ /* level is 0:PGD to 4:PTE, or -1 if unknown */
+ void (*note_page)(struct ptdump_state *st, unsigned long addr,
+ int level, unsigned long val);
+ const struct ptdump_range *range;
+};
+
+void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm);
+
+#endif /* _LINUX_PTDUMP_H */
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index d47bd40fc0f5..d14cbc83986a 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -178,7 +178,7 @@ bool dma_in_atomic_pool(void *start, size_t size)
if (unlikely(!atomic_pool))
return false;
- return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+ return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
}
void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 72361341bbee..4217bd26e220 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -173,6 +173,15 @@ config SYMBOLIC_ERRNAME
of the number 28. It makes the kernel image slightly larger
(about 3KB), but can make the kernel logs easier to read.
+config DEBUG_BUGVERBOSE
+ bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT
+ depends on BUG && (GENERIC_BUG || HAVE_DEBUG_BUGVERBOSE)
+ default y
+ help
+ Say Y here to make BUG() panics output the file name and line number
+ of the BUG call as well as the EIP and oops trace. This aids
+ debugging but costs about 70-100K of memory.
+
endmenu # "printk and dmesg options"
menu "Compile-time checks and compiler options"
@@ -286,18 +295,6 @@ config READABLE_ASM
to keep kernel developers who have to stare a lot at assembler listings
sane.
-config DEBUG_FS
- bool "Debug Filesystem"
- help
- debugfs is a virtual file system that kernel developers use to put
- debugging files into. Enable this option to be able to read and
- write to these files.
-
- For detailed documentation on the debugfs API, see
- Documentation/filesystems/.
-
- If unsure, say N.
-
config HEADERS_INSTALL
bool "Install uapi headers to usr/include"
depends on !UML
@@ -399,6 +396,8 @@ config DEBUG_FORCE_WEAK_PER_CPU
endmenu # "Compiler options"
+menu "Generic Kernel Debugging Instruments"
+
config MAGIC_SYSRQ
bool "Magic SysRq key"
depends on !UML
@@ -432,6 +431,26 @@ config MAGIC_SYSRQ_SERIAL
This option allows you to decide whether you want to enable the
magic SysRq key.
+config DEBUG_FS
+ bool "Debug Filesystem"
+ help
+ debugfs is a virtual file system that kernel developers use to put
+ debugging files into. Enable this option to be able to read and
+ write to these files.
+
+ For detailed documentation on the debugfs API, see
+ Documentation/filesystems/.
+
+ If unsure, say N.
+
+source "lib/Kconfig.kgdb"
+
+source "lib/Kconfig.ubsan"
+
+source "lib/Kconfig.kcsan"
+
+endmenu
+
config DEBUG_KERNEL
bool "Kernel debugging"
help
@@ -624,6 +643,18 @@ config DEBUG_STACK_USAGE
This option will slow down process creation somewhat.
+config SCHED_STACK_END_CHECK
+ bool "Detect stack corruption on calls to schedule()"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ This option checks for a stack overrun on calls to schedule().
+ If the stack end location is found to be over written always panic as
+ the content of the corrupted region can no longer be trusted.
+ This is to ensure no erroneous behaviour occurs which could result in
+ data corruption or a sporadic crash at a later stage once the region
+ is examined. The runtime overhead introduced is minimal.
+
config DEBUG_VM
bool "Debug VM"
depends on DEBUG_KERNEL
@@ -756,53 +787,6 @@ source "lib/Kconfig.kasan"
endmenu # "Memory Debugging"
-config ARCH_HAS_KCOV
- bool
- help
- An architecture should select this when it can successfully
- build and run with CONFIG_KCOV. This typically requires
- disabling instrumentation for some early boot code.
-
-config CC_HAS_SANCOV_TRACE_PC
- def_bool $(cc-option,-fsanitize-coverage=trace-pc)
-
-config KCOV
- bool "Code coverage for fuzzing"
- depends on ARCH_HAS_KCOV
- depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
- select DEBUG_FS
- select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
- help
- KCOV exposes kernel code coverage information in a form suitable
- for coverage-guided fuzzing (randomized testing).
-
- If RANDOMIZE_BASE is enabled, PC values will not be stable across
- different machines and across reboots. If you need stable PC values,
- disable RANDOMIZE_BASE.
-
- For more details, see Documentation/dev-tools/kcov.rst.
-
-config KCOV_ENABLE_COMPARISONS
- bool "Enable comparison operands collection by KCOV"
- depends on KCOV
- depends on $(cc-option,-fsanitize-coverage=trace-cmp)
- help
- KCOV also exposes operands of every comparison in the instrumented
- code along with operand sizes and PCs of the comparison instructions.
- These operands can be used by fuzzing engines to improve the quality
- of fuzzing coverage.
-
-config KCOV_INSTRUMENT_ALL
- bool "Instrument all code by default"
- depends on KCOV
- default y
- help
- If you are doing generic system call fuzzing (like e.g. syzkaller),
- then you will want to instrument the whole kernel and you should
- say y here. If you are doing more targeted fuzzing (like e.g.
- filesystem fuzzing with AFL) then you will want to enable coverage
- for more specific subsets of files, and should say n here.
-
config DEBUG_SHIRQ
bool "Debug shared IRQ handlers"
depends on DEBUG_KERNEL
@@ -812,7 +796,35 @@ config DEBUG_SHIRQ
Drivers ought to be able to handle interrupts coming in at those
points; some don't and need to be caught.
-menu "Debug Lockups and Hangs"
+menu "Debug Oops, Lockups and Hangs"
+
+config PANIC_ON_OOPS
+ bool "Panic on Oops"
+ help
+ Say Y here to enable the kernel to panic when it oopses. This
+ has the same effect as setting oops=panic on the kernel command
+ line.
+
+ This feature is useful to ensure that the kernel does not do
+ anything erroneous after an oops which could result in data
+ corruption or other issues.
+
+ Say N if unsure.
+
+config PANIC_ON_OOPS_VALUE
+ int
+ range 0 1
+ default 0 if !PANIC_ON_OOPS
+ default 1 if PANIC_ON_OOPS
+
+config PANIC_TIMEOUT
+ int "panic timeout"
+ default 0
+ help
+ Set the timeout value (in seconds) until a reboot occurs when the
+ the kernel panics. If n = 0, then we wait forever. A timeout
+ value n > 0 will wait n seconds before rebooting, while a timeout
+ value n < 0 will reboot immediately.
config LOCKUP_DETECTOR
bool
@@ -970,33 +982,7 @@ config WQ_WATCHDOG
endmenu # "Debug lockups and hangs"
-config PANIC_ON_OOPS
- bool "Panic on Oops"
- help
- Say Y here to enable the kernel to panic when it oopses. This
- has the same effect as setting oops=panic on the kernel command
- line.
-
- This feature is useful to ensure that the kernel does not do
- anything erroneous after an oops which could result in data
- corruption or other issues.
-
- Say N if unsure.
-
-config PANIC_ON_OOPS_VALUE
- int
- range 0 1
- default 0 if !PANIC_ON_OOPS
- default 1 if PANIC_ON_OOPS
-
-config PANIC_TIMEOUT
- int "panic timeout"
- default 0
- help
- Set the timeout value (in seconds) until a reboot occurs when the
- the kernel panics. If n = 0, then we wait forever. A timeout
- value n > 0 will wait n seconds before rebooting, while a timeout
- value n < 0 will reboot immediately.
+menu "Scheduler Debugging"
config SCHED_DEBUG
bool "Collect scheduler debugging info"
@@ -1024,17 +1010,7 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
-config SCHED_STACK_END_CHECK
- bool "Detect stack corruption on calls to schedule()"
- depends on DEBUG_KERNEL
- default n
- help
- This option checks for a stack overrun on calls to schedule().
- If the stack end location is found to be over written always panic as
- the content of the corrupted region can no longer be trusted.
- This is to ensure no erroneous behaviour occurs which could result in
- data corruption or a sporadic crash at a later stage once the region
- is examined. The runtime overhead introduced is minimal.
+endmenu
config DEBUG_TIMEKEEPING
bool "Enable extra timekeeping sanity checking"
@@ -1338,14 +1314,7 @@ config DEBUG_KOBJECT_RELEASE
config HAVE_DEBUG_BUGVERBOSE
bool
-config DEBUG_BUGVERBOSE
- bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT
- depends on BUG && (GENERIC_BUG || HAVE_DEBUG_BUGVERBOSE)
- default y
- help
- Say Y here to make BUG() panics output the file name and line number
- of the BUG call as well as the EIP and oops trace. This aids
- debugging but costs about 70-100K of memory.
+menu "Debug kernel data structures"
config DEBUG_LIST
bool "Debug linked list manipulation"
@@ -1386,6 +1355,18 @@ config DEBUG_NOTIFIERS
This is a relatively cheap check but if you care about maximum
performance, say N.
+config BUG_ON_DATA_CORRUPTION
+ bool "Trigger a BUG when data corruption is detected"
+ select DEBUG_LIST
+ help
+ Select this option if the kernel should BUG when it encounters
+ data corruption in kernel memory structures when they get checked
+ for validity.
+
+ If unsure, say N.
+
+endmenu
+
config DEBUG_CREDENTIALS
bool "Debug credential management"
depends on DEBUG_KERNEL
@@ -1458,164 +1439,6 @@ config CPU_HOTPLUG_STATE_CONTROL
Say N if your are unsure.
-config NOTIFIER_ERROR_INJECTION
- tristate "Notifier error injection"
- depends on DEBUG_KERNEL
- select DEBUG_FS
- help
- This option provides the ability to inject artificial errors to
- specified notifier chain callbacks. It is useful to test the error
- handling of notifier call chain failures.
-
- Say N if unsure.
-
-config PM_NOTIFIER_ERROR_INJECT
- tristate "PM notifier error injection module"
- depends on PM && NOTIFIER_ERROR_INJECTION
- default m if PM_DEBUG
- help
- This option provides the ability to inject artificial errors to
- PM notifier chain callbacks. It is controlled through debugfs
- interface /sys/kernel/debug/notifier-error-inject/pm
-
- If the notifier call chain should be failed with some events
- notified, write the error code to "actions/<notifier event>/error".
-
- Example: Inject PM suspend error (-12 = -ENOMEM)
-
- # cd /sys/kernel/debug/notifier-error-inject/pm/
- # echo -12 > actions/PM_SUSPEND_PREPARE/error
- # echo mem > /sys/power/state
- bash: echo: write error: Cannot allocate memory
-
- To compile this code as a module, choose M here: the module will
- be called pm-notifier-error-inject.
-
- If unsure, say N.
-
-config OF_RECONFIG_NOTIFIER_ERROR_INJECT
- tristate "OF reconfig notifier error injection module"
- depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION
- help
- This option provides the ability to inject artificial errors to
- OF reconfig notifier chain callbacks. It is controlled
- through debugfs interface under
- /sys/kernel/debug/notifier-error-inject/OF-reconfig/
-
- If the notifier call chain should be failed with some events
- notified, write the error code to "actions/<notifier event>/error".
-
- To compile this code as a module, choose M here: the module will
- be called of-reconfig-notifier-error-inject.
-
- If unsure, say N.
-
-config NETDEV_NOTIFIER_ERROR_INJECT
- tristate "Netdev notifier error injection module"
- depends on NET && NOTIFIER_ERROR_INJECTION
- help
- This option provides the ability to inject artificial errors to
- netdevice notifier chain callbacks. It is controlled through debugfs
- interface /sys/kernel/debug/notifier-error-inject/netdev
-
- If the notifier call chain should be failed with some events
- notified, write the error code to "actions/<notifier event>/error".
-
- Example: Inject netdevice mtu change error (-22 = -EINVAL)
-
- # cd /sys/kernel/debug/notifier-error-inject/netdev
- # echo -22 > actions/NETDEV_CHANGEMTU/error
- # ip link set eth0 mtu 1024
- RTNETLINK answers: Invalid argument
-
- To compile this code as a module, choose M here: the module will
- be called netdev-notifier-error-inject.
-
- If unsure, say N.
-
-config FUNCTION_ERROR_INJECTION
- def_bool y
- depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
-
-config FAULT_INJECTION
- bool "Fault-injection framework"
- depends on DEBUG_KERNEL
- help
- Provide fault-injection framework.
- For more details, see Documentation/fault-injection/.
-
-config FAILSLAB
- bool "Fault-injection capability for kmalloc"
- depends on FAULT_INJECTION
- depends on SLAB || SLUB
- help
- Provide fault-injection capability for kmalloc.
-
-config FAIL_PAGE_ALLOC
- bool "Fault-injection capabilitiy for alloc_pages()"
- depends on FAULT_INJECTION
- help
- Provide fault-injection capability for alloc_pages().
-
-config FAIL_MAKE_REQUEST
- bool "Fault-injection capability for disk IO"
- depends on FAULT_INJECTION && BLOCK
- help
- Provide fault-injection capability for disk IO.
-
-config FAIL_IO_TIMEOUT
- bool "Fault-injection capability for faking disk interrupts"
- depends on FAULT_INJECTION && BLOCK
- help
- Provide fault-injection capability on end IO handling. This
- will make the block layer "forget" an interrupt as configured,
- thus exercising the error handling.
-
- Only works with drivers that use the generic timeout handling,
- for others it wont do anything.
-
-config FAIL_FUTEX
- bool "Fault-injection capability for futexes"
- select DEBUG_FS
- depends on FAULT_INJECTION && FUTEX
- help
- Provide fault-injection capability for futexes.
-
-config FAULT_INJECTION_DEBUG_FS
- bool "Debugfs entries for fault-injection capabilities"
- depends on FAULT_INJECTION && SYSFS && DEBUG_FS
- help
- Enable configuration of fault-injection capabilities via debugfs.
-
-config FAIL_FUNCTION
- bool "Fault-injection capability for functions"
- depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
- help
- Provide function-based fault-injection capability.
- This will allow you to override a specific function with a return
- with given return value. As a result, function caller will see
- an error value and have to handle it. This is useful to test the
- error handling in various subsystems.
-
-config FAIL_MMC_REQUEST
- bool "Fault-injection capability for MMC IO"
- depends on FAULT_INJECTION_DEBUG_FS && MMC
- help
- Provide fault-injection capability for MMC IO.
- This will make the mmc core return data errors. This is
- useful to test the error handling in the mmc block device
- and to test how the mmc host driver handles retries from
- the block device.
-
-config FAULT_INJECTION_STACKTRACE_FILTER
- bool "stacktrace filter for fault-injection capabilities"
- depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
- depends on !X86_64
- select STACKTRACE
- select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86
- help
- Provide stacktrace filter for fault-injection capabilities
-
config LATENCYTOP
bool "Latency measuring infrastructure"
depends on DEBUG_KERNEL
@@ -1664,6 +1487,109 @@ config PROVIDE_OHCI1394_DMA_INIT
source "lib/kunit/Kconfig"
+source "samples/Kconfig"
+
+config ARCH_HAS_DEVMEM_IS_ALLOWED
+ bool
+
+config STRICT_DEVMEM
+ bool "Filter access to /dev/mem"
+ depends on MMU && DEVMEM
+ depends on ARCH_HAS_DEVMEM_IS_ALLOWED
+ default y if PPC || X86 || ARM64
+ ---help---
+ If this option is disabled, you allow userspace (root) access to all
+ of memory, including kernel and userspace memory. Accidental
+ access to this is obviously disastrous, but specific access can
+ be used by people debugging the kernel. Note that with PAT support
+ enabled, even in this case there are restrictions on /dev/mem
+ use due to the cache aliasing requirements.
+
+ If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
+ file only allows userspace access to PCI space and the BIOS code and
+ data regions. This is sufficient for dosemu and X and all common
+ users of /dev/mem.
+
+ If in doubt, say Y.
+
+config IO_STRICT_DEVMEM
+ bool "Filter I/O access to /dev/mem"
+ depends on STRICT_DEVMEM
+ ---help---
+ If this option is disabled, you allow userspace (root) access to all
+ io-memory regardless of whether a driver is actively using that
+ range. Accidental access to this is obviously disastrous, but
+ specific access can be used by people debugging kernel drivers.
+
+ If this option is switched on, the /dev/mem file only allows
+ userspace access to *idle* io-memory ranges (see /proc/iomem) This
+ may break traditional users of /dev/mem (dosemu, legacy X, etc...)
+ if the driver using a given range cannot be disabled.
+
+ If in doubt, say Y.
+
+config DEBUG_AID_FOR_SYZBOT
+ bool "Additional debug code for syzbot"
+ default n
+ help
+ This option is intended for testing by syzbot.
+
+menu "$(SRCARCH) Debugging"
+
+source "arch/$(SRCARCH)/Kconfig.debug"
+
+endmenu
+
+menu "Kernel Testing and Coverage"
+
+config ARCH_HAS_KCOV
+ bool
+ help
+ An architecture should select this when it can successfully
+ build and run with CONFIG_KCOV. This typically requires
+ disabling instrumentation for some early boot code.
+
+config CC_HAS_SANCOV_TRACE_PC
+ def_bool $(cc-option,-fsanitize-coverage=trace-pc)
+
+
+config KCOV
+ bool "Code coverage for fuzzing"
+ depends on ARCH_HAS_KCOV
+ depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
+ select DEBUG_FS
+ select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
+ help
+ KCOV exposes kernel code coverage information in a form suitable
+ for coverage-guided fuzzing (randomized testing).
+
+ If RANDOMIZE_BASE is enabled, PC values will not be stable across
+ different machines and across reboots. If you need stable PC values,
+ disable RANDOMIZE_BASE.
+
+ For more details, see Documentation/dev-tools/kcov.rst.
+
+config KCOV_ENABLE_COMPARISONS
+ bool "Enable comparison operands collection by KCOV"
+ depends on KCOV
+ depends on $(cc-option,-fsanitize-coverage=trace-cmp)
+ help
+ KCOV also exposes operands of every comparison in the instrumented
+ code along with operand sizes and PCs of the comparison instructions.
+ These operands can be used by fuzzing engines to improve the quality
+ of fuzzing coverage.
+
+config KCOV_INSTRUMENT_ALL
+ bool "Instrument all code by default"
+ depends on KCOV
+ default y
+ help
+ If you are doing generic system call fuzzing (like e.g. syzkaller),
+ then you will want to instrument the whole kernel and you should
+ say y here. If you are doing more targeted fuzzing (like e.g.
+ filesystem fuzzing with AFL) then you will want to enable coverage
+ for more specific subsets of files, and should say n here.
+
menuconfig RUNTIME_TESTING_MENU
bool "Runtime Testing"
def_bool y
@@ -2099,70 +2025,165 @@ config MEMTEST
memtest=17, mean do 17 test patterns.
If you are unsure how to answer this question, answer N.
-config BUG_ON_DATA_CORRUPTION
- bool "Trigger a BUG when data corruption is detected"
- select DEBUG_LIST
+config NOTIFIER_ERROR_INJECTION
+ tristate "Notifier error injection"
+ depends on DEBUG_KERNEL
+ select DEBUG_FS
help
- Select this option if the kernel should BUG when it encounters
- data corruption in kernel memory structures when they get checked
- for validity.
+ This option provides the ability to inject artificial errors to
+ specified notifier chain callbacks. It is useful to test the error
+ handling of notifier call chain failures.
+
+ Say N if unsure.
+
+config PM_NOTIFIER_ERROR_INJECT
+ tristate "PM notifier error injection module"
+ depends on PM && NOTIFIER_ERROR_INJECTION
+ default m if PM_DEBUG
+ help
+ This option provides the ability to inject artificial errors to
+ PM notifier chain callbacks. It is controlled through debugfs
+ interface /sys/kernel/debug/notifier-error-inject/pm
+
+ If the notifier call chain should be failed with some events
+ notified, write the error code to "actions/<notifier event>/error".
+
+ Example: Inject PM suspend error (-12 = -ENOMEM)
+
+ # cd /sys/kernel/debug/notifier-error-inject/pm/
+ # echo -12 > actions/PM_SUSPEND_PREPARE/error
+ # echo mem > /sys/power/state
+ bash: echo: write error: Cannot allocate memory
+
+ To compile this code as a module, choose M here: the module will
+ be called pm-notifier-error-inject.
If unsure, say N.
-source "samples/Kconfig"
+config OF_RECONFIG_NOTIFIER_ERROR_INJECT
+ tristate "OF reconfig notifier error injection module"
+ depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION
+ help
+ This option provides the ability to inject artificial errors to
+ OF reconfig notifier chain callbacks. It is controlled
+ through debugfs interface under
+ /sys/kernel/debug/notifier-error-inject/OF-reconfig/
-source "lib/Kconfig.kgdb"
+ If the notifier call chain should be failed with some events
+ notified, write the error code to "actions/<notifier event>/error".
-source "lib/Kconfig.ubsan"
+ To compile this code as a module, choose M here: the module will
+ be called of-reconfig-notifier-error-inject.
-source "lib/Kconfig.kcsan"
+ If unsure, say N.
-config ARCH_HAS_DEVMEM_IS_ALLOWED
- bool
+config NETDEV_NOTIFIER_ERROR_INJECT
+ tristate "Netdev notifier error injection module"
+ depends on NET && NOTIFIER_ERROR_INJECTION
+ help
+ This option provides the ability to inject artificial errors to
+ netdevice notifier chain callbacks. It is controlled through debugfs
+ interface /sys/kernel/debug/notifier-error-inject/netdev
-config STRICT_DEVMEM
- bool "Filter access to /dev/mem"
- depends on MMU && DEVMEM
- depends on ARCH_HAS_DEVMEM_IS_ALLOWED
- default y if PPC || X86 || ARM64
- ---help---
- If this option is disabled, you allow userspace (root) access to all
- of memory, including kernel and userspace memory. Accidental
- access to this is obviously disastrous, but specific access can
- be used by people debugging the kernel. Note that with PAT support
- enabled, even in this case there are restrictions on /dev/mem
- use due to the cache aliasing requirements.
+ If the notifier call chain should be failed with some events
+ notified, write the error code to "actions/<notifier event>/error".
- If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
- file only allows userspace access to PCI space and the BIOS code and
- data regions. This is sufficient for dosemu and X and all common
- users of /dev/mem.
+ Example: Inject netdevice mtu change error (-22 = -EINVAL)
- If in doubt, say Y.
+ # cd /sys/kernel/debug/notifier-error-inject/netdev
+ # echo -22 > actions/NETDEV_CHANGEMTU/error
+ # ip link set eth0 mtu 1024
+ RTNETLINK answers: Invalid argument
-config IO_STRICT_DEVMEM
- bool "Filter I/O access to /dev/mem"
- depends on STRICT_DEVMEM
- ---help---
- If this option is disabled, you allow userspace (root) access to all
- io-memory regardless of whether a driver is actively using that
- range. Accidental access to this is obviously disastrous, but
- specific access can be used by people debugging kernel drivers.
+ To compile this code as a module, choose M here: the module will
+ be called netdev-notifier-error-inject.
- If this option is switched on, the /dev/mem file only allows
- userspace access to *idle* io-memory ranges (see /proc/iomem) This
- may break traditional users of /dev/mem (dosemu, legacy X, etc...)
- if the driver using a given range cannot be disabled.
+ If unsure, say N.
- If in doubt, say Y.
+config FUNCTION_ERROR_INJECTION
+ def_bool y
+ depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
-config DEBUG_AID_FOR_SYZBOT
- bool "Additional debug code for syzbot"
- default n
- help
- This option is intended for testing by syzbot.
+config FAULT_INJECTION
+ bool "Fault-injection framework"
+ depends on DEBUG_KERNEL
+ help
+ Provide fault-injection framework.
+ For more details, see Documentation/fault-injection/.
-source "arch/$(SRCARCH)/Kconfig.debug"
+config FAILSLAB
+ bool "Fault-injection capability for kmalloc"
+ depends on FAULT_INJECTION
+ depends on SLAB || SLUB
+ help
+ Provide fault-injection capability for kmalloc.
+
+config FAIL_PAGE_ALLOC
+ bool "Fault-injection capabilitiy for alloc_pages()"
+ depends on FAULT_INJECTION
+ help
+ Provide fault-injection capability for alloc_pages().
+
+config FAIL_MAKE_REQUEST
+ bool "Fault-injection capability for disk IO"
+ depends on FAULT_INJECTION && BLOCK
+ help
+ Provide fault-injection capability for disk IO.
+
+config FAIL_IO_TIMEOUT
+ bool "Fault-injection capability for faking disk interrupts"
+ depends on FAULT_INJECTION && BLOCK
+ help
+ Provide fault-injection capability on end IO handling. This
+ will make the block layer "forget" an interrupt as configured,
+ thus exercising the error handling.
+
+ Only works with drivers that use the generic timeout handling,
+ for others it wont do anything.
+
+config FAIL_FUTEX
+ bool "Fault-injection capability for futexes"
+ select DEBUG_FS
+ depends on FAULT_INJECTION && FUTEX
+ help
+ Provide fault-injection capability for futexes.
+
+config FAULT_INJECTION_DEBUG_FS
+ bool "Debugfs entries for fault-injection capabilities"
+ depends on FAULT_INJECTION && SYSFS && DEBUG_FS
+ help
+ Enable configuration of fault-injection capabilities via debugfs.
+
+config FAIL_FUNCTION
+ bool "Fault-injection capability for functions"
+ depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
+ help
+ Provide function-based fault-injection capability.
+ This will allow you to override a specific function with a return
+ with given return value. As a result, function caller will see
+ an error value and have to handle it. This is useful to test the
+ error handling in various subsystems.
+
+config FAIL_MMC_REQUEST
+ bool "Fault-injection capability for MMC IO"
+ depends on FAULT_INJECTION_DEBUG_FS && MMC
+ help
+ Provide fault-injection capability for MMC IO.
+ This will make the mmc core return data errors. This is
+ useful to test the error handling in the mmc block device
+ and to test how the mmc host driver handles retries from
+ the block device.
+
+config FAULT_INJECTION_STACKTRACE_FILTER
+ bool "stacktrace filter for fault-injection capabilities"
+ depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
+ depends on !X86_64
+ select STACKTRACE
+ select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86
+ help
+ Provide stacktrace filter for fault-injection capabilities
+
+endmenu # "Kernel Testing and Coverage"
config HYPERV_TESTING
bool "Microsoft Hyper-V driver testing"
diff --git a/lib/bitmap.c b/lib/bitmap.c
index f9e834841e94..4250519d7d1c 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -222,6 +222,18 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
}
EXPORT_SYMBOL(__bitmap_andnot);
+void __bitmap_replace(unsigned long *dst,
+ const unsigned long *old, const unsigned long *new,
+ const unsigned long *mask, unsigned int nbits)
+{
+ unsigned int k;
+ unsigned int nr = BITS_TO_LONGS(nbits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = (old[k] & ~mask[k]) | (new[k] & mask[k]);
+}
+EXPORT_SYMBOL(__bitmap_replace);
+
int __bitmap_intersects(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 5c51eb45178a..e35a76b291e6 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -214,3 +214,17 @@ EXPORT_SYMBOL(find_next_bit_le);
#endif
#endif /* __BIG_ENDIAN */
+
+unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ offset = find_next_bit(addr, size, offset);
+ if (offset == size)
+ return size;
+
+ offset = round_down(offset, 8);
+ *clump = bitmap_get_value8(addr, offset);
+
+ return offset;
+}
+EXPORT_SYMBOL(find_next_clump8);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 24d20ca7e91b..7f1244b5294a 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -540,7 +540,7 @@ void gen_pool_for_each_chunk(struct gen_pool *pool,
EXPORT_SYMBOL(gen_pool_for_each_chunk);
/**
- * addr_in_gen_pool - checks if an address falls within the range of a pool
+ * gen_pool_has_addr - checks if an address falls within the range of a pool
* @pool: the generic memory pool
* @start: start address
* @size: size of the region
@@ -548,7 +548,7 @@ EXPORT_SYMBOL(gen_pool_for_each_chunk);
* Check if the range of addresses falls within the specified pool. Returns
* true if the entire range is contained in the pool and false otherwise.
*/
-bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start,
size_t size)
{
bool found = false;
@@ -567,6 +567,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
rcu_read_unlock();
return found;
}
+EXPORT_SYMBOL(gen_pool_has_addr);
/**
* gen_pool_avail - get available free space of the pool
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 51a98f7ee79e..e14a15ac250b 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Test cases for printf facility.
+ * Test cases for bitmap API.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -21,6 +21,39 @@ static unsigned failed_tests __initdata;
static char pbl_buffer[PAGE_SIZE] __initdata;
+static const unsigned long exp1[] __initconst = {
+ BITMAP_FROM_U64(1),
+ BITMAP_FROM_U64(2),
+ BITMAP_FROM_U64(0x0000ffff),
+ BITMAP_FROM_U64(0xffff0000),
+ BITMAP_FROM_U64(0x55555555),
+ BITMAP_FROM_U64(0xaaaaaaaa),
+ BITMAP_FROM_U64(0x11111111),
+ BITMAP_FROM_U64(0x22222222),
+ BITMAP_FROM_U64(0xffffffff),
+ BITMAP_FROM_U64(0xfffffffe),
+ BITMAP_FROM_U64(0x3333333311111111ULL),
+ BITMAP_FROM_U64(0xffffffff77777777ULL),
+ BITMAP_FROM_U64(0),
+};
+
+static const unsigned long exp2[] __initconst = {
+ BITMAP_FROM_U64(0x3333333311111111ULL),
+ BITMAP_FROM_U64(0xffffffff77777777ULL),
+};
+
+/* Fibonacci sequence */
+static const unsigned long exp2_to_exp3_mask[] __initconst = {
+ BITMAP_FROM_U64(0x008000020020212eULL),
+};
+/* exp3_0_1 = (exp2[0] & ~exp2_to_exp3_mask) | (exp2[1] & exp2_to_exp3_mask) */
+static const unsigned long exp3_0_1[] __initconst = {
+ BITMAP_FROM_U64(0x33b3333311313137ULL),
+};
+/* exp3_1_0 = (exp2[1] & ~exp2_to_exp3_mask) | (exp2[0] & exp2_to_exp3_mask) */
+static const unsigned long exp3_1_0[] __initconst = {
+ BITMAP_FROM_U64(0xff7fffff77575751ULL),
+};
static bool __init
__check_eq_uint(const char *srcfile, unsigned int line,
@@ -92,6 +125,36 @@ __check_eq_u32_array(const char *srcfile, unsigned int line,
return true;
}
+static bool __init __check_eq_clump8(const char *srcfile, unsigned int line,
+ const unsigned int offset,
+ const unsigned int size,
+ const unsigned char *const clump_exp,
+ const unsigned long *const clump)
+{
+ unsigned long exp;
+
+ if (offset >= size) {
+ pr_warn("[%s:%u] bit offset for clump out-of-bounds: expected less than %u, got %u\n",
+ srcfile, line, size, offset);
+ return false;
+ }
+
+ exp = clump_exp[offset / 8];
+ if (!exp) {
+ pr_warn("[%s:%u] bit offset for zero clump: expected nonzero clump, got bit offset %u with clump value 0",
+ srcfile, line, offset);
+ return false;
+ }
+
+ if (*clump != exp) {
+ pr_warn("[%s:%u] expected clump value of 0x%lX, got clump value of 0x%lX",
+ srcfile, line, exp, *clump);
+ return false;
+ }
+
+ return true;
+}
+
#define __expect_eq(suffix, ...) \
({ \
int result = 0; \
@@ -108,6 +171,7 @@ __check_eq_u32_array(const char *srcfile, unsigned int line,
#define expect_eq_bitmap(...) __expect_eq(bitmap, ##__VA_ARGS__)
#define expect_eq_pbl(...) __expect_eq(pbl, ##__VA_ARGS__)
#define expect_eq_u32_array(...) __expect_eq(u32_array, ##__VA_ARGS__)
+#define expect_eq_clump8(...) __expect_eq(clump8, ##__VA_ARGS__)
static void __init test_zero_clear(void)
{
@@ -206,6 +270,30 @@ static void __init test_copy(void)
expect_eq_pbl("0-108,128-1023", bmap2, 1024);
}
+#define EXP2_IN_BITS (sizeof(exp2) * 8)
+
+static void __init test_replace(void)
+{
+ unsigned int nbits = 64;
+ DECLARE_BITMAP(bmap, 1024);
+
+ bitmap_zero(bmap, 1024);
+ bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits);
+ expect_eq_bitmap(bmap, exp3_0_1, nbits);
+
+ bitmap_zero(bmap, 1024);
+ bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits);
+ expect_eq_bitmap(bmap, exp3_1_0, nbits);
+
+ bitmap_fill(bmap, 1024);
+ bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits);
+ expect_eq_bitmap(bmap, exp3_0_1, nbits);
+
+ bitmap_fill(bmap, 1024);
+ bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits);
+ expect_eq_bitmap(bmap, exp3_1_0, nbits);
+}
+
#define PARSE_TIME 0x1
struct test_bitmap_parselist{
@@ -216,53 +304,32 @@ struct test_bitmap_parselist{
const int flags;
};
-static const unsigned long exp[] __initconst = {
- BITMAP_FROM_U64(1),
- BITMAP_FROM_U64(2),
- BITMAP_FROM_U64(0x0000ffff),
- BITMAP_FROM_U64(0xffff0000),
- BITMAP_FROM_U64(0x55555555),
- BITMAP_FROM_U64(0xaaaaaaaa),
- BITMAP_FROM_U64(0x11111111),
- BITMAP_FROM_U64(0x22222222),
- BITMAP_FROM_U64(0xffffffff),
- BITMAP_FROM_U64(0xfffffffe),
- BITMAP_FROM_U64(0x3333333311111111ULL),
- BITMAP_FROM_U64(0xffffffff77777777ULL),
- BITMAP_FROM_U64(0),
-};
-
-static const unsigned long exp2[] __initconst = {
- BITMAP_FROM_U64(0x3333333311111111ULL),
- BITMAP_FROM_U64(0xffffffff77777777ULL)
-};
-
static const struct test_bitmap_parselist parselist_tests[] __initconst = {
#define step (sizeof(u64) / sizeof(unsigned long))
- {0, "0", &exp[0], 8, 0},
- {0, "1", &exp[1 * step], 8, 0},
- {0, "0-15", &exp[2 * step], 32, 0},
- {0, "16-31", &exp[3 * step], 32, 0},
- {0, "0-31:1/2", &exp[4 * step], 32, 0},
- {0, "1-31:1/2", &exp[5 * step], 32, 0},
- {0, "0-31:1/4", &exp[6 * step], 32, 0},
- {0, "1-31:1/4", &exp[7 * step], 32, 0},
- {0, "0-31:4/4", &exp[8 * step], 32, 0},
- {0, "1-31:4/4", &exp[9 * step], 32, 0},
- {0, "0-31:1/4,32-63:2/4", &exp[10 * step], 64, 0},
- {0, "0-31:3/4,32-63:4/4", &exp[11 * step], 64, 0},
- {0, " ,, 0-31:3/4 ,, 32-63:4/4 ,, ", &exp[11 * step], 64, 0},
+ {0, "0", &exp1[0], 8, 0},
+ {0, "1", &exp1[1 * step], 8, 0},
+ {0, "0-15", &exp1[2 * step], 32, 0},
+ {0, "16-31", &exp1[3 * step], 32, 0},
+ {0, "0-31:1/2", &exp1[4 * step], 32, 0},
+ {0, "1-31:1/2", &exp1[5 * step], 32, 0},
+ {0, "0-31:1/4", &exp1[6 * step], 32, 0},
+ {0, "1-31:1/4", &exp1[7 * step], 32, 0},
+ {0, "0-31:4/4", &exp1[8 * step], 32, 0},
+ {0, "1-31:4/4", &exp1[9 * step], 32, 0},
+ {0, "0-31:1/4,32-63:2/4", &exp1[10 * step], 64, 0},
+ {0, "0-31:3/4,32-63:4/4", &exp1[11 * step], 64, 0},
+ {0, " ,, 0-31:3/4 ,, 32-63:4/4 ,, ", &exp1[11 * step], 64, 0},
{0, "0-31:1/4,32-63:2/4,64-95:3/4,96-127:4/4", exp2, 128, 0},
{0, "0-2047:128/256", NULL, 2048, PARSE_TIME},
- {0, "", &exp[12 * step], 8, 0},
- {0, "\n", &exp[12 * step], 8, 0},
- {0, ",, ,, , , ,", &exp[12 * step], 8, 0},
- {0, " , ,, , , ", &exp[12 * step], 8, 0},
- {0, " , ,, , , \n", &exp[12 * step], 8, 0},
+ {0, "", &exp1[12 * step], 8, 0},
+ {0, "\n", &exp1[12 * step], 8, 0},
+ {0, ",, ,, , , ,", &exp1[12 * step], 8, 0},
+ {0, " , ,, , , ", &exp1[12 * step], 8, 0},
+ {0, " , ,, , , \n", &exp1[12 * step], 8, 0},
{-EINVAL, "-1", NULL, 8, 0},
{-EINVAL, "-0", NULL, 8, 0},
@@ -280,6 +347,8 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
{-EINVAL, "a-31:10/1", NULL, 8, 0},
{-EINVAL, "0-31:a/1", NULL, 8, 0},
{-EINVAL, "0-\n", NULL, 8, 0},
+
+#undef step
};
static void __init __test_bitmap_parselist(int is_user)
@@ -299,7 +368,7 @@ static void __init __test_bitmap_parselist(int is_user)
set_fs(KERNEL_DS);
time = ktime_get();
- err = bitmap_parselist_user(ptest.in, len,
+ err = bitmap_parselist_user((__force const char __user *)ptest.in, len,
bmap, ptest.nbits);
time = ktime_get() - time;
set_fs(orig_fs);
@@ -326,6 +395,8 @@ static void __init __test_bitmap_parselist(int is_user)
if (ptest.flags & PARSE_TIME)
pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n",
mode, i, ptest.in, time);
+
+#undef ptest
}
}
@@ -339,20 +410,20 @@ static void __init test_bitmap_parselist_user(void)
__test_bitmap_parselist(1);
}
-#define EXP_BYTES (sizeof(exp) * 8)
+#define EXP1_IN_BITS (sizeof(exp1) * 8)
static void __init test_bitmap_arr32(void)
{
unsigned int nbits, next_bit;
- u32 arr[sizeof(exp) / 4];
- DECLARE_BITMAP(bmap2, EXP_BYTES);
+ u32 arr[EXP1_IN_BITS / 32];
+ DECLARE_BITMAP(bmap2, EXP1_IN_BITS);
memset(arr, 0xa5, sizeof(arr));
- for (nbits = 0; nbits < EXP_BYTES; ++nbits) {
- bitmap_to_arr32(arr, exp, nbits);
+ for (nbits = 0; nbits < EXP1_IN_BITS; ++nbits) {
+ bitmap_to_arr32(arr, exp1, nbits);
bitmap_from_arr32(bmap2, arr, nbits);
- expect_eq_bitmap(bmap2, exp, nbits);
+ expect_eq_bitmap(bmap2, exp1, nbits);
next_bit = find_next_bit(bmap2,
round_up(nbits, BITS_PER_LONG), nbits);
@@ -361,7 +432,7 @@ static void __init test_bitmap_arr32(void)
" tail is not safely cleared: %d\n",
nbits, next_bit);
- if (nbits < EXP_BYTES - 32)
+ if (nbits < EXP1_IN_BITS - 32)
expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
0xa5a5a5a5);
}
@@ -404,15 +475,50 @@ static void noinline __init test_mem_optimisations(void)
}
}
+static const unsigned char clump_exp[] __initconst = {
+ 0x01, /* 1 bit set */
+ 0x02, /* non-edge 1 bit set */
+ 0x00, /* zero bits set */
+ 0x38, /* 3 bits set across 4-bit boundary */
+ 0x38, /* Repeated clump */
+ 0x0F, /* 4 bits set */
+ 0xFF, /* all bits set */
+ 0x05, /* non-adjacent 2 bits set */
+};
+
+static void __init test_for_each_set_clump8(void)
+{
+#define CLUMP_EXP_NUMBITS 64
+ DECLARE_BITMAP(bits, CLUMP_EXP_NUMBITS);
+ unsigned int start;
+ unsigned long clump;
+
+ /* set bitmap to test case */
+ bitmap_zero(bits, CLUMP_EXP_NUMBITS);
+ bitmap_set(bits, 0, 1); /* 0x01 */
+ bitmap_set(bits, 9, 1); /* 0x02 */
+ bitmap_set(bits, 27, 3); /* 0x28 */
+ bitmap_set(bits, 35, 3); /* 0x28 */
+ bitmap_set(bits, 40, 4); /* 0x0F */
+ bitmap_set(bits, 48, 8); /* 0xFF */
+ bitmap_set(bits, 56, 1); /* 0x05 - part 1 */
+ bitmap_set(bits, 58, 1); /* 0x05 - part 2 */
+
+ for_each_set_clump8(start, clump, bits, CLUMP_EXP_NUMBITS)
+ expect_eq_clump8(start, CLUMP_EXP_NUMBITS, clump_exp, &clump);
+}
+
static void __init selftest(void)
{
test_zero_clear();
test_fill_set();
test_copy();
+ test_replace();
test_bitmap_arr32();
test_bitmap_parselist();
test_bitmap_parselist_user();
test_mem_optimisations();
+ test_for_each_set_clump8();
}
KSTM_MODULE_LOADERS(test_bitmap);
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 327b3ebf23bf..0271b22e063f 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -117,3 +117,24 @@ config DEBUG_RODATA_TEST
depends on STRICT_KERNEL_RWX
---help---
This option enables a testcase for the setting rodata read-only.
+
+config GENERIC_PTDUMP
+ bool
+
+config PTDUMP_CORE
+ bool
+
+config PTDUMP_DEBUGFS
+ bool "Export kernel pagetable layout to userspace via debugfs"
+ depends on DEBUG_KERNEL
+ depends on DEBUG_FS
+ depends on GENERIC_PTDUMP
+ select PTDUMP_CORE
+ help
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
diff --git a/mm/Makefile b/mm/Makefile
index 56c1964bb3a1..a4fa4985c1d7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_CMA) += cma.o
obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
+obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
diff --git a/mm/hmm.c b/mm/hmm.c
index d379cb6496ae..981f9f8614f2 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -186,7 +186,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
}
static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+ __always_unused int depth, struct mm_walk *walk)
{
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
@@ -380,7 +380,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
again:
pmd = READ_ONCE(*pmdp);
if (pmd_none(pmd))
- return hmm_vma_walk_hole(start, end, walk);
+ return hmm_vma_walk_hole(start, end, -1, walk);
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
bool fault, write_fault;
@@ -482,7 +482,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
again:
pud = READ_ONCE(*pudp);
if (pud_none(pud))
- return hmm_vma_walk_hole(start, end, walk);
+ return hmm_vma_walk_hole(start, end, -1, walk);
if (pud_huge(pud) && pud_devmap(pud)) {
unsigned long i, npages, pfn;
@@ -490,7 +490,7 @@ again:
bool fault, write_fault;
if (!pud_present(pud))
- return hmm_vma_walk_hole(start, end, walk);
+ return hmm_vma_walk_hole(start, end, -1, walk);
i = (addr - range->start) >> PAGE_SHIFT;
npages = (end - addr) >> PAGE_SHIFT;
diff --git a/mm/memory.c b/mm/memory.c
index e455160e0f75..606da187d1de 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4197,19 +4197,11 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
smp_wmb(); /* See comment in __pte_alloc */
ptl = pud_lock(mm, pud);
-#ifndef __ARCH_HAS_4LEVEL_HACK
if (!pud_present(*pud)) {
mm_inc_nr_pmds(mm);
pud_populate(mm, pud, new);
} else /* Another has populated it */
pmd_free(mm, new);
-#else
- if (!pgd_present(*pud)) {
- mm_inc_nr_pmds(mm);
- pgd_populate(mm, pud, new);
- } else /* Another has populated it */
- pmd_free(mm, new);
-#endif /* __ARCH_HAS_4LEVEL_HACK */
spin_unlock(ptl);
return 0;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index eae1565285e3..616cd1ed04dd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2119,6 +2119,7 @@ out_unlock:
#ifdef CONFIG_DEVICE_PRIVATE
static int migrate_vma_collect_hole(unsigned long start,
unsigned long end,
+ __always_unused int depth,
struct mm_walk *walk)
{
struct migrate_vma *migrate = walk->private;
@@ -2163,7 +2164,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
again:
if (pmd_none(*pmdp))
- return migrate_vma_collect_hole(start, end, walk);
+ return migrate_vma_collect_hole(start, end, -1, walk);
if (pmd_trans_huge(*pmdp)) {
struct page *page;
@@ -2196,7 +2197,7 @@ again:
return migrate_vma_collect_skip(start, end,
walk);
if (pmd_none(*pmdp))
- return migrate_vma_collect_hole(start, end,
+ return migrate_vma_collect_hole(start, end, -1,
walk);
}
}
diff --git a/mm/mincore.c b/mm/mincore.c
index 49b6fa2f6aa1..0e6dd9948f1a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -112,6 +112,7 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
}
static int mincore_unmapped_range(unsigned long addr, unsigned long end,
+ __always_unused int depth,
struct mm_walk *walk)
{
walk->private += __mincore_unmapped_range(addr, end,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index d48c2a986ea3..70dcaa23598f 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -4,6 +4,22 @@
#include <linux/sched.h>
#include <linux/hugetlb.h>
+/*
+ * We want to know the real level where a entry is located ignoring any
+ * folding of levels which may be happening. For example if p4d is folded then
+ * a missing entry found at level 1 (p4d) is actually at level 0 (pgd).
+ */
+static int real_depth(int depth)
+{
+ if (depth == 3 && PTRS_PER_PMD == 1)
+ depth = 2;
+ if (depth == 2 && PTRS_PER_PUD == 1)
+ depth = 1;
+ if (depth == 1 && PTRS_PER_P4D == 1)
+ depth = 0;
+ return depth;
+}
+
static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
@@ -33,14 +49,23 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0;
+ int depth = real_depth(3);
+
+ if (ops->test_pmd) {
+ err = ops->test_pmd(addr, end, pmd_offset(pud, 0UL), walk);
+ if (err < 0)
+ return err;
+ if (err > 0)
+ return 0;
+ }
pmd = pmd_offset(pud, addr);
do {
again:
next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd) || !walk->vma) {
+ if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, depth, walk);
if (err)
break;
continue;
@@ -61,9 +86,14 @@ again:
if (!ops->pte_entry)
continue;
- split_huge_pmd(walk->vma, pmd, addr);
- if (pmd_trans_unstable(pmd))
- goto again;
+ if (walk->vma) {
+ split_huge_pmd(walk->vma, pmd, addr);
+ if (pmd_trans_unstable(pmd))
+ goto again;
+ } else if (pmd_leaf(*pmd) || !pmd_present(*pmd)) {
+ continue;
+ }
+
err = walk_pte_range(pmd, addr, next, walk);
if (err)
break;
@@ -79,34 +109,41 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0;
+ int depth = real_depth(2);
+
+ if (ops->test_pud) {
+ err = ops->test_pud(addr, end, pud_offset(p4d, 0UL), walk);
+ if (err < 0)
+ return err;
+ if (err > 0)
+ return 0;
+ }
pud = pud_offset(p4d, addr);
do {
again:
next = pud_addr_end(addr, end);
- if (pud_none(*pud) || !walk->vma) {
+ if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, depth, walk);
if (err)
break;
continue;
}
if (ops->pud_entry) {
- spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma);
-
- if (ptl) {
- err = ops->pud_entry(pud, addr, next, walk);
- spin_unlock(ptl);
- if (err)
- break;
- continue;
- }
+ err = ops->pud_entry(pud, addr, next, walk);
+ if (err)
+ break;
}
- split_huge_pud(walk->vma, pud, addr);
- if (pud_none(*pud))
- goto again;
+ if (walk->vma) {
+ split_huge_pud(walk->vma, pud, addr);
+ if (pud_none(*pud))
+ goto again;
+ } else if (pud_leaf(*pud) || !pud_present(*pud)) {
+ continue;
+ }
if (ops->pmd_entry || ops->pte_entry)
err = walk_pmd_range(pud, addr, next, walk);
@@ -124,18 +161,32 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0;
+ int depth = real_depth(1);
+
+ if (ops->test_p4d) {
+ err = ops->test_p4d(addr, end, p4d_offset(pgd, 0UL), walk);
+ if (err < 0)
+ return err;
+ if (err > 0)
+ return 0;
+ }
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d)) {
if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, depth, walk);
if (err)
break;
continue;
}
- if (ops->pmd_entry || ops->pte_entry)
+ if (ops->p4d_entry) {
+ err = ops->p4d_entry(p4d, addr, next, walk);
+ if (err)
+ break;
+ }
+ if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
err = walk_pud_range(p4d, addr, next, walk);
if (err)
break;
@@ -157,12 +208,18 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) {
if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, 0, walk);
if (err)
break;
continue;
}
- if (ops->pmd_entry || ops->pte_entry)
+ if (ops->pgd_entry) {
+ err = ops->pgd_entry(pgd, addr, next, walk);
+ if (err)
+ break;
+ }
+ if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
+ ops->pte_entry)
err = walk_p4d_range(pgd, addr, next, walk);
if (err)
break;
@@ -198,7 +255,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
if (pte)
err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
else if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, -1, walk);
if (err)
break;
@@ -242,7 +299,7 @@ static int walk_page_test(unsigned long start, unsigned long end,
if (vma->vm_flags & VM_PFNMAP) {
int err = 1;
if (ops->pte_hole)
- err = ops->pte_hole(start, end, walk);
+ err = ops->pte_hole(start, end, -1, walk);
return err ? err : 1;
}
return 0;
@@ -353,6 +410,25 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
return err;
}
+int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
+ unsigned long end, const struct mm_walk_ops *ops,
+ void *private)
+{
+ struct mm_walk walk = {
+ .ops = ops,
+ .mm = mm,
+ .private = private,
+ .no_vma = true
+ };
+
+ if (start >= end || !walk.mm)
+ return -EINVAL;
+
+ lockdep_assert_held(&walk.mm->mmap_sem);
+
+ return __walk_page_range(start, end, &walk);
+}
+
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private)
{
diff --git a/mm/ptdump.c b/mm/ptdump.c
new file mode 100644
index 000000000000..39b0773b6172
--- /dev/null
+++ b/mm/ptdump.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pagewalk.h>
+#include <linux/ptdump.h>
+#include <linux/kasan.h>
+
+static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ pgd_t val = READ_ONCE(*pgd);
+
+ if (pgd_leaf(val))
+ st->note_page(st, addr, 0, pgd_val(val));
+
+ return 0;
+}
+
+static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ p4d_t val = READ_ONCE(*p4d);
+
+ if (p4d_leaf(val))
+ st->note_page(st, addr, 1, p4d_val(val));
+
+ return 0;
+}
+
+static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ pud_t val = READ_ONCE(*pud);
+
+ if (pud_leaf(val))
+ st->note_page(st, addr, 2, pud_val(val));
+
+ return 0;
+}
+
+static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ pmd_t val = READ_ONCE(*pmd);
+
+ if (pmd_leaf(val))
+ st->note_page(st, addr, 3, pmd_val(val));
+
+ return 0;
+}
+
+static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+
+ st->note_page(st, addr, 4, pte_val(READ_ONCE(*pte)));
+
+ return 0;
+}
+
+#ifdef CONFIG_KASAN
+/*
+ * This is an optimization for KASAN=y case. Since all kasan page tables
+ * eventually point to the kasan_early_shadow_page we could call note_page()
+ * right away without walking through lower level page tables. This saves
+ * us dozens of seconds (minutes for 5-level config) while checking for
+ * W+X mapping or reading kernel_page_tables debugfs file.
+ */
+static inline int note_kasan_page_table(struct mm_walk *walk,
+ unsigned long addr)
+{
+ struct ptdump_state *st = walk->private;
+
+ st->note_page(st, addr, 4, pte_val(kasan_early_shadow_pte[0]));
+ return 1;
+}
+
+static int ptdump_test_p4d(unsigned long addr, unsigned long next,
+ p4d_t *p4d, struct mm_walk *walk)
+{
+#if CONFIG_PGTABLE_LEVELS > 4
+ if (p4d == lm_alias(kasan_early_shadow_p4d))
+ return note_kasan_page_table(walk, addr);
+#endif
+ return 0;
+}
+
+static int ptdump_test_pud(unsigned long addr, unsigned long next,
+ pud_t *pud, struct mm_walk *walk)
+{
+#if CONFIG_PGTABLE_LEVELS > 3
+ if (pud == lm_alias(kasan_early_shadow_pud))
+ return note_kasan_page_table(walk, addr);
+#endif
+ return 0;
+}
+
+static int ptdump_test_pmd(unsigned long addr, unsigned long next,
+ pmd_t *pmd, struct mm_walk *walk)
+{
+#if CONFIG_PGTABLE_LEVELS > 2
+ if (pmd == lm_alias(kasan_early_shadow_pmd))
+ return note_kasan_page_table(walk, addr);
+#endif
+ return 0;
+}
+#endif /* CONFIG_KASAN */
+
+static int ptdump_hole(unsigned long addr, unsigned long next,
+ int depth, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+
+ st->note_page(st, addr, depth, 0);
+
+ return 0;
+}
+
+static const struct mm_walk_ops ptdump_ops = {
+ .pgd_entry = ptdump_pgd_entry,
+ .p4d_entry = ptdump_p4d_entry,
+ .pud_entry = ptdump_pud_entry,
+ .pmd_entry = ptdump_pmd_entry,
+ .pte_entry = ptdump_pte_entry,
+#ifdef CONFIG_KASAN
+ .test_p4d = ptdump_test_p4d,
+ .test_pud = ptdump_test_pud,
+ .test_pmd = ptdump_test_pmd,
+#endif
+ .pte_hole = ptdump_hole,
+};
+
+void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm)
+{
+ const struct ptdump_range *range = st->range;
+
+ down_read(&mm->mmap_sem);
+ while (range->start != range->end) {
+ walk_page_range_novma(mm, range->start, range->end,
+ &ptdump_ops, st);
+ range++;
+ }
+ up_read(&mm->mmap_sem);
+
+ /* Flush out the last page */
+ st->note_page(st, 0, -1, 0);
+}