aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Capper <steve.capper@linaro.org>2014-02-05 09:06:30 +0000
committerSteve Capper <steve.capper@linaro.org>2014-02-05 15:16:16 +0000
commit57b13264e194c8544a834d5abc89e0a0fe0489e6 (patch)
treef224cb112f07516785a9ee24af651c8fe0f7f9bf
parent0f1cb8ed1319413ed58ca14a8625727ab5c7382b (diff)
downloadlinux-57b13264e194c8544a834d5abc89e0a0fe0489e6.tar.gz
arm64: mm: implement get_user_pages_fastfast_gup/combined
An implementation of get_user_pages_fast for arm64. It is based on the arm implementation (it has the added ability to walk huge puds) which is loosely on the PowerPC implementation. We disable interrupts in the walker to prevent the call_rcu_sched pagetable freeing code from running under us. We also explicitly fire an IPI in the Transparent HugePage splitting case to prevent splits from interfering with the fast_gup walker. As THP splits are relatively rare, this should not have a noticable overhead. Signed-off-by: Steve Capper <steve.capper@linaro.org>
-rw-r--r--arch/arm64/include/asm/pgtable.h4
-rw-r--r--arch/arm64/mm/Makefile2
-rw-r--r--arch/arm64/mm/gup.c297
3 files changed, 302 insertions, 1 deletions
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7f2b60affbb..8cfa1aae3d1 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -212,6 +212,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+struct vm_area_struct;
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp);
#endif
#define PMD_BIT_FUNC(fn,op) \
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index b51d36401d8..44b21485161 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -1,5 +1,5 @@
obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \
- context.o tlb.o proc.o
+ context.o tlb.o proc.o gup.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/arm64/mm/gup.c b/arch/arm64/mm/gup.c
new file mode 100644
index 00000000000..45dd908e4d0
--- /dev/null
+++ b/arch/arm64/mm/gup.c
@@ -0,0 +1,297 @@
+/*
+ * arch/arm64/mm/gup.c
+ *
+ * Copyright (C) 2014 Linaro Ltd.
+ *
+ * Based on arch/powerpc/mm/gup.c which is:
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <linux/hugetlb.h>
+#include <asm/pgtable.h>
+
+static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ pte_t *ptep, *ptem;
+ int ret = 0;
+
+ ptem = ptep = pte_offset_map(&pmd, addr);
+ do {
+ pte_t pte = ACCESS_ONCE(*ptep);
+ struct page *page;
+
+ if (!pte_valid_user(pte) || (write && !pte_write(pte)))
+ goto pte_unmap;
+
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
+
+ if (!page_cache_get_speculative(page))
+ goto pte_unmap;
+
+ if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+ put_page(page);
+ goto pte_unmap;
+ }
+
+ pages[*nr] = page;
+ (*nr)++;
+
+ } while (ptep++, addr += PAGE_SIZE, addr != end);
+
+ ret = 1;
+
+pte_unmap:
+ pte_unmap(ptem);
+ return ret;
+}
+
+static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ struct page *head, *page, *tail;
+ int refs;
+
+ if (!pmd_present(orig) || (write && !pmd_write(orig)))
+ return 0;
+
+ refs = 0;
+ head = pmd_page(orig);
+ page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+ }
+
+ if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+
+ /*
+ * Any tail pages need their mapcount reference taken before we
+ * return. (This allows the THP code to bump their ref count when
+ * they are split into base pages).
+ */
+ while (refs--) {
+ if (PageTail(tail))
+ get_huge_page_tail(tail);
+ tail++;
+ }
+
+ return 1;
+}
+
+static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ struct page *head, *page, *tail;
+ pmd_t origpmd = __pmd(pud_val(orig));
+ int refs;
+
+ if (!pmd_present(origpmd) || (write && !pmd_write(origpmd)))
+ return 0;
+
+ refs = 0;
+ head = pmd_page(origpmd);
+ page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+ }
+
+ if (unlikely(pud_val(orig) != pud_val(*pudp))) {
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+
+ while (refs--) {
+ if (PageTail(tail))
+ get_huge_page_tail(tail);
+ tail++;
+ }
+
+ return 1;
+}
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pmd_t *pmdp;
+
+ pmdp = pmd_offset(&pud, addr);
+ do {
+ pmd_t pmd = ACCESS_ONCE(*pmdp);
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+ return 0;
+
+ if (unlikely(pmd_huge(pmd) || pmd_trans_huge(pmd))) {
+ if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
+ pages, nr))
+ return 0;
+ } else {
+ if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+ return 0;
+ }
+ } while (pmdp++, addr = next, addr != end);
+
+ return 1;
+}
+
+static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pud_t *pudp;
+
+ pudp = pud_offset(pgdp, addr);
+ do {
+ pud_t pud = ACCESS_ONCE(*pudp);
+ next = pud_addr_end(addr, end);
+ if (pud_none(pud))
+ return 0;
+ if (pud_huge(pud)) {
+ if (!gup_huge_pud(pud, pudp, addr, next, write,
+ pages, nr))
+ return 0;
+ }
+ else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+ return 0;
+ } while (pudp++, addr = next, addr != end);
+
+ return 1;
+}
+
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, len, end;
+ unsigned long next, flags;
+ pgd_t *pgdp;
+ int nr = 0;
+
+ start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+ start, len)))
+ return 0;
+
+ /*
+ * Disable interrupts, we use the nested form as we can already
+ * have interrupts disabled by get_futex_key.
+ *
+ * With interrupts disabled, we block page table pages from being
+ * freed from under us. See mmu_gather_tlb in asm-generic/tlb.h
+ * for more details.
+ */
+
+ local_irq_save(flags);
+ pgdp = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(*pgdp))
+ break;
+ else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr))
+ break;
+ } while (pgdp++, addr = next, addr != end);
+ local_irq_restore(flags);
+
+ return nr;
+}
+
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ int nr, ret;
+
+ start &= PAGE_MASK;
+ nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ ret = nr;
+
+ if (nr < nr_pages) {
+ /* Try to get the remaining pages with get_user_pages */
+ start += nr << PAGE_SHIFT;
+ pages += nr;
+
+ down_read(&mm->mmap_sem);
+ ret = get_user_pages(current, mm, start,
+ nr_pages - nr, write, 0, pages, NULL);
+ up_read(&mm->mmap_sem);
+
+ /* Have to be a bit careful with return values */
+ if (nr > 0) {
+ if (ret < 0)
+ ret = nr;
+ else
+ ret += nr;
+ }
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void thp_splitting_flush_sync(void *arg)
+{
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ pmd_t pmd = pmd_mksplitting(*pmdp);
+ VM_BUG_ON(address & ~PMD_MASK);
+ set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+
+ /* dummy IPI to serialise against fast_gup */
+ smp_call_function(thp_splitting_flush_sync, NULL, 1);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */