aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm')
-rw-r--r--arch/arm/mm/context.c35
-rw-r--r--arch/arm/mm/dma-mapping.c579
-rw-r--r--arch/arm/mm/init.c4
-rw-r--r--arch/arm/mm/ioremap.c2
-rw-r--r--arch/arm/mm/mm.h7
-rw-r--r--arch/arm/mm/mmu.c82
-rw-r--r--arch/arm/mm/proc-v6.S6
-rw-r--r--arch/arm/mm/proc-v7-2level.S5
-rw-r--r--arch/arm/mm/tlb-v7.S12
9 files changed, 377 insertions, 355 deletions
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 806cc4f6351..119bc52ab93 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <asm/mmu_context.h>
+#include <asm/thread_notify.h>
#include <asm/tlbflush.h>
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
@@ -48,6 +49,40 @@ void cpu_set_reserved_ttbr0(void)
}
#endif
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd,
+ void *t)
+{
+ u32 contextidr;
+ pid_t pid;
+ struct thread_info *thread = t;
+
+ if (cmd != THREAD_NOTIFY_SWITCH)
+ return NOTIFY_DONE;
+
+ pid = task_pid_nr(thread->task) << ASID_BITS;
+ asm volatile(
+ " mrc p15, 0, %0, c13, c0, 1\n"
+ " bfi %1, %0, #0, %2\n"
+ " mcr p15, 0, %1, c13, c0, 1\n"
+ : "=r" (contextidr), "+r" (pid)
+ : "I" (ASID_BITS));
+ isb();
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block contextidr_notifier_block = {
+ .notifier_call = contextidr_notifier,
+};
+
+static int __init contextidr_notifier_init(void)
+{
+ return thread_register_notifier(&contextidr_notifier_block);
+}
+arch_initcall(contextidr_notifier_init);
+#endif
+
/*
* We fork()ed a process, and we need a new context for the child
* to run in.
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 106c4c0ebcc..4e7d1182e8a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -22,13 +22,14 @@
#include <linux/memblock.h>
#include <linux/slab.h>
#include <linux/iommu.h>
+#include <linux/io.h>
#include <linux/vmalloc.h>
+#include <linux/sizes.h>
#include <asm/memory.h>
#include <asm/highmem.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-#include <asm/sizes.h>
#include <asm/mach/arch.h>
#include <asm/dma-iommu.h>
#include <asm/mach/map.h>
@@ -72,7 +73,7 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- if (!arch_is_coherent())
+ if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_cpu_to_dev(page, offset, size, dir);
return pfn_to_dma(dev, page_to_pfn(page)) + offset;
}
@@ -95,7 +96,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- if (!arch_is_coherent())
+ if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
handle & ~PAGE_MASK, size, dir);
}
@@ -124,6 +125,7 @@ struct dma_map_ops arm_dma_ops = {
.alloc = arm_dma_alloc,
.free = arm_dma_free,
.mmap = arm_dma_mmap,
+ .get_sgtable = arm_dma_get_sgtable,
.map_page = arm_dma_map_page,
.unmap_page = arm_dma_unmap_page,
.map_sg = arm_dma_map_sg,
@@ -217,115 +219,70 @@ static void __dma_free_buffer(struct page *page, size_t size)
}
#ifdef CONFIG_MMU
+#ifdef CONFIG_HUGETLB_PAGE
+#error ARM Coherent DMA allocator does not (yet) support huge TLB
+#endif
-#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT)
-#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT)
-
-/*
- * These are the page tables (2MB each) covering uncached, DMA consistent allocations
- */
-static pte_t **consistent_pte;
-
-#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M
+static void *__alloc_from_contiguous(struct device *dev, size_t size,
+ pgprot_t prot, struct page **ret_page);
-unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE;
+static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
+ pgprot_t prot, struct page **ret_page,
+ const void *caller);
-void __init init_consistent_dma_size(unsigned long size)
+static void *
+__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
+ const void *caller)
{
- unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M);
+ struct vm_struct *area;
+ unsigned long addr;
- BUG_ON(consistent_pte); /* Check we're called before DMA region init */
- BUG_ON(base < VMALLOC_END);
+ /*
+ * DMA allocation can be mapped to user space, so lets
+ * set VM_USERMAP flags too.
+ */
+ area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP,
+ caller);
+ if (!area)
+ return NULL;
+ addr = (unsigned long)area->addr;
+ area->phys_addr = __pfn_to_phys(page_to_pfn(page));
- /* Grow region to accommodate specified size */
- if (base < consistent_base)
- consistent_base = base;
+ if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) {
+ vunmap((void *)addr);
+ return NULL;
+ }
+ return (void *)addr;
}
-#include "vmregion.h"
-
-static struct arm_vmregion_head consistent_head = {
- .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock),
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_end = CONSISTENT_END,
-};
-
-#ifdef CONFIG_HUGETLB_PAGE
-#error ARM Coherent DMA allocator does not (yet) support huge TLB
-#endif
-
-/*
- * Initialise the consistent memory allocation.
- */
-static int __init consistent_init(void)
+static void __dma_free_remap(void *cpu_addr, size_t size)
{
- int ret = 0;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- int i = 0;
- unsigned long base = consistent_base;
- unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
-
- if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
- return 0;
-
- consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
- if (!consistent_pte) {
- pr_err("%s: no memory\n", __func__);
- return -ENOMEM;
+ unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP;
+ struct vm_struct *area = find_vm_area(cpu_addr);
+ if (!area || (area->flags & flags) != flags) {
+ WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+ return;
}
-
- pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END);
- consistent_head.vm_start = base;
-
- do {
- pgd = pgd_offset(&init_mm, base);
-
- pud = pud_alloc(&init_mm, pgd, base);
- if (!pud) {
- pr_err("%s: no pud tables\n", __func__);
- ret = -ENOMEM;
- break;
- }
-
- pmd = pmd_alloc(&init_mm, pud, base);
- if (!pmd) {
- pr_err("%s: no pmd tables\n", __func__);
- ret = -ENOMEM;
- break;
- }
- WARN_ON(!pmd_none(*pmd));
-
- pte = pte_alloc_kernel(pmd, base);
- if (!pte) {
- pr_err("%s: no pte tables\n", __func__);
- ret = -ENOMEM;
- break;
- }
-
- consistent_pte[i++] = pte;
- base += PMD_SIZE;
- } while (base < CONSISTENT_END);
-
- return ret;
+ unmap_kernel_range((unsigned long)cpu_addr, size);
+ vunmap(cpu_addr);
}
-core_initcall(consistent_init);
-static void *__alloc_from_contiguous(struct device *dev, size_t size,
- pgprot_t prot, struct page **ret_page);
-
-static struct arm_vmregion_head coherent_head = {
- .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock),
- .vm_list = LIST_HEAD_INIT(coherent_head.vm_list),
+struct dma_pool {
+ size_t size;
+ spinlock_t lock;
+ unsigned long *bitmap;
+ unsigned long nr_pages;
+ void *vaddr;
+ struct page *page;
};
-size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8;
+static struct dma_pool atomic_pool = {
+ .size = SZ_256K,
+};
static int __init early_coherent_pool(char *p)
{
- coherent_pool_size = memparse(p, &p);
+ atomic_pool.size = memparse(p, &p);
return 0;
}
early_param("coherent_pool", early_coherent_pool);
@@ -333,32 +290,45 @@ early_param("coherent_pool", early_coherent_pool);
/*
* Initialise the coherent pool for atomic allocations.
*/
-static int __init coherent_init(void)
+static int __init atomic_pool_init(void)
{
+ struct dma_pool *pool = &atomic_pool;
pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
- size_t size = coherent_pool_size;
+ unsigned long nr_pages = pool->size >> PAGE_SHIFT;
+ unsigned long *bitmap;
struct page *page;
void *ptr;
+ int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
- if (!IS_ENABLED(CONFIG_CMA))
- return 0;
+ bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+ if (!bitmap)
+ goto no_bitmap;
- ptr = __alloc_from_contiguous(NULL, size, prot, &page);
+ if (IS_ENABLED(CONFIG_CMA))
+ ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page);
+ else
+ ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot,
+ &page, NULL);
if (ptr) {
- coherent_head.vm_start = (unsigned long) ptr;
- coherent_head.vm_end = (unsigned long) ptr + size;
- printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n",
- (unsigned)size / 1024);
+ spin_lock_init(&pool->lock);
+ pool->vaddr = ptr;
+ pool->page = page;
+ pool->bitmap = bitmap;
+ pool->nr_pages = nr_pages;
+ pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
+ (unsigned)pool->size / 1024);
return 0;
}
- printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
- (unsigned)size / 1024);
+ kfree(bitmap);
+no_bitmap:
+ pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
+ (unsigned)pool->size / 1024);
return -ENOMEM;
}
/*
* CMA is activated by core_initcall, so we must be called after it.
*/
-postcore_initcall(coherent_init);
+postcore_initcall(atomic_pool_init);
struct dma_contig_early_reserve {
phys_addr_t base;
@@ -388,7 +358,7 @@ void __init dma_contiguous_remap(void)
if (end > arm_lowmem_limit)
end = arm_lowmem_limit;
if (start >= end)
- return;
+ continue;
map.pfn = __phys_to_pfn(start);
map.virtual = __phys_to_virt(start);
@@ -406,112 +376,6 @@ void __init dma_contiguous_remap(void)
}
}
-static void *
-__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
- const void *caller)
-{
- struct arm_vmregion *c;
- size_t align;
- int bit;
-
- if (!consistent_pte) {
- pr_err("%s: not initialised\n", __func__);
- dump_stack();
- return NULL;
- }
-
- /*
- * Align the virtual region allocation - maximum alignment is
- * a section size, minimum is a page size. This helps reduce
- * fragmentation of the DMA space, and also prevents allocations
- * smaller than a section from crossing a section boundary.
- */
- bit = fls(size - 1);
- if (bit > SECTION_SHIFT)
- bit = SECTION_SHIFT;
- align = 1 << bit;
-
- /*
- * Allocate a virtual address in the consistent mapping region.
- */
- c = arm_vmregion_alloc(&consistent_head, align, size,
- gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller);
- if (c) {
- pte_t *pte;
- int idx = CONSISTENT_PTE_INDEX(c->vm_start);
- u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
-
- pte = consistent_pte[idx] + off;
- c->priv = page;
-
- do {
- BUG_ON(!pte_none(*pte));
-
- set_pte_ext(pte, mk_pte(page, prot), 0);
- page++;
- pte++;
- off++;
- if (off >= PTRS_PER_PTE) {
- off = 0;
- pte = consistent_pte[++idx];
- }
- } while (size -= PAGE_SIZE);
-
- dsb();
-
- return (void *)c->vm_start;
- }
- return NULL;
-}
-
-static void __dma_free_remap(void *cpu_addr, size_t size)
-{
- struct arm_vmregion *c;
- unsigned long addr;
- pte_t *ptep;
- int idx;
- u32 off;
-
- c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr);
- if (!c) {
- pr_err("%s: trying to free invalid coherent area: %p\n",
- __func__, cpu_addr);
- dump_stack();
- return;
- }
-
- if ((c->vm_end - c->vm_start) != size) {
- pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
- __func__, c->vm_end - c->vm_start, size);
- dump_stack();
- size = c->vm_end - c->vm_start;
- }
-
- idx = CONSISTENT_PTE_INDEX(c->vm_start);
- off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
- ptep = consistent_pte[idx] + off;
- addr = c->vm_start;
- do {
- pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
-
- ptep++;
- addr += PAGE_SIZE;
- off++;
- if (off >= PTRS_PER_PTE) {
- off = 0;
- ptep = consistent_pte[++idx];
- }
-
- if (pte_none(pte) || !pte_present(pte))
- pr_crit("%s: bad page in kernel page table\n",
- __func__);
- } while (size -= PAGE_SIZE);
-
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
- arm_vmregion_free(&consistent_head, c);
-}
-
static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
void *data)
{
@@ -552,16 +416,17 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
return ptr;
}
-static void *__alloc_from_pool(struct device *dev, size_t size,
- struct page **ret_page, const void *caller)
+static void *__alloc_from_pool(size_t size, struct page **ret_page)
{
- struct arm_vmregion *c;
- size_t align;
+ struct dma_pool *pool = &atomic_pool;
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned int pageno;
+ unsigned long flags;
+ void *ptr = NULL;
+ unsigned long align_mask;
- if (!coherent_head.vm_start) {
- printk(KERN_ERR "%s: coherent pool not initialised!\n",
- __func__);
- dump_stack();
+ if (!pool->vaddr) {
+ WARN(1, "coherent pool not initialised!\n");
return NULL;
}
@@ -570,36 +435,42 @@ static void *__alloc_from_pool(struct device *dev, size_t size,
* small, so align them to their order in pages, minimum is a page
* size. This helps reduce fragmentation of the DMA space.
*/
- align = PAGE_SIZE << get_order(size);
- c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller);
- if (c) {
- void *ptr = (void *)c->vm_start;
- struct page *page = virt_to_page(ptr);
- *ret_page = page;
- return ptr;
+ align_mask = (1 << get_order(size)) - 1;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
+ 0, count, align_mask);
+ if (pageno < pool->nr_pages) {
+ bitmap_set(pool->bitmap, pageno, count);
+ ptr = pool->vaddr + PAGE_SIZE * pageno;
+ *ret_page = pool->page + pageno;
}
- return NULL;
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ return ptr;
}
-static int __free_from_pool(void *cpu_addr, size_t size)
+static int __free_from_pool(void *start, size_t size)
{
- unsigned long start = (unsigned long)cpu_addr;
- unsigned long end = start + size;
- struct arm_vmregion *c;
+ struct dma_pool *pool = &atomic_pool;
+ unsigned long pageno, count;
+ unsigned long flags;
- if (start < coherent_head.vm_start || end > coherent_head.vm_end)
+ if (start < pool->vaddr || start > pool->vaddr + pool->size)
return 0;
- c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start);
-
- if ((c->vm_end - c->vm_start) != size) {
- printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
- __func__, c->vm_end - c->vm_start, size);
- dump_stack();
- size = c->vm_end - c->vm_start;
+ if (start + size > pool->vaddr + pool->size) {
+ WARN(1, "freeing wrong coherent size from pool\n");
+ return 0;
}
- arm_vmregion_free(&coherent_head, c);
+ pageno = (start - pool->vaddr) >> PAGE_SHIFT;
+ count = size >> PAGE_SHIFT;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ bitmap_clear(pool->bitmap, pageno, count);
+ spin_unlock_irqrestore(&pool->lock, flags);
+
return 1;
}
@@ -644,7 +515,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
#define __get_dma_pgprot(attrs, prot) __pgprot(0)
#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL
-#define __alloc_from_pool(dev, size, ret_page, c) NULL
+#define __alloc_from_pool(size, ret_page) NULL
#define __alloc_from_contiguous(dev, size, prot, ret) NULL
#define __free_from_pool(cpu_addr, size) 0
#define __free_from_contiguous(dev, page, size) do { } while (0)
@@ -702,10 +573,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (arch_is_coherent() || nommu())
addr = __alloc_simple_buffer(dev, size, gfp, &page);
+ else if (gfp & GFP_ATOMIC)
+ addr = __alloc_from_pool(size, &page);
else if (!IS_ENABLED(CONFIG_CMA))
addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
- else if (gfp & GFP_ATOMIC)
- addr = __alloc_from_pool(dev, size, &page, caller);
else
addr = __alloc_from_contiguous(dev, size, prot, &page);
@@ -741,16 +612,22 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
{
int ret = -ENXIO;
#ifdef CONFIG_MMU
+ unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long pfn = dma_to_pfn(dev, dma_addr);
+ unsigned long off = vma->vm_pgoff;
+
vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
- ret = remap_pfn_range(vma, vma->vm_start,
- pfn + vma->vm_pgoff,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
+ if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
+ ret = remap_pfn_range(vma, vma->vm_start,
+ pfn + off,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ }
#endif /* CONFIG_MMU */
return ret;
@@ -771,12 +648,12 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
if (arch_is_coherent() || nommu()) {
__dma_free_buffer(page, size);
+ } else if (__free_from_pool(cpu_addr, size)) {
+ return;
} else if (!IS_ENABLED(CONFIG_CMA)) {
__dma_free_remap(cpu_addr, size);
__dma_free_buffer(page, size);
} else {
- if (__free_from_pool(cpu_addr, size))
- return;
/*
* Non-atomic allocations cannot be freed with IRQs disabled
*/
@@ -785,6 +662,21 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
}
}
+int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t handle, size_t size,
+ struct dma_attrs *attrs)
+{
+ struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
+ int ret;
+
+ ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+ if (unlikely(ret))
+ return ret;
+
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+ return 0;
+}
+
static void dma_cache_maint_page(struct page *page, unsigned long offset,
size_t size, enum dma_data_direction dir,
void (*op)(const void *, size_t, int))
@@ -998,9 +890,6 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask)
static int __init dma_debug_do_init(void)
{
-#ifdef CONFIG_MMU
- arm_vmregion_create_proc("dma-mappings", &consistent_head);
-#endif
dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
return 0;
}
@@ -1067,7 +956,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t
return NULL;
while (count) {
- int j, order = __ffs(count);
+ int j, order = __fls(count);
pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
while (!pages[i] && order)
@@ -1088,10 +977,10 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t
return pages;
error:
- while (--i)
+ while (i--)
if (pages[i])
__free_pages(pages[i], 0);
- if (array_size < PAGE_SIZE)
+ if (array_size <= PAGE_SIZE)
kfree(pages);
else
vfree(pages);
@@ -1106,7 +995,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s
for (i = 0; i < count; i++)
if (pages[i])
__free_pages(pages[i], 0);
- if (array_size < PAGE_SIZE)
+ if (array_size <= PAGE_SIZE)
kfree(pages);
else
vfree(pages);
@@ -1117,61 +1006,32 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s
* Create a CPU mapping for a specified pages
*/
static void *
-__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot)
+__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
+ const void *caller)
{
- struct arm_vmregion *c;
- size_t align;
- size_t count = size >> PAGE_SHIFT;
- int bit;
+ unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct vm_struct *area;
+ unsigned long p;
- if (!consistent_pte[0]) {
- pr_err("%s: not initialised\n", __func__);
- dump_stack();
+ area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP,
+ caller);
+ if (!area)
return NULL;
- }
-
- /*
- * Align the virtual region allocation - maximum alignment is
- * a section size, minimum is a page size. This helps reduce
- * fragmentation of the DMA space, and also prevents allocations
- * smaller than a section from crossing a section boundary.
- */
- bit = fls(size - 1);
- if (bit > SECTION_SHIFT)
- bit = SECTION_SHIFT;
- align = 1 << bit;
-
- /*
- * Allocate a virtual address in the consistent mapping region.
- */
- c = arm_vmregion_alloc(&consistent_head, align, size,
- gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
- if (c) {
- pte_t *pte;
- int idx = CONSISTENT_PTE_INDEX(c->vm_start);
- int i = 0;
- u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
-
- pte = consistent_pte[idx] + off;
- c->priv = pages;
-
- do {
- BUG_ON(!pte_none(*pte));
-
- set_pte_ext(pte, mk_pte(pages[i], prot), 0);
- pte++;
- off++;
- i++;
- if (off >= PTRS_PER_PTE) {
- off = 0;
- pte = consistent_pte[++idx];
- }
- } while (i < count);
- dsb();
+ area->pages = pages;
+ area->nr_pages = nr_pages;
+ p = (unsigned long)area->addr;
- return (void *)c->vm_start;
+ for (i = 0; i < nr_pages; i++) {
+ phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i]));
+ if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot))
+ goto err;
+ p += PAGE_SIZE;
}
+ return area->addr;
+err:
+ unmap_kernel_range((unsigned long)area->addr, size);
+ vunmap(area->addr);
return NULL;
}
@@ -1230,6 +1090,19 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si
return 0;
}
+static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
+{
+ struct vm_struct *area;
+
+ if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+ return cpu_addr;
+
+ area = find_vm_area(cpu_addr);
+ if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
+ return area->pages;
+ return NULL;
+}
+
static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
{
@@ -1248,7 +1121,11 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
if (*handle == DMA_ERROR_CODE)
goto err_buffer;
- addr = __iommu_alloc_remap(pages, size, gfp, prot);
+ if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+ return pages;
+
+ addr = __iommu_alloc_remap(pages, size, gfp, prot,
+ __builtin_return_address(0));
if (!addr)
goto err_mapping;
@@ -1265,31 +1142,25 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
struct dma_attrs *attrs)
{
- struct arm_vmregion *c;
+ unsigned long uaddr = vma->vm_start;
+ unsigned long usize = vma->vm_end - vma->vm_start;
+ struct page **pages = __iommu_get_pages(cpu_addr, attrs);
vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
- c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
-
- if (c) {
- struct page **pages = c->priv;
-
- unsigned long uaddr = vma->vm_start;
- unsigned long usize = vma->vm_end - vma->vm_start;
- int i = 0;
- do {
- int ret;
+ if (!pages)
+ return -ENXIO;
- ret = vm_insert_page(vma, uaddr, pages[i++]);
- if (ret) {
- pr_err("Remapping memory, error: %d\n", ret);
- return ret;
- }
+ do {
+ int ret = vm_insert_page(vma, uaddr, *pages++);
+ if (ret) {
+ pr_err("Remapping memory failed: %d\n", ret);
+ return ret;
+ }
+ uaddr += PAGE_SIZE;
+ usize -= PAGE_SIZE;
+ } while (usize > 0);
- uaddr += PAGE_SIZE;
- usize -= PAGE_SIZE;
- } while (usize > 0);
- }
return 0;
}
@@ -1300,16 +1171,35 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t handle, struct dma_attrs *attrs)
{
- struct arm_vmregion *c;
+ struct page **pages = __iommu_get_pages(cpu_addr, attrs);
size = PAGE_ALIGN(size);
- c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
- if (c) {
- struct page **pages = c->priv;
- __dma_free_remap(cpu_addr, size);
- __iommu_remove_mapping(dev, handle, size);
- __iommu_free_buffer(dev, pages, size);
+ if (!pages) {
+ WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+ return;
}
+
+ if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
+ unmap_kernel_range((unsigned long)cpu_addr, size);
+ vunmap(cpu_addr);
+ }
+
+ __iommu_remove_mapping(dev, handle, size);
+ __iommu_free_buffer(dev, pages, size);
+}
+
+static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr,
+ size_t size, struct dma_attrs *attrs)
+{
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct page **pages = __iommu_get_pages(cpu_addr, attrs);
+
+ if (!pages)
+ return -ENXIO;
+
+ return sg_alloc_table_from_pages(sgt, pages, count, 0, size,
+ GFP_KERNEL);
}
/*
@@ -1317,7 +1207,7 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
*/
static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
size_t size, dma_addr_t *handle,
- enum dma_data_direction dir)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
struct dma_iommu_mapping *mapping = dev->archdata.mapping;
dma_addr_t iova, iova_base;
@@ -1336,7 +1226,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
phys_addr_t phys = page_to_phys(sg_page(s));
unsigned int len = PAGE_ALIGN(s->offset + s->length);
- if (!arch_is_coherent())
+ if (!arch_is_coherent() &&
+ !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
ret = iommu_map(mapping->domain, iova, phys, len, 0);
@@ -1383,7 +1274,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
if (__map_sg_chunk(dev, start, size, &dma->dma_address,
- dir) < 0)
+ dir, attrs) < 0)
goto bad_mapping;
dma->dma_address += offset;
@@ -1396,7 +1287,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
}
size += s->length;
}
- if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
+ if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs) < 0)
goto bad_mapping;
dma->dma_address += offset;
@@ -1430,7 +1321,8 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
if (sg_dma_len(s))
__iommu_remove_mapping(dev, sg_dma_address(s),
sg_dma_len(s));
- if (!arch_is_coherent())
+ if (!arch_is_coherent() &&
+ !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_dev_to_cpu(sg_page(s), s->offset,
s->length, dir);
}
@@ -1492,7 +1384,7 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
dma_addr_t dma_addr;
int ret, len = PAGE_ALIGN(size + offset);
- if (!arch_is_coherent())
+ if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_cpu_to_dev(page, offset, size, dir);
dma_addr = __alloc_iova(mapping, len);
@@ -1531,7 +1423,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
if (!iova)
return;
- if (!arch_is_coherent())
+ if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_dev_to_cpu(page, offset, size, dir);
iommu_unmap(mapping->domain, iova, len);
@@ -1571,6 +1463,7 @@ struct dma_map_ops iommu_ops = {
.alloc = arm_iommu_alloc_attrs,
.free = arm_iommu_free_attrs,
.mmap = arm_iommu_mmap_attrs,
+ .get_sgtable = arm_iommu_get_sgtable,
.map_page = arm_iommu_map_page,
.unmap_page = arm_iommu_unmap_page,
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index c21d06c7dd7..9aec41fa80a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -21,13 +21,13 @@
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/dma-contiguous.h>
+#include <linux/sizes.h>
#include <asm/mach-types.h>
#include <asm/memblock.h>
#include <asm/prom.h>
#include <asm/sections.h>
#include <asm/setup.h>
-#include <asm/sizes.h>
#include <asm/tlb.h>
#include <asm/fixmap.h>
@@ -212,7 +212,7 @@ EXPORT_SYMBOL(arm_dma_zone_size);
* allocations. This must be the smallest DMA mask in the system,
* so a successful GFP_DMA allocation will always satisfy this.
*/
-u32 arm_dma_limit;
+phys_addr_t arm_dma_limit;
static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole,
unsigned long dma_size)
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 4f55f5062ab..566750fa57d 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -25,6 +25,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/io.h>
+#include <linux/sizes.h>
#include <asm/cp15.h>
#include <asm/cputype.h>
@@ -32,7 +33,6 @@
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-#include <asm/sizes.h>
#include <asm/system_info.h>
#include <asm/mach/map.h>
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 93dc0c17cdc..6776160618e 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -59,12 +59,15 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page
#define VM_ARM_MTYPE(mt) ((mt) << 20)
#define VM_ARM_MTYPE_MASK (0x1f << 20)
+/* consistent regions used by dma_alloc_attrs() */
+#define VM_ARM_DMA_CONSISTENT 0x20000000
+
#endif
#ifdef CONFIG_ZONE_DMA
-extern u32 arm_dma_limit;
+extern phys_addr_t arm_dma_limit;
#else
-#define arm_dma_limit ((u32)~0)
+#define arm_dma_limit ((phys_addr_t)~0)
#endif
extern phys_addr_t arm_lowmem_limit;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e5dad60b558..4c2d0451e84 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -16,13 +16,13 @@
#include <linux/memblock.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
+#include <linux/sizes.h>
#include <asm/cp15.h>
#include <asm/cputype.h>
#include <asm/sections.h>
#include <asm/cachetype.h>
#include <asm/setup.h>
-#include <asm/sizes.h>
#include <asm/smp_plat.h>
#include <asm/tlb.h>
#include <asm/highmem.h>
@@ -422,12 +422,6 @@ static void __init build_mem_type_table(void)
vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
/*
- * Only use write-through for non-SMP systems
- */
- if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
- vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
-
- /*
* Enable CPU-specific coherency if supported.
* (Only available on XSC3 at the moment.)
*/
@@ -791,6 +785,79 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
}
}
+#ifndef CONFIG_ARM_LPAE
+
+/*
+ * The Linux PMD is made of two consecutive section entries covering 2MB
+ * (see definition in include/asm/pgtable-2level.h). However a call to
+ * create_mapping() may optimize static mappings by using individual
+ * 1MB section mappings. This leaves the actual PMD potentially half
+ * initialized if the top or bottom section entry isn't used, leaving it
+ * open to problems if a subsequent ioremap() or vmalloc() tries to use
+ * the virtual space left free by that unused section entry.
+ *
+ * Let's avoid the issue by inserting dummy vm entries covering the unused
+ * PMD halves once the static mappings are in place.
+ */
+
+static void __init pmd_empty_section_gap(unsigned long addr)
+{
+ struct vm_struct *vm;
+
+ vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm));
+ vm->addr = (void *)addr;
+ vm->size = SECTION_SIZE;
+ vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
+ vm->caller = pmd_empty_section_gap;
+ vm_area_add_early(vm);
+}
+
+static void __init fill_pmd_gaps(void)
+{
+ struct vm_struct *vm;
+ unsigned long addr, next = 0;
+ pmd_t *pmd;
+
+ /* we're still single threaded hence no lock needed here */
+ for (vm = vmlist; vm; vm = vm->next) {
+ if (!(vm->flags & VM_ARM_STATIC_MAPPING))
+ continue;
+ addr = (unsigned long)vm->addr;
+ if (addr < next)
+ continue;
+
+ /*
+ * Check if this vm starts on an odd section boundary.
+ * If so and the first section entry for this PMD is free
+ * then we block the corresponding virtual address.
+ */
+ if ((addr & ~PMD_MASK) == SECTION_SIZE) {
+ pmd = pmd_off_k(addr);
+ if (pmd_none(*pmd))
+ pmd_empty_section_gap(addr & PMD_MASK);
+ }
+
+ /*
+ * Then check if this vm ends on an odd section boundary.
+ * If so and the second section entry for this PMD is empty
+ * then we block the corresponding virtual address.
+ */
+ addr += vm->size;
+ if ((addr & ~PMD_MASK) == SECTION_SIZE) {
+ pmd = pmd_off_k(addr) + 1;
+ if (pmd_none(*pmd))
+ pmd_empty_section_gap(addr);
+ }
+
+ /* no need to look at any vm entry until we hit the next PMD */
+ next = (addr + PMD_SIZE - 1) & PMD_MASK;
+ }
+}
+
+#else
+#define fill_pmd_gaps() do { } while (0)
+#endif
+
static void * __initdata vmalloc_min =
(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
@@ -1072,6 +1139,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
*/
if (mdesc->map_io)
mdesc->map_io();
+ fill_pmd_gaps();
/*
* Finally flush the caches and tlb to ensure that we're in a
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 5900cd520e8..86b8b480634 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -107,6 +107,12 @@ ENTRY(cpu_v6_switch_mm)
mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB
mcr p15, 0, r2, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c2, c0, 0 @ set TTB 0
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+ mrc p15, 0, r2, c13, c0, 1 @ read current context ID
+ bic r2, r2, #0xff @ extract the PID
+ and r1, r1, #0xff
+ orr r1, r1, r2 @ insert into new context ID
+#endif
mcr p15, 0, r1, c13, c0, 1 @ set context ID
#endif
mov pc, lr
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 42ac069c801..fd045e70639 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -46,6 +46,11 @@ ENTRY(cpu_v7_switch_mm)
#ifdef CONFIG_ARM_ERRATA_430973
mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB
#endif
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+ mrc p15, 0, r2, c13, c0, 1 @ read current context ID
+ lsr r2, r2, #8 @ extract the PID
+ bfi r1, r2, #8, #24 @ insert into new context ID
+#endif
#ifdef CONFIG_ARM_ERRATA_754322
dsb
#endif
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index 845f461f8ec..c2021139cb5 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -38,11 +38,19 @@ ENTRY(v7wbi_flush_user_tlb_range)
dsb
mov r0, r0, lsr #PAGE_SHIFT @ align address
mov r1, r1, lsr #PAGE_SHIFT
+#ifdef CONFIG_ARM_ERRATA_720789
+ mov r3, #0
+#else
asid r3, r3 @ mask ASID
+#endif
orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA
mov r1, r1, lsl #PAGE_SHIFT
1:
+#ifdef CONFIG_ARM_ERRATA_720789
+ ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable)
+#else
ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable)
+#endif
ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA
add r0, r0, #PAGE_SZ
@@ -67,7 +75,11 @@ ENTRY(v7wbi_flush_kern_tlb_range)
mov r0, r0, lsl #PAGE_SHIFT
mov r1, r1, lsl #PAGE_SHIFT
1:
+#ifdef CONFIG_ARM_ERRATA_720789
+ ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable)
+#else
ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable)
+#endif
ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA
add r0, r0, #PAGE_SZ
cmp r0, r1