Commit 671df189 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

 - add dma-mapping and block layer helpers to take care of IOMMU merging
   for mmc plus subsequent fixups (Yoshihiro Shimoda)

 - rework handling of the pgprot bits for remapping (me)

 - take care of the dma direct infrastructure for swiotlb-xen (me)

 - improve the dma noncoherent remapping infrastructure (me)

 - better defaults for ->mmap, ->get_sgtable and ->get_required_mask
   (me)

 - cleanup mmaping of coherent DMA allocations (me)

 - various misc cleanups (Andy Shevchenko, me)

* tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping: (41 commits)
  mmc: renesas_sdhi_internal_dmac: Add MMC_CAP2_MERGE_CAPABLE
  mmc: queue: Fix bigger segments usage
  arm64: use asm-generic/dma-mapping.h
  swiotlb-xen: merge xen_unmap_single into xen_swiotlb_unmap_page
  swiotlb-xen: simplify cache maintainance
  swiotlb-xen: use the same foreign page check everywhere
  swiotlb-xen: remove xen_swiotlb_dma_mmap and xen_swiotlb_dma_get_sgtable
  xen: remove the exports for xen_{create,destroy}_contiguous_region
  xen/arm: remove xen_dma_ops
  xen/arm: simplify dma_cache_maint
  xen/arm: use dev_is_dma_coherent
  xen/arm: consolidate page-coherent.h
  xen/arm: use dma-noncoherent.h calls for xen-swiotlb cache maintainance
  arm: remove wrappers for the generic dma remap helpers
  dma-mapping: introduce a dma_common_find_pages helper
  dma-mapping: always use VM_DMA_COHERENT for generic DMA remap
  vmalloc: lift the arm flag for coherent mappings to common code
  dma-mapping: provide a better default ->get_required_mask
  dma-mapping: remove the dma_declare_coherent_memory export
  remoteproc: don't allow modular build
  ...
parents c9fe5630 c7d9eccb
......@@ -204,6 +204,14 @@ Returns the maximum size of a mapping for the device. The size parameter
of the mapping functions like dma_map_single(), dma_map_page() and
others should not be larger than the returned value.
::
unsigned long
dma_get_merge_boundary(struct device *dev);
Returns the DMA merge boundary. If the device cannot merge any the DMA address
segments, the function returns 0.
Part Id - Streaming DMA mappings
--------------------------------
......@@ -595,17 +603,6 @@ For reasons of efficiency, most platforms choose to track the declared
region only at the granularity of a page. For smaller allocations,
you should use the dma_pool() API.
::
void
dma_release_declared_memory(struct device *dev)
Remove the memory region previously declared from the system. This
API performs *no* in-use checking for this region and will return
unconditionally having removed all the required structures. It is the
driver's job to ensure that no parts of this memory region are
currently in use.
Part III - Debug drivers use of the DMA-API
-------------------------------------------
......
......@@ -230,7 +230,7 @@ IOMMU (input/output memory management unit)
===========================================
Multiple x86-64 PCI-DMA mapping implementations exist, for example:
1. <lib/dma-direct.c>: use no hardware/software IOMMU at all
1. <kernel/dma/direct.c>: use no hardware/software IOMMU at all
(e.g. because you have < 3 GB memory).
Kernel boot message: "PCI-DMA: Disabling IOMMU"
......
......@@ -793,9 +793,6 @@ config COMPAT_32BIT_TIME
This is relevant on all 32-bit architectures, and 64-bit architectures
as part of compat syscall handling.
config ARCH_NO_COHERENT_DMA_MMAP
bool
config ARCH_NO_PREEMPT
bool
......
......@@ -955,5 +955,7 @@ const struct dma_map_ops alpha_pci_ops = {
.map_sg = alpha_pci_map_sg,
.unmap_sg = alpha_pci_unmap_sg,
.dma_supported = alpha_pci_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
EXPORT_SYMBOL(alpha_pci_ops);
......@@ -104,9 +104,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev_info(dev, "use %scoherent DMA ops\n",
dev->dma_coherent ? "" : "non");
}
static int __init atomic_pool_init(void)
{
return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
}
postcore_initcall(atomic_pool_init);
......@@ -8,7 +8,7 @@ config ARM
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB
select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB
select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KEEPINITRD
......
......@@ -14,9 +14,6 @@ struct dev_archdata {
#endif
#ifdef CONFIG_ARM_DMA_USE_IOMMU
struct dma_iommu_mapping *mapping;
#endif
#ifdef CONFIG_XEN
const struct dma_map_ops *dev_dma_ops;
#endif
unsigned int dma_coherent:1;
unsigned int dma_ops_setup:1;
......
......@@ -91,12 +91,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
}
#endif
/* do not use this function in a driver */
static inline bool is_device_dma_coherent(struct device *dev)
{
return dev->archdata.dma_coherent;
}
/**
* arm_dma_alloc - allocate consistent memory for DMA
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
......
......@@ -62,7 +62,6 @@ typedef pte_t *pte_addr_t;
*/
#define pgprot_noncached(prot) (prot)
#define pgprot_writecombine(prot) (prot)
#define pgprot_dmacoherent(prot) (prot)
#define pgprot_device(prot) (prot)
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
#define _ASM_ARM_XEN_PAGE_COHERENT_H
#include <linux/dma-mapping.h>
#include <asm/page.h>
#include <xen/arm/page-coherent.h>
static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev)
{
if (dev && dev->archdata.dev_dma_ops)
return dev->archdata.dev_dma_ops;
return get_arch_dma_ops(NULL);
}
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
{
return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
}
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{
xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
unsigned long page_pfn = page_to_xen_pfn(page);
unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
unsigned long compound_pages =
(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
bool local = (page_pfn <= dev_pfn) &&
(dev_pfn - page_pfn < compound_pages);
/*
* Dom0 is mapped 1:1, while the Linux page can span across
* multiple Xen pages, it's not possible for it to contain a
* mix of local and foreign Xen pages. So if the first xen_pfn
* == mfn the page is local otherwise it's a foreign page
* grant-mapped in dom0. If the page is local we can safely
* call the native dma_ops function, otherwise we call the xen
* specific function.
*/
if (local)
xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
else
__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
}
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
unsigned long pfn = PFN_DOWN(handle);
/*
* Dom0 is mapped 1:1, while the Linux page can be spanned accross
* multiple Xen page, it's not possible to have a mix of local and
* foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
* foreign mfn will always return false. If the page is local we can
* safely call the native dma_ops function, otherwise we call the xen
* specific function.
*/
if (pfn_valid(pfn)) {
if (xen_get_dma_ops(hwdev)->unmap_page)
xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
} else
__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
}
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) {
if (xen_get_dma_ops(hwdev)->sync_single_for_cpu)
xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir);
} else
__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
}
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) {
if (xen_get_dma_ops(hwdev)->sync_single_for_device)
xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir);
} else
__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
}
#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */
......@@ -68,8 +68,9 @@ static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
return ret;
return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
return -ENXIO;
}
......
......@@ -14,6 +14,7 @@
#include <linux/list.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/dma-direct.h>
#include <linux/dma-mapping.h>
#include <linux/dma-noncoherent.h>
#include <linux/dma-contiguous.h>
......@@ -35,6 +36,7 @@
#include <asm/mach/map.h>
#include <asm/system_info.h>
#include <asm/dma-contiguous.h>
#include <xen/swiotlb-xen.h>
#include "dma.h"
#include "mm.h"
......@@ -192,6 +194,7 @@ const struct dma_map_ops arm_dma_ops = {
.sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
.sync_sg_for_device = arm_dma_sync_sg_for_device,
.dma_supported = arm_dma_supported,
.get_required_mask = dma_direct_get_required_mask,
};
EXPORT_SYMBOL(arm_dma_ops);
......@@ -212,6 +215,7 @@ const struct dma_map_ops arm_coherent_dma_ops = {
.map_sg = arm_dma_map_sg,
.map_resource = dma_direct_map_resource,
.dma_supported = arm_dma_supported,
.get_required_mask = dma_direct_get_required_mask,
};
EXPORT_SYMBOL(arm_coherent_dma_ops);
......@@ -336,25 +340,6 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr);
static void *
__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
const void *caller)
{
/*
* DMA allocation can be mapped to user space, so lets
* set VM_USERMAP flags too.
*/
return dma_common_contiguous_remap(page, size,
VM_ARM_DMA_CONSISTENT | VM_USERMAP,
prot, caller);
}
static void __dma_free_remap(void *cpu_addr, size_t size)
{
dma_common_free_remap(cpu_addr, size,
VM_ARM_DMA_CONSISTENT | VM_USERMAP);
}
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
static struct gen_pool *atomic_pool __ro_after_init;
......@@ -510,7 +495,7 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
if (!want_vaddr)
goto out;
ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
ptr = dma_common_contiguous_remap(page, size, prot, caller);
if (!ptr) {
__dma_free_buffer(page, size);
return NULL;
......@@ -577,7 +562,7 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
goto out;
if (PageHighMem(page)) {
ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
ptr = dma_common_contiguous_remap(page, size, prot, caller);
if (!ptr) {
dma_release_from_contiguous(dev, page, count);
return NULL;
......@@ -597,7 +582,7 @@ static void __free_from_contiguous(struct device *dev, struct page *page,
{
if (want_vaddr) {
if (PageHighMem(page))
__dma_free_remap(cpu_addr, size);
dma_common_free_remap(cpu_addr, size);
else
__dma_remap(page, size, PAGE_KERNEL);
}
......@@ -689,7 +674,7 @@ static void *remap_allocator_alloc(struct arm_dma_alloc_args *args,
static void remap_allocator_free(struct arm_dma_free_args *args)
{
if (args->want_vaddr)
__dma_free_remap(args->cpu_addr, args->size);
dma_common_free_remap(args->cpu_addr, args->size);
__dma_free_buffer(args->page, args->size);
}
......@@ -877,17 +862,6 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add
__arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
}
/*
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
* that the intention is to allow exporting memory allocated via the
* coherent DMA APIs through the dma_buf API, which only accepts a
* scattertable. This presents a couple of problems:
* 1. Not all memory allocated via the coherent DMA APIs is backed by
* a struct page
* 2. Passing coherent DMA memory into the streaming APIs is not allowed
* as we will try to flush the memory through a different alias to that
* actually being used (and the flushes are redundant.)
*/
int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t handle, size_t size,
unsigned long attrs)
......@@ -1132,10 +1106,6 @@ static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
* 32-bit DMA.
* Use the generic dma-direct / swiotlb ops code in that case, as that
* handles bounce buffering for us.
*
* Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the
* latter is also selected by the Xen code, but that code for now relies
* on non-NULL dev_dma_ops. To be cleaned up later.
*/
if (IS_ENABLED(CONFIG_ARM_LPAE))
return NULL;
......@@ -1372,17 +1342,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
return 0;
}
/*
* Create a CPU mapping for a specified pages
*/
static void *
__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
const void *caller)
{
return dma_common_pages_remap(pages, size,
VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller);
}
/*
* Create a mapping in device IO address space for specified pages
*/
......@@ -1455,18 +1414,13 @@ static struct page **__atomic_get_pages(void *addr)
static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs)
{
struct vm_struct *area;
if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
return __atomic_get_pages(cpu_addr);
if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
return cpu_addr;
area = find_vm_area(cpu_addr);
if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
return area->pages;
return NULL;
return dma_common_find_pages(cpu_addr);
}
static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
......@@ -1539,7 +1493,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
return pages;
addr = __iommu_alloc_remap(pages, size, gfp, prot,
addr = dma_common_pages_remap(pages, size, prot,
__builtin_return_address(0));
if (!addr)
goto err_mapping;
......@@ -1622,10 +1576,8 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
return;
}
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) {
dma_common_free_remap(cpu_addr, size,
VM_ARM_DMA_CONSISTENT | VM_USERMAP);
}
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0)
dma_common_free_remap(cpu_addr, size);
__iommu_remove_mapping(dev, handle, size);
__iommu_free_buffer(dev, pages, size, attrs);
......@@ -2363,10 +2315,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
set_dma_ops(dev, dma_ops);
#ifdef CONFIG_XEN
if (xen_initial_domain()) {
dev->archdata.dev_dma_ops = dev->dma_ops;
dev->dma_ops = xen_dma_ops;
}
if (xen_initial_domain())
dev->dma_ops = &xen_swiotlb_dma_ops;
#endif
dev->archdata.dma_ops_setup = true;
}
......@@ -2402,12 +2352,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
return dma_to_pfn(dev, dma_addr);
}
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs)
{
return __get_dma_pgprot(attrs, prot);
}
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
......
......@@ -70,9 +70,6 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page
#define VM_ARM_MTYPE(mt) ((mt) << 20)
#define VM_ARM_MTYPE_MASK (0x1f << 20)
/* consistent regions used by dma_alloc_attrs() */
#define VM_ARM_DMA_CONSISTENT 0x20000000
struct static_vm {
struct vm_struct vm;
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/cpu.h>
#include <linux/dma-mapping.h>
#include <linux/dma-noncoherent.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/export.h>
......@@ -35,105 +35,56 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int order)
return __get_free_pages(flags, order);
}
enum dma_cache_op {
DMA_UNMAP,
DMA_MAP,
};
static bool hypercall_cflush = false;
/* functions called by SWIOTLB */
static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
size_t size, enum dma_data_direction dir, enum dma_cache_op op)
/* buffers in highmem or foreign pages cannot cross page boundaries */
static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op)
{
struct gnttab_cache_flush cflush;
unsigned long xen_pfn;
size_t left = size;
xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE;
offset %= XEN_PAGE_SIZE;
cflush.a.dev_bus_addr = handle & XEN_PAGE_MASK;
cflush.offset = xen_offset_in_page(handle);
cflush.op = op;
do {
size_t len = left;
/* buffers in highmem or foreign pages cannot cross page
* boundaries */
if (len + offset > XEN_PAGE_SIZE)
len = XEN_PAGE_SIZE - offset;
cflush.op = 0;
cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT;
cflush.offset = offset;
cflush.length = len;
if (op == DMA_UNMAP && dir != DMA_TO_DEVICE)
cflush.op = GNTTAB_CACHE_INVAL;
if (op == DMA_MAP) {
if (dir == DMA_FROM_DEVICE)
cflush.op = GNTTAB_CACHE_INVAL;
else
cflush.op = GNTTAB_CACHE_CLEAN;
}
if (cflush.op)
HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
if (size + cflush.offset > XEN_PAGE_SIZE)
cflush.length = XEN_PAGE_SIZE - cflush.offset;
else
cflush.length = size;
offset = 0;
xen_pfn++;
left -= len;
} while (left);
}
HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
{
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_UNMAP);
cflush.offset = 0;
cflush.a.dev_bus_addr += cflush.length;
size -= cflush.length;
} while (size);
}
static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
/*
* Dom0 is mapped 1:1, and while the Linux page can span across multiple Xen
* pages, it is not possible for it to contain a mix of local and foreign Xen
* pages. Calling pfn_valid on a foreign mfn will always return false, so if
* pfn_valid returns true the pages is local and we can use the native
* dma-direct functions, otherwise we call the Xen specific version.
*/
void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
phys_addr_t paddr, size_t size, enum dma_data_direction dir)
{
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP);
if (pfn_valid(PFN_DOWN(handle)))
arch_sync_dma_for_cpu(dev, paddr, size, dir);
else if (dir != DMA_TO_DEVICE)
dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
}
void __xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
phys_addr_t paddr, size_t size, enum dma_data_direction dir)
{
if (is_device_dma_coherent(hwdev))
return;
if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
return;
__xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir);
}
void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
if (is_device_dma_coherent(hwdev))
return;
if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
void __xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
if (is_device_dma_coherent(hwdev))
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
void __xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
if (is_device_dma_coherent(hwdev))
return;
__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
if (pfn_valid(PFN_DOWN(handle)))
arch_sync_dma_for_device(dev, paddr, size, dir);
else if (dir == DMA_FROM_DEVICE)
dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
else
dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN);
}
bool xen_arch_need_swiotlb(struct device *dev,
......@@ -159,7 +110,7 @@ bool xen_arch_need_swiotlb(struct device *dev,
* memory and we are not able to flush the cache.
*/
return (!hypercall_cflush && (xen_pfn != bfn) &&
!is_device_dma_coherent(dev));
!dev_is_dma_coherent(dev));
}
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
......@@ -173,16 +124,11 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
*dma_handle = pstart;
return 0;
}
EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
{
return;
}
EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
const struct dma_map_ops *xen_dma_ops;
EXPORT_SYMBOL(xen_dma_ops);
int __init xen_mm_init(void)
{
......@@ -190,7 +136,6 @@ int __init xen_mm_init(void)
if (!xen_initial_domain())
return 0;
xen_swiotlb_init(1, false);
xen_dma_ops = &xen_swiotlb_dma_ops;
cflush.op = 0;
cflush.a.dev_bus_addr = 0;
......
......@@ -13,7 +13,6 @@ config ARM64
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_COHERENT_TO_PFN
select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
......
......@@ -4,6 +4,7 @@ generic-y += delay.h
generic-y += div64.h
generic-y += dma.h
generic-y += dma-contiguous.h
generic-y += dma-mapping.h
generic-y += early_ioremap.h
generic-y += emergency-restart.h
generic-y += hw_irq.h
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
*/
#ifndef __ASM_DMA_MAPPING_H
#define __ASM_DMA_MAPPING_H
#include <linux/types.h>
#include <linux/vmalloc.h>
#include <xen/xen.h>
#include <asm/xen/hypervisor.h>
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
return NULL;
}
/*
* Do not use this function in a driver, it is only provided for
* arch/arm/mm/xen.c, which is used by arm64 as well.
*/
static inline bool is_device_dma_coherent(struct device *dev)
{
return dev->dma_coherent;
}
#endif /* __ASM_DMA_MAPPING_H */
......@@ -437,6 +437,18 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
#define pgprot_device(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
/*
* DMA allocations for non-coherent devices use what the Arm architecture calls
* "Normal non-cacheable" memory, which permits speculation, unaligned accesses
* and merging of writes. This is different from "Device-nGnR[nE]" memory which
* is intended for MMIO and thus forbids speculation, preserves access size,
* requires strict alignment and can also force write responses to come from the
* endpoint.
*/
#define pgprot_dmacoherent(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, \
PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
#define __HAVE_PHYS_MEM_ACCESS_PROT
struct file;
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H
#define _ASM_ARM64_XEN_PAGE_COHERENT_H
#include <linux/dma-mapping.h>
#include <asm/page.h>
#include <xen/arm/page-coherent.h>
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
{
return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
}
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{
dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
}
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn))
dma_direct_sync_single_for_cpu(hwdev, handle, size, dir);
else
__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
}
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn))
dma_direct_sync_single_for_device(hwdev, handle, size, dir);
else
__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
unsigned long page_pfn = page_to_xen_pfn(page);
unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
unsigned long compound_pages =
(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
bool local = (page_pfn <= dev_pfn) &&
(dev_pfn - page_pfn < compound_pages);
if (local)
dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
else
__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
}
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
unsigned long pfn = PFN_DOWN(handle);
/*
* Dom0 is mapped 1:1, while the Linux page can be spanned accross
* multiple Xen page, it's not possible to have a mix of local and
* foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
* foreign mfn will always return false. If the page is local we can
* safely call the native dma_ops function, otherwise we call the xen
* specific function.
*/
if (pfn_valid(pfn))
dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
else
__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
}
#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */
......@@ -8,15 +8,11 @@
#include <linux/cache.h>
#include <linux/dma-noncoherent.h>
#include <linux/dma-iommu.h>
#include <xen/xen.h>
#include <xen/swiotlb-xen.h>
#include <asm/cacheflush.h>
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs)
{
return pgprot_writecombine(prot);
}
void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir)
{
......@@ -34,12 +30,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
__dma_flush_area(page_address(page), size);
}
static int __init arm64_dma_init(void)
{
return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
}
arch_initcall(arm64_dma_init);
#ifdef CONFIG_IOMMU_DMA
void arch_teardown_dma_ops(struct device *dev)
{
......@@ -64,6 +54,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
#ifdef CONFIG_XEN
if (xen_initial_domain())
dev->dma_ops = xen_dma_ops;
dev->dma_ops = &xen_swiotlb_dma_ops;
#endif
}
......@@ -20,7 +20,6 @@ config C6X
select OF_EARLY_FLATTREE
select GENERIC_CLOCKEVENTS
select MODULES_USE_ELF_RELA
select ARCH_NO_COHERENT_DMA_MMAP
select MMU_GATHER_NO_RANGE if MMU
config MMU
......
......@@ -14,12 +14,6 @@
#include <linux/version.h>
#include <asm/cache.h>
static int __init atomic_pool_init(void)
{
return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
}
postcore_initcall(atomic_pool_init);
void arch_dma_prep_coherent(struct page *page, size_t size)
{
if (PageHighMem(page)) {
......
......@@ -2069,6 +2069,8 @@ static const struct dma_map_ops sba_dma_ops = {
.map_sg = sba_map_sg_attrs,
.unmap_sg = sba_unmap_sg_attrs,
.dma_supported = sba_dma_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
static int __init
......
......@@ -259,7 +259,7 @@ __initcall(register_memory);
* This function checks if the reserved crashkernel is allowed on the specific
* IA64 machine flavour. Machines without an IO TLB use swiotlb and require
* some memory below 4 GB (i.e. in 32 bit area), see the implementation of
* lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that
* kernel/dma/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that
* in kdump case. See the comment in sba_init() in sba_iommu.c.
*
* So, the only machvec that really supports loading the kdump kernel
......
......@@ -4,11 +4,9 @@ config M68K
default y
select ARCH_32BIT_OFF_T
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select ARCH_NO_PREEMPT if !COLDFIRE
select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
......
......@@ -169,6 +169,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
? (__pgprot((pgprot_val(prot) & _CACHEMASK040) | _PAGE_NOCACHE_S)) \
: (prot)))
pgprot_t pgprot_dmacoherent(pgprot_t prot);
#define pgprot_dmacoherent(prot) pgprot_dmacoherent(prot)
#endif /* CONFIG_COLDFIRE */
#include <asm-generic/pgtable.h>
#endif /* !__ASSEMBLY__ */
......
......@@ -23,8 +23,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
cache_push(page_to_phys(page), size);
}
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs)
pgprot_t pgprot_dmacoherent(pgprot_t prot)
{
if (CPU_IS_040_OR_060) {
pgprot_val(prot) &= ~_PAGE_CACHE040;
......
......@@ -9,7 +9,6 @@ config MICROBLAZE
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select TIMER_OF
......
......@@ -1119,7 +1119,14 @@ config DMA_PERDEV_COHERENT
config DMA_NONCOHERENT
bool
select ARCH_HAS_DMA_MMAP_PGPROT
#
# MIPS allows mixing "slightly different" Cacheability and Coherency
# Attribute bits. It is believed that the uncached access through
# KSEG1 and the implementation specific "uncached accelerated" used
# by pgprot_writcombine can be mixed, and the latter sometimes provides
# significant advantages.
#
select ARCH_HAS_DMA_WRITE_COMBINE
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_UNCACHED_SEGMENT
select NEED_DMA_MAP_STATE
......
......@@ -682,5 +682,7 @@ const struct dma_map_ops jazz_dma_ops = {
.sync_sg_for_device = jazz_dma_sync_sg_for_device,
.dma_supported = dma_direct_supported,
.cache_sync = arch_dma_cache_sync,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
EXPORT_SYMBOL(jazz_dma_ops);
......@@ -65,14 +65,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr)));
}
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs)
{
if (attrs & DMA_ATTR_WRITE_COMBINE)
return pgprot_writecombine(prot);
return pgprot_noncached(prot);
}
static inline void dma_sync_virt(void *addr, size_t size,
enum dma_data_direction dir)
{
......
......@@ -80,9 +80,3 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
{
cache_op(page_to_phys(page), size, cpu_dma_wbinval_range);
}
static int __init atomic_pool_init(void)
{
return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
}
postcore_initcall(atomic_pool_init);
......@@ -52,7 +52,6 @@ config PARISC
select GENERIC_SCHED_CLOCK
select HAVE_UNSTABLE_SCHED_CLOCK if SMP
select GENERIC_CLOCKEVENTS
select ARCH_NO_COHERENT_DMA_MMAP
select CPU_NO_EFFICIENT_FFS
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
......
......@@ -208,4 +208,6 @@ const struct dma_map_ops dma_iommu_ops = {
.sync_single_for_device = dma_iommu_sync_for_device,
.sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
.sync_sg_for_device = dma_iommu_sync_sg_for_device,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
......@@ -686,20 +686,16 @@ static int ps3_dma_supported(struct device *_dev, u64 mask)
return mask >= DMA_BIT_MASK(32);
}
static u64 ps3_dma_get_required_mask(struct device *_dev)
{
return DMA_BIT_MASK(32);
}
static const struct dma_map_ops ps3_sb_dma_ops = {
.alloc = ps3_alloc_coherent,
.free = ps3_free_coherent,
.map_sg = ps3_sb_map_sg,
.unmap_sg = ps3_sb_unmap_sg,
.dma_supported = ps3_dma_supported,
.get_required_mask = ps3_dma_get_required_mask,
.map_page = ps3_sb_map_page,
.unmap_page = ps3_unmap_page,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
static const struct dma_map_ops ps3_ioc0_dma_ops = {
......@@ -708,9 +704,10 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = {
.map_sg = ps3_ioc0_map_sg,
.unmap_sg = ps3_ioc0_unmap_sg,
.dma_supported = ps3_dma_supported,
.get_required_mask = ps3_dma_get_required_mask,
.map_page = ps3_ioc0_map_page,
.unmap_page = ps3_unmap_page,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
/**
......
......@@ -605,6 +605,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = {
.unmap_page = vio_dma_iommu_unmap_page,
.dma_supported = dma_iommu_dma_supported,
.get_required_mask = dma_iommu_get_required_mask,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
/**
......
......@@ -668,6 +668,8 @@ const struct dma_map_ops s390_pci_dma_ops = {
.unmap_sg = s390_dma_unmap_sg,
.map_page = s390_dma_map_pages,
.unmap_page = s390_dma_unmap_pages,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
/* dma_supported is unconditionally true without a callback */
};
EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
......
......@@ -5,7 +5,6 @@ config SUPERH
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select HAVE_PATA_PLATFORM
select CLKDEV_LOOKUP
select DMA_DECLARE_COHERENT
......
......@@ -198,8 +198,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
__pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
#define pgprot_writecombine(prot) \
__pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
#define pgprot_dmacoherent(prot) \
__pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_present(pmd) (pmd_val(pmd) & PMD_PRESENT)
......
......@@ -21,18 +21,4 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
free_pages((unsigned long) cpu_addr, get_order(size));
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs) { }
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
unsigned long attrs) { }
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
......@@ -677,7 +677,10 @@ static const struct dma_map_ops gart_dma_ops = {
.unmap_page = gart_unmap_page,
.alloc = gart_alloc_coherent,
.free = gart_free_coherent,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
.dma_supported = dma_direct_supported,
.get_required_mask = dma_direct_get_required_mask,
};
static void gart_iommu_shutdown(void)
......
......@@ -468,6 +468,8 @@ static const struct dma_map_ops calgary_dma_ops = {
.map_page = calgary_map_page,
.unmap_page = calgary_unmap_page,
.dma_supported = dma_direct_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
static inline void __iomem * busno_to_bbar(unsigned char num)
......
// SPDX-License-Identifier: GPL-2.0
/* Glue code to lib/swiotlb.c */
#include <linux/pci.h>
#include <linux/cache.h>
......
......@@ -486,7 +486,7 @@ static int __init reserve_crashkernel_low(void)
ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
if (ret) {
/*
* two parts from lib/swiotlb.c:
* two parts from kernel/dma/swiotlb.c:
* -swiotlb size: user-specified with swiotlb= or default.
*
* -swiotlb overflow buffer: now hardcoded to 32k. We round it
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* arch/x86/pci/sta2x11-fixup.c
* glue code for lib/swiotlb.c and DMA translation between STA2x11
* AMBA memory mapping and the X86 memory mapping
* DMA translation between STA2x11 AMBA memory mapping and the x86 memory mapping
*
* ST Microelectronics ConneXt (STA2X11/STA2X10)
*
......
......@@ -2625,7 +2625,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
*dma_handle = virt_to_machine(vstart).maddr;
return success ? 0 : -ENOMEM;
}
EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
{
......@@ -2660,7 +2659,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
spin_unlock_irqrestore(&xen_reservation_lock, flags);
}
EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
static noinline void xen_flush_tlb_all(void)
{
......
......@@ -5,7 +5,6 @@ config XTENSA
select ARCH_HAS_BINFMT_FLAT if !MMU
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_FRAME_POINTERS
......
......@@ -167,7 +167,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (PageHighMem(page)) {
void *p;
p = dma_common_contiguous_remap(page, size, VM_MAP,
p = dma_common_contiguous_remap(page, size,
pgprot_noncached(PAGE_KERNEL),
__builtin_return_address(0));
if (!p) {
......@@ -192,7 +192,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
page = virt_to_page(platform_vaddr_to_cached(vaddr));
} else {
#ifdef CONFIG_MMU
dma_common_free_remap(vaddr, size, VM_MAP);
dma_common_free_remap(vaddr, size);
#endif
page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle)));
}
......
......@@ -12,6 +12,7 @@
#include <linux/lcm.h>
#include <linux/jiffies.h>
#include <linux/gfp.h>
#include <linux/dma-mapping.h>
#include "blk.h"
#include "blk-wbt.h"
......@@ -848,6 +849,28 @@ void blk_queue_required_elevator_features(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features);
/**
* blk_queue_can_use_dma_map_merging - configure queue for merging segments.
* @q: the request queue for the device
* @dev: the device pointer for dma
*
* Tell the block layer about merging the segments by dma map of @q.
*/
bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
struct device *dev)
{
unsigned long boundary = dma_get_merge_boundary(dev);
if (!boundary)
return false;
/* No need to update max_segment_size. see blk_queue_virt_boundary() */
blk_queue_virt_boundary(q, boundary);
return true;
}
EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
static int __init blk_settings_init(void)
{
blk_max_low_pfn = max_low_pfn - 1;
......
......@@ -4609,11 +4609,10 @@ static int dispc_errata_i734_wa_init(struct dispc_device *dispc)
i734_buf.size = i734.ovli.width * i734.ovli.height *
color_mode_to_bpp(i734.ovli.fourcc) / 8;
i734_buf.vaddr = dma_alloc_writecombine(&dispc->pdev->dev,
i734_buf.size, &i734_buf.paddr,
GFP_KERNEL);
i734_buf.vaddr = dma_alloc_wc(&dispc->pdev->dev, i734_buf.size,
&i734_buf.paddr, GFP_KERNEL);
if (!i734_buf.vaddr) {
dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed\n",
dev_err(&dispc->pdev->dev, "%s: dma_alloc_wc failed\n",
__func__);
return -ENOMEM;
}
......@@ -4626,8 +4625,8 @@ static void dispc_errata_i734_wa_fini(struct dispc_device *dispc)
if (!dispc->feat->has_gamma_i734_bug)
return;
dma_free_writecombine(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr,
i734_buf.paddr);
dma_free_wc(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr,
i734_buf.paddr);
}
static void dispc_errata_i734_wa(struct dispc_device *dispc)
......
......@@ -2754,6 +2754,8 @@ static const struct dma_map_ops amd_iommu_dma_ops = {
.map_sg = map_sg,
.unmap_sg = unmap_sg,
.dma_supported = amd_iommu_dma_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
static int init_reserved_iova_ranges(void)
......
......@@ -548,15 +548,6 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev,
return pages;
}
static struct page **__iommu_dma_get_pages(void *cpu_addr)
{
struct vm_struct *area = find_vm_area(cpu_addr);
if (!area || !area->pages)
return NULL;
return area->pages;
}
/**
* iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space
* @dev: Device to allocate memory for. Must be a real device
......@@ -624,7 +615,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
< size)
goto out_free_sg;
vaddr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
vaddr = dma_common_pages_remap(pages, size, prot,
__builtin_return_address(0));
if (!vaddr)
goto out_unmap;
......@@ -945,10 +936,10 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
* If it the address is remapped, then it's either non-coherent
* or highmem CMA, or an iommu_dma_alloc_remap() construction.
*/
pages = __iommu_dma_get_pages(cpu_addr);
pages = dma_common_find_pages(cpu_addr);
if (!pages)
page = vmalloc_to_page(cpu_addr);
dma_common_free_remap(cpu_addr, alloc_size, VM_USERMAP);
dma_common_free_remap(cpu_addr, alloc_size);
} else {
/* Lowmem means a coherent atomic or CMA allocation */
page = virt_to_page(cpu_addr);
......@@ -986,7 +977,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
cpu_addr = dma_common_contiguous_remap(page, alloc_size,
VM_USERMAP, prot, __builtin_return_address(0));
prot, __builtin_return_address(0));
if (!cpu_addr)
goto out_free_pages;
......@@ -1052,7 +1043,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
return -ENXIO;
if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
struct page **pages = __iommu_dma_get_pages(cpu_addr);
struct page **pages = dma_common_find_pages(cpu_addr);
if (pages)
return __iommu_dma_mmap(pages, size, vma);
......@@ -1074,7 +1065,7 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
int ret;
if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
struct page **pages = __iommu_dma_get_pages(cpu_addr);
struct page **pages = dma_common_find_pages(cpu_addr);
if (pages) {
return sg_alloc_table_from_pages(sgt, pages,
......@@ -1093,6 +1084,13 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
return ret;
}
static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
{
struct iommu_domain *domain = iommu_get_dma_domain(dev);
return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
}
static const struct dma_map_ops iommu_dma_ops = {
.alloc = iommu_dma_alloc,
.free = iommu_dma_free,
......@@ -1108,6 +1106,7 @@ static const struct dma_map_ops iommu_dma_ops = {
.sync_sg_for_device = iommu_dma_sync_sg_for_device,
.map_resource = iommu_dma_map_resource,
.unmap_resource = iommu_dma_unmap_resource,
.get_merge_boundary = iommu_dma_get_merge_boundary,
};
/*
......
......@@ -3785,6 +3785,8 @@ static const struct dma_map_ops intel_dma_ops = {
.map_resource = intel_map_resource,
.unmap_resource = intel_unmap_resource,
.dma_supported = dma_direct_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
static void
......
......@@ -21,6 +21,8 @@
#include "card.h"
#include "host.h"
#define MMC_DMA_MAP_MERGE_SEGMENTS 512
static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq)
{
/* Allow only 1 DCMD at a time */
......@@ -193,6 +195,12 @@ static void mmc_queue_setup_discard(struct request_queue *q,
blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
}
static unsigned int mmc_get_max_segments(struct mmc_host *host)
{
return host->can_dma_map_merge ? MMC_DMA_MAP_MERGE_SEGMENTS :
host->max_segs;
}
/**
* mmc_init_request() - initialize the MMC-specific per-request data
* @q: the request queue
......@@ -206,7 +214,7 @@ static int __mmc_init_request(struct mmc_queue *mq, struct request *req,
struct mmc_card *card = mq->card;
struct mmc_host *host = card->host;
mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp);
mq_rq->sg = mmc_alloc_sg(mmc_get_max_segments(host), gfp);
if (!mq_rq->sg)
return -ENOMEM;
......@@ -362,13 +370,23 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH);
blk_queue_max_hw_sectors(mq->queue,
min(host->max_blk_count, host->max_req_size / 512));
blk_queue_max_segments(mq->queue, host->max_segs);
if (host->can_dma_map_merge)
WARN(!blk_queue_can_use_dma_map_merging(mq->queue,
mmc_dev(host)),
"merging was advertised but not possible");
blk_queue_max_segments(mq->queue, mmc_get_max_segments(host));
if (mmc_card_mmc(card))
block_size = card->ext_csd.data_sector_size;
blk_queue_logical_block_size(mq->queue, block_size);
blk_queue_max_segment_size(mq->queue,
/*
* After blk_queue_can_use_dma_map_merging() was called with succeed,
* since it calls blk_queue_virt_boundary(), the mmc should not call
* both blk_queue_max_segment_size().
*/
if (!host->can_dma_map_merge)
blk_queue_max_segment_size(mq->queue,
round_down(host->max_seg_size, block_size));
dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
......@@ -381,6 +399,11 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
init_waitqueue_head(&mq->wait);
}
static inline bool mmc_merge_capable(struct mmc_host *host)
{
return host->caps2 & MMC_CAP2_MERGE_CAPABLE;
}
/* Set queue depth to get a reasonable value for q->nr_requests */
#define MMC_QUEUE_DEPTH 64
......@@ -418,6 +441,18 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
mq->tag_set.cmd_size = sizeof(struct mmc_queue_req);
mq->tag_set.driver_data = mq;
/*
* Since blk_mq_alloc_tag_set() calls .init_request() of mmc_mq_ops,
* the host->can_dma_map_merge should be set before to get max_segs
* from mmc_get_max_segments().
*/
if (mmc_merge_capable(host) &&
host->max_segs < MMC_DMA_MAP_MERGE_SEGMENTS &&
dma_get_merge_boundary(mmc_dev(host)))
host->can_dma_map_merge = 1;
else
host->can_dma_map_merge = 0;
ret = blk_mq_alloc_tag_set(&mq->tag_set);
if (ret)
return ret;
......
......@@ -106,7 +106,7 @@ static const struct renesas_sdhi_of_data of_rcar_gen3_compatible = {
TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2,
.capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
MMC_CAP_CMD23,
.capabilities2 = MMC_CAP2_NO_WRITE_PROTECT,
.capabilities2 = MMC_CAP2_NO_WRITE_PROTECT | MMC_CAP2_MERGE_CAPABLE,
.bus_shift = 2,
.scc_offset = 0x1000,
.taps = rcar_gen3_scc_taps,
......
......@@ -1024,6 +1024,7 @@ static const struct dma_map_ops ccio_ops = {
.unmap_page = ccio_unmap_page,
.map_sg = ccio_map_sg,
.unmap_sg = ccio_unmap_sg,
.get_sgtable = dma_common_get_sgtable,
};
#ifdef CONFIG_PROC_FS
......
......@@ -1084,6 +1084,7 @@ static const struct dma_map_ops sba_ops = {
.unmap_page = sba_unmap_page,
.map_sg = sba_map_sg,
.unmap_sg = sba_unmap_sg,
.get_sgtable = dma_common_get_sgtable,
};
......
......@@ -2,7 +2,7 @@
menu "Remoteproc drivers"
config REMOTEPROC
tristate "Support for Remote Processor subsystem"
bool "Support for Remote Processor subsystem"
depends on HAS_DMA
select CRC32
select FW_LOADER
......
......@@ -28,6 +28,7 @@
#include <linux/memblock.h>
#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
#include <linux/export.h>
#include <xen/swiotlb-xen.h>
#include <xen/page.h>
......@@ -391,6 +392,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
phys = map;
dev_addr = xen_phys_to_bus(map);
/*
......@@ -402,14 +404,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
return DMA_MAPPING_ERROR;
}
page = pfn_to_page(map >> PAGE_SHIFT);
offset = map & ~PAGE_MASK;
done:
/*
* we are not interested in the dma_addr returned by xen_dma_map_page,
* only in the potential cache flushes executed by the function.
*/
xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs);
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
xen_dma_sync_for_device(dev, dev_addr, phys, size, dir);
return dev_addr;
}
......@@ -421,35 +418,29 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* After this call, reads by the cpu to the buffer are guaranteed to see
* whatever the device wrote there.
*/
static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
phys_addr_t paddr = xen_bus_to_phys(dev_addr);
BUG_ON(dir == DMA_NONE);
xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs);
if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
xen_dma_sync_for_cpu(hwdev, dev_addr, paddr, size, dir);
/* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(dev_addr))
swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs);
}
static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
xen_unmap_single(hwdev, dev_addr, size, dir, attrs);
}
static void
xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir)
{
phys_addr_t paddr = xen_bus_to_phys(dma_addr);
xen_dma_sync_single_for_cpu(dev, dma_addr, size, dir);
if (!dev_is_dma_coherent(dev))
xen_dma_sync_for_cpu(dev, dma_addr, paddr, size, dir);
if (is_xen_swiotlb_buffer(dma_addr))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
......@@ -464,7 +455,8 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
if (is_xen_swiotlb_buffer(dma_addr))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
xen_dma_sync_single_for_device(dev, dma_addr, size, dir);
if (!dev_is_dma_coherent(dev))
xen_dma_sync_for_device(dev, dma_addr, paddr, size, dir);
}
/*
......@@ -481,7 +473,8 @@ xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i)
xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs);
xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
dir, attrs);
}
......@@ -547,51 +540,6 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask;
}
/*
* Create userspace mapping for the DMA-coherent memory.
* This function should be called with the pages from the current domain only,
* passing pages mapped from other domains would lead to memory corruption.
*/
static int
xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
#ifdef CONFIG_ARM
if (xen_get_dma_ops(dev)->mmap)
return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr,
dma_addr, size, attrs);
#endif
return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
}
/*
* This function should be called with the pages from the current domain only,
* passing pages mapped from other domains would lead to memory corruption.
*/
static int
xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t handle, size_t size,
unsigned long attrs)
{
#ifdef CONFIG_ARM
if (xen_get_dma_ops(dev)->get_sgtable) {
#if 0
/*
* This check verifies that the page belongs to the current domain and
* is not one mapped from another domain.
* This check is for debug only, and should not go to production build
*/
unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle));
BUG_ON (!page_is_ram(bfn));
#endif
return xen_get_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr,
handle, size, attrs);
}
#endif
return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs);
}
const struct dma_map_ops xen_swiotlb_dma_ops = {
.alloc = xen_swiotlb_alloc_coherent,
.free = xen_swiotlb_free_coherent,
......@@ -604,6 +552,6 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
.map_page = xen_swiotlb_map_page,
.unmap_page = xen_swiotlb_unmap_page,
.dma_supported = xen_swiotlb_dma_supported,
.mmap = xen_swiotlb_dma_mmap,
.get_sgtable = xen_swiotlb_get_sgtable,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
......@@ -1110,6 +1110,8 @@ extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
extern void blk_queue_required_elevator_features(struct request_queue *q,
unsigned int features);
extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
struct device *dev);
/*
* Number of physical segments as sent to the device.
......
......@@ -131,6 +131,7 @@ struct dma_map_ops {
int (*dma_supported)(struct device *dev, u64 mask);
u64 (*get_required_mask)(struct device *dev);
size_t (*max_mapping_size)(struct device *dev);
unsigned long (*get_merge_boundary)(struct device *dev);
};
#define DMA_MAPPING_ERROR (~(dma_addr_t)0)
......@@ -457,11 +458,13 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs);
bool dma_can_mmap(struct device *dev);
int dma_supported(struct device *dev, u64 mask);
int dma_set_mask(struct device *dev, u64 mask);
int dma_set_coherent_mask(struct device *dev, u64 mask);
u64 dma_get_required_mask(struct device *dev);
size_t dma_max_mapping_size(struct device *dev);
unsigned long dma_get_merge_boundary(struct device *dev);
#else /* CONFIG_HAS_DMA */
static inline dma_addr_t dma_map_page_attrs(struct device *dev,
struct page *page, size_t offset, size_t size,
......@@ -547,6 +550,10 @@ static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
{
return -ENXIO;
}
static inline bool dma_can_mmap(struct device *dev)
{
return false;
}
static inline int dma_supported(struct device *dev, u64 mask)
{
return 0;
......@@ -567,6 +574,10 @@ static inline size_t dma_max_mapping_size(struct device *dev)
{
return 0;
}
static inline unsigned long dma_get_merge_boundary(struct device *dev)
{
return 0;
}
#endif /* CONFIG_HAS_DMA */
static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
......@@ -610,16 +621,14 @@ extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs);
struct page **dma_common_find_pages(void *cpu_addr);
void *dma_common_contiguous_remap(struct page *page, size_t size,
unsigned long vm_flags,
pgprot_t prot, const void *caller);
void *dma_common_pages_remap(struct page **pages, size_t size,
unsigned long vm_flags, pgprot_t prot,
const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
pgprot_t prot, const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size);
int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot);
bool dma_in_atomic_pool(void *start, size_t size);
void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
bool dma_free_from_pool(void *start, size_t size);
......@@ -749,7 +758,6 @@ static inline int dma_get_cache_alignment(void)
#ifdef CONFIG_DMA_DECLARE_COHERENT
int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
dma_addr_t device_addr, size_t size);
void dma_release_declared_memory(struct device *dev);
#else
static inline int
dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
......@@ -757,11 +765,6 @@ dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
{
return -ENOSYS;
}
static inline void
dma_release_declared_memory(struct device *dev)
{
}
#endif /* CONFIG_DMA_DECLARE_COHERENT */
static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
......@@ -781,9 +784,6 @@ static inline void *dma_alloc_wc(struct device *dev, size_t size,
return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs);
}
#ifndef dma_alloc_writecombine
#define dma_alloc_writecombine dma_alloc_wc
#endif
static inline void dma_free_wc(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr)
......@@ -791,9 +791,6 @@ static inline void dma_free_wc(struct device *dev, size_t size,
return dma_free_attrs(dev, size, cpu_addr, dma_addr,
DMA_ATTR_WRITE_COMBINE);
}
#ifndef dma_free_writecombine
#define dma_free_writecombine dma_free_wc
#endif
static inline int dma_mmap_wc(struct device *dev,
struct vm_area_struct *vma,
......@@ -803,9 +800,6 @@ static inline int dma_mmap_wc(struct device *dev,
return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size,
DMA_ATTR_WRITE_COMBINE);
}
#ifndef dma_mmap_writecombine
#define dma_mmap_writecombine dma_mmap_wc
#endif
#ifdef CONFIG_NEED_DMA_MAP_STATE
#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME
......
......@@ -3,6 +3,7 @@
#define _LINUX_DMA_NONCOHERENT_H 1
#include <linux/dma-mapping.h>
#include <asm/pgtable.h>
#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H
#include <asm/dma-coherence.h>
......@@ -42,10 +43,18 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs);
long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
dma_addr_t dma_addr);
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs);
#ifdef CONFIG_MMU
/*
* Page protection so that devices that can't snoop CPU caches can use the
* memory coherently. We default to pgprot_noncached which is usually used
* for ioremap as a safe bet, but architectures can override this with less
* strict semantics if possible.
*/
#ifndef pgprot_dmacoherent
#define pgprot_dmacoherent(prot) pgprot_noncached(prot)
#endif
pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs);
#else
static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot,
......
......@@ -368,6 +368,7 @@ struct mmc_host {
#define MMC_CAP2_CQE (1 << 23) /* Has eMMC command queue engine */
#define MMC_CAP2_CQE_DCMD (1 << 24) /* CQE can issue a direct command */
#define MMC_CAP2_AVOID_3_3V (1 << 25) /* Host must negotiate down from 3.3V */
#define MMC_CAP2_MERGE_CAPABLE (1 << 26) /* Host can merge a segment over the segment size */
int fixed_drv_type; /* fixed driver type for non-removable media */
......@@ -397,6 +398,7 @@ struct mmc_host {
unsigned int retune_paused:1; /* re-tuning is temporarily disabled */
unsigned int use_blk_mq:1; /* use blk-mq */
unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */
unsigned int can_dma_map_merge:1; /* merging can be used */
int rescan_disable; /* disable card detection */
int rescan_entered; /* used with nonremovable devices */
......
......@@ -18,6 +18,7 @@ struct notifier_block; /* in notifier.h */
#define VM_ALLOC 0x00000002 /* vmalloc() */
#define VM_MAP 0x00000004 /* vmap()ed pages */
#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */
#define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
......@@ -26,6 +27,7 @@ struct notifier_block; /* in notifier.h */
* vfree_atomic().
*/
#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */
/* bits [20..32] reserved for arch specific ioremap internals */
/*
......
......@@ -19,8 +19,6 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return PARAVIRT_LAZY_NONE;
}
extern const struct dma_map_ops *xen_dma_ops;
#ifdef CONFIG_XEN
void __init xen_early_init(void);
#else
......
......@@ -2,15 +2,19 @@
#ifndef _XEN_ARM_PAGE_COHERENT_H
#define _XEN_ARM_PAGE_COHERENT_H
void __xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs);
void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
unsigned long attrs);
void __xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir);
void __xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir);
#include <linux/dma-mapping.h>
#include <asm/page.h>
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
{
return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
}
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{
dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
}
#endif /* _XEN_ARM_PAGE_COHERENT_H */
......@@ -4,6 +4,11 @@
#include <linux/swiotlb.h>
void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
phys_addr_t paddr, size_t size, enum dma_data_direction dir);
void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
phys_addr_t paddr, size_t size, enum dma_data_direction dir);
extern int xen_swiotlb_init(int verbose, bool early);
extern const struct dma_map_ops xen_swiotlb_dma_ops;
......
......@@ -20,6 +20,15 @@ config ARCH_HAS_DMA_COHERENCE_H
config ARCH_HAS_DMA_SET_MASK
bool
#
# Select this option if the architecture needs special handling for
# DMA_ATTR_WRITE_COMBINE. Normally the "uncached" mapping should be what
# people thing of when saying write combine, so very few platforms should
# need to enable this.
#
config ARCH_HAS_DMA_WRITE_COMBINE
bool
config DMA_DECLARE_COHERENT
bool
......@@ -45,9 +54,6 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_DMA_COHERENT_TO_PFN
bool
config ARCH_HAS_DMA_MMAP_PGPROT
bool
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
......
......@@ -122,18 +122,6 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
dma_release_coherent_memory(mem);
return ret;
}
EXPORT_SYMBOL(dma_declare_coherent_memory);
void dma_release_declared_memory(struct device *dev)
{
struct dma_coherent_mem *mem = dev->dma_mem;
if (!mem)
return;
dma_release_coherent_memory(mem);
dev->dma_mem = NULL;
}
EXPORT_SYMBOL(dma_release_declared_memory);
static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
ssize_t size, dma_addr_t *dma_handle)
......@@ -288,7 +276,6 @@ int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret);
}
EXPORT_SYMBOL(dma_mmap_from_dev_coherent);
int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
size_t size, int *ret)
......
......@@ -136,17 +136,29 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
return ret;
}
/*
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
* that the intention is to allow exporting memory allocated via the
* coherent DMA APIs through the dma_buf API, which only accepts a
* scattertable. This presents a couple of problems:
* 1. Not all memory allocated via the coherent DMA APIs is backed by
* a struct page
* 2. Passing coherent DMA memory into the streaming APIs is not allowed
* as we will try to flush the memory through a different alias to that
* actually being used (and the flushes are redundant.)
*/
int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (!dma_is_direct(ops) && ops->get_sgtable)
return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
attrs);
return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
attrs);
if (dma_is_direct(ops))
return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr,
size, attrs);
if (!ops->get_sgtable)
return -ENXIO;
return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
}
EXPORT_SYMBOL(dma_get_sgtable_attrs);
......@@ -161,9 +173,11 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
(IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
(attrs & DMA_ATTR_NON_CONSISTENT)))
return prot;
if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT))
return arch_dma_mmap_pgprot(dev, prot, attrs);
return pgprot_noncached(prot);
#ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE
if (attrs & DMA_ATTR_WRITE_COMBINE)
return pgprot_writecombine(prot);
#endif
return pgprot_dmacoherent(prot);
}
#endif /* CONFIG_MMU */
......@@ -174,7 +188,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
#ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP
#ifdef CONFIG_MMU
unsigned long user_count = vma_pages(vma);
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long off = vma->vm_pgoff;
......@@ -205,8 +219,29 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
user_count << PAGE_SHIFT, vma->vm_page_prot);
#else
return -ENXIO;
#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
#endif /* CONFIG_MMU */
}
/**
* dma_can_mmap - check if a given device supports dma_mmap_*
* @dev: device to check
*
* Returns %true if @dev supports dma_mmap_coherent() and dma_mmap_attrs() to
* map DMA allocations to userspace.
*/
bool dma_can_mmap(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_is_direct(ops)) {
return IS_ENABLED(CONFIG_MMU) &&
(dev_is_dma_coherent(dev) ||
IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN));
}
return ops->mmap != NULL;
}
EXPORT_SYMBOL_GPL(dma_can_mmap);
/**
* dma_mmap_attrs - map a coherent DMA allocation into user space
......@@ -227,31 +262,15 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (!dma_is_direct(ops) && ops->mmap)
return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
if (dma_is_direct(ops))
return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size,
attrs);
if (!ops->mmap)
return -ENXIO;
return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
}
EXPORT_SYMBOL(dma_mmap_attrs);
static u64 dma_default_get_required_mask(struct device *dev)
{
u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
u64 mask;
if (!high_totalram) {
/* convert to mask just covering totalram */
low_totalram = (1 << (fls(low_totalram) - 1));
low_totalram += low_totalram - 1;
mask = low_totalram;
} else {
high_totalram = (1 << (fls(high_totalram) - 1));
high_totalram += high_totalram - 1;
mask = (((u64)high_totalram) << 32) + 0xffffffff;
}
return mask;
}
u64 dma_get_required_mask(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
......@@ -260,7 +279,16 @@ u64 dma_get_required_mask(struct device *dev)
return dma_direct_get_required_mask(dev);
if (ops->get_required_mask)
return ops->get_required_mask(dev);
return dma_default_get_required_mask(dev);
/*
* We require every DMA ops implementation to at least support a 32-bit
* DMA mask (and use bounce buffering if that isn't supported in
* hardware). As the direct mapping code has its own routine to
* actually report an optimal mask we default to 32-bit here as that
* is the right thing for most IOMMUs, and at least not actively
* harmful in general.
*/
return DMA_BIT_MASK(32);
}
EXPORT_SYMBOL_GPL(dma_get_required_mask);
......@@ -405,3 +433,14 @@ size_t dma_max_mapping_size(struct device *dev)
return size;
}
EXPORT_SYMBOL_GPL(dma_max_mapping_size);
unsigned long dma_get_merge_boundary(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (!ops || !ops->get_merge_boundary)
return 0; /* can't merge */
return ops->get_merge_boundary(dev);
}
EXPORT_SYMBOL_GPL(dma_get_merge_boundary);
......@@ -11,13 +11,21 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
struct page **dma_common_find_pages(void *cpu_addr)
{
struct vm_struct *area = find_vm_area(cpu_addr);
if (!area || area->flags != VM_DMA_COHERENT)
return NULL;
return area->pages;
}
static struct vm_struct *__dma_common_pages_remap(struct page **pages,
size_t size, unsigned long vm_flags, pgprot_t prot,
const void *caller)
size_t size, pgprot_t prot, const void *caller)
{
struct vm_struct *area;
area = get_vm_area_caller(size, vm_flags, caller);
area = get_vm_area_caller(size, VM_DMA_COHERENT, caller);
if (!area)
return NULL;
......@@ -34,12 +42,11 @@ static struct vm_struct *__dma_common_pages_remap(struct page **pages,
* Cannot be used in non-sleeping contexts
*/
void *dma_common_pages_remap(struct page **pages, size_t size,
unsigned long vm_flags, pgprot_t prot,
const void *caller)
pgprot_t prot, const void *caller)
{
struct vm_struct *area;
area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
area = __dma_common_pages_remap(pages, size, prot, caller);
if (!area)
return NULL;
......@@ -53,7 +60,6 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
* Cannot be used in non-sleeping contexts
*/
void *dma_common_contiguous_remap(struct page *page, size_t size,
unsigned long vm_flags,
pgprot_t prot, const void *caller)
{
int i;
......@@ -67,7 +73,7 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
for (i = 0; i < (size >> PAGE_SHIFT); i++)
pages[i] = nth_page(page, i);
area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
area = __dma_common_pages_remap(pages, size, prot, caller);
kfree(pages);
......@@ -79,11 +85,11 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
/*
* Unmaps a range previously mapped by dma_common_*_remap
*/
void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
void dma_common_free_remap(void *cpu_addr, size_t size)
{
struct vm_struct *area = find_vm_area(cpu_addr);
struct page **pages = dma_common_find_pages(cpu_addr);
if (!area || (area->flags & vm_flags) != vm_flags) {
if (!pages) {
WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
return;
}
......@@ -105,7 +111,16 @@ static int __init early_coherent_pool(char *p)
}
early_param("coherent_pool", early_coherent_pool);
int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
static gfp_t dma_atomic_pool_gfp(void)
{
if (IS_ENABLED(CONFIG_ZONE_DMA))
return GFP_DMA;
if (IS_ENABLED(CONFIG_ZONE_DMA32))
return GFP_DMA32;
return GFP_KERNEL;
}
static int __init dma_atomic_pool_init(void)
{
unsigned int pool_size_order = get_order(atomic_pool_size);
unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
......@@ -117,7 +132,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
page = dma_alloc_from_contiguous(NULL, nr_pages,
pool_size_order, false);
else
page = alloc_pages(gfp, pool_size_order);
page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order);
if (!page)
goto out;
......@@ -127,8 +142,9 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
if (!atomic_pool)
goto free_page;
addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP,
prot, __builtin_return_address(0));
addr = dma_common_contiguous_remap(page, atomic_pool_size,
pgprot_dmacoherent(PAGE_KERNEL),
__builtin_return_address(0));
if (!addr)
goto destroy_genpool;
......@@ -143,7 +159,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
return 0;
remove_mapping:
dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
dma_common_free_remap(addr, atomic_pool_size);
destroy_genpool:
gen_pool_destroy(atomic_pool);
atomic_pool = NULL;
......@@ -155,6 +171,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
atomic_pool_size / 1024);
return -ENOMEM;
}
postcore_initcall(dma_atomic_pool_init);
bool dma_in_atomic_pool(void *start, size_t size)
{
......@@ -217,7 +234,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
arch_dma_prep_coherent(page, size);
/* create a coherent mapping */
ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
ret = dma_common_contiguous_remap(page, size,
dma_pgprot(dev, PAGE_KERNEL, attrs),
__builtin_return_address(0));
if (!ret) {
......
......@@ -2993,7 +2993,7 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
if (!area)
return -EINVAL;
if (!(area->flags & VM_USERMAP))
if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
return -EINVAL;
if (kaddr + size > area->addr + get_vm_area_size(area))
......@@ -3496,6 +3496,9 @@ static int s_show(struct seq_file *m, void *p)
if (v->flags & VM_USERMAP)
seq_puts(m, " user");
if (v->flags & VM_DMA_COHERENT)
seq_puts(m, " dma-coherent");
if (is_vmalloc_addr(v->pages))
seq_puts(m, " vpages");
......
......@@ -220,13 +220,12 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream)
{
if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP))
return false;
/* architecture supports dma_mmap_coherent()? */
#if defined(CONFIG_ARCH_NO_COHERENT_DMA_MMAP) || !defined(CONFIG_HAS_DMA)
if (!substream->ops->mmap &&
substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV)
return false;
#endif
return true;
if (substream->ops->mmap ||
substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV)
return true;
return dma_can_mmap(substream->dma_buffer.dev.dev);
}
static int constrain_mask_params(struct snd_pcm_substream *substream,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment