Commit daa12112 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dma-mapping-6.10-2024-05-20' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

 - optimize DMA sync calls when they are no-ops (Alexander Lobakin)

 - fix swiotlb padding for untrusted devices (Michael Kelley)

 - add documentation for swiotb (Michael Kelley)

* tag 'dma-mapping-6.10-2024-05-20' of git://git.infradead.org/users/hch/dma-mapping:
  dma: fix DMA sync for drivers not calling dma_set_mask*()
  xsk: use generic DMA sync shortcut instead of a custom one
  page_pool: check for DMA sync shortcut earlier
  page_pool: don't use driver-set flags field directly
  page_pool: make sure frag API fields don't span between cachelines
  iommu/dma: avoid expensive indirect calls for sync operations
  dma: avoid redundant calls for sync operations
  dma: compile-out DMA sync op calls when not used
  iommu/dma: fix zeroing of bounce buffer padding used by untrusted devices
  swiotlb: remove alloc_size argument to swiotlb_tbl_map_single()
  Documentation/core-api: add swiotlb documentation
parents 6e51b4b5 a6016aac
......@@ -102,6 +102,7 @@ more memory-management documentation in Documentation/mm/index.rst.
dma-api-howto
dma-attributes
dma-isa-lpc
swiotlb
mm-api
genalloc
pin_user_pages
......
This diff is collapsed.
......@@ -1152,9 +1152,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
*/
if (dev_use_swiotlb(dev, size, dir) &&
iova_offset(iovad, phys | size)) {
void *padding_start;
size_t padding_size, aligned_size;
if (!is_swiotlb_active(dev)) {
dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
return DMA_MAPPING_ERROR;
......@@ -1162,24 +1159,30 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
trace_swiotlb_bounced(dev, phys, size);
aligned_size = iova_align(iovad, size);
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
phys = swiotlb_tbl_map_single(dev, phys, size,
iova_mask(iovad), dir, attrs);
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
/* Cleanup the padding area. */
padding_start = phys_to_virt(phys);
padding_size = aligned_size;
/*
* Untrusted devices should not see padding areas with random
* leftover kernel data, so zero the pre- and post-padding.
* swiotlb_tbl_map_single() has initialized the bounce buffer
* proper to the contents of the original memory buffer.
*/
if (dev_is_untrusted(dev)) {
size_t start, virt = (size_t)phys_to_virt(phys);
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
padding_start += size;
padding_size -= size;
}
/* Pre-padding */
start = iova_align_down(iovad, virt);
memset((void *)start, 0, virt - start);
memset(padding_start, 0, padding_size);
/* Post-padding */
start = virt + size;
memset((void *)start, 0,
iova_align(iovad, start) - start);
}
}
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
......@@ -1718,7 +1721,8 @@ static size_t iommu_dma_max_mapping_size(struct device *dev)
}
static const struct dma_map_ops iommu_dma_ops = {
.flags = DMA_F_PCI_P2PDMA_SUPPORTED,
.flags = DMA_F_PCI_P2PDMA_SUPPORTED |
DMA_F_CAN_SKIP_SYNC,
.alloc = iommu_dma_alloc,
.free = iommu_dma_free,
.alloc_pages_op = dma_common_alloc_pages,
......
......@@ -1587,7 +1587,7 @@ static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi,
length = __le32_to_cpu(entry->desc_wb->properties) &
TSNEP_DESC_LENGTH_MASK;
xsk_buff_set_size(entry->xdp, length - ETH_FCS_LEN);
xsk_buff_dma_sync_for_cpu(entry->xdp, rx->xsk_pool);
xsk_buff_dma_sync_for_cpu(entry->xdp);
/* RX metadata with timestamps is in front of actual data,
* subtract metadata size to get length of actual data and
......
......@@ -55,7 +55,7 @@ static u32 dpaa2_xsk_run_xdp(struct dpaa2_eth_priv *priv,
xdp_set_data_meta_invalid(xdp_buff);
xdp_buff->rxq = &ch->xdp_rxq;
xsk_buff_dma_sync_for_cpu(xdp_buff, ch->xsk_pool);
xsk_buff_dma_sync_for_cpu(xdp_buff);
xdp_act = bpf_prog_run_xdp(xdp_prog, xdp_buff);
/* xdp.data pointer may have changed */
......
......@@ -482,7 +482,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
bi = *i40e_rx_bi(rx_ring, next_to_process);
xsk_buff_set_size(bi, size);
xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool);
xsk_buff_dma_sync_for_cpu(bi);
if (!first)
first = bi;
......
......@@ -878,7 +878,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
ICE_RX_FLX_DESC_PKT_LEN_M;
xsk_buff_set_size(xdp, size);
xsk_buff_dma_sync_for_cpu(xdp, xsk_pool);
xsk_buff_dma_sync_for_cpu(xdp);
if (!first) {
first = xdp;
......
......@@ -2812,7 +2812,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
}
bi->xdp->data_end = bi->xdp->data + size;
xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
xsk_buff_dma_sync_for_cpu(bi->xdp);
res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
switch (res) {
......
......@@ -303,7 +303,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
}
bi->xdp->data_end = bi->xdp->data + size;
xsk_buff_dma_sync_for_cpu(bi->xdp, rx_ring->xsk_pool);
xsk_buff_dma_sync_for_cpu(bi->xdp);
xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp);
if (likely(xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR))) {
......
......@@ -270,7 +270,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
mxbuf->cqe = cqe;
xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp);
net_prefetch(mxbuf->xdp.data);
/* Possible flows:
......@@ -319,7 +319,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
mxbuf->cqe = cqe;
xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp);
net_prefetch(mxbuf->xdp.data);
prog = rcu_dereference(rq->xdp_prog);
......
......@@ -917,7 +917,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
if (!rq->xsk_pool) {
count = mlx5e_refill_rx_wqes(rq, head, wqe_bulk);
} else if (likely(!rq->xsk_pool->dma_need_sync)) {
} else if (likely(!dma_dev_need_sync(rq->pdev))) {
mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk);
count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk);
} else {
......
......@@ -184,7 +184,7 @@ nfp_nfd3_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget,
xrxbuf->xdp->data += meta_len;
xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len;
xdp_set_data_meta_invalid(xrxbuf->xdp);
xsk_buff_dma_sync_for_cpu(xrxbuf->xdp, r_vec->xsk_pool);
xsk_buff_dma_sync_for_cpu(xrxbuf->xdp);
net_prefetch(xrxbuf->xdp->data);
if (meta_len) {
......
......@@ -5361,7 +5361,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
/* RX buffer is good and fit into a XSK pool buffer */
buf->xdp->data_end = buf->xdp->data + buf1_len;
xsk_buff_dma_sync_for_cpu(buf->xdp, rx_q->xsk_pool);
xsk_buff_dma_sync_for_cpu(buf->xdp);
prog = READ_ONCE(priv->xdp_prog);
res = __stmmac_xdp_run_prog(priv, prog, buf->xdp);
......
......@@ -216,7 +216,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
*/
trace_swiotlb_bounced(dev, dev_addr, size);
map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
map = swiotlb_tbl_map_single(dev, phys, size, 0, dir, attrs);
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
......
......@@ -691,6 +691,7 @@ struct device_physical_location {
* and optionall (if the coherent mask is large enough) also
* for dma allocations. This flag is managed by the dma ops
* instance from ->dma_supported.
* @dma_skip_sync: DMA sync operations can be skipped for coherent buffers.
*
* At the lowest level, every device in a Linux system is represented by an
* instance of struct device. The device structure contains the information
......@@ -803,6 +804,9 @@ struct device {
#ifdef CONFIG_DMA_OPS_BYPASS
bool dma_ops_bypass : 1;
#endif
#ifdef CONFIG_DMA_NEED_SYNC
bool dma_skip_sync:1;
#endif
};
/**
......
......@@ -18,8 +18,11 @@ struct iommu_ops;
*
* DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can
* handle PCI P2PDMA pages in the map_sg/unmap_sg operation.
* DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is
* coherent and it's not an SWIOTLB buffer.
*/
#define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0)
#define DMA_F_CAN_SKIP_SYNC (1 << 1)
struct dma_map_ops {
unsigned int flags;
......@@ -273,6 +276,15 @@ static inline bool dev_is_dma_coherent(struct device *dev)
}
#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */
static inline void dma_reset_need_sync(struct device *dev)
{
#ifdef CONFIG_DMA_NEED_SYNC
/* Reset it only once so that the function can be called on hotpath */
if (unlikely(dev->dma_skip_sync))
dev->dma_skip_sync = false;
#endif
}
/*
* Check whether potential kmalloc() buffers are safe for non-coherent DMA.
*/
......
......@@ -117,14 +117,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs);
void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs);
void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir);
void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir);
void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir);
void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir);
void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flag, unsigned long attrs);
void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
......@@ -147,7 +139,6 @@ u64 dma_get_required_mask(struct device *dev);
bool dma_addressing_limited(struct device *dev);
size_t dma_max_mapping_size(struct device *dev);
size_t dma_opt_mapping_size(struct device *dev);
bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
unsigned long dma_get_merge_boundary(struct device *dev);
struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
......@@ -195,22 +186,6 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
}
static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
}
static inline void dma_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
}
static inline void dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
}
static inline void dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
}
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return -ENOMEM;
......@@ -277,10 +252,6 @@ static inline size_t dma_opt_mapping_size(struct device *dev)
{
return 0;
}
static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return false;
}
static inline unsigned long dma_get_merge_boundary(struct device *dev)
{
return 0;
......@@ -310,6 +281,82 @@ static inline int dma_mmap_noncontiguous(struct device *dev,
}
#endif /* CONFIG_HAS_DMA */
#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir);
void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir);
void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir);
void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir);
bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr);
static inline bool dma_dev_need_sync(const struct device *dev)
{
/* Always call DMA sync operations when debugging is enabled */
return !dev->dma_skip_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG);
}
static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
if (dma_dev_need_sync(dev))
__dma_sync_single_for_cpu(dev, addr, size, dir);
}
static inline void dma_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
if (dma_dev_need_sync(dev))
__dma_sync_single_for_device(dev, addr, size, dir);
}
static inline void dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
if (dma_dev_need_sync(dev))
__dma_sync_sg_for_cpu(dev, sg, nelems, dir);
}
static inline void dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
if (dma_dev_need_sync(dev))
__dma_sync_sg_for_device(dev, sg, nelems, dir);
}
static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
}
#else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */
static inline bool dma_dev_need_sync(const struct device *dev)
{
return false;
}
static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
}
static inline void dma_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
}
static inline void dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
}
static inline void dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
}
static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return false;
}
#endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */
struct page *dma_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp);
void dma_free_pages(struct device *dev, size_t size, struct page *page,
......
......@@ -65,6 +65,11 @@ static inline size_t iova_align(struct iova_domain *iovad, size_t size)
return ALIGN(size, iovad->granule);
}
static inline size_t iova_align_down(struct iova_domain *iovad, size_t size)
{
return ALIGN_DOWN(size, iovad->granule);
}
static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova)
{
return (dma_addr_t)iova->pfn_lo << iova_shift(iovad);
......
......@@ -43,7 +43,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
extern void __init swiotlb_update_mem_attributes(void);
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
size_t mapping_size, size_t alloc_size,
size_t mapping_size,
unsigned int alloc_aligned_mask, enum dma_data_direction dir,
unsigned long attrs);
......
......@@ -45,7 +45,6 @@ struct pp_alloc_cache {
/**
* struct page_pool_params - page pool parameters
* @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV
* @order: 2^order pages on allocation
* @pool_size: size of the ptr_ring
* @nid: NUMA node id to allocate from pages from
......@@ -55,10 +54,11 @@ struct pp_alloc_cache {
* @dma_dir: DMA mapping direction
* @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
* @netdev: corresponding &net_device for Netlink introspection
* @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL
*/
struct page_pool_params {
struct_group_tagged(page_pool_params_fast, fast,
unsigned int flags;
unsigned int order;
unsigned int pool_size;
int nid;
......@@ -70,6 +70,7 @@ struct page_pool_params {
);
struct_group_tagged(page_pool_params_slow, slow,
struct net_device *netdev;
unsigned int flags;
/* private: used by test code only */
void (*init_callback)(struct page *page, void *arg);
void *init_arg;
......@@ -130,12 +131,28 @@ struct page_pool {
struct page_pool_params_fast p;
int cpuid;
bool has_init_callback;
u32 pages_state_hold_cnt;
bool has_init_callback:1; /* slow::init_callback is set */
bool dma_map:1; /* Perform DMA mapping */
bool dma_sync:1; /* Perform DMA sync */
#ifdef CONFIG_PAGE_POOL_STATS
bool system:1; /* This is a global percpu pool */
#endif
/* The following block must stay within one cacheline. On 32-bit
* systems, sizeof(long) == sizeof(int), so that the block size is
* ``3 * sizeof(long)``. On 64-bit systems, the actual size is
* ``2 * sizeof(long) + sizeof(int)``. The closest pow-2 to both of
* them is ``4 * sizeof(long)``, so just use that one for simplicity.
* Having it aligned to a cacheline boundary may be excessive and
* doesn't bring any good.
*/
__cacheline_group_begin(frag) __aligned(4 * sizeof(long));
long frag_users;
struct page *frag_page;
unsigned int frag_offset;
u32 pages_state_hold_cnt;
__cacheline_group_end(frag);
struct delayed_work release_dw;
void (*disconnect)(void *pool);
......
......@@ -219,13 +219,10 @@ static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool
return meta;
}
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
if (!pool->dma_need_sync)
return;
xp_dma_sync_for_cpu(xskb);
}
......@@ -402,7 +399,7 @@ static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool
return NULL;
}
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
{
}
......
......@@ -43,7 +43,6 @@ struct xsk_dma_map {
refcount_t users;
struct list_head list; /* Protected by the RTNL_LOCK */
u32 dma_pages_cnt;
bool dma_need_sync;
};
struct xsk_buff_pool {
......@@ -82,7 +81,6 @@ struct xsk_buff_pool {
u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
bool unaligned;
bool tx_sw_csum;
void *addrs;
......@@ -155,21 +153,17 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
return xskb->frame_dma;
}
void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
{
xp_dma_sync_for_cpu_slow(xskb);
dma_sync_single_for_cpu(xskb->pool->dev, xskb->dma,
xskb->pool->frame_len,
DMA_BIDIRECTIONAL);
}
void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
size_t size);
static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool,
dma_addr_t dma, size_t size)
{
if (!pool->dma_need_sync)
return;
xp_dma_sync_for_device_slow(pool, dma, size);
dma_sync_single_for_device(pool->dev, dma, size, DMA_BIDIRECTIONAL);
}
/* Masks for xdp_umem_page flags.
......
......@@ -107,6 +107,11 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC
bool
depends on SWIOTLB
config DMA_NEED_SYNC
def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \
ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || DMA_OPS || \
SWIOTLB
config DMA_RESTRICTED_POOL
bool "DMA Restricted Pool"
depends on OF && OF_RESERVED_MEM && SWIOTLB
......
......@@ -329,7 +329,8 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
}
EXPORT_SYMBOL(dma_unmap_resource);
void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
#ifdef CONFIG_DMA_NEED_SYNC
void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
......@@ -341,9 +342,9 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
ops->sync_single_for_cpu(dev, addr, size, dir);
debug_dma_sync_single_for_cpu(dev, addr, size, dir);
}
EXPORT_SYMBOL(dma_sync_single_for_cpu);
EXPORT_SYMBOL(__dma_sync_single_for_cpu);
void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
......@@ -355,9 +356,9 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
ops->sync_single_for_device(dev, addr, size, dir);
debug_dma_sync_single_for_device(dev, addr, size, dir);
}
EXPORT_SYMBOL(dma_sync_single_for_device);
EXPORT_SYMBOL(__dma_sync_single_for_device);
void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
......@@ -369,9 +370,9 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
ops->sync_sg_for_cpu(dev, sg, nelems, dir);
debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
}
EXPORT_SYMBOL(dma_sync_sg_for_cpu);
EXPORT_SYMBOL(__dma_sync_sg_for_cpu);
void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
......@@ -383,7 +384,47 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
ops->sync_sg_for_device(dev, sg, nelems, dir);
debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
}
EXPORT_SYMBOL(dma_sync_sg_for_device);
EXPORT_SYMBOL(__dma_sync_sg_for_device);
bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_map_direct(dev, ops))
/*
* dma_skip_sync could've been reset on first SWIOTLB buffer
* mapping, but @dma_addr is not necessary an SWIOTLB buffer.
* In this case, fall back to more granular check.
*/
return dma_direct_need_sync(dev, dma_addr);
return true;
}
EXPORT_SYMBOL_GPL(__dma_need_sync);
static void dma_setup_need_sync(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC))
/*
* dma_skip_sync will be reset to %false on first SWIOTLB buffer
* mapping, if any. During the device initialization, it's
* enough to check only for the DMA coherence.
*/
dev->dma_skip_sync = dev_is_dma_coherent(dev);
else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu &&
!ops->sync_sg_for_device && !ops->sync_sg_for_cpu)
/*
* Synchronization is not possible when none of DMA sync ops
* is set.
*/
dev->dma_skip_sync = true;
else
dev->dma_skip_sync = false;
}
#else /* !CONFIG_DMA_NEED_SYNC */
static inline void dma_setup_need_sync(struct device *dev) { }
#endif /* !CONFIG_DMA_NEED_SYNC */
/*
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
......@@ -773,6 +814,8 @@ int dma_set_mask(struct device *dev, u64 mask)
arch_dma_set_mask(dev, mask);
*dev->dma_mask = mask;
dma_setup_need_sync(dev);
return 0;
}
EXPORT_SYMBOL(dma_set_mask);
......@@ -841,16 +884,6 @@ size_t dma_opt_mapping_size(struct device *dev)
}
EXPORT_SYMBOL_GPL(dma_opt_mapping_size);
bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_map_direct(dev, ops))
return dma_direct_need_sync(dev, dma_addr);
return ops->sync_single_for_cpu || ops->sync_single_for_device;
}
EXPORT_SYMBOL_GPL(dma_need_sync);
unsigned long dma_get_merge_boundary(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
......
......@@ -1340,15 +1340,40 @@ static unsigned long mem_used(struct io_tlb_mem *mem)
#endif /* CONFIG_DEBUG_FS */
/**
* swiotlb_tbl_map_single() - bounce buffer map a single contiguous physical area
* @dev: Device which maps the buffer.
* @orig_addr: Original (non-bounced) physical IO buffer address
* @mapping_size: Requested size of the actual bounce buffer, excluding
* any pre- or post-padding for alignment
* @alloc_align_mask: Required start and end alignment of the allocated buffer
* @dir: DMA direction
* @attrs: Optional DMA attributes for the map operation
*
* Find and allocate a suitable sequence of IO TLB slots for the request.
* The allocated space starts at an alignment specified by alloc_align_mask,
* and the size of the allocated space is rounded up so that the total amount
* of allocated space is a multiple of (alloc_align_mask + 1). If
* alloc_align_mask is zero, the allocated space may be at any alignment and
* the size is not rounded up.
*
* The returned address is within the allocated space and matches the bits
* of orig_addr that are specified in the DMA min_align_mask for the device. As
* such, this returned address may be offset from the beginning of the allocated
* space. The bounce buffer space starting at the returned address for
* mapping_size bytes is initialized to the contents of the original IO buffer
* area. Any pre-padding (due to an offset) and any post-padding (due to
* rounding-up the size) is not initialized.
*/
phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
unsigned int alloc_align_mask, enum dma_data_direction dir,
unsigned long attrs)
size_t mapping_size, unsigned int alloc_align_mask,
enum dma_data_direction dir, unsigned long attrs)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
unsigned int offset;
struct io_tlb_pool *pool;
unsigned int i;
size_t size;
int index;
phys_addr_t tlb_addr;
unsigned short pad_slots;
......@@ -1362,23 +1387,33 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
if (mapping_size > alloc_size) {
dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
mapping_size, alloc_size);
return (phys_addr_t)DMA_MAPPING_ERROR;
}
/*
* The default swiotlb memory pool is allocated with PAGE_SIZE
* alignment. If a mapping is requested with larger alignment,
* the mapping may be unable to use the initial slot(s) in all
* sets of IO_TLB_SEGSIZE slots. In such case, a mapping request
* of or near the maximum mapping size would always fail.
*/
dev_WARN_ONCE(dev, alloc_align_mask > ~PAGE_MASK,
"Alloc alignment may prevent fulfilling requests with max mapping_size\n");
offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
index = swiotlb_find_slots(dev, orig_addr,
alloc_size + offset, alloc_align_mask, &pool);
size = ALIGN(mapping_size + offset, alloc_align_mask + 1);
index = swiotlb_find_slots(dev, orig_addr, size, alloc_align_mask, &pool);
if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
alloc_size, mem->nslabs, mem_used(mem));
size, mem->nslabs, mem_used(mem));
return (phys_addr_t)DMA_MAPPING_ERROR;
}
/*
* If dma_skip_sync was set, reset it on first SWIOTLB buffer
* mapping to always sync SWIOTLB buffers.
*/
dma_reset_need_sync(dev);
/*
* Save away the mapping from the original address to the DMA address.
* This is needed when we sync the memory. Then we sync the buffer if
......@@ -1388,7 +1423,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
offset &= (IO_TLB_SIZE - 1);
index += pad_slots;
pool->slots[index].pad_slots = pad_slots;
for (i = 0; i < nr_slots(alloc_size + offset); i++)
for (i = 0; i < (nr_slots(size) - pad_slots); i++)
pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
tlb_addr = slot_addr(pool->start, index) + offset;
/*
......@@ -1543,8 +1578,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size);
swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
attrs);
swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, 0, dir, attrs);
if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
......
......@@ -173,19 +173,29 @@ static void page_pool_producer_unlock(struct page_pool *pool,
spin_unlock_bh(&pool->ring.producer_lock);
}
static void page_pool_struct_check(void)
{
CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users);
CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page);
CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset);
CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long));
}
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params,
int cpuid)
{
unsigned int ring_qsize = 1024; /* Default */
page_pool_struct_check();
memcpy(&pool->p, &params->fast, sizeof(pool->p));
memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
pool->cpuid = cpuid;
/* Validate only known flags were used */
if (pool->p.flags & ~(PP_FLAG_ALL))
if (pool->slow.flags & ~PP_FLAG_ALL)
return -EINVAL;
if (pool->p.pool_size)
......@@ -199,22 +209,26 @@ static int page_pool_init(struct page_pool *pool,
* DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
* which is the XDP_TX use-case.
*/
if (pool->p.flags & PP_FLAG_DMA_MAP) {
if (pool->slow.flags & PP_FLAG_DMA_MAP) {
if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
(pool->p.dma_dir != DMA_BIDIRECTIONAL))
return -EINVAL;
pool->dma_map = true;
}
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) {
/* In order to request DMA-sync-for-device the page
* needs to be mapped
*/
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
if (!(pool->slow.flags & PP_FLAG_DMA_MAP))
return -EINVAL;
if (!pool->p.max_len)
return -EINVAL;
pool->dma_sync = true;
/* pool->p.offset has to be set according to the address
* offset used by the DMA engine to start copying rx data
*/
......@@ -223,7 +237,7 @@ static int page_pool_init(struct page_pool *pool,
pool->has_init_callback = !!pool->slow.init_callback;
#ifdef CONFIG_PAGE_POOL_STATS
if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) {
if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) {
pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
if (!pool->recycle_stats)
return -ENOMEM;
......@@ -233,12 +247,13 @@ static int page_pool_init(struct page_pool *pool,
* (also percpu) page pool instance.
*/
pool->recycle_stats = &pp_system_recycle_stats;
pool->system = true;
}
#endif
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
#ifdef CONFIG_PAGE_POOL_STATS
if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
if (!pool->system)
free_percpu(pool->recycle_stats);
#endif
return -ENOMEM;
......@@ -249,7 +264,7 @@ static int page_pool_init(struct page_pool *pool,
/* Driver calling page_pool_create() also call page_pool_destroy() */
refcount_set(&pool->user_cnt, 1);
if (pool->p.flags & PP_FLAG_DMA_MAP)
if (pool->dma_map)
get_device(pool->p.dev);
return 0;
......@@ -259,11 +274,11 @@ static void page_pool_uninit(struct page_pool *pool)
{
ptr_ring_cleanup(&pool->ring, NULL);
if (pool->p.flags & PP_FLAG_DMA_MAP)
if (pool->dma_map)
put_device(pool->p.dev);
#ifdef CONFIG_PAGE_POOL_STATS
if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
if (!pool->system)
free_percpu(pool->recycle_stats);
#endif
}
......@@ -384,16 +399,26 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
return page;
}
static void page_pool_dma_sync_for_device(const struct page_pool *pool,
static void __page_pool_dma_sync_for_device(const struct page_pool *pool,
const struct page *page,
unsigned int dma_sync_size)
u32 dma_sync_size)
{
#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
dma_addr_t dma_addr = page_pool_get_dma_addr(page);
dma_sync_size = min(dma_sync_size, pool->p.max_len);
dma_sync_single_range_for_device(pool->p.dev, dma_addr,
pool->p.offset, dma_sync_size,
pool->p.dma_dir);
__dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
dma_sync_size, pool->p.dma_dir);
#endif
}
static __always_inline void
page_pool_dma_sync_for_device(const struct page_pool *pool,
const struct page *page,
u32 dma_sync_size)
{
if (pool->dma_sync && dma_dev_need_sync(pool->p.dev))
__page_pool_dma_sync_for_device(pool, page, dma_sync_size);
}
static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
......@@ -415,7 +440,6 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
if (page_pool_set_dma_addr(page, dma))
goto unmap_failed;
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
return true;
......@@ -461,8 +485,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
if (unlikely(!page))
return NULL;
if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
unlikely(!page_pool_dma_map(pool, page))) {
if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) {
put_page(page);
return NULL;
}
......@@ -482,8 +505,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
gfp_t gfp)
{
const int bulk = PP_ALLOC_CACHE_REFILL;
unsigned int pp_flags = pool->p.flags;
unsigned int pp_order = pool->p.order;
bool dma_map = pool->dma_map;
struct page *page;
int i, nr_pages;
......@@ -508,8 +531,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
*/
for (i = 0; i < nr_pages; i++) {
page = pool->alloc.cache[i];
if ((pp_flags & PP_FLAG_DMA_MAP) &&
unlikely(!page_pool_dma_map(pool, page))) {
if (dma_map && unlikely(!page_pool_dma_map(pool, page))) {
put_page(page);
continue;
}
......@@ -582,7 +604,7 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
{
dma_addr_t dma;
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
if (!pool->dma_map)
/* Always account for inflight pages, even if we didn't
* map them
*/
......@@ -665,7 +687,7 @@ static bool __page_pool_page_can_be_recycled(const struct page *page)
}
/* If the page refcnt == 1, this will try to recycle the page.
* if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
* If pool->dma_sync is set, we'll try to sync the DMA area for
* the configured size min(dma_sync_size, pool->max_len).
* If the page refcnt != 1, then the page will be returned to memory
* subsystem.
......@@ -688,9 +710,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
if (likely(__page_pool_page_can_be_recycled(page))) {
/* Read barrier done in page_ref_count / READ_ONCE */
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
page_pool_dma_sync_for_device(pool, page,
dma_sync_size);
page_pool_dma_sync_for_device(pool, page, dma_sync_size);
if (allow_direct && page_pool_recycle_in_cache(page, pool))
return NULL;
......@@ -829,9 +849,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
return NULL;
if (__page_pool_page_can_be_recycled(page)) {
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
page_pool_dma_sync_for_device(pool, page, -1);
return page;
}
......
......@@ -338,7 +338,6 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi
dma_map->netdev = netdev;
dma_map->dev = dev;
dma_map->dma_need_sync = false;
dma_map->dma_pages_cnt = nr_pages;
refcount_set(&dma_map->users, 1);
list_add(&dma_map->list, &umem->xsk_dma_list);
......@@ -424,7 +423,6 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
pool->dev = dma_map->dev;
pool->dma_pages_cnt = dma_map->dma_pages_cnt;
pool->dma_need_sync = dma_map->dma_need_sync;
memcpy(pool->dma_pages, dma_map->dma_pages,
pool->dma_pages_cnt * sizeof(*pool->dma_pages));
......@@ -460,8 +458,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
__xp_dma_unmap(dma_map, attrs);
return -ENOMEM;
}
if (dma_need_sync(dev, dma))
dma_map->dma_need_sync = true;
dma_map->dma_pages[i] = dma;
}
......@@ -557,11 +553,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
xskb->xdp.data_meta = xskb->xdp.data;
xskb->xdp.flags = 0;
if (pool->dma_need_sync) {
dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
pool->frame_len,
DMA_BIDIRECTIONAL);
}
if (pool->dev)
xp_dma_sync_for_device(pool, xskb->dma, pool->frame_len);
return &xskb->xdp;
}
EXPORT_SYMBOL(xp_alloc);
......@@ -633,7 +627,7 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
u32 nb_entries1 = 0, nb_entries2;
if (unlikely(pool->dma_need_sync)) {
if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) {
struct xdp_buff *buff;
/* Slow path */
......@@ -693,18 +687,3 @@ dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
(addr & ~PAGE_MASK);
}
EXPORT_SYMBOL(xp_raw_get_dma);
void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
{
dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
xskb->pool->frame_len, DMA_BIDIRECTIONAL);
}
EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
size_t size)
{
dma_sync_single_range_for_device(pool->dev, dma, 0,
size, DMA_BIDIRECTIONAL);
}
EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment