Commit 07da1223 authored by Maor Gottlieb's avatar Maor Gottlieb Committed by Jason Gunthorpe

lib/scatterlist: Add support in dynamic allocation of SG table from pages

Extend __sg_alloc_table_from_pages to support dynamic allocation of
SG table from pages. It should be used by drivers that can't supply
all the pages at one time.

This function returns the last populated SGE in the table. Users should
pass it as an argument to the function from the second call and forward.
As before, nents will be equal to the number of populated SGEs (chunks).

With this new extension, drivers can benefit the optimization of merging
contiguous pages without a need to allocate all pages in advance and
hold them in a large buffer.

E.g. with the Infiniband driver that allocates a single page for hold the
pages. For 1TB memory registration, the temporary buffer would consume only
4KB, instead of 2GB.

Link: https://lore.kernel.org/r/20201004154340.1080481-2-leon@kernel.orgSigned-off-by: default avatarMaor Gottlieb <maorg@nvidia.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 29d88681
...@@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, ...@@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
unsigned int max_segment = i915_sg_segment_size(); unsigned int max_segment = i915_sg_segment_size();
struct sg_table *st; struct sg_table *st;
unsigned int sg_page_sizes; unsigned int sg_page_sizes;
struct scatterlist *sg;
int ret; int ret;
st = kmalloc(sizeof(*st), GFP_KERNEL); st = kmalloc(sizeof(*st), GFP_KERNEL);
...@@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, ...@@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
alloc_table: alloc_table:
ret = __sg_alloc_table_from_pages(st, pvec, num_pages, sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
0, num_pages << PAGE_SHIFT, num_pages << PAGE_SHIFT, max_segment,
max_segment, NULL, 0, GFP_KERNEL);
GFP_KERNEL); if (IS_ERR(sg)) {
if (ret) {
kfree(st); kfree(st);
return ERR_PTR(ret); return ERR_CAST(sg);
} }
ret = i915_gem_gtt_prepare_pages(obj, st); ret = i915_gem_gtt_prepare_pages(obj, st);
......
...@@ -419,6 +419,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) ...@@ -419,6 +419,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
int ret = 0; int ret = 0;
static size_t sgl_size; static size_t sgl_size;
static size_t sgt_size; static size_t sgt_size;
struct scatterlist *sg;
if (vmw_tt->mapped) if (vmw_tt->mapped)
return 0; return 0;
...@@ -441,13 +442,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) ...@@ -441,13 +442,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
if (unlikely(ret != 0)) if (unlikely(ret != 0))
return ret; return ret;
ret = __sg_alloc_table_from_pages sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
(&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0, vsgt->num_pages, 0,
(unsigned long) vsgt->num_pages << PAGE_SHIFT, (unsigned long) vsgt->num_pages << PAGE_SHIFT,
dma_get_max_seg_size(dev_priv->dev->dev), dma_get_max_seg_size(dev_priv->dev->dev),
GFP_KERNEL); NULL, 0, GFP_KERNEL);
if (unlikely(ret != 0)) if (IS_ERR(sg)) {
ret = PTR_ERR(sg);
goto out_sg_alloc_fail; goto out_sg_alloc_fail;
}
if (vsgt->num_pages > vmw_tt->sgt.nents) { if (vsgt->num_pages > vmw_tt->sgt.nents) {
uint64_t over_alloc = uint64_t over_alloc =
......
...@@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, ...@@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
#define for_each_sgtable_dma_sg(sgt, sg, i) \ #define for_each_sgtable_dma_sg(sgt, sg, i) \
for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) for_each_sg((sgt)->sgl, sg, (sgt)->nents, i)
static inline void __sg_chain(struct scatterlist *chain_sg,
struct scatterlist *sgl)
{
/*
* offset and length are unused for chain entry. Clear them.
*/
chain_sg->offset = 0;
chain_sg->length = 0;
/*
* Set lowest bit to indicate a link pointer, and make sure to clear
* the termination bit if it happens to be set.
*/
chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END;
}
/** /**
* sg_chain - Chain two sglists together * sg_chain - Chain two sglists together
* @prv: First scatterlist * @prv: First scatterlist
...@@ -178,18 +194,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, ...@@ -178,18 +194,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl) struct scatterlist *sgl)
{ {
/* __sg_chain(&prv[prv_nents - 1], sgl);
* offset and length are unused for chain entry. Clear them.
*/
prv[prv_nents - 1].offset = 0;
prv[prv_nents - 1].length = 0;
/*
* Set lowest bit to indicate a link pointer, and make sure to clear
* the termination bit if it happens to be set.
*/
prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN)
& ~SG_END;
} }
/** /**
...@@ -286,9 +291,10 @@ void sg_free_table(struct sg_table *); ...@@ -286,9 +291,10 @@ void sg_free_table(struct sg_table *);
int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *);
int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
unsigned int n_pages, unsigned int offset, struct page **pages, unsigned int n_pages, unsigned int offset,
unsigned long size, unsigned int max_segment, unsigned long size, unsigned int max_segment,
struct scatterlist *prv, unsigned int left_pages,
gfp_t gfp_mask); gfp_t gfp_mask);
int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
unsigned int n_pages, unsigned int offset, unsigned int n_pages, unsigned int offset,
......
...@@ -365,6 +365,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) ...@@ -365,6 +365,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
} }
EXPORT_SYMBOL(sg_alloc_table); EXPORT_SYMBOL(sg_alloc_table);
static struct scatterlist *get_next_sg(struct sg_table *table,
struct scatterlist *cur,
unsigned long needed_sges,
gfp_t gfp_mask)
{
struct scatterlist *new_sg, *next_sg;
unsigned int alloc_size;
if (cur) {
next_sg = sg_next(cur);
/* Check if last entry should be keeped for chainning */
if (!sg_is_last(next_sg) || needed_sges == 1)
return next_sg;
}
alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
new_sg = sg_kmalloc(alloc_size, gfp_mask);
if (!new_sg)
return ERR_PTR(-ENOMEM);
sg_init_table(new_sg, alloc_size);
if (cur) {
__sg_chain(next_sg, new_sg);
table->orig_nents += alloc_size - 1;
} else {
table->sgl = new_sg;
table->orig_nents = alloc_size;
table->nents = 0;
}
return new_sg;
}
/** /**
* __sg_alloc_table_from_pages - Allocate and initialize an sg table from * __sg_alloc_table_from_pages - Allocate and initialize an sg table from
* an array of pages * an array of pages
...@@ -374,29 +405,63 @@ EXPORT_SYMBOL(sg_alloc_table); ...@@ -374,29 +405,63 @@ EXPORT_SYMBOL(sg_alloc_table);
* @offset: Offset from start of the first page to the start of a buffer * @offset: Offset from start of the first page to the start of a buffer
* @size: Number of valid bytes in the buffer (after offset) * @size: Number of valid bytes in the buffer (after offset)
* @max_segment: Maximum size of a scatterlist node in bytes (page aligned) * @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
* @prv: Last populated sge in sgt
* @left_pages: Left pages caller have to set after this call
* @gfp_mask: GFP allocation mask * @gfp_mask: GFP allocation mask
* *
* Description: * Description:
* Allocate and initialize an sg table from a list of pages. Contiguous * If @prv is NULL, allocate and initialize an sg table from a list of pages,
* ranges of the pages are squashed into a single scatterlist node up to the * else reuse the scatterlist passed in at @prv.
* maximum size specified in @max_segment. An user may provide an offset at a * Contiguous ranges of the pages are squashed into a single scatterlist
* start and a size of valid data in a buffer specified by the page array. * entry up to the maximum size specified in @max_segment. A user may
* The returned sg table is released by sg_free_table. * provide an offset at a start and a size of valid data in a buffer
* specified by the page array.
* *
* Returns: * Returns:
* 0 on success, negative error on failure * Last SGE in sgt on success, PTR_ERR on otherwise.
* The allocation in @sgt must be released by sg_free_table.
*
* Notes:
* If this function returns non-0 (eg failure), the caller must call
* sg_free_table() to cleanup any leftover allocations.
*/ */
int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
unsigned int n_pages, unsigned int offset, struct page **pages, unsigned int n_pages, unsigned int offset,
unsigned long size, unsigned int max_segment, unsigned long size, unsigned int max_segment,
struct scatterlist *prv, unsigned int left_pages,
gfp_t gfp_mask) gfp_t gfp_mask)
{ {
unsigned int chunks, cur_page, seg_len, i; unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
int ret; unsigned int added_nents = 0;
struct scatterlist *s; struct scatterlist *s = prv;
if (WARN_ON(!max_segment || offset_in_page(max_segment))) if (WARN_ON(!max_segment || offset_in_page(max_segment)))
return -EINVAL; return ERR_PTR(-EINVAL);
if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
return ERR_PTR(-EOPNOTSUPP);
if (prv) {
unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
prv->offset + prv->length) /
PAGE_SIZE;
if (WARN_ON(offset))
return ERR_PTR(-EINVAL);
/* Merge contiguous pages into the last SG */
prv_len = prv->length;
while (n_pages && page_to_pfn(pages[0]) == paddr) {
if (prv->length + PAGE_SIZE > max_segment)
break;
prv->length += PAGE_SIZE;
paddr++;
pages++;
n_pages--;
}
if (!n_pages)
goto out;
}
/* compute number of contiguous chunks */ /* compute number of contiguous chunks */
chunks = 1; chunks = 1;
...@@ -410,13 +475,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, ...@@ -410,13 +475,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
} }
} }
ret = sg_alloc_table(sgt, chunks, gfp_mask);
if (unlikely(ret))
return ret;
/* merging chunks and putting them into the scatterlist */ /* merging chunks and putting them into the scatterlist */
cur_page = 0; cur_page = 0;
for_each_sg(sgt->sgl, s, sgt->orig_nents, i) { for (i = 0; i < chunks; i++) {
unsigned int j, chunk_size; unsigned int j, chunk_size;
/* look for the end of the current chunk */ /* look for the end of the current chunk */
...@@ -429,15 +490,30 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, ...@@ -429,15 +490,30 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
break; break;
} }
/* Pass how many chunks might be left */
s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
if (IS_ERR(s)) {
/*
* Adjust entry length to be as before function was
* called.
*/
if (prv)
prv->length = prv_len;
return s;
}
chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
sg_set_page(s, pages[cur_page], sg_set_page(s, pages[cur_page],
min_t(unsigned long, size, chunk_size), offset); min_t(unsigned long, size, chunk_size), offset);
added_nents++;
size -= chunk_size; size -= chunk_size;
offset = 0; offset = 0;
cur_page = j; cur_page = j;
} }
sgt->nents += added_nents;
return 0; out:
if (!left_pages)
sg_mark_end(s);
return s;
} }
EXPORT_SYMBOL(__sg_alloc_table_from_pages); EXPORT_SYMBOL(__sg_alloc_table_from_pages);
...@@ -465,8 +541,9 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, ...@@ -465,8 +541,9 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
unsigned int n_pages, unsigned int offset, unsigned int n_pages, unsigned int offset,
unsigned long size, gfp_t gfp_mask) unsigned long size, gfp_t gfp_mask)
{ {
return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size, return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages,
SCATTERLIST_MAX_SEGMENT, gfp_mask); offset, size, SCATTERLIST_MAX_SEGMENT,
NULL, 0, gfp_mask));
} }
EXPORT_SYMBOL(sg_alloc_table_from_pages); EXPORT_SYMBOL(sg_alloc_table_from_pages);
......
...@@ -79,14 +79,13 @@ int main(void) ...@@ -79,14 +79,13 @@ int main(void)
for (i = 0, test = tests; test->expected_segments; test++, i++) { for (i = 0, test = tests; test->expected_segments; test++, i++) {
struct page *pages[MAX_PAGES]; struct page *pages[MAX_PAGES];
struct sg_table st; struct sg_table st;
int ret; struct scatterlist *sg;
set_pages(pages, test->pfn, test->num_pages); set_pages(pages, test->pfn, test->num_pages);
ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages, sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
0, test->size, test->max_seg, test->size, test->max_seg, NULL, 0, GFP_KERNEL);
GFP_KERNEL); assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
assert(ret == test->alloc_ret);
if (test->alloc_ret) if (test->alloc_ret)
continue; continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment