Commit ea996974 authored by John Hubbard's avatar John Hubbard Committed by Jason Gunthorpe

RDMA: Convert put_page() to put_user_page*()

For infiniband code that retains pages via get_user_pages*(), release
those pages via the new put_user_page(), or put_user_pages*(), instead of
put_page()

This is a tiny part of the second step of fixing the problem described in
[1]. The steps are:

1) Provide put_user_page*() routines, intended to be used for releasing
   pages that were pinned via get_user_pages*().

2) Convert all of the call sites for get_user_pages*(), to invoke
   put_user_page*(), instead of put_page(). This involves dozens of call
   sites, and will take some time.

3) After (2) is complete, use get_user_pages*() and put_user_page*() to
   implement tracking of these pages. This tracking will be separate from
   the existing struct page refcounting.

4) Use the tracking and identification of these pages, to implement
   special handling (especially in writeback paths) when the pages are
   backed by a filesystem. Again, [1] provides details as to why that is
   desirable.

[1] https://lwn.net/Articles/753027/ : "The Trouble with get_user_pages()"
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: default avatarIra Weiny <ira.weiny@intel.com>
Reviewed-by: default avatarJérôme Glisse <jglisse@redhat.com>
Acked-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Tested-by: default avatarIra Weiny <ira.weiny@intel.com>
Signed-off-by: default avatarJohn Hubbard <jhubbard@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent cfcc048c
...@@ -54,9 +54,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d ...@@ -54,9 +54,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
page = sg_page_iter_page(&sg_iter); page = sg_page_iter_page(&sg_iter);
if (!PageDirty(page) && umem->writable && dirty) if (umem->writable && dirty)
set_page_dirty_lock(page); put_user_pages_dirty_lock(&page, 1);
put_page(page); else
put_user_page(page);
} }
sg_free_table(&umem->sg_head); sg_free_table(&umem->sg_head);
......
...@@ -482,7 +482,7 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) ...@@ -482,7 +482,7 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
* The function returns -EFAULT if the DMA mapping operation fails. It returns * The function returns -EFAULT if the DMA mapping operation fails. It returns
* -EAGAIN if a concurrent invalidation prevents us from updating the page. * -EAGAIN if a concurrent invalidation prevents us from updating the page.
* *
* The page is released via put_page even if the operation failed. For * The page is released via put_user_page even if the operation failed. For
* on-demand pinning, the page is released whenever it isn't stored in the * on-demand pinning, the page is released whenever it isn't stored in the
* umem. * umem.
*/ */
...@@ -530,7 +530,7 @@ static int ib_umem_odp_map_dma_single_page( ...@@ -530,7 +530,7 @@ static int ib_umem_odp_map_dma_single_page(
} }
out: out:
put_page(page); put_user_page(page);
if (remove_existing_mapping) { if (remove_existing_mapping) {
ib_umem_notifier_start_account(umem_odp); ib_umem_notifier_start_account(umem_odp);
...@@ -653,7 +653,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ...@@ -653,7 +653,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
ret = -EFAULT; ret = -EFAULT;
break; break;
} }
put_page(local_page_list[j]); put_user_page(local_page_list[j]);
continue; continue;
} }
...@@ -680,8 +680,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ...@@ -680,8 +680,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
* ib_umem_odp_map_dma_single_page(). * ib_umem_odp_map_dma_single_page().
*/ */
if (npages - (j + 1) > 0) if (npages - (j + 1) > 0)
release_pages(&local_page_list[j+1], put_user_pages(&local_page_list[j+1],
npages - (j + 1)); npages - (j + 1));
break; break;
} }
} }
......
...@@ -118,13 +118,10 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np ...@@ -118,13 +118,10 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty) size_t npages, bool dirty)
{ {
size_t i; if (dirty)
put_user_pages_dirty_lock(p, npages);
for (i = 0; i < npages; i++) { else
if (dirty) put_user_pages(p, npages);
set_page_dirty_lock(p[i]);
put_page(p[i]);
}
if (mm) { /* during close after signal, mm can be NULL */ if (mm) { /* during close after signal, mm can be NULL */
atomic64_sub(npages, &mm->pinned_vm); atomic64_sub(npages, &mm->pinned_vm);
......
...@@ -482,7 +482,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, ...@@ -482,7 +482,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
if (ret < 0) { if (ret < 0) {
put_page(pages[0]); put_user_page(pages[0]);
goto out; goto out;
} }
...@@ -490,7 +490,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, ...@@ -490,7 +490,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
mthca_uarc_virt(dev, uar, i)); mthca_uarc_virt(dev, uar, i));
if (ret) { if (ret) {
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
put_page(sg_page(&db_tab->page[i].mem)); put_user_page(sg_page(&db_tab->page[i].mem));
goto out; goto out;
} }
...@@ -556,7 +556,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, ...@@ -556,7 +556,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
if (db_tab->page[i].uvirt) { if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1); mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
put_page(sg_page(&db_tab->page[i].mem)); put_user_page(sg_page(&db_tab->page[i].mem));
} }
} }
......
...@@ -40,13 +40,10 @@ ...@@ -40,13 +40,10 @@
static void __qib_release_user_pages(struct page **p, size_t num_pages, static void __qib_release_user_pages(struct page **p, size_t num_pages,
int dirty) int dirty)
{ {
size_t i; if (dirty)
put_user_pages_dirty_lock(p, num_pages);
for (i = 0; i < num_pages; i++) { else
if (dirty) put_user_pages(p, num_pages);
set_page_dirty_lock(p[i]);
put_page(p[i]);
}
} }
/** /**
......
...@@ -317,7 +317,7 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, ...@@ -317,7 +317,7 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
* the caller can ignore this page. * the caller can ignore this page.
*/ */
if (put) { if (put) {
put_page(page); put_user_page(page);
} else { } else {
/* coalesce case */ /* coalesce case */
kunmap(page); kunmap(page);
...@@ -631,7 +631,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, ...@@ -631,7 +631,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
kunmap(pkt->addr[i].page); kunmap(pkt->addr[i].page);
if (pkt->addr[i].put_page) if (pkt->addr[i].put_page)
put_page(pkt->addr[i].page); put_user_page(pkt->addr[i].page);
else else
__free_page(pkt->addr[i].page); __free_page(pkt->addr[i].page);
} else if (pkt->addr[i].kvaddr) { } else if (pkt->addr[i].kvaddr) {
...@@ -706,7 +706,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, ...@@ -706,7 +706,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
/* if error, return all pages not managed by pkt */ /* if error, return all pages not managed by pkt */
free_pages: free_pages:
while (i < j) while (i < j)
put_page(pages[i++]); put_user_page(pages[i++]);
done: done:
return ret; return ret;
......
...@@ -75,9 +75,10 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) ...@@ -75,9 +75,10 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
for_each_sg(chunk->page_list, sg, chunk->nents, i) { for_each_sg(chunk->page_list, sg, chunk->nents, i) {
page = sg_page(sg); page = sg_page(sg);
pa = sg_phys(sg); pa = sg_phys(sg);
if (!PageDirty(page) && dirty) if (dirty)
set_page_dirty_lock(page); put_user_pages_dirty_lock(&page, 1);
put_page(page); else
put_user_page(page);
usnic_dbg("pa: %pa\n", &pa); usnic_dbg("pa: %pa\n", &pa);
} }
kfree(chunk); kfree(chunk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment