Commit c0b12405 authored by Jérôme Glisse's avatar Jérôme Glisse Committed by Linus Torvalds

mm/hmm/mirror: mirror process address space on device with HMM helpers

This is a heterogeneous memory management (HMM) process address space
mirroring.  In a nutshell this provide an API to mirror process address
space on a device.  This boils down to keeping CPU and device page table
synchronize (we assume that both device and CPU are cache coherent like
PCIe device can be).

This patch provide a simple API for device driver to achieve address space
mirroring thus avoiding each device driver to grow its own CPU page table
walker and its own CPU page table synchronization mechanism.

This is useful for NVidia GPU >= Pascal, Mellanox IB >= mlx5 and more
hardware in the future.

[jglisse@redhat.com: fix hmm for "mmu_notifier kill invalidate_page callback"]
  Link: http://lkml.kernel.org/r/20170830231955.GD9445@redhat.com
Link: http://lkml.kernel.org/r/20170817000548.32038-4-jglisse@redhat.comSigned-off-by: default avatarJérôme Glisse <jglisse@redhat.com>
Signed-off-by: default avatarEvgeny Baskakov <ebaskakov@nvidia.com>
Signed-off-by: default avatarJohn Hubbard <jhubbard@nvidia.com>
Signed-off-by: default avatarMark Hairgrove <mhairgrove@nvidia.com>
Signed-off-by: default avatarSherry Cheung <SCheung@nvidia.com>
Signed-off-by: default avatarSubhash Gutti <sgutti@nvidia.com>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Nellans <dnellans@nvidia.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Bob Liu <liubo95@huawei.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 133ff0ea
...@@ -72,6 +72,7 @@ ...@@ -72,6 +72,7 @@
#if IS_ENABLED(CONFIG_HMM) #if IS_ENABLED(CONFIG_HMM)
struct hmm;
/* /*
* hmm_pfn_t - HMM uses its own pfn type to keep several flags per page * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
...@@ -134,6 +135,115 @@ static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn) ...@@ -134,6 +135,115 @@ static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn)
} }
#if IS_ENABLED(CONFIG_HMM_MIRROR)
/*
* Mirroring: how to synchronize device page table with CPU page table.
*
* A device driver that is participating in HMM mirroring must always
* synchronize with CPU page table updates. For this, device drivers can either
* directly use mmu_notifier APIs or they can use the hmm_mirror API. Device
* drivers can decide to register one mirror per device per process, or just
* one mirror per process for a group of devices. The pattern is:
*
* int device_bind_address_space(..., struct mm_struct *mm, ...)
* {
* struct device_address_space *das;
*
* // Device driver specific initialization, and allocation of das
* // which contains an hmm_mirror struct as one of its fields.
* ...
*
* ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops);
* if (ret) {
* // Cleanup on error
* return ret;
* }
*
* // Other device driver specific initialization
* ...
* }
*
* Once an hmm_mirror is registered for an address space, the device driver
* will get callbacks through sync_cpu_device_pagetables() operation (see
* hmm_mirror_ops struct).
*
* Device driver must not free the struct containing the hmm_mirror struct
* before calling hmm_mirror_unregister(). The expected usage is to do that when
* the device driver is unbinding from an address space.
*
*
* void device_unbind_address_space(struct device_address_space *das)
* {
* // Device driver specific cleanup
* ...
*
* hmm_mirror_unregister(&das->mirror);
*
* // Other device driver specific cleanup, and now das can be freed
* ...
* }
*/
struct hmm_mirror;
/*
* enum hmm_update_type - type of update
* @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
*/
enum hmm_update_type {
HMM_UPDATE_INVALIDATE,
};
/*
* struct hmm_mirror_ops - HMM mirror device operations callback
*
* @update: callback to update range on a device
*/
struct hmm_mirror_ops {
/* sync_cpu_device_pagetables() - synchronize page tables
*
* @mirror: pointer to struct hmm_mirror
* @update_type: type of update that occurred to the CPU page table
* @start: virtual start address of the range to update
* @end: virtual end address of the range to update
*
* This callback ultimately originates from mmu_notifiers when the CPU
* page table is updated. The device driver must update its page table
* in response to this callback. The update argument tells what action
* to perform.
*
* The device driver must not return from this callback until the device
* page tables are completely updated (TLBs flushed, etc); this is a
* synchronous call.
*/
void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
enum hmm_update_type update_type,
unsigned long start,
unsigned long end);
};
/*
* struct hmm_mirror - mirror struct for a device driver
*
* @hmm: pointer to struct hmm (which is unique per mm_struct)
* @ops: device driver callback for HMM mirror operations
* @list: for list of mirrors of a given mm
*
* Each address space (mm_struct) being mirrored by a device must register one
* instance of an hmm_mirror struct with HMM. HMM will track the list of all
* mirrors for each mm_struct.
*/
struct hmm_mirror {
struct hmm *hmm;
const struct hmm_mirror_ops *ops;
struct list_head list;
};
int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm);
void hmm_mirror_unregister(struct hmm_mirror *mirror);
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
/* Below are for HMM internal use only! Not to be used by device driver! */ /* Below are for HMM internal use only! Not to be used by device driver! */
void hmm_mm_destroy(struct mm_struct *mm); void hmm_mm_destroy(struct mm_struct *mm);
......
...@@ -705,6 +705,18 @@ config ARCH_HAS_HMM ...@@ -705,6 +705,18 @@ config ARCH_HAS_HMM
config HMM config HMM
bool bool
config HMM_MIRROR
bool "HMM mirror CPU page table into a device page table"
depends on ARCH_HAS_HMM
select MMU_NOTIFIER
select HMM
help
Select HMM_MIRROR if you want to mirror range of the CPU page table of a
process into a device page table. Here, mirror means "keep synchronized".
Prerequisites: the device must provide the ability to write-protect its
page tables (at PAGE_SIZE granularity), and must be able to recover from
the resulting potential page faults.
config FRAME_VECTOR config FRAME_VECTOR
bool bool
......
...@@ -21,16 +21,27 @@ ...@@ -21,16 +21,27 @@
#include <linux/hmm.h> #include <linux/hmm.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/mmu_notifier.h>
#ifdef CONFIG_HMM #ifdef CONFIG_HMM
static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
/* /*
* struct hmm - HMM per mm struct * struct hmm - HMM per mm struct
* *
* @mm: mm struct this HMM struct is bound to * @mm: mm struct this HMM struct is bound to
* @sequence: we track updates to the CPU page table with a sequence number
* @mirrors: list of mirrors for this mm
* @mmu_notifier: mmu notifier to track updates to CPU page table
* @mirrors_sem: read/write semaphore protecting the mirrors list
*/ */
struct hmm { struct hmm {
struct mm_struct *mm; struct mm_struct *mm;
atomic_t sequence;
struct list_head mirrors;
struct mmu_notifier mmu_notifier;
struct rw_semaphore mirrors_sem;
}; };
/* /*
...@@ -43,27 +54,48 @@ struct hmm { ...@@ -43,27 +54,48 @@ struct hmm {
*/ */
static struct hmm *hmm_register(struct mm_struct *mm) static struct hmm *hmm_register(struct mm_struct *mm)
{ {
if (!mm->hmm) { struct hmm *hmm = READ_ONCE(mm->hmm);
struct hmm *hmm = NULL; bool cleanup = false;
/*
* The hmm struct can only be freed once the mm_struct goes away,
* hence we should always have pre-allocated an new hmm struct
* above.
*/
if (hmm)
return hmm;
hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
if (!hmm) if (!hmm)
return NULL; return NULL;
INIT_LIST_HEAD(&hmm->mirrors);
init_rwsem(&hmm->mirrors_sem);
atomic_set(&hmm->sequence, 0);
hmm->mmu_notifier.ops = NULL;
hmm->mm = mm; hmm->mm = mm;
/*
* We should only get here if hold the mmap_sem in write mode ie on
* registration of first mirror through hmm_mirror_register()
*/
hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
kfree(hmm);
return NULL;
}
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
if (!mm->hmm) if (!mm->hmm)
mm->hmm = hmm; mm->hmm = hmm;
else else
kfree(hmm); cleanup = true;
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
if (cleanup) {
mmu_notifier_unregister(&hmm->mmu_notifier, mm);
kfree(hmm);
} }
/*
* The hmm struct can only be freed once the mm_struct goes away,
* hence we should always have pre-allocated an new hmm struct
* above.
*/
return mm->hmm; return mm->hmm;
} }
...@@ -72,3 +104,94 @@ void hmm_mm_destroy(struct mm_struct *mm) ...@@ -72,3 +104,94 @@ void hmm_mm_destroy(struct mm_struct *mm)
kfree(mm->hmm); kfree(mm->hmm);
} }
#endif /* CONFIG_HMM */ #endif /* CONFIG_HMM */
#if IS_ENABLED(CONFIG_HMM_MIRROR)
static void hmm_invalidate_range(struct hmm *hmm,
enum hmm_update_type action,
unsigned long start,
unsigned long end)
{
struct hmm_mirror *mirror;
down_read(&hmm->mirrors_sem);
list_for_each_entry(mirror, &hmm->mirrors, list)
mirror->ops->sync_cpu_device_pagetables(mirror, action,
start, end);
up_read(&hmm->mirrors_sem);
}
static void hmm_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
struct hmm *hmm = mm->hmm;
VM_BUG_ON(!hmm);
atomic_inc(&hmm->sequence);
}
static void hmm_invalidate_range_end(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
struct hmm *hmm = mm->hmm;
VM_BUG_ON(!hmm);
hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end);
}
static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
.invalidate_range_start = hmm_invalidate_range_start,
.invalidate_range_end = hmm_invalidate_range_end,
};
/*
* hmm_mirror_register() - register a mirror against an mm
*
* @mirror: new mirror struct to register
* @mm: mm to register against
*
* To start mirroring a process address space, the device driver must register
* an HMM mirror struct.
*
* THE mm->mmap_sem MUST BE HELD IN WRITE MODE !
*/
int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
{
/* Sanity check */
if (!mm || !mirror || !mirror->ops)
return -EINVAL;
mirror->hmm = hmm_register(mm);
if (!mirror->hmm)
return -ENOMEM;
down_write(&mirror->hmm->mirrors_sem);
list_add(&mirror->list, &mirror->hmm->mirrors);
up_write(&mirror->hmm->mirrors_sem);
return 0;
}
EXPORT_SYMBOL(hmm_mirror_register);
/*
* hmm_mirror_unregister() - unregister a mirror
*
* @mirror: new mirror struct to register
*
* Stop mirroring a process address space, and cleanup.
*/
void hmm_mirror_unregister(struct hmm_mirror *mirror)
{
struct hmm *hmm = mirror->hmm;
down_write(&hmm->mirrors_sem);
list_del(&mirror->list);
up_write(&hmm->mirrors_sem);
}
EXPORT_SYMBOL(hmm_mirror_unregister);
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment