Commit 0a6cad5d authored by Dave Airlie's avatar Dave Airlie

Merge branch 'vmwgfx-coherent' of git://people.freedesktop.org/~thomash/linux into drm-next

Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver
to provide coherent graphics memory, meaning that the GPU sees any
content written to the coherent memory on the next GPU operation that
touches that memory, and the CPU sees any content written by the GPU
to that memory immediately after any fence object trailing the GPU
operation is signaled.

Paravirtual drivers that otherwise require explicit synchronization
needs to do this by hooking up dirty tracking to pagefault handlers
and buffer object validation.

Provide mm helpers needed for this and that also allow for huge pmd-
and pud entries (patch 1-3), and the associated vmwgfx code (patch 4-7).

The code has been tested and exercised by a tailored version of mesa
where we disable all explicit synchronization and assume graphics memory
is coherent. The performance loss varies of course; a typical number is
around 5%.
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Thomas Hellstrom <thomas_os@shipmail.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20191113131639.4653-1-thomas_os@shipmail.org
parents acc61b89 9ca7d19f
...@@ -42,8 +42,6 @@ ...@@ -42,8 +42,6 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/mem_encrypt.h> #include <linux/mem_encrypt.h>
#define TTM_BO_VM_NUM_PREFAULT 16
static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf) struct vm_fault *vmf)
{ {
...@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, ...@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
+ page_offset; + page_offset;
} }
static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) /**
* ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
* @bo: The buffer object
* @vmf: The fault structure handed to the callback
*
* vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
* during long waits, and after the wait the callback will be restarted. This
* is to allow other threads using the same virtual memory space concurrent
* access to map(), unmap() completely unrelated buffer objects. TTM buffer
* object reservations sometimes wait for GPU and should therefore be
* considered long waits. This function reserves the buffer object interruptibly
* taking this into account. Starvation is avoided by the vm system not
* allowing too many repeated restarts.
* This function is intended to be used in customized fault() and _mkwrite()
* handlers.
*
* Return:
* 0 on success and the bo was reserved.
* VM_FAULT_RETRY if blocking wait.
* VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
*/
vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
vma->vm_private_data;
struct ttm_bo_device *bdev = bo->bdev;
unsigned long page_offset;
unsigned long page_last;
unsigned long pfn;
struct ttm_tt *ttm = NULL;
struct page *page;
int err;
int i;
vm_fault_t ret = VM_FAULT_NOPAGE;
unsigned long address = vmf->address;
struct ttm_mem_type_manager *man =
&bdev->man[bo->mem.mem_type];
struct vm_area_struct cvma;
/* /*
* Work around locking order reversal in fault / nopfn * Work around locking order reversal in fault / nopfn
* between mmap_sem and bo_reserve: Perform a trylock operation * between mmap_sem and bo_reserve: Perform a trylock operation
...@@ -151,14 +154,54 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) ...@@ -151,14 +154,54 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
} }
return 0;
}
EXPORT_SYMBOL(ttm_bo_vm_reserve);
/**
* ttm_bo_vm_fault_reserved - TTM fault helper
* @vmf: The struct vm_fault given as argument to the fault callback
* @prot: The page protection to be used for this memory area.
* @num_prefault: Maximum number of prefault pages. The caller may want to
* specify this based on madvice settings and the size of the GPU object
* backed by the memory.
*
* This function inserts one or more page table entries pointing to the
* memory backing the buffer object, and then returns a return code
* instructing the caller to retry the page access.
*
* Return:
* VM_FAULT_NOPAGE on success or pending signal
* VM_FAULT_SIGBUS on unspecified error
* VM_FAULT_OOM on out-of-memory
* VM_FAULT_RETRY if retryable wait
*/
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pgprot_t prot,
pgoff_t num_prefault)
{
struct vm_area_struct *vma = vmf->vma;
struct vm_area_struct cvma = *vma;
struct ttm_buffer_object *bo = vma->vm_private_data;
struct ttm_bo_device *bdev = bo->bdev;
unsigned long page_offset;
unsigned long page_last;
unsigned long pfn;
struct ttm_tt *ttm = NULL;
struct page *page;
int err;
pgoff_t i;
vm_fault_t ret = VM_FAULT_NOPAGE;
unsigned long address = vmf->address;
struct ttm_mem_type_manager *man =
&bdev->man[bo->mem.mem_type];
/* /*
* Refuse to fault imported pages. This should be handled * Refuse to fault imported pages. This should be handled
* (if at all) by redirecting mmap to the exporter. * (if at all) by redirecting mmap to the exporter.
*/ */
if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) { if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
ret = VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
goto out_unlock;
}
if (bdev->driver->fault_reserve_notify) { if (bdev->driver->fault_reserve_notify) {
struct dma_fence *moving = dma_fence_get(bo->moving); struct dma_fence *moving = dma_fence_get(bo->moving);
...@@ -169,11 +212,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) ...@@ -169,11 +212,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
break; break;
case -EBUSY: case -EBUSY:
case -ERESTARTSYS: case -ERESTARTSYS:
ret = VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
goto out_unlock;
default: default:
ret = VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
goto out_unlock;
} }
if (bo->moving != moving) { if (bo->moving != moving) {
...@@ -189,21 +230,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) ...@@ -189,21 +230,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
* move. * move.
*/ */
ret = ttm_bo_vm_fault_idle(bo, vmf); ret = ttm_bo_vm_fault_idle(bo, vmf);
if (unlikely(ret != 0)) { if (unlikely(ret != 0))
if (ret == VM_FAULT_RETRY &&
!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
/* The BO has already been unreserved. */
return ret; return ret;
}
goto out_unlock;
}
err = ttm_mem_io_lock(man, true); err = ttm_mem_io_lock(man, true);
if (unlikely(err != 0)) { if (unlikely(err != 0))
ret = VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
goto out_unlock;
}
err = ttm_mem_io_reserve_vm(bo); err = ttm_mem_io_reserve_vm(bo);
if (unlikely(err != 0)) { if (unlikely(err != 0)) {
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
...@@ -220,18 +252,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) ...@@ -220,18 +252,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
goto out_io_unlock; goto out_io_unlock;
} }
/* cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
* Make a local vma copy to modify the page_prot member if (!bo->mem.bus.is_iomem) {
* and vm_flags if necessary. The vma parameter is protected
* by mmap_sem in write mode.
*/
cvma = *vma;
cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
if (bo->mem.bus.is_iomem) {
cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
cvma.vm_page_prot);
} else {
struct ttm_operation_ctx ctx = { struct ttm_operation_ctx ctx = {
.interruptible = false, .interruptible = false,
.no_wait_gpu = false, .no_wait_gpu = false,
...@@ -240,24 +262,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) ...@@ -240,24 +262,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
}; };
ttm = bo->ttm; ttm = bo->ttm;
cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, if (ttm_tt_populate(bo->ttm, &ctx)) {
cvma.vm_page_prot);
/* Allocate all page at once, most common usage */
if (ttm_tt_populate(ttm, &ctx)) {
ret = VM_FAULT_OOM; ret = VM_FAULT_OOM;
goto out_io_unlock; goto out_io_unlock;
} }
} else {
/* Iomem should not be marked encrypted */
cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
} }
/* /*
* Speculatively prefault a number of pages. Only error on * Speculatively prefault a number of pages. Only error on
* first page. * first page.
*/ */
for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) { for (i = 0; i < num_prefault; ++i) {
if (bo->mem.bus.is_iomem) { if (bo->mem.bus.is_iomem) {
/* Iomem should not be marked encrypted */
cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
pfn = ttm_bo_io_mem_pfn(bo, page_offset); pfn = ttm_bo_io_mem_pfn(bo, page_offset);
} else { } else {
page = ttm->pages[page_offset]; page = ttm->pages[page_offset];
...@@ -293,28 +312,49 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) ...@@ -293,28 +312,49 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE; ret = VM_FAULT_NOPAGE;
out_io_unlock: out_io_unlock:
ttm_mem_io_unlock(man); ttm_mem_io_unlock(man);
out_unlock: return ret;
}
EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
pgprot_t prot;
struct ttm_buffer_object *bo = vma->vm_private_data;
vm_fault_t ret;
ret = ttm_bo_vm_reserve(bo, vmf);
if (ret)
return ret;
prot = vm_get_page_prot(vma->vm_flags);
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
dma_resv_unlock(bo->base.resv); dma_resv_unlock(bo->base.resv);
return ret; return ret;
} }
static void ttm_bo_vm_open(struct vm_area_struct *vma) void ttm_bo_vm_open(struct vm_area_struct *vma)
{ {
struct ttm_buffer_object *bo = struct ttm_buffer_object *bo = vma->vm_private_data;
(struct ttm_buffer_object *)vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping); WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
ttm_bo_get(bo); ttm_bo_get(bo);
} }
EXPORT_SYMBOL(ttm_bo_vm_open);
static void ttm_bo_vm_close(struct vm_area_struct *vma) void ttm_bo_vm_close(struct vm_area_struct *vma)
{ {
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)vma->vm_private_data; struct ttm_buffer_object *bo = vma->vm_private_data;
ttm_bo_put(bo); ttm_bo_put(bo);
vma->vm_private_data = NULL; vma->vm_private_data = NULL;
} }
EXPORT_SYMBOL(ttm_bo_vm_close);
static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset, unsigned long offset,
......
...@@ -8,6 +8,7 @@ config DRM_VMWGFX ...@@ -8,6 +8,7 @@ config DRM_VMWGFX
select FB_CFB_IMAGEBLIT select FB_CFB_IMAGEBLIT
select DRM_TTM select DRM_TTM
select FB select FB
select MAPPING_DIRTY_HELPERS
# Only needed for the transitional use of drm_crtc_init - can be removed # Only needed for the transitional use of drm_crtc_init - can be removed
# again once vmwgfx sets up the primary plane itself. # again once vmwgfx sets up the primary plane itself.
select DRM_KMS_HELPER select DRM_KMS_HELPER
......
...@@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ ...@@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \ vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \
vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \ vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \ vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
vmwgfx_validation.o \ vmwgfx_validation.o vmwgfx_page_dirty.o \
ttm_object.o ttm_lock.o ttm_object.o ttm_lock.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
...@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format, ...@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset; return offset;
} }
static inline u32 static inline u32
svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
surf_size_struct baseLevelSize, surf_size_struct baseLevelSize,
...@@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format) ...@@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format); return svga3dsurface_is_dx_screen_target_format(format);
} }
/**
* struct svga3dsurface_mip - Mimpmap level information
* @bytes: Bytes required in the backing store of this mipmap level.
* @img_stride: Byte stride per image.
* @row_stride: Byte stride per block row.
* @size: The size of the mipmap.
*/
struct svga3dsurface_mip {
size_t bytes;
size_t img_stride;
size_t row_stride;
struct drm_vmw_size size;
};
/**
* struct svga3dsurface_cache - Cached surface information
* @desc: Pointer to the surface descriptor
* @mip: Array of mipmap level information. Valid size is @num_mip_levels.
* @mip_chain_bytes: Bytes required in the backing store for the whole chain
* of mip levels.
* @sheet_bytes: Bytes required in the backing store for a sheet
* representing a single sample.
* @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
* a chain.
* @num_layers: Number of slices in an array texture or number of faces in
* a cubemap texture.
*/
struct svga3dsurface_cache {
const struct svga3d_surface_desc *desc;
struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
size_t mip_chain_bytes;
size_t sheet_bytes;
u32 num_mip_levels;
u32 num_layers;
};
/**
* struct svga3dsurface_loc - Surface location
* @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
* mip_level.
* @x: X coordinate.
* @y: Y coordinate.
* @z: Z coordinate.
*/
struct svga3dsurface_loc {
u32 sub_resource;
u32 x, y, z;
};
/**
* svga3dsurface_subres - Compute the subresource from layer and mipmap.
* @cache: Surface layout data.
* @mip_level: The mipmap level.
* @layer: The surface layer (face or array slice).
*
* Return: The subresource.
*/
static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
u32 mip_level, u32 layer)
{
return cache->num_mip_levels * layer + mip_level;
}
/**
* svga3dsurface_setup_cache - Build a surface cache entry
* @size: The surface base level dimensions.
* @format: The surface format.
* @num_mip_levels: Number of mipmap levels.
* @num_layers: Number of layers.
* @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
*
* Return: Zero on success, -EINVAL on invalid surface layout.
*/
static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
SVGA3dSurfaceFormat format,
u32 num_mip_levels,
u32 num_layers,
u32 num_samples,
struct svga3dsurface_cache *cache)
{
const struct svga3d_surface_desc *desc;
u32 i;
memset(cache, 0, sizeof(*cache));
cache->desc = desc = svga3dsurface_get_desc(format);
cache->num_mip_levels = num_mip_levels;
cache->num_layers = num_layers;
for (i = 0; i < cache->num_mip_levels; i++) {
struct svga3dsurface_mip *mip = &cache->mip[i];
mip->size = svga3dsurface_get_mip_size(*size, i);
mip->bytes = svga3dsurface_get_image_buffer_size
(desc, &mip->size, 0);
mip->row_stride =
__KERNEL_DIV_ROUND_UP(mip->size.width,
desc->block_size.width) *
desc->bytes_per_block * num_samples;
if (!mip->row_stride)
goto invalid_dim;
mip->img_stride =
__KERNEL_DIV_ROUND_UP(mip->size.height,
desc->block_size.height) *
mip->row_stride;
if (!mip->img_stride)
goto invalid_dim;
cache->mip_chain_bytes += mip->bytes;
}
cache->sheet_bytes = cache->mip_chain_bytes * num_layers;
if (!cache->sheet_bytes)
goto invalid_dim;
return 0;
invalid_dim:
VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n");
return -EINVAL;
}
/**
* svga3dsurface_get_loc - Get a surface location from an offset into the
* backing store
* @cache: Surface layout data.
* @loc: Pointer to a struct svga3dsurface_loc to be filled in.
* @offset: Offset into the surface backing store.
*/
static inline void
svga3dsurface_get_loc(const struct svga3dsurface_cache *cache,
struct svga3dsurface_loc *loc,
size_t offset)
{
const struct svga3dsurface_mip *mip = &cache->mip[0];
const struct svga3d_surface_desc *desc = cache->desc;
u32 layer;
int i;
if (offset >= cache->sheet_bytes)
offset %= cache->sheet_bytes;
layer = offset / cache->mip_chain_bytes;
offset -= layer * cache->mip_chain_bytes;
for (i = 0; i < cache->num_mip_levels; ++i, ++mip) {
if (mip->bytes > offset)
break;
offset -= mip->bytes;
}
loc->sub_resource = svga3dsurface_subres(cache, i, layer);
loc->z = offset / mip->img_stride;
offset -= loc->z * mip->img_stride;
loc->z *= desc->block_size.depth;
loc->y = offset / mip->row_stride;
offset -= loc->y * mip->row_stride;
loc->y *= desc->block_size.height;
loc->x = offset / desc->bytes_per_block;
loc->x *= desc->block_size.width;
}
/**
* svga3dsurface_inc_loc - Clamp increment a surface location with one block
* size
* in each dimension.
* @loc: Pointer to a struct svga3dsurface_loc to be incremented.
*
* When computing the size of a range as size = end - start, the range does not
* include the end element. However a location representing the last byte
* of a touched region in the backing store *is* included in the range.
* This function modifies such a location to match the end definition
* given as start + size which is the one used in a SVGA3dBox.
*/
static inline void
svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache,
struct svga3dsurface_loc *loc)
{
const struct svga3d_surface_desc *desc = cache->desc;
u32 mip = loc->sub_resource % cache->num_mip_levels;
const struct drm_vmw_size *size = &cache->mip[mip].size;
loc->sub_resource++;
loc->x += desc->block_size.width;
if (loc->x > size->width)
loc->x = size->width;
loc->y += desc->block_size.height;
if (loc->y > size->height)
loc->y = size->height;
loc->z += desc->block_size.depth;
if (loc->z > size->depth)
loc->z = size->depth;
}
/**
* svga3dsurface_min_loc - The start location in a subresource
* @cache: Surface layout data.
* @sub_resource: The subresource.
* @loc: Pointer to a struct svga3dsurface_loc to be filled in.
*/
static inline void
svga3dsurface_min_loc(const struct svga3dsurface_cache *cache,
u32 sub_resource,
struct svga3dsurface_loc *loc)
{
loc->sub_resource = sub_resource;
loc->x = loc->y = loc->z = 0;
}
/**
* svga3dsurface_min_loc - The end location in a subresource
* @cache: Surface layout data.
* @sub_resource: The subresource.
* @loc: Pointer to a struct svga3dsurface_loc to be filled in.
*
* Following the end definition given in svga3dsurface_inc_loc(),
* Compute the end location of a surface subresource.
*/
static inline void
svga3dsurface_max_loc(const struct svga3dsurface_cache *cache,
u32 sub_resource,
struct svga3dsurface_loc *loc)
{
const struct drm_vmw_size *size;
u32 mip;
loc->sub_resource = sub_resource + 1;
mip = sub_resource % cache->num_mip_levels;
size = &cache->mip[mip].size;
loc->x = size->width;
loc->y = size->height;
loc->z = size->depth;
}
#endif /* _SVGA3D_SURFACEDEFS_H_ */ #endif /* _SVGA3D_SURFACEDEFS_H_ */
...@@ -462,6 +462,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) ...@@ -462,6 +462,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
{ {
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo); struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
WARN_ON(vmw_bo->dirty);
WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo); vmw_bo_unmap(vmw_bo);
kfree(vmw_bo); kfree(vmw_bo);
} }
...@@ -475,8 +477,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) ...@@ -475,8 +477,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
static void vmw_user_bo_destroy(struct ttm_buffer_object *bo) static void vmw_user_bo_destroy(struct ttm_buffer_object *bo)
{ {
struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo); struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo);
struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
vmw_bo_unmap(&vmw_user_bo->vbo); WARN_ON(vbo->dirty);
WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime); ttm_prime_object_kfree(vmw_user_bo, prime);
} }
...@@ -511,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv, ...@@ -511,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo)); memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3; vmw_bo->base.priority = 3;
vmw_bo->res_tree = RB_ROOT;
INIT_LIST_HEAD(&vmw_bo->res_list);
ret = ttm_bo_init(bdev, &vmw_bo->base, size, ret = ttm_bo_init(bdev, &vmw_bo->base, size,
ttm_bo_type_device, placement, ttm_bo_type_device, placement,
......
...@@ -56,9 +56,9 @@ ...@@ -56,9 +56,9 @@
#define VMWGFX_DRIVER_NAME "vmwgfx" #define VMWGFX_DRIVER_NAME "vmwgfx"
#define VMWGFX_DRIVER_DATE "20180704" #define VMWGFX_DRIVER_DATE "20190328"
#define VMWGFX_DRIVER_MAJOR 2 #define VMWGFX_DRIVER_MAJOR 2
#define VMWGFX_DRIVER_MINOR 15 #define VMWGFX_DRIVER_MINOR 16
#define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_DRIVER_PATCHLEVEL 0
#define VMWGFX_FIFO_STATIC_SIZE (1024*1024) #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
#define VMWGFX_MAX_RELOCATIONS 2048 #define VMWGFX_MAX_RELOCATIONS 2048
...@@ -100,17 +100,18 @@ struct vmw_fpriv { ...@@ -100,17 +100,18 @@ struct vmw_fpriv {
/** /**
* struct vmw_buffer_object - TTM buffer object with vmwgfx additions * struct vmw_buffer_object - TTM buffer object with vmwgfx additions
* @base: The TTM buffer object * @base: The TTM buffer object
* @res_list: List of resources using this buffer object as a backing MOB * @res_tree: RB tree of resources using this buffer object as a backing MOB
* @pin_count: pin depth * @pin_count: pin depth
* @cpu_writers: Number of synccpu write grabs. Protected by reservation when * @cpu_writers: Number of synccpu write grabs. Protected by reservation when
* increased. May be decreased without reservation. * increased. May be decreased without reservation.
* @dx_query_ctx: DX context if this buffer object is used as a DX query MOB * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
* @map: Kmap object for semi-persistent mappings * @map: Kmap object for semi-persistent mappings
* @res_prios: Eviction priority counts for attached resources * @res_prios: Eviction priority counts for attached resources
* @dirty: structure for user-space dirty-tracking
*/ */
struct vmw_buffer_object { struct vmw_buffer_object {
struct ttm_buffer_object base; struct ttm_buffer_object base;
struct list_head res_list; struct rb_root res_tree;
s32 pin_count; s32 pin_count;
atomic_t cpu_writers; atomic_t cpu_writers;
/* Not ref-counted. Protected by binding_mutex */ /* Not ref-counted. Protected by binding_mutex */
...@@ -118,6 +119,7 @@ struct vmw_buffer_object { ...@@ -118,6 +119,7 @@ struct vmw_buffer_object {
/* Protected by reservation */ /* Protected by reservation */
struct ttm_bo_kmap_obj map; struct ttm_bo_kmap_obj map;
u32 res_prios[TTM_MAX_BO_PRIORITY]; u32 res_prios[TTM_MAX_BO_PRIORITY];
struct vmw_bo_dirty *dirty;
}; };
/** /**
...@@ -148,7 +150,8 @@ struct vmw_res_func; ...@@ -148,7 +150,8 @@ struct vmw_res_func;
* @res_dirty: Resource contains data not yet in the backup buffer. Protected * @res_dirty: Resource contains data not yet in the backup buffer. Protected
* by resource reserved. * by resource reserved.
* @backup_dirty: Backup buffer contains data not yet in the HW resource. * @backup_dirty: Backup buffer contains data not yet in the HW resource.
* Protecte by resource reserved. * Protected by resource reserved.
* @coherent: Emulate coherency by tracking vm accesses.
* @backup: The backup buffer if any. Protected by resource reserved. * @backup: The backup buffer if any. Protected by resource reserved.
* @backup_offset: Offset into the backup buffer if any. Protected by resource * @backup_offset: Offset into the backup buffer if any. Protected by resource
* reserved. Note that only a few resource types can have a @backup_offset * reserved. Note that only a few resource types can have a @backup_offset
...@@ -157,29 +160,32 @@ struct vmw_res_func; ...@@ -157,29 +160,32 @@ struct vmw_res_func;
* pin-count greater than zero. It is not on the resource LRU lists and its * pin-count greater than zero. It is not on the resource LRU lists and its
* backup buffer is pinned. Hence it can't be evicted. * backup buffer is pinned. Hence it can't be evicted.
* @func: Method vtable for this resource. Immutable. * @func: Method vtable for this resource. Immutable.
* @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
* @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock.
* @mob_head: List head for the MOB backup list. Protected by @backup reserved.
* @binding_head: List head for the context binding list. Protected by * @binding_head: List head for the context binding list. Protected by
* the @dev_priv::binding_mutex * the @dev_priv::binding_mutex
* @res_free: The resource destructor. * @res_free: The resource destructor.
* @hw_destroy: Callback to destroy the resource on the device, as part of * @hw_destroy: Callback to destroy the resource on the device, as part of
* resource destruction. * resource destruction.
*/ */
struct vmw_resource_dirty;
struct vmw_resource { struct vmw_resource {
struct kref kref; struct kref kref;
struct vmw_private *dev_priv; struct vmw_private *dev_priv;
int id; int id;
u32 used_prio; u32 used_prio;
unsigned long backup_size; unsigned long backup_size;
bool res_dirty; u32 res_dirty : 1;
bool backup_dirty; u32 backup_dirty : 1;
u32 coherent : 1;
struct vmw_buffer_object *backup; struct vmw_buffer_object *backup;
unsigned long backup_offset; unsigned long backup_offset;
unsigned long pin_count; unsigned long pin_count;
const struct vmw_res_func *func; const struct vmw_res_func *func;
struct rb_node mob_node;
struct list_head lru_head; struct list_head lru_head;
struct list_head mob_head;
struct list_head binding_head; struct list_head binding_head;
struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res); void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res); void (*hw_destroy) (struct vmw_resource *res);
}; };
...@@ -678,7 +684,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res); ...@@ -678,7 +684,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res);
extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
extern struct vmw_resource * extern struct vmw_resource *
vmw_resource_reference_unless_doomed(struct vmw_resource *res); vmw_resource_reference_unless_doomed(struct vmw_resource *res);
extern int vmw_resource_validate(struct vmw_resource *res, bool intr); extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
bool dirtying);
extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup); bool no_backup);
extern bool vmw_resource_needs_backup(const struct vmw_resource *res); extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
...@@ -720,6 +727,10 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv); ...@@ -720,6 +727,10 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv);
extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo); extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_attach(struct vmw_resource *res);
void vmw_resource_mob_detach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res);
void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
pgoff_t end);
int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
pgoff_t end, pgoff_t *num_prefault);
/** /**
* vmw_resource_mob_attached - Whether a resource currently has a mob attached * vmw_resource_mob_attached - Whether a resource currently has a mob attached
...@@ -729,7 +740,7 @@ void vmw_resource_mob_detach(struct vmw_resource *res); ...@@ -729,7 +740,7 @@ void vmw_resource_mob_detach(struct vmw_resource *res);
*/ */
static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
{ {
return !list_empty(&res->mob_head); return !RB_EMPTY_NODE(&res->mob_node);
} }
/** /**
...@@ -1407,6 +1418,17 @@ int vmw_host_log(const char *log); ...@@ -1407,6 +1418,17 @@ int vmw_host_log(const char *log);
#define VMW_DEBUG_USER(fmt, ...) \ #define VMW_DEBUG_USER(fmt, ...) \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
/* Resource dirtying - vmwgfx_page_dirty.c */
void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
void vmw_bo_dirty_clear_res(struct vmw_resource *res);
void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
pgoff_t start, pgoff_t end);
vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
/** /**
* VMW_DEBUG_KMS - Debug output for kernel mode-setting * VMW_DEBUG_KMS - Debug output for kernel mode-setting
* *
......
...@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv, ...@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv,
offsetof(typeof(*cmd), sid)); offsetof(typeof(*cmd), sid));
cmd = container_of(header, typeof(*cmd), header); cmd = container_of(header, typeof(*cmd), header);
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
VMW_RES_DIRTY_NONE, user_surface_converter, VMW_RES_DIRTY_NONE, user_surface_converter,
&cmd->sid, NULL); &cmd->sid, NULL);
......
// SPDX-License-Identifier: GPL-2.0 OR MIT
/**************************************************************************
*
* Copyright 2019 VMware, Inc., Palo Alto, CA., USA
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "vmwgfx_drv.h"
/*
* Different methods for tracking dirty:
* VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
* VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
* accesses in the VM mkwrite() callback
*/
enum vmw_bo_dirty_method {
VMW_BO_DIRTY_PAGETABLE,
VMW_BO_DIRTY_MKWRITE,
};
/*
* No dirtied pages at scan trigger a transition to the _MKWRITE method,
* similarly a certain percentage of dirty pages trigger a transition to
* the _PAGETABLE method. How many triggers should we wait for before
* changing method?
*/
#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
/* Percentage to trigger a transition to the _PAGETABLE method */
#define VMW_DIRTY_PERCENTAGE 10
/**
* struct vmw_bo_dirty - Dirty information for buffer objects
* @start: First currently dirty bit
* @end: Last currently dirty bit + 1
* @method: The currently used dirty method
* @change_count: Number of consecutive method change triggers
* @ref_count: Reference count for this structure
* @bitmap_size: The size of the bitmap in bits. Typically equal to the
* nuber of pages in the bo.
* @size: The accounting size for this struct.
* @bitmap: A bitmap where each bit represents a page. A set bit means a
* dirty page.
*/
struct vmw_bo_dirty {
unsigned long start;
unsigned long end;
enum vmw_bo_dirty_method method;
unsigned int change_count;
unsigned int ref_count;
unsigned long bitmap_size;
size_t size;
unsigned long bitmap[0];
};
/**
* vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits
* @vbo: The buffer object to scan
*
* Scans the pagetable for dirty bits. Clear those bits and modify the
* dirty structure with the results. This function may change the
* dirty-tracking method.
*/
static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo)
{
struct vmw_bo_dirty *dirty = vbo->dirty;
pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
struct address_space *mapping = vbo->base.bdev->dev_mapping;
pgoff_t num_marked;
num_marked = clean_record_shared_mapping_range
(mapping,
offset, dirty->bitmap_size,
offset, &dirty->bitmap[0],
&dirty->start, &dirty->end);
if (num_marked == 0)
dirty->change_count++;
else
dirty->change_count = 0;
if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
dirty->change_count = 0;
dirty->method = VMW_BO_DIRTY_MKWRITE;
wp_shared_mapping_range(mapping,
offset, dirty->bitmap_size);
clean_record_shared_mapping_range(mapping,
offset, dirty->bitmap_size,
offset, &dirty->bitmap[0],
&dirty->start, &dirty->end);
}
}
/**
* vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method
* @vbo: The buffer object to scan
*
* Write-protect pages written to so that consecutive write accesses will
* trigger a call to mkwrite.
*
* This function may change the dirty-tracking method.
*/
static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo)
{
struct vmw_bo_dirty *dirty = vbo->dirty;
unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
struct address_space *mapping = vbo->base.bdev->dev_mapping;
pgoff_t num_marked;
if (dirty->end <= dirty->start)
return;
num_marked = wp_shared_mapping_range(vbo->base.bdev->dev_mapping,
dirty->start + offset,
dirty->end - dirty->start);
if (100UL * num_marked / dirty->bitmap_size >
VMW_DIRTY_PERCENTAGE) {
dirty->change_count++;
} else {
dirty->change_count = 0;
}
if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
pgoff_t start = 0;
pgoff_t end = dirty->bitmap_size;
dirty->method = VMW_BO_DIRTY_PAGETABLE;
clean_record_shared_mapping_range(mapping, offset, end, offset,
&dirty->bitmap[0],
&start, &end);
bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size);
if (dirty->start < dirty->end)
bitmap_set(&dirty->bitmap[0], dirty->start,
dirty->end - dirty->start);
dirty->change_count = 0;
}
}
/**
* vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
* tracking structure
* @vbo: The buffer object to scan
*
* This function may change the dirty tracking method.
*/
void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
{
struct vmw_bo_dirty *dirty = vbo->dirty;
if (dirty->method == VMW_BO_DIRTY_PAGETABLE)
vmw_bo_dirty_scan_pagetable(vbo);
else
vmw_bo_dirty_scan_mkwrite(vbo);
}
/**
* vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
* an unmap_mapping_range operation.
* @vbo: The buffer object,
* @start: First page of the range within the buffer object.
* @end: Last page of the range within the buffer object + 1.
*
* If we're using the _PAGETABLE scan method, we may leak dirty pages
* when calling unmap_mapping_range(). This function makes sure we pick
* up all dirty pages.
*/
static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
pgoff_t start, pgoff_t end)
{
struct vmw_bo_dirty *dirty = vbo->dirty;
unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
struct address_space *mapping = vbo->base.bdev->dev_mapping;
if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
return;
wp_shared_mapping_range(mapping, start + offset, end - start);
clean_record_shared_mapping_range(mapping, start + offset,
end - start, offset,
&dirty->bitmap[0], &dirty->start,
&dirty->end);
}
/**
* vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
* @vbo: The buffer object,
* @start: First page of the range within the buffer object.
* @end: Last page of the range within the buffer object + 1.
*
* This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
*/
void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
pgoff_t start, pgoff_t end)
{
unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
struct address_space *mapping = vbo->base.bdev->dev_mapping;
vmw_bo_dirty_pre_unmap(vbo, start, end);
unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
(loff_t) (end - start) << PAGE_SHIFT);
}
/**
* vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
* @vbo: The buffer object
*
* This function registers a dirty-tracking user to a buffer object.
* A user can be for example a resource or a vma in a special user-space
* mapping.
*
* Return: Zero on success, -ENOMEM on memory allocation failure.
*/
int vmw_bo_dirty_add(struct vmw_buffer_object *vbo)
{
struct vmw_bo_dirty *dirty = vbo->dirty;
pgoff_t num_pages = vbo->base.num_pages;
size_t size, acc_size;
int ret;
static struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false
};
if (dirty) {
dirty->ref_count++;
return 0;
}
size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long);
acc_size = ttm_round_pot(size);
ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx);
if (ret) {
VMW_DEBUG_USER("Out of graphics memory for buffer object "
"dirty tracker.\n");
return ret;
}
dirty = kvzalloc(size, GFP_KERNEL);
if (!dirty) {
ret = -ENOMEM;
goto out_no_dirty;
}
dirty->size = acc_size;
dirty->bitmap_size = num_pages;
dirty->start = dirty->bitmap_size;
dirty->end = 0;
dirty->ref_count = 1;
if (num_pages < PAGE_SIZE / sizeof(pte_t)) {
dirty->method = VMW_BO_DIRTY_PAGETABLE;
} else {
struct address_space *mapping = vbo->base.bdev->dev_mapping;
pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
dirty->method = VMW_BO_DIRTY_MKWRITE;
/* Write-protect and then pick up already dirty bits */
wp_shared_mapping_range(mapping, offset, num_pages);
clean_record_shared_mapping_range(mapping, offset, num_pages,
offset,
&dirty->bitmap[0],
&dirty->start, &dirty->end);
}
vbo->dirty = dirty;
return 0;
out_no_dirty:
ttm_mem_global_free(&ttm_mem_glob, acc_size);
return ret;
}
/**
* vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
* @vbo: The buffer object
*
* This function releases a dirty-tracking user from a buffer object.
* If the reference count reaches zero, then the dirty-tracking object is
* freed and the pointer to it cleared.
*
* Return: Zero on success, -ENOMEM on memory allocation failure.
*/
void vmw_bo_dirty_release(struct vmw_buffer_object *vbo)
{
struct vmw_bo_dirty *dirty = vbo->dirty;
if (dirty && --dirty->ref_count == 0) {
size_t acc_size = dirty->size;
kvfree(dirty);
ttm_mem_global_free(&ttm_mem_glob, acc_size);
vbo->dirty = NULL;
}
}
/**
* vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from
* its backing mob.
* @res: The resource
*
* This function will pick up all dirty ranges affecting the resource from
* it's backup mob, and call vmw_resource_dirty_update() once for each
* range. The transferred ranges will be cleared from the backing mob's
* dirty tracking.
*/
void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res)
{
struct vmw_buffer_object *vbo = res->backup;
struct vmw_bo_dirty *dirty = vbo->dirty;
pgoff_t start, cur, end;
unsigned long res_start = res->backup_offset;
unsigned long res_end = res->backup_offset + res->backup_size;
WARN_ON_ONCE(res_start & ~PAGE_MASK);
res_start >>= PAGE_SHIFT;
res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
if (res_start >= dirty->end || res_end <= dirty->start)
return;
cur = max(res_start, dirty->start);
res_end = max(res_end, dirty->end);
while (cur < res_end) {
unsigned long num;
start = find_next_bit(&dirty->bitmap[0], res_end, cur);
if (start >= res_end)
break;
end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1);
cur = end + 1;
num = end - start;
bitmap_clear(&dirty->bitmap[0], start, num);
vmw_resource_dirty_update(res, start, end);
}
if (res_start <= dirty->start && res_end > dirty->start)
dirty->start = res_end;
if (res_start < dirty->end && res_end >= dirty->end)
dirty->end = res_start;
}
/**
* vmw_bo_dirty_clear_res - Clear a resource's dirty region from
* its backing mob.
* @res: The resource
*
* This function will clear all dirty ranges affecting the resource from
* it's backup mob's dirty tracking.
*/
void vmw_bo_dirty_clear_res(struct vmw_resource *res)
{
unsigned long res_start = res->backup_offset;
unsigned long res_end = res->backup_offset + res->backup_size;
struct vmw_buffer_object *vbo = res->backup;
struct vmw_bo_dirty *dirty = vbo->dirty;
res_start >>= PAGE_SHIFT;
res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
if (res_start >= dirty->end || res_end <= dirty->start)
return;
res_start = max(res_start, dirty->start);
res_end = min(res_end, dirty->end);
bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start);
if (res_start <= dirty->start && res_end > dirty->start)
dirty->start = res_end;
if (res_start < dirty->end && res_end >= dirty->end)
dirty->end = res_start;
}
vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
vma->vm_private_data;
vm_fault_t ret;
unsigned long page_offset;
unsigned int save_flags;
struct vmw_buffer_object *vbo =
container_of(bo, typeof(*vbo), base);
/*
* mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly.
* So make sure the TTM helpers are aware.
*/
save_flags = vmf->flags;
vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
ret = ttm_bo_vm_reserve(bo, vmf);
vmf->flags = save_flags;
if (ret)
return ret;
page_offset = vmf->pgoff - drm_vma_node_start(&bo->base.vma_node);
if (unlikely(page_offset >= bo->num_pages)) {
ret = VM_FAULT_SIGBUS;
goto out_unlock;
}
if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE &&
!test_bit(page_offset, &vbo->dirty->bitmap[0])) {
struct vmw_bo_dirty *dirty = vbo->dirty;
__set_bit(page_offset, &dirty->bitmap[0]);
dirty->start = min(dirty->start, page_offset);
dirty->end = max(dirty->end, page_offset + 1);
}
out_unlock:
dma_resv_unlock(bo->base.resv);
return ret;
}
vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
vma->vm_private_data;
struct vmw_buffer_object *vbo =
container_of(bo, struct vmw_buffer_object, base);
pgoff_t num_prefault;
pgprot_t prot;
vm_fault_t ret;
ret = ttm_bo_vm_reserve(bo, vmf);
if (ret)
return ret;
num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 :
TTM_BO_VM_NUM_PREFAULT;
if (vbo->dirty) {
pgoff_t allowed_prefault;
unsigned long page_offset;
page_offset = vmf->pgoff -
drm_vma_node_start(&bo->base.vma_node);
if (page_offset >= bo->num_pages ||
vmw_resources_clean(vbo, page_offset,
page_offset + PAGE_SIZE,
&allowed_prefault)) {
ret = VM_FAULT_SIGBUS;
goto out_unlock;
}
num_prefault = min(num_prefault, allowed_prefault);
}
/*
* If we don't track dirty using the MKWRITE method, make sure
* sure the page protection is write-enabled so we don't get
* a lot of unnecessary write faults.
*/
if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
prot = vma->vm_page_prot;
else
prot = vm_get_page_prot(vma->vm_flags);
ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
out_unlock:
dma_resv_unlock(bo->base.resv);
return ret;
}
...@@ -40,11 +40,24 @@ ...@@ -40,11 +40,24 @@
void vmw_resource_mob_attach(struct vmw_resource *res) void vmw_resource_mob_attach(struct vmw_resource *res)
{ {
struct vmw_buffer_object *backup = res->backup; struct vmw_buffer_object *backup = res->backup;
struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
dma_resv_assert_held(res->backup->base.base.resv); dma_resv_assert_held(res->backup->base.base.resv);
res->used_prio = (res->res_dirty) ? res->func->dirty_prio : res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
res->func->prio; res->func->prio;
list_add_tail(&res->mob_head, &backup->res_list);
while (*new) {
struct vmw_resource *this =
container_of(*new, struct vmw_resource, mob_node);
parent = *new;
new = (res->backup_offset < this->backup_offset) ?
&((*new)->rb_left) : &((*new)->rb_right);
}
rb_link_node(&res->mob_node, parent, new);
rb_insert_color(&res->mob_node, &backup->res_tree);
vmw_bo_prio_add(backup, res->used_prio); vmw_bo_prio_add(backup, res->used_prio);
} }
...@@ -58,7 +71,8 @@ void vmw_resource_mob_detach(struct vmw_resource *res) ...@@ -58,7 +71,8 @@ void vmw_resource_mob_detach(struct vmw_resource *res)
dma_resv_assert_held(backup->base.base.resv); dma_resv_assert_held(backup->base.base.resv);
if (vmw_resource_mob_attached(res)) { if (vmw_resource_mob_attached(res)) {
list_del_init(&res->mob_head); rb_erase(&res->mob_node, &backup->res_tree);
RB_CLEAR_NODE(&res->mob_node);
vmw_bo_prio_del(backup, res->used_prio); vmw_bo_prio_del(backup, res->used_prio);
} }
} }
...@@ -119,6 +133,10 @@ static void vmw_resource_release(struct kref *kref) ...@@ -119,6 +133,10 @@ static void vmw_resource_release(struct kref *kref)
} }
res->backup_dirty = false; res->backup_dirty = false;
vmw_resource_mob_detach(res); vmw_resource_mob_detach(res);
if (res->dirty)
res->func->dirty_free(res);
if (res->coherent)
vmw_bo_dirty_release(res->backup);
ttm_bo_unreserve(bo); ttm_bo_unreserve(bo);
vmw_bo_unreference(&res->backup); vmw_bo_unreference(&res->backup);
} }
...@@ -200,15 +218,17 @@ int vmw_resource_init(struct vmw_private *dev_priv, struct vmw_resource *res, ...@@ -200,15 +218,17 @@ int vmw_resource_init(struct vmw_private *dev_priv, struct vmw_resource *res,
res->res_free = res_free; res->res_free = res_free;
res->dev_priv = dev_priv; res->dev_priv = dev_priv;
res->func = func; res->func = func;
RB_CLEAR_NODE(&res->mob_node);
INIT_LIST_HEAD(&res->lru_head); INIT_LIST_HEAD(&res->lru_head);
INIT_LIST_HEAD(&res->mob_head);
INIT_LIST_HEAD(&res->binding_head); INIT_LIST_HEAD(&res->binding_head);
res->id = -1; res->id = -1;
res->backup = NULL; res->backup = NULL;
res->backup_offset = 0; res->backup_offset = 0;
res->backup_dirty = false; res->backup_dirty = false;
res->res_dirty = false; res->res_dirty = false;
res->coherent = false;
res->used_prio = 3; res->used_prio = 3;
res->dirty = NULL;
if (delay_id) if (delay_id)
return 0; return 0;
else else
...@@ -373,7 +393,8 @@ static int vmw_resource_buf_alloc(struct vmw_resource *res, ...@@ -373,7 +393,8 @@ static int vmw_resource_buf_alloc(struct vmw_resource *res,
* should be retried once resources have been freed up. * should be retried once resources have been freed up.
*/ */
static int vmw_resource_do_validate(struct vmw_resource *res, static int vmw_resource_do_validate(struct vmw_resource *res,
struct ttm_validate_buffer *val_buf) struct ttm_validate_buffer *val_buf,
bool dirtying)
{ {
int ret = 0; int ret = 0;
const struct vmw_res_func *func = res->func; const struct vmw_res_func *func = res->func;
...@@ -395,6 +416,39 @@ static int vmw_resource_do_validate(struct vmw_resource *res, ...@@ -395,6 +416,39 @@ static int vmw_resource_do_validate(struct vmw_resource *res,
vmw_resource_mob_attach(res); vmw_resource_mob_attach(res);
} }
/*
* Handle the case where the backup mob is marked coherent but
* the resource isn't.
*/
if (func->dirty_alloc && vmw_resource_mob_attached(res) &&
!res->coherent) {
if (res->backup->dirty && !res->dirty) {
ret = func->dirty_alloc(res);
if (ret)
return ret;
} else if (!res->backup->dirty && res->dirty) {
func->dirty_free(res);
}
}
/*
* Transfer the dirty regions to the resource and update
* the resource.
*/
if (res->dirty) {
if (dirtying && !res->res_dirty) {
pgoff_t start = res->backup_offset >> PAGE_SHIFT;
pgoff_t end = __KERNEL_DIV_ROUND_UP
(res->backup_offset + res->backup_size,
PAGE_SIZE);
vmw_bo_dirty_unmap(res->backup, start, end);
}
vmw_bo_dirty_transfer_to_res(res);
return func->dirty_sync(res);
}
return 0; return 0;
out_bind_failed: out_bind_failed:
...@@ -433,16 +487,28 @@ void vmw_resource_unreserve(struct vmw_resource *res, ...@@ -433,16 +487,28 @@ void vmw_resource_unreserve(struct vmw_resource *res,
if (switch_backup && new_backup != res->backup) { if (switch_backup && new_backup != res->backup) {
if (res->backup) { if (res->backup) {
vmw_resource_mob_detach(res); vmw_resource_mob_detach(res);
if (res->coherent)
vmw_bo_dirty_release(res->backup);
vmw_bo_unreference(&res->backup); vmw_bo_unreference(&res->backup);
} }
if (new_backup) { if (new_backup) {
res->backup = vmw_bo_reference(new_backup); res->backup = vmw_bo_reference(new_backup);
/*
* The validation code should already have added a
* dirty tracker here.
*/
WARN_ON(res->coherent && !new_backup->dirty);
vmw_resource_mob_attach(res); vmw_resource_mob_attach(res);
} else { } else {
res->backup = NULL; res->backup = NULL;
} }
} else if (switch_backup && res->coherent) {
vmw_bo_dirty_release(res->backup);
} }
if (switch_backup) if (switch_backup)
res->backup_offset = new_backup_offset; res->backup_offset = new_backup_offset;
...@@ -622,6 +688,7 @@ static int vmw_resource_do_evict(struct ww_acquire_ctx *ticket, ...@@ -622,6 +688,7 @@ static int vmw_resource_do_evict(struct ww_acquire_ctx *ticket,
* to the device. * to the device.
* @res: The resource to make visible to the device. * @res: The resource to make visible to the device.
* @intr: Perform waits interruptible if possible. * @intr: Perform waits interruptible if possible.
* @dirtying: Pending GPU operation will dirty the resource
* *
* On succesful return, any backup DMA buffer pointed to by @res->backup will * On succesful return, any backup DMA buffer pointed to by @res->backup will
* be reserved and validated. * be reserved and validated.
...@@ -631,7 +698,8 @@ static int vmw_resource_do_evict(struct ww_acquire_ctx *ticket, ...@@ -631,7 +698,8 @@ static int vmw_resource_do_evict(struct ww_acquire_ctx *ticket,
* Return: Zero on success, -ERESTARTSYS if interrupted, negative error code * Return: Zero on success, -ERESTARTSYS if interrupted, negative error code
* on failure. * on failure.
*/ */
int vmw_resource_validate(struct vmw_resource *res, bool intr) int vmw_resource_validate(struct vmw_resource *res, bool intr,
bool dirtying)
{ {
int ret; int ret;
struct vmw_resource *evict_res; struct vmw_resource *evict_res;
...@@ -648,7 +716,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr) ...@@ -648,7 +716,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr)
if (res->backup) if (res->backup)
val_buf.bo = &res->backup->base; val_buf.bo = &res->backup->base;
do { do {
ret = vmw_resource_do_validate(res, &val_buf); ret = vmw_resource_do_validate(res, &val_buf, dirtying);
if (likely(ret != -EBUSY)) if (likely(ret != -EBUSY))
break; break;
...@@ -711,19 +779,20 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr) ...@@ -711,19 +779,20 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr)
*/ */
void vmw_resource_unbind_list(struct vmw_buffer_object *vbo) void vmw_resource_unbind_list(struct vmw_buffer_object *vbo)
{ {
struct vmw_resource *res, *next;
struct ttm_validate_buffer val_buf = { struct ttm_validate_buffer val_buf = {
.bo = &vbo->base, .bo = &vbo->base,
.num_shared = 0 .num_shared = 0
}; };
dma_resv_assert_held(vbo->base.base.resv); dma_resv_assert_held(vbo->base.base.resv);
list_for_each_entry_safe(res, next, &vbo->res_list, mob_head) { while (!RB_EMPTY_ROOT(&vbo->res_tree)) {
if (!res->func->unbind) struct rb_node *node = vbo->res_tree.rb_node;
continue; struct vmw_resource *res =
container_of(node, struct vmw_resource, mob_node);
if (!WARN_ON_ONCE(!res->func->unbind))
(void) res->func->unbind(res, res->res_dirty, &val_buf); (void) res->func->unbind(res, res->res_dirty, &val_buf);
res->backup_dirty = true; res->backup_dirty = true;
res->res_dirty = false; res->res_dirty = false;
vmw_resource_mob_detach(res); vmw_resource_mob_detach(res);
...@@ -947,7 +1016,7 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible) ...@@ -947,7 +1016,7 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible)
/* Do we really need to pin the MOB as well? */ /* Do we really need to pin the MOB as well? */
vmw_bo_pin_reserved(vbo, true); vmw_bo_pin_reserved(vbo, true);
} }
ret = vmw_resource_validate(res, interruptible); ret = vmw_resource_validate(res, interruptible, true);
if (vbo) if (vbo)
ttm_bo_unreserve(&vbo->base); ttm_bo_unreserve(&vbo->base);
if (ret) if (ret)
...@@ -1007,3 +1076,101 @@ enum vmw_res_type vmw_res_type(const struct vmw_resource *res) ...@@ -1007,3 +1076,101 @@ enum vmw_res_type vmw_res_type(const struct vmw_resource *res)
{ {
return res->func->res_type; return res->func->res_type;
} }
/**
* vmw_resource_update_dirty - Update a resource's dirty tracker with a
* sequential range of touched backing store memory.
* @res: The resource.
* @start: The first page touched.
* @end: The last page touched + 1.
*/
void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
pgoff_t end)
{
if (res->dirty)
res->func->dirty_range_add(res, start << PAGE_SHIFT,
end << PAGE_SHIFT);
}
/**
* vmw_resources_clean - Clean resources intersecting a mob range
* @vbo: The mob buffer object
* @start: The mob page offset starting the range
* @end: The mob page offset ending the range
* @num_prefault: Returns how many pages including the first have been
* cleaned and are ok to prefault
*/
int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
pgoff_t end, pgoff_t *num_prefault)
{
struct rb_node *cur = vbo->res_tree.rb_node;
struct vmw_resource *found = NULL;
unsigned long res_start = start << PAGE_SHIFT;
unsigned long res_end = end << PAGE_SHIFT;
unsigned long last_cleaned = 0;
/*
* Find the resource with lowest backup_offset that intersects the
* range.
*/
while (cur) {
struct vmw_resource *cur_res =
container_of(cur, struct vmw_resource, mob_node);
if (cur_res->backup_offset >= res_end) {
cur = cur->rb_left;
} else if (cur_res->backup_offset + cur_res->backup_size <=
res_start) {
cur = cur->rb_right;
} else {
found = cur_res;
cur = cur->rb_left;
/* Continue to look for resources with lower offsets */
}
}
/*
* In order of increasing backup_offset, clean dirty resorces
* intersecting the range.
*/
while (found) {
if (found->res_dirty) {
int ret;
if (!found->func->clean)
return -EINVAL;
ret = found->func->clean(found);
if (ret)
return ret;
found->res_dirty = false;
}
last_cleaned = found->backup_offset + found->backup_size;
cur = rb_next(&found->mob_node);
if (!cur)
break;
found = container_of(cur, struct vmw_resource, mob_node);
if (found->backup_offset >= res_end)
break;
}
/*
* Set number of pages allowed prefaulting and fence the buffer object
*/
*num_prefault = 1;
if (last_cleaned > res_start) {
struct ttm_buffer_object *bo = &vbo->base;
*num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
PAGE_SIZE);
vmw_bo_fence_single(bo, NULL);
if (bo->moving)
dma_fence_put(bo->moving);
bo->moving = dma_fence_get
(dma_resv_get_excl(bo->base.resv));
}
return 0;
}
...@@ -71,6 +71,13 @@ struct vmw_user_resource_conv { ...@@ -71,6 +71,13 @@ struct vmw_user_resource_conv {
* @commit_notify: If the resource is a command buffer managed resource, * @commit_notify: If the resource is a command buffer managed resource,
* callback to notify that a define or remove command * callback to notify that a define or remove command
* has been committed to the device. * has been committed to the device.
* @dirty_alloc: Allocate a dirty tracker. NULL if dirty-tracking is not
* supported.
* @dirty_free: Free the dirty tracker.
* @dirty_sync: Upload the dirty mob contents to the resource.
* @dirty_add_range: Add a sequential dirty range to the resource
* dirty tracker.
* @clean: Clean the resource.
*/ */
struct vmw_res_func { struct vmw_res_func {
enum vmw_res_type res_type; enum vmw_res_type res_type;
...@@ -90,6 +97,12 @@ struct vmw_res_func { ...@@ -90,6 +97,12 @@ struct vmw_res_func {
struct ttm_validate_buffer *val_buf); struct ttm_validate_buffer *val_buf);
void (*commit_notify)(struct vmw_resource *res, void (*commit_notify)(struct vmw_resource *res,
enum vmw_cmdbuf_res_state state); enum vmw_cmdbuf_res_state state);
int (*dirty_alloc)(struct vmw_resource *res);
void (*dirty_free)(struct vmw_resource *res);
int (*dirty_sync)(struct vmw_resource *res);
void (*dirty_range_add)(struct vmw_resource *res, size_t start,
size_t end);
int (*clean)(struct vmw_resource *res);
}; };
/** /**
......
...@@ -68,6 +68,20 @@ struct vmw_surface_offset { ...@@ -68,6 +68,20 @@ struct vmw_surface_offset {
uint32_t bo_offset; uint32_t bo_offset;
}; };
/**
* vmw_surface_dirty - Surface dirty-tracker
* @cache: Cached layout information of the surface.
* @size: Accounting size for the struct vmw_surface_dirty.
* @num_subres: Number of subresources.
* @boxes: Array of SVGA3dBoxes indicating dirty regions. One per subresource.
*/
struct vmw_surface_dirty {
struct svga3dsurface_cache cache;
size_t size;
u32 num_subres;
SVGA3dBox boxes[0];
};
static void vmw_user_surface_free(struct vmw_resource *res); static void vmw_user_surface_free(struct vmw_resource *res);
static struct vmw_resource * static struct vmw_resource *
vmw_user_surface_base_to_res(struct ttm_base_object *base); vmw_user_surface_base_to_res(struct ttm_base_object *base);
...@@ -96,6 +110,13 @@ vmw_gb_surface_reference_internal(struct drm_device *dev, ...@@ -96,6 +110,13 @@ vmw_gb_surface_reference_internal(struct drm_device *dev,
struct drm_vmw_gb_surface_ref_ext_rep *rep, struct drm_vmw_gb_surface_ref_ext_rep *rep,
struct drm_file *file_priv); struct drm_file *file_priv);
static void vmw_surface_dirty_free(struct vmw_resource *res);
static int vmw_surface_dirty_alloc(struct vmw_resource *res);
static int vmw_surface_dirty_sync(struct vmw_resource *res);
static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
size_t end);
static int vmw_surface_clean(struct vmw_resource *res);
static const struct vmw_user_resource_conv user_surface_conv = { static const struct vmw_user_resource_conv user_surface_conv = {
.object_type = VMW_RES_SURFACE, .object_type = VMW_RES_SURFACE,
.base_obj_to_res = vmw_user_surface_base_to_res, .base_obj_to_res = vmw_user_surface_base_to_res,
...@@ -133,7 +154,12 @@ static const struct vmw_res_func vmw_gb_surface_func = { ...@@ -133,7 +154,12 @@ static const struct vmw_res_func vmw_gb_surface_func = {
.create = vmw_gb_surface_create, .create = vmw_gb_surface_create,
.destroy = vmw_gb_surface_destroy, .destroy = vmw_gb_surface_destroy,
.bind = vmw_gb_surface_bind, .bind = vmw_gb_surface_bind,
.unbind = vmw_gb_surface_unbind .unbind = vmw_gb_surface_unbind,
.dirty_alloc = vmw_surface_dirty_alloc,
.dirty_free = vmw_surface_dirty_free,
.dirty_sync = vmw_surface_dirty_sync,
.dirty_range_add = vmw_surface_dirty_range_add,
.clean = vmw_surface_clean,
}; };
/** /**
...@@ -639,6 +665,7 @@ static void vmw_user_surface_free(struct vmw_resource *res) ...@@ -639,6 +665,7 @@ static void vmw_user_surface_free(struct vmw_resource *res)
struct vmw_private *dev_priv = srf->res.dev_priv; struct vmw_private *dev_priv = srf->res.dev_priv;
uint32_t size = user_srf->size; uint32_t size = user_srf->size;
WARN_ON_ONCE(res->dirty);
if (user_srf->master) if (user_srf->master)
drm_master_put(&user_srf->master); drm_master_put(&user_srf->master);
kfree(srf->offsets); kfree(srf->offsets);
...@@ -1166,10 +1193,16 @@ static int vmw_gb_surface_bind(struct vmw_resource *res, ...@@ -1166,10 +1193,16 @@ static int vmw_gb_surface_bind(struct vmw_resource *res,
cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_SURFACE; cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_SURFACE;
cmd2->header.size = sizeof(cmd2->body); cmd2->header.size = sizeof(cmd2->body);
cmd2->body.sid = res->id; cmd2->body.sid = res->id;
res->backup_dirty = false;
} }
vmw_fifo_commit(dev_priv, submit_size); vmw_fifo_commit(dev_priv, submit_size);
if (res->backup->dirty && res->backup_dirty) {
/* We've just made a full upload. Cear dirty regions. */
vmw_bo_dirty_clear_res(res);
}
res->backup_dirty = false;
return 0; return 0;
} }
...@@ -1634,7 +1667,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev, ...@@ -1634,7 +1667,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
} }
} }
} else if (req->base.drm_surface_flags & } else if (req->base.drm_surface_flags &
drm_vmw_surface_flag_create_buffer) (drm_vmw_surface_flag_create_buffer |
drm_vmw_surface_flag_coherent))
ret = vmw_user_bo_alloc(dev_priv, tfile, ret = vmw_user_bo_alloc(dev_priv, tfile,
res->backup_size, res->backup_size,
req->base.drm_surface_flags & req->base.drm_surface_flags &
...@@ -1648,6 +1682,26 @@ vmw_gb_surface_define_internal(struct drm_device *dev, ...@@ -1648,6 +1682,26 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
goto out_unlock; goto out_unlock;
} }
if (req->base.drm_surface_flags & drm_vmw_surface_flag_coherent) {
struct vmw_buffer_object *backup = res->backup;
ttm_bo_reserve(&backup->base, false, false, NULL);
if (!res->func->dirty_alloc)
ret = -EINVAL;
if (!ret)
ret = vmw_bo_dirty_add(backup);
if (!ret) {
res->coherent = true;
ret = res->func->dirty_alloc(res);
}
ttm_bo_unreserve(&backup->base);
if (ret) {
vmw_resource_unreference(&res);
goto out_unlock;
}
}
tmp = vmw_resource_reference(res); tmp = vmw_resource_reference(res);
ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime, ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime,
req->base.drm_surface_flags & req->base.drm_surface_flags &
...@@ -1756,3 +1810,338 @@ vmw_gb_surface_reference_internal(struct drm_device *dev, ...@@ -1756,3 +1810,338 @@ vmw_gb_surface_reference_internal(struct drm_device *dev,
return ret; return ret;
} }
/**
* vmw_subres_dirty_add - Add a dirty region to a subresource
* @dirty: The surfaces's dirty tracker.
* @loc_start: The location corresponding to the start of the region.
* @loc_end: The location corresponding to the end of the region.
*
* As we are assuming that @loc_start and @loc_end represent a sequential
* range of backing store memory, if the region spans multiple lines then
* regardless of the x coordinate, the full lines are dirtied.
* Correspondingly if the region spans multiple z slices, then full rather
* than partial z slices are dirtied.
*/
static void vmw_subres_dirty_add(struct vmw_surface_dirty *dirty,
const struct svga3dsurface_loc *loc_start,
const struct svga3dsurface_loc *loc_end)
{
const struct svga3dsurface_cache *cache = &dirty->cache;
SVGA3dBox *box = &dirty->boxes[loc_start->sub_resource];
u32 mip = loc_start->sub_resource % cache->num_mip_levels;
const struct drm_vmw_size *size = &cache->mip[mip].size;
u32 box_c2 = box->z + box->d;
if (WARN_ON(loc_start->sub_resource >= dirty->num_subres))
return;
if (box->d == 0 || box->z > loc_start->z)
box->z = loc_start->z;
if (box_c2 < loc_end->z)
box->d = loc_end->z - box->z;
if (loc_start->z + 1 == loc_end->z) {
box_c2 = box->y + box->h;
if (box->h == 0 || box->y > loc_start->y)
box->y = loc_start->y;
if (box_c2 < loc_end->y)
box->h = loc_end->y - box->y;
if (loc_start->y + 1 == loc_end->y) {
box_c2 = box->x + box->w;
if (box->w == 0 || box->x > loc_start->x)
box->x = loc_start->x;
if (box_c2 < loc_end->x)
box->w = loc_end->x - box->x;
} else {
box->x = 0;
box->w = size->width;
}
} else {
box->y = 0;
box->h = size->height;
box->x = 0;
box->w = size->width;
}
}
/**
* vmw_subres_dirty_full - Mark a full subresource as dirty
* @dirty: The surface's dirty tracker.
* @subres: The subresource
*/
static void vmw_subres_dirty_full(struct vmw_surface_dirty *dirty, u32 subres)
{
const struct svga3dsurface_cache *cache = &dirty->cache;
u32 mip = subres % cache->num_mip_levels;
const struct drm_vmw_size *size = &cache->mip[mip].size;
SVGA3dBox *box = &dirty->boxes[subres];
box->x = 0;
box->y = 0;
box->z = 0;
box->w = size->width;
box->h = size->height;
box->d = size->depth;
}
/*
* vmw_surface_tex_dirty_add_range - The dirty_add_range callback for texture
* surfaces.
*/
static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res,
size_t start, size_t end)
{
struct vmw_surface_dirty *dirty =
(struct vmw_surface_dirty *) res->dirty;
size_t backup_end = res->backup_offset + res->backup_size;
struct svga3dsurface_loc loc1, loc2;
const struct svga3dsurface_cache *cache;
start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
end = min(end, backup_end) - res->backup_offset;
cache = &dirty->cache;
svga3dsurface_get_loc(cache, &loc1, start);
svga3dsurface_get_loc(cache, &loc2, end - 1);
svga3dsurface_inc_loc(cache, &loc2);
if (loc1.sub_resource + 1 == loc2.sub_resource) {
/* Dirty range covers a single sub-resource */
vmw_subres_dirty_add(dirty, &loc1, &loc2);
} else {
/* Dirty range covers multiple sub-resources */
struct svga3dsurface_loc loc_min, loc_max;
u32 sub_res;
svga3dsurface_max_loc(cache, loc1.sub_resource, &loc_max);
vmw_subres_dirty_add(dirty, &loc1, &loc_max);
svga3dsurface_min_loc(cache, loc2.sub_resource - 1, &loc_min);
vmw_subres_dirty_add(dirty, &loc_min, &loc2);
for (sub_res = loc1.sub_resource + 1;
sub_res < loc2.sub_resource - 1; ++sub_res)
vmw_subres_dirty_full(dirty, sub_res);
}
}
/*
* vmw_surface_tex_dirty_add_range - The dirty_add_range callback for buffer
* surfaces.
*/
static void vmw_surface_buf_dirty_range_add(struct vmw_resource *res,
size_t start, size_t end)
{
struct vmw_surface_dirty *dirty =
(struct vmw_surface_dirty *) res->dirty;
const struct svga3dsurface_cache *cache = &dirty->cache;
size_t backup_end = res->backup_offset + cache->mip_chain_bytes;
SVGA3dBox *box = &dirty->boxes[0];
u32 box_c2;
box->h = box->d = 1;
start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
end = min(end, backup_end) - res->backup_offset;
box_c2 = box->x + box->w;
if (box->w == 0 || box->x > start)
box->x = start;
if (box_c2 < end)
box->w = end - box->x;
}
/*
* vmw_surface_tex_dirty_add_range - The dirty_add_range callback for surfaces
*/
static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
size_t end)
{
struct vmw_surface *srf = vmw_res_to_srf(res);
if (WARN_ON(end <= res->backup_offset ||
start >= res->backup_offset + res->backup_size))
return;
if (srf->format == SVGA3D_BUFFER)
vmw_surface_buf_dirty_range_add(res, start, end);
else
vmw_surface_tex_dirty_range_add(res, start, end);
}
/*
* vmw_surface_dirty_sync - The surface's dirty_sync callback.
*/
static int vmw_surface_dirty_sync(struct vmw_resource *res)
{
struct vmw_private *dev_priv = res->dev_priv;
bool has_dx = 0;
u32 i, num_dirty;
struct vmw_surface_dirty *dirty =
(struct vmw_surface_dirty *) res->dirty;
size_t alloc_size;
const struct svga3dsurface_cache *cache = &dirty->cache;
struct {
SVGA3dCmdHeader header;
SVGA3dCmdDXUpdateSubResource body;
} *cmd1;
struct {
SVGA3dCmdHeader header;
SVGA3dCmdUpdateGBImage body;
} *cmd2;
void *cmd;
num_dirty = 0;
for (i = 0; i < dirty->num_subres; ++i) {
const SVGA3dBox *box = &dirty->boxes[i];
if (box->d)
num_dirty++;
}
if (!num_dirty)
goto out;
alloc_size = num_dirty * ((has_dx) ? sizeof(*cmd1) : sizeof(*cmd2));
cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
if (!cmd)
return -ENOMEM;
cmd1 = cmd;
cmd2 = cmd;
for (i = 0; i < dirty->num_subres; ++i) {
const SVGA3dBox *box = &dirty->boxes[i];
if (!box->d)
continue;
/*
* DX_UPDATE_SUBRESOURCE is aware of array surfaces.
* UPDATE_GB_IMAGE is not.
*/
if (has_dx) {
cmd1->header.id = SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE;
cmd1->header.size = sizeof(cmd1->body);
cmd1->body.sid = res->id;
cmd1->body.subResource = i;
cmd1->body.box = *box;
cmd1++;
} else {
cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
cmd2->header.size = sizeof(cmd2->body);
cmd2->body.image.sid = res->id;
cmd2->body.image.face = i / cache->num_mip_levels;
cmd2->body.image.mipmap = i -
(cache->num_mip_levels * cmd2->body.image.face);
cmd2->body.box = *box;
cmd2++;
}
}
vmw_fifo_commit(dev_priv, alloc_size);
out:
memset(&dirty->boxes[0], 0, sizeof(dirty->boxes[0]) *
dirty->num_subres);
return 0;
}
/*
* vmw_surface_dirty_alloc - The surface's dirty_alloc callback.
*/
static int vmw_surface_dirty_alloc(struct vmw_resource *res)
{
struct vmw_surface *srf = vmw_res_to_srf(res);
struct vmw_surface_dirty *dirty;
u32 num_layers = 1;
u32 num_mip;
u32 num_subres;
u32 num_samples;
size_t dirty_size, acc_size;
static struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false
};
int ret;
if (srf->array_size)
num_layers = srf->array_size;
else if (srf->flags & SVGA3D_SURFACE_CUBEMAP)
num_layers *= SVGA3D_MAX_SURFACE_FACES;
num_mip = srf->mip_levels[0];
if (!num_mip)
num_mip = 1;
num_subres = num_layers * num_mip;
dirty_size = sizeof(*dirty) + num_subres * sizeof(dirty->boxes[0]);
acc_size = ttm_round_pot(dirty_size);
ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv),
acc_size, &ctx);
if (ret) {
VMW_DEBUG_USER("Out of graphics memory for surface "
"dirty tracker.\n");
return ret;
}
dirty = kvzalloc(dirty_size, GFP_KERNEL);
if (!dirty) {
ret = -ENOMEM;
goto out_no_dirty;
}
num_samples = max_t(u32, 1, srf->multisample_count);
ret = svga3dsurface_setup_cache(&srf->base_size, srf->format, num_mip,
num_layers, num_samples, &dirty->cache);
if (ret)
goto out_no_cache;
dirty->num_subres = num_subres;
dirty->size = acc_size;
res->dirty = (struct vmw_resource_dirty *) dirty;
return 0;
out_no_cache:
kvfree(dirty);
out_no_dirty:
ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
return ret;
}
/*
* vmw_surface_dirty_free - The surface's dirty_free callback
*/
static void vmw_surface_dirty_free(struct vmw_resource *res)
{
struct vmw_surface_dirty *dirty =
(struct vmw_surface_dirty *) res->dirty;
size_t acc_size = dirty->size;
kvfree(dirty);
ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
res->dirty = NULL;
}
/*
* vmw_surface_clean - The surface's clean callback
*/
static int vmw_surface_clean(struct vmw_resource *res)
{
struct vmw_private *dev_priv = res->dev_priv;
size_t alloc_size;
struct {
SVGA3dCmdHeader header;
SVGA3dCmdReadbackGBSurface body;
} *cmd;
alloc_size = sizeof(*cmd);
cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
if (!cmd)
return -ENOMEM;
cmd->header.id = SVGA_3D_CMD_READBACK_GB_SURFACE;
cmd->header.size = sizeof(cmd->body);
cmd->body.sid = res->id;
vmw_fifo_commit(dev_priv, alloc_size);
return 0;
}
...@@ -29,10 +29,23 @@ ...@@ -29,10 +29,23 @@
int vmw_mmap(struct file *filp, struct vm_area_struct *vma) int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
{ {
static const struct vm_operations_struct vmw_vm_ops = {
.pfn_mkwrite = vmw_bo_vm_mkwrite,
.page_mkwrite = vmw_bo_vm_mkwrite,
.fault = vmw_bo_vm_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close
};
struct drm_file *file_priv = filp->private_data; struct drm_file *file_priv = filp->private_data;
struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev); struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
int ret = ttm_bo_mmap(filp, vma, &dev_priv->bdev);
return ttm_bo_mmap(filp, vma, &dev_priv->bdev); if (ret)
return ret;
vma->vm_ops = &vmw_vm_ops;
return 0;
} }
/* struct vmw_validation_mem callback */ /* struct vmw_validation_mem callback */
......
...@@ -33,6 +33,8 @@ ...@@ -33,6 +33,8 @@
* struct vmw_validation_bo_node - Buffer object validation metadata. * struct vmw_validation_bo_node - Buffer object validation metadata.
* @base: Metadata used for TTM reservation- and validation. * @base: Metadata used for TTM reservation- and validation.
* @hash: A hash entry used for the duplicate detection hash table. * @hash: A hash entry used for the duplicate detection hash table.
* @coherent_count: If switching backup buffers, number of new coherent
* resources that will have this buffer as a backup buffer.
* @as_mob: Validate as mob. * @as_mob: Validate as mob.
* @cpu_blit: Validate for cpu blit access. * @cpu_blit: Validate for cpu blit access.
* *
...@@ -42,6 +44,7 @@ ...@@ -42,6 +44,7 @@
struct vmw_validation_bo_node { struct vmw_validation_bo_node {
struct ttm_validate_buffer base; struct ttm_validate_buffer base;
struct drm_hash_item hash; struct drm_hash_item hash;
unsigned int coherent_count;
u32 as_mob : 1; u32 as_mob : 1;
u32 cpu_blit : 1; u32 cpu_blit : 1;
}; };
...@@ -459,6 +462,19 @@ int vmw_validation_res_reserve(struct vmw_validation_context *ctx, ...@@ -459,6 +462,19 @@ int vmw_validation_res_reserve(struct vmw_validation_context *ctx,
if (ret) if (ret)
goto out_unreserve; goto out_unreserve;
} }
if (val->switching_backup && val->new_backup &&
res->coherent) {
struct vmw_validation_bo_node *bo_node =
vmw_validation_find_bo_dup(ctx,
val->new_backup);
if (WARN_ON(!bo_node)) {
ret = -EINVAL;
goto out_unreserve;
}
bo_node->coherent_count++;
}
} }
return 0; return 0;
...@@ -565,6 +581,9 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr) ...@@ -565,6 +581,9 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
int ret; int ret;
list_for_each_entry(entry, &ctx->bo_list, base.head) { list_for_each_entry(entry, &ctx->bo_list, base.head) {
struct vmw_buffer_object *vbo =
container_of(entry->base.bo, typeof(*vbo), base);
if (entry->cpu_blit) { if (entry->cpu_blit) {
struct ttm_operation_ctx ctx = { struct ttm_operation_ctx ctx = {
.interruptible = intr, .interruptible = intr,
...@@ -579,6 +598,27 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr) ...@@ -579,6 +598,27 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
} }
if (ret) if (ret)
return ret; return ret;
/*
* Rather than having the resource code allocating the bo
* dirty tracker in resource_unreserve() where we can't fail,
* Do it here when validating the buffer object.
*/
if (entry->coherent_count) {
unsigned int coherent_count = entry->coherent_count;
while (coherent_count) {
ret = vmw_bo_dirty_add(vbo);
if (ret)
return ret;
coherent_count--;
}
entry->coherent_count -= coherent_count;
}
if (vbo->dirty)
vmw_bo_dirty_scan(vbo);
} }
return 0; return 0;
} }
...@@ -604,7 +644,8 @@ int vmw_validation_res_validate(struct vmw_validation_context *ctx, bool intr) ...@@ -604,7 +644,8 @@ int vmw_validation_res_validate(struct vmw_validation_context *ctx, bool intr)
struct vmw_resource *res = val->res; struct vmw_resource *res = val->res;
struct vmw_buffer_object *backup = res->backup; struct vmw_buffer_object *backup = res->backup;
ret = vmw_resource_validate(res, intr); ret = vmw_resource_validate(res, intr, val->dirty_set &&
val->dirty);
if (ret) { if (ret) {
if (ret != -ERESTARTSYS) if (ret != -ERESTARTSYS)
DRM_ERROR("Failed to validate resource.\n"); DRM_ERROR("Failed to validate resource.\n");
...@@ -831,3 +872,34 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx, ...@@ -831,3 +872,34 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
ctx->mem_size_left += size; ctx->mem_size_left += size;
return 0; return 0;
} }
/**
* vmw_validation_bo_backoff - Unreserve buffer objects registered with a
* validation context
* @ctx: The validation context
*
* This function unreserves the buffer objects previously reserved using
* vmw_validation_bo_reserve. It's typically used as part of an error path
*/
void vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
{
struct vmw_validation_bo_node *entry;
/*
* Switching coherent resource backup buffers failed.
* Release corresponding buffer object dirty trackers.
*/
list_for_each_entry(entry, &ctx->bo_list, base.head) {
if (entry->coherent_count) {
unsigned int coherent_count = entry->coherent_count;
struct vmw_buffer_object *vbo =
container_of(entry->base.bo, typeof(*vbo),
base);
while (coherent_count--)
vmw_bo_dirty_release(vbo);
}
}
ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
}
...@@ -173,20 +173,6 @@ vmw_validation_bo_reserve(struct vmw_validation_context *ctx, ...@@ -173,20 +173,6 @@ vmw_validation_bo_reserve(struct vmw_validation_context *ctx,
NULL); NULL);
} }
/**
* vmw_validation_bo_backoff - Unreserve buffer objects registered with a
* validation context
* @ctx: The validation context
*
* This function unreserves the buffer objects previously reserved using
* vmw_validation_bo_reserve. It's typically used as part of an error path
*/
static inline void
vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
{
ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
}
/** /**
* vmw_validation_bo_fence - Unreserve and fence buffer objects registered * vmw_validation_bo_fence - Unreserve and fence buffer objects registered
* with a validation context * with a validation context
...@@ -269,4 +255,6 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx, ...@@ -269,4 +255,6 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
unsigned int size); unsigned int size);
void vmw_validation_res_set_dirty(struct vmw_validation_context *ctx, void vmw_validation_res_set_dirty(struct vmw_validation_context *ctx,
void *val_private, u32 dirty); void *val_private, u32 dirty);
void vmw_validation_bo_backoff(struct vmw_validation_context *ctx);
#endif #endif
...@@ -727,4 +727,18 @@ static inline bool ttm_bo_uses_embedded_gem_object(struct ttm_buffer_object *bo) ...@@ -727,4 +727,18 @@ static inline bool ttm_bo_uses_embedded_gem_object(struct ttm_buffer_object *bo)
{ {
return bo->base.dev != NULL; return bo->base.dev != NULL;
} }
/* Default number of pre-faulted pages in the TTM fault handler */
#define TTM_BO_VM_NUM_PREFAULT 16
vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
struct vm_fault *vmf);
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pgprot_t prot,
pgoff_t num_prefault);
void ttm_bo_vm_open(struct vm_area_struct *vma);
void ttm_bo_vm_close(struct vm_area_struct *vma);
#endif #endif
...@@ -216,7 +216,6 @@ static inline int is_swap_pmd(pmd_t pmd) ...@@ -216,7 +216,6 @@ static inline int is_swap_pmd(pmd_t pmd)
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
return __pmd_trans_huge_lock(pmd, vma); return __pmd_trans_huge_lock(pmd, vma);
else else
...@@ -225,7 +224,6 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, ...@@ -225,7 +224,6 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
if (pud_trans_huge(*pud) || pud_devmap(*pud)) if (pud_trans_huge(*pud) || pud_devmap(*pud))
return __pud_trans_huge_lock(pud, vma); return __pud_trans_huge_lock(pud, vma);
else else
......
...@@ -2632,7 +2632,6 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); ...@@ -2632,7 +2632,6 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data); unsigned long size, pte_fn_t fn, void *data);
#ifdef CONFIG_PAGE_POISONING #ifdef CONFIG_PAGE_POISONING
extern bool page_poisoning_enabled(void); extern bool page_poisoning_enabled(void);
extern void kernel_poison_pages(struct page *page, int numpages, int enable); extern void kernel_poison_pages(struct page *page, int numpages, int enable);
...@@ -2873,5 +2872,17 @@ static inline int pages_identical(struct page *page1, struct page *page2) ...@@ -2873,5 +2872,17 @@ static inline int pages_identical(struct page *page1, struct page *page2)
return !memcmp_pages(page1, page2); return !memcmp_pages(page1, page2);
} }
#ifdef CONFIG_MAPPING_DIRTY_HELPERS
unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
pgoff_t first_index, pgoff_t nr,
pgoff_t bitmap_pgoff,
unsigned long *bitmap,
pgoff_t *start,
pgoff_t *end);
unsigned long wp_shared_mapping_range(struct address_space *mapping,
pgoff_t first_index, pgoff_t nr);
#endif
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */ #endif /* _LINUX_MM_H */
...@@ -24,6 +24,9 @@ struct mm_walk; ...@@ -24,6 +24,9 @@ struct mm_walk;
* "do page table walk over the current vma", returning * "do page table walk over the current vma", returning
* a negative value means "abort current page table walk * a negative value means "abort current page table walk
* right now" and returning 1 means "skip the current vma" * right now" and returning 1 means "skip the current vma"
* @pre_vma: if set, called before starting walk on a non-null vma.
* @post_vma: if set, called after a walk on a non-null vma, provided
* that @pre_vma and the vma walk succeeded.
*/ */
struct mm_walk_ops { struct mm_walk_ops {
int (*pud_entry)(pud_t *pud, unsigned long addr, int (*pud_entry)(pud_t *pud, unsigned long addr,
...@@ -39,6 +42,9 @@ struct mm_walk_ops { ...@@ -39,6 +42,9 @@ struct mm_walk_ops {
struct mm_walk *walk); struct mm_walk *walk);
int (*test_walk)(unsigned long addr, unsigned long next, int (*test_walk)(unsigned long addr, unsigned long next,
struct mm_walk *walk); struct mm_walk *walk);
int (*pre_vma)(unsigned long start, unsigned long end,
struct mm_walk *walk);
void (*post_vma)(struct mm_walk *walk);
}; };
/** /**
...@@ -62,5 +68,8 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, ...@@ -62,5 +68,8 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
void *private); void *private);
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private); void *private);
int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
pgoff_t nr, const struct mm_walk_ops *ops,
void *private);
#endif /* _LINUX_PAGEWALK_H */ #endif /* _LINUX_PAGEWALK_H */
...@@ -891,11 +891,13 @@ struct drm_vmw_shader_arg { ...@@ -891,11 +891,13 @@ struct drm_vmw_shader_arg {
* surface. * surface.
* @drm_vmw_surface_flag_create_buffer: Create a backup buffer if none is * @drm_vmw_surface_flag_create_buffer: Create a backup buffer if none is
* given. * given.
* @drm_vmw_surface_flag_coherent: Back surface with coherent memory.
*/ */
enum drm_vmw_surface_flags { enum drm_vmw_surface_flags {
drm_vmw_surface_flag_shareable = (1 << 0), drm_vmw_surface_flag_shareable = (1 << 0),
drm_vmw_surface_flag_scanout = (1 << 1), drm_vmw_surface_flag_scanout = (1 << 1),
drm_vmw_surface_flag_create_buffer = (1 << 2) drm_vmw_surface_flag_create_buffer = (1 << 2),
drm_vmw_surface_flag_coherent = (1 << 3),
}; };
/** /**
......
...@@ -736,4 +736,7 @@ config ARCH_HAS_PTE_SPECIAL ...@@ -736,4 +736,7 @@ config ARCH_HAS_PTE_SPECIAL
config ARCH_HAS_HUGEPD config ARCH_HAS_HUGEPD
bool bool
config MAPPING_DIRTY_HELPERS
bool
endmenu endmenu
...@@ -107,3 +107,4 @@ obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o ...@@ -107,3 +107,4 @@ obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
obj-$(CONFIG_ZONE_DEVICE) += memremap.o obj-$(CONFIG_ZONE_DEVICE) += memremap.o
obj-$(CONFIG_HMM_MIRROR) += hmm.o obj-$(CONFIG_HMM_MIRROR) += hmm.o
obj-$(CONFIG_MEMFD_CREATE) += memfd.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o
obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
// SPDX-License-Identifier: GPL-2.0
#include <linux/pagewalk.h>
#include <linux/hugetlb.h>
#include <linux/bitops.h>
#include <linux/mmu_notifier.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
/**
* struct wp_walk - Private struct for pagetable walk callbacks
* @range: Range for mmu notifiers
* @tlbflush_start: Address of first modified pte
* @tlbflush_end: Address of last modified pte + 1
* @total: Total number of modified ptes
*/
struct wp_walk {
struct mmu_notifier_range range;
unsigned long tlbflush_start;
unsigned long tlbflush_end;
unsigned long total;
};
/**
* wp_pte - Write-protect a pte
* @pte: Pointer to the pte
* @addr: The virtual page address
* @walk: pagetable walk callback argument
*
* The function write-protects a pte and records the range in
* virtual address space of touched ptes for efficient range TLB flushes.
*/
static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct wp_walk *wpwalk = walk->private;
pte_t ptent = *pte;
if (pte_write(ptent)) {
pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
ptent = pte_wrprotect(old_pte);
ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
wpwalk->total++;
wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
addr + PAGE_SIZE);
}
return 0;
}
/**
* struct clean_walk - Private struct for the clean_record_pte function.
* @base: struct wp_walk we derive from
* @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
* @bitmap: Bitmap with one bit for each page offset in the address_space range
* covered.
* @start: Address_space page offset of first modified pte relative
* to @bitmap_pgoff
* @end: Address_space page offset of last modified pte relative
* to @bitmap_pgoff
*/
struct clean_walk {
struct wp_walk base;
pgoff_t bitmap_pgoff;
unsigned long *bitmap;
pgoff_t start;
pgoff_t end;
};
#define to_clean_walk(_wpwalk) container_of(_wpwalk, struct clean_walk, base)
/**
* clean_record_pte - Clean a pte and record its address space offset in a
* bitmap
* @pte: Pointer to the pte
* @addr: The virtual page address
* @walk: pagetable walk callback argument
*
* The function cleans a pte and records the range in
* virtual address space of touched ptes for efficient TLB flushes.
* It also records dirty ptes in a bitmap representing page offsets
* in the address_space, as well as the first and last of the bits
* touched.
*/
static int clean_record_pte(pte_t *pte, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct wp_walk *wpwalk = walk->private;
struct clean_walk *cwalk = to_clean_walk(wpwalk);
pte_t ptent = *pte;
if (pte_dirty(ptent)) {
pgoff_t pgoff = ((addr - walk->vma->vm_start) >> PAGE_SHIFT) +
walk->vma->vm_pgoff - cwalk->bitmap_pgoff;
pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
ptent = pte_mkclean(old_pte);
ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
wpwalk->total++;
wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
addr + PAGE_SIZE);
__set_bit(pgoff, cwalk->bitmap);
cwalk->start = min(cwalk->start, pgoff);
cwalk->end = max(cwalk->end, pgoff + 1);
}
return 0;
}
/* wp_clean_pmd_entry - The pagewalk pmd callback. */
static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
/* Dirty-tracking should be handled on the pte level */
pmd_t pmdval = pmd_read_atomic(pmd);
if (pmd_trans_huge(pmdval) || pmd_devmap(pmdval))
WARN_ON(pmd_write(pmdval) || pmd_dirty(pmdval));
return 0;
}
/* wp_clean_pud_entry - The pagewalk pud callback. */
static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
/* Dirty-tracking should be handled on the pte level */
pud_t pudval = READ_ONCE(*pud);
if (pud_trans_huge(pudval) || pud_devmap(pudval))
WARN_ON(pud_write(pudval) || pud_dirty(pudval));
return 0;
}
/*
* wp_clean_pre_vma - The pagewalk pre_vma callback.
*
* The pre_vma callback performs the cache flush, stages the tlb flush
* and calls the necessary mmu notifiers.
*/
static int wp_clean_pre_vma(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
struct wp_walk *wpwalk = walk->private;
wpwalk->tlbflush_start = end;
wpwalk->tlbflush_end = start;
mmu_notifier_range_init(&wpwalk->range, MMU_NOTIFY_PROTECTION_PAGE, 0,
walk->vma, walk->mm, start, end);
mmu_notifier_invalidate_range_start(&wpwalk->range);
flush_cache_range(walk->vma, start, end);
/*
* We're not using tlb_gather_mmu() since typically
* only a small subrange of PTEs are affected, whereas
* tlb_gather_mmu() records the full range.
*/
inc_tlb_flush_pending(walk->mm);
return 0;
}
/*
* wp_clean_post_vma - The pagewalk post_vma callback.
*
* The post_vma callback performs the tlb flush and calls necessary mmu
* notifiers.
*/
static void wp_clean_post_vma(struct mm_walk *walk)
{
struct wp_walk *wpwalk = walk->private;
if (mm_tlb_flush_nested(walk->mm))
flush_tlb_range(walk->vma, wpwalk->range.start,
wpwalk->range.end);
else if (wpwalk->tlbflush_end > wpwalk->tlbflush_start)
flush_tlb_range(walk->vma, wpwalk->tlbflush_start,
wpwalk->tlbflush_end);
mmu_notifier_invalidate_range_end(&wpwalk->range);
dec_tlb_flush_pending(walk->mm);
}
/*
* wp_clean_test_walk - The pagewalk test_walk callback.
*
* Won't perform dirty-tracking on COW, read-only or HUGETLB vmas.
*/
static int wp_clean_test_walk(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
unsigned long vm_flags = READ_ONCE(walk->vma->vm_flags);
/* Skip non-applicable VMAs */
if ((vm_flags & (VM_SHARED | VM_MAYWRITE | VM_HUGETLB)) !=
(VM_SHARED | VM_MAYWRITE))
return 1;
return 0;
}
static const struct mm_walk_ops clean_walk_ops = {
.pte_entry = clean_record_pte,
.pmd_entry = wp_clean_pmd_entry,
.pud_entry = wp_clean_pud_entry,
.test_walk = wp_clean_test_walk,
.pre_vma = wp_clean_pre_vma,
.post_vma = wp_clean_post_vma
};
static const struct mm_walk_ops wp_walk_ops = {
.pte_entry = wp_pte,
.pmd_entry = wp_clean_pmd_entry,
.pud_entry = wp_clean_pud_entry,
.test_walk = wp_clean_test_walk,
.pre_vma = wp_clean_pre_vma,
.post_vma = wp_clean_post_vma
};
/**
* wp_shared_mapping_range - Write-protect all ptes in an address space range
* @mapping: The address_space we want to write protect
* @first_index: The first page offset in the range
* @nr: Number of incremental page offsets to cover
*
* Note: This function currently skips transhuge page-table entries, since
* it's intended for dirty-tracking on the PTE level. It will warn on
* encountering transhuge write-enabled entries, though, and can easily be
* extended to handle them as well.
*
* Return: The number of ptes actually write-protected. Note that
* already write-protected ptes are not counted.
*/
unsigned long wp_shared_mapping_range(struct address_space *mapping,
pgoff_t first_index, pgoff_t nr)
{
struct wp_walk wpwalk = { .total = 0 };
i_mmap_lock_read(mapping);
WARN_ON(walk_page_mapping(mapping, first_index, nr, &wp_walk_ops,
&wpwalk));
i_mmap_unlock_read(mapping);
return wpwalk.total;
}
EXPORT_SYMBOL_GPL(wp_shared_mapping_range);
/**
* clean_record_shared_mapping_range - Clean and record all ptes in an
* address space range
* @mapping: The address_space we want to clean
* @first_index: The first page offset in the range
* @nr: Number of incremental page offsets to cover
* @bitmap_pgoff: The page offset of the first bit in @bitmap
* @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to
* cover the whole range @first_index..@first_index + @nr.
* @start: Pointer to number of the first set bit in @bitmap.
* is modified as new bits are set by the function.
* @end: Pointer to the number of the last set bit in @bitmap.
* none set. The value is modified as new bits are set by the function.
*
* Note: When this function returns there is no guarantee that a CPU has
* not already dirtied new ptes. However it will not clean any ptes not
* reported in the bitmap. The guarantees are as follows:
* a) All ptes dirty when the function starts executing will end up recorded
* in the bitmap.
* b) All ptes dirtied after that will either remain dirty, be recorded in the
* bitmap or both.
*
* If a caller needs to make sure all dirty ptes are picked up and none
* additional are added, it first needs to write-protect the address-space
* range and make sure new writers are blocked in page_mkwrite() or
* pfn_mkwrite(). And then after a TLB flush following the write-protection
* pick up all dirty bits.
*
* Note: This function currently skips transhuge page-table entries, since
* it's intended for dirty-tracking on the PTE level. It will warn on
* encountering transhuge dirty entries, though, and can easily be extended
* to handle them as well.
*
* Return: The number of dirty ptes actually cleaned.
*/
unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
pgoff_t first_index, pgoff_t nr,
pgoff_t bitmap_pgoff,
unsigned long *bitmap,
pgoff_t *start,
pgoff_t *end)
{
bool none_set = (*start >= *end);
struct clean_walk cwalk = {
.base = { .total = 0 },
.bitmap_pgoff = bitmap_pgoff,
.bitmap = bitmap,
.start = none_set ? nr : *start,
.end = none_set ? 0 : *end,
};
i_mmap_lock_read(mapping);
WARN_ON(walk_page_mapping(mapping, first_index, nr, &clean_walk_ops,
&cwalk.base));
i_mmap_unlock_read(mapping);
*start = cwalk.start;
*end = cwalk.end;
return cwalk.base.total;
}
EXPORT_SYMBOL_GPL(clean_record_shared_mapping_range);
...@@ -10,8 +10,9 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -10,8 +10,9 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte; pte_t *pte;
int err = 0; int err = 0;
const struct mm_walk_ops *ops = walk->ops; const struct mm_walk_ops *ops = walk->ops;
spinlock_t *ptl;
pte = pte_offset_map(pmd, addr); pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (;;) { for (;;) {
err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
if (err) if (err)
...@@ -22,7 +23,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -22,7 +23,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte++; pte++;
} }
pte_unmap(pte); pte_unmap_unlock(pte, ptl);
return err; return err;
} }
...@@ -253,13 +254,23 @@ static int __walk_page_range(unsigned long start, unsigned long end, ...@@ -253,13 +254,23 @@ static int __walk_page_range(unsigned long start, unsigned long end,
{ {
int err = 0; int err = 0;
struct vm_area_struct *vma = walk->vma; struct vm_area_struct *vma = walk->vma;
const struct mm_walk_ops *ops = walk->ops;
if (vma && ops->pre_vma) {
err = ops->pre_vma(start, end, walk);
if (err)
return err;
}
if (vma && is_vm_hugetlb_page(vma)) { if (vma && is_vm_hugetlb_page(vma)) {
if (walk->ops->hugetlb_entry) if (ops->hugetlb_entry)
err = walk_hugetlb_range(start, end, walk); err = walk_hugetlb_range(start, end, walk);
} else } else
err = walk_pgd_range(start, end, walk); err = walk_pgd_range(start, end, walk);
if (vma && ops->post_vma)
ops->post_vma(walk);
return err; return err;
} }
...@@ -290,6 +301,11 @@ static int __walk_page_range(unsigned long start, unsigned long end, ...@@ -290,6 +301,11 @@ static int __walk_page_range(unsigned long start, unsigned long end,
* its vm_flags. walk_page_test() and @ops->test_walk() are used for this * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
* purpose. * purpose.
* *
* If operations need to be staged before and committed after a vma is walked,
* there are two callbacks, pre_vma() and post_vma(). Note that post_vma(),
* since it is intended to handle commit-type operations, can't return any
* errors.
*
* struct mm_walk keeps current values of some common data like vma and pmd, * struct mm_walk keeps current values of some common data like vma and pmd,
* which are useful for the access from callbacks. If you want to pass some * which are useful for the access from callbacks. If you want to pass some
* caller-specific data to callbacks, @private should be helpful. * caller-specific data to callbacks, @private should be helpful.
...@@ -376,3 +392,80 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, ...@@ -376,3 +392,80 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
return err; return err;
return __walk_page_range(vma->vm_start, vma->vm_end, &walk); return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
} }
/**
* walk_page_mapping - walk all memory areas mapped into a struct address_space.
* @mapping: Pointer to the struct address_space
* @first_index: First page offset in the address_space
* @nr: Number of incremental page offsets to cover
* @ops: operation to call during the walk
* @private: private data for callbacks' usage
*
* This function walks all memory areas mapped into a struct address_space.
* The walk is limited to only the given page-size index range, but if
* the index boundaries cross a huge page-table entry, that entry will be
* included.
*
* Also see walk_page_range() for additional information.
*
* Locking:
* This function can't require that the struct mm_struct::mmap_sem is held,
* since @mapping may be mapped by multiple processes. Instead
* @mapping->i_mmap_rwsem must be held. This might have implications in the
* callbacks, and it's up tho the caller to ensure that the
* struct mm_struct::mmap_sem is not needed.
*
* Also this means that a caller can't rely on the struct
* vm_area_struct::vm_flags to be constant across a call,
* except for immutable flags. Callers requiring this shouldn't use
* this function.
*
* Return: 0 on success, negative error code on failure, positive number on
* caller defined premature termination.
*/
int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
pgoff_t nr, const struct mm_walk_ops *ops,
void *private)
{
struct mm_walk walk = {
.ops = ops,
.private = private,
};
struct vm_area_struct *vma;
pgoff_t vba, vea, cba, cea;
unsigned long start_addr, end_addr;
int err = 0;
lockdep_assert_held(&mapping->i_mmap_rwsem);
vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
first_index + nr - 1) {
/* Clip to the vma */
vba = vma->vm_pgoff;
vea = vba + vma_pages(vma);
cba = first_index;
cba = max(cba, vba);
cea = first_index + nr;
cea = min(cea, vea);
start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
if (start_addr >= end_addr)
continue;
walk.vma = vma;
walk.mm = vma->vm_mm;
err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
if (err > 0) {
err = 0;
break;
} else if (err < 0)
break;
err = __walk_page_range(start_addr, end_addr, &walk);
if (err)
break;
}
return err;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment