Commit 676deb38 authored by Dennis Li's avatar Dennis Li Committed by Alex Deucher

drm/amdgpu: fix the issue of reserving bad pages failed

In amdgpu_ras_reset_gpu, because bad pages may not be freed,
it has high probability to reserve bad pages failed.

Change to reserve bad pages when freeing VRAM.

v2:
1. avoid allocating the drm_mm node outside of amdgpu_vram_mgr.c
2. move bad page reserving into amdgpu_ras_add_bad_pages, if vram mgr
   reserve bad page failed, it will put it into pending list, otherwise
   put it into processed list;
3. remove amdgpu_ras_release_bad_pages, because retired page's info has
   been moved into amdgpu_vram_mgr

v3:
1. formate code style;
2. rename amdgpu_vram_reserve_scope as amdgpu_vram_reservation;
3. rename scope_pending as reservations_pending;
4. rename scope_processed as reserved_pages;
5. change to iterate over all the pending ones and try to insert them
   with drm_mm_reserve_node();

v4:
1. rename amdgpu_vram_mgr_reserve_scope as
amdgpu_vram_mgr_reserve_range;
2. remove unused include "amdgpu_ras.h";
3. rename amdgpu_vram_mgr_check_and_reserve as
amdgpu_vram_mgr_do_reserve;
4. refine amdgpu_vram_mgr_reserve_range to call
amdgpu_vram_mgr_do_reserve.
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarHawking Zhang <hawking.zhang@amd.com>
Signed-off-by: default avatarDennis Li <Dennis.Li@amd.com>
Signed-off-by: default avatarWenhui Sheng <Wenhui.Sheng@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 5eeb4593
...@@ -80,6 +80,8 @@ enum amdgpu_ras_retire_page_reservation { ...@@ -80,6 +80,8 @@ enum amdgpu_ras_retire_page_reservation {
atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr);
static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr); uint64_t addr);
...@@ -1551,10 +1553,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, ...@@ -1551,10 +1553,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
.size = AMDGPU_GPU_PAGE_SIZE, .size = AMDGPU_GPU_PAGE_SIZE,
.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
}; };
ret = amdgpu_vram_mgr_query_page_status(
if (data->last_reserved <= i) ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
data->bps[i].retired_page);
if (ret == -EBUSY)
(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
else if (data->bps_bo[i] == NULL) else if (ret == -ENOENT)
(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
} }
...@@ -1606,12 +1610,9 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, ...@@ -1606,12 +1610,9 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
unsigned int new_space = old_space + pages; unsigned int new_space = old_space + pages;
unsigned int align_space = ALIGN(new_space, 512); unsigned int align_space = ALIGN(new_space, 512);
void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
struct amdgpu_bo **bps_bo =
kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL);
if (!bps || !bps_bo) { if (!bps) {
kfree(bps); kfree(bps);
kfree(bps_bo);
return -ENOMEM; return -ENOMEM;
} }
...@@ -1620,14 +1621,8 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, ...@@ -1620,14 +1621,8 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
data->count * sizeof(*data->bps)); data->count * sizeof(*data->bps));
kfree(data->bps); kfree(data->bps);
} }
if (data->bps_bo) {
memcpy(bps_bo, data->bps_bo,
data->count * sizeof(*data->bps_bo));
kfree(data->bps_bo);
}
data->bps = bps; data->bps = bps;
data->bps_bo = bps_bo;
data->space_left += align_space - old_space; data->space_left += align_space - old_space;
return 0; return 0;
} }
...@@ -1639,6 +1634,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, ...@@ -1639,6 +1634,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data; struct ras_err_handler_data *data;
int ret = 0; int ret = 0;
uint32_t i;
if (!con || !con->eh_data || !bps || pages <= 0) if (!con || !con->eh_data || !bps || pages <= 0)
return 0; return 0;
...@@ -1648,16 +1644,26 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, ...@@ -1648,16 +1644,26 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
if (!data) if (!data)
goto out; goto out;
if (data->space_left <= pages) for (i = 0; i < pages; i++) {
if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) { if (amdgpu_ras_check_bad_page_unlock(con,
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
continue;
if (!data->space_left &&
amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); amdgpu_vram_mgr_reserve_range(
data->count += pages; ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
data->space_left -= pages; bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE);
memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
data->count++;
data->space_left--;
}
out: out:
mutex_unlock(&con->recovery_lock); mutex_unlock(&con->recovery_lock);
...@@ -1730,6 +1736,20 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) ...@@ -1730,6 +1736,20 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
return ret; return ret;
} }
static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr)
{
struct ras_err_handler_data *data = con->eh_data;
int i;
addr >>= AMDGPU_GPU_PAGE_SHIFT;
for (i = 0; i < data->count; i++)
if (addr == data->bps[i].retired_page)
return true;
return false;
}
/* /*
* check if an address belongs to bad page * check if an address belongs to bad page
* *
...@@ -1739,26 +1759,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, ...@@ -1739,26 +1759,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr) uint64_t addr)
{ {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
int i;
bool ret = false; bool ret = false;
if (!con || !con->eh_data) if (!con || !con->eh_data)
return ret; return ret;
mutex_lock(&con->recovery_lock); mutex_lock(&con->recovery_lock);
data = con->eh_data; ret = amdgpu_ras_check_bad_page_unlock(con, addr);
if (!data)
goto out;
addr >>= AMDGPU_GPU_PAGE_SHIFT;
for (i = 0; i < data->count; i++)
if (addr == data->bps[i].retired_page) {
ret = true;
goto out;
}
out:
mutex_unlock(&con->recovery_lock); mutex_unlock(&con->recovery_lock);
return ret; return ret;
} }
...@@ -1804,77 +1811,6 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, ...@@ -1804,77 +1811,6 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
} }
} }
/* called in gpu recovery/init */
int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
uint64_t bp;
struct amdgpu_bo *bo = NULL;
int i, ret = 0;
/* Not reserve bad page when amdgpu_bad_page_threshold == 0. */
if (!con || !con->eh_data || (amdgpu_bad_page_threshold == 0))
return 0;
mutex_lock(&con->recovery_lock);
data = con->eh_data;
if (!data)
goto out;
/* reserve vram at driver post stage. */
for (i = data->last_reserved; i < data->count; i++) {
bp = data->bps[i].retired_page;
/* There are two cases of reserve error should be ignored:
* 1) a ras bad page has been allocated (used by someone);
* 2) a ras bad page has been reserved (duplicate error injection
* for one page);
*/
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&bo, NULL))
dev_warn(adev->dev, "RAS WARN: reserve vram for "
"retired page %llx fail\n", bp);
data->bps_bo[i] = bo;
data->last_reserved = i + 1;
bo = NULL;
}
out:
mutex_unlock(&con->recovery_lock);
return ret;
}
/* called when driver unload */
static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_bo *bo;
int i;
if (!con || !con->eh_data)
return 0;
mutex_lock(&con->recovery_lock);
data = con->eh_data;
if (!data)
goto out;
for (i = data->last_reserved - 1; i >= 0; i--) {
bo = data->bps_bo[i];
amdgpu_bo_free_kernel(&bo, NULL, NULL);
data->bps_bo[i] = bo;
data->last_reserved = i;
}
out:
mutex_unlock(&con->recovery_lock);
return 0;
}
int amdgpu_ras_recovery_init(struct amdgpu_device *adev) int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
{ {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
...@@ -1914,18 +1850,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) ...@@ -1914,18 +1850,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
ret = amdgpu_ras_load_bad_pages(adev); ret = amdgpu_ras_load_bad_pages(adev);
if (ret) if (ret)
goto free; goto free;
ret = amdgpu_ras_reserve_bad_pages(adev);
if (ret)
goto release;
} }
return 0; return 0;
release:
amdgpu_ras_release_bad_pages(adev);
free: free:
kfree((*data)->bps); kfree((*data)->bps);
kfree((*data)->bps_bo);
kfree(*data); kfree(*data);
con->eh_data = NULL; con->eh_data = NULL;
out: out:
...@@ -1953,12 +1883,10 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) ...@@ -1953,12 +1883,10 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
return 0; return 0;
cancel_work_sync(&con->recovery_work); cancel_work_sync(&con->recovery_work);
amdgpu_ras_release_bad_pages(adev);
mutex_lock(&con->recovery_lock); mutex_lock(&con->recovery_lock);
con->eh_data = NULL; con->eh_data = NULL;
kfree(data->bps); kfree(data->bps);
kfree(data->bps_bo);
kfree(data); kfree(data);
mutex_unlock(&con->recovery_lock); mutex_unlock(&con->recovery_lock);
......
...@@ -362,14 +362,10 @@ struct ras_err_data { ...@@ -362,14 +362,10 @@ struct ras_err_data {
struct ras_err_handler_data { struct ras_err_handler_data {
/* point to bad page records array */ /* point to bad page records array */
struct eeprom_table_record *bps; struct eeprom_table_record *bps;
/* point to reserved bo array */
struct amdgpu_bo **bps_bo;
/* the count of entries */ /* the count of entries */
int count; int count;
/* the space can place new entries */ /* the space can place new entries */
int space_left; int space_left;
/* last reserved entry's index + 1 */
int last_reserved;
}; };
typedef int (*ras_ih_cb)(struct amdgpu_device *adev, typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
...@@ -506,15 +502,11 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, ...@@ -506,15 +502,11 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
struct eeprom_table_record *bps, int pages); struct eeprom_table_record *bps, int pages);
int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev); int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
{ {
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (in_task())
amdgpu_ras_reserve_bad_pages(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
schedule_work(&ras->recovery_work); schedule_work(&ras->recovery_work);
return 0; return 0;
......
...@@ -41,10 +41,17 @@ ...@@ -41,10 +41,17 @@
#define AMDGPU_POISON 0xd0bed0be #define AMDGPU_POISON 0xd0bed0be
struct amdgpu_vram_reservation {
struct list_head node;
struct drm_mm_node mm_node;
};
struct amdgpu_vram_mgr { struct amdgpu_vram_mgr {
struct ttm_resource_manager manager; struct ttm_resource_manager manager;
struct drm_mm mm; struct drm_mm mm;
spinlock_t lock; spinlock_t lock;
struct list_head reservations_pending;
struct list_head reserved_pages;
atomic64_t usage; atomic64_t usage;
atomic64_t vis_usage; atomic64_t vis_usage;
}; };
...@@ -122,6 +129,10 @@ void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev, ...@@ -122,6 +129,10 @@ void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
struct sg_table *sgt); struct sg_table *sgt);
uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man); uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man);
uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man);
int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
uint64_t start, uint64_t size);
int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
uint64_t start);
int amdgpu_ttm_init(struct amdgpu_device *adev); int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_late_init(struct amdgpu_device *adev); void amdgpu_ttm_late_init(struct amdgpu_device *adev);
......
...@@ -187,6 +187,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) ...@@ -187,6 +187,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
drm_mm_init(&mgr->mm, 0, man->size); drm_mm_init(&mgr->mm, 0, man->size);
spin_lock_init(&mgr->lock); spin_lock_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
/* Add the two VRAM-related sysfs files */ /* Add the two VRAM-related sysfs files */
ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
...@@ -211,6 +213,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) ...@@ -211,6 +213,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager; struct ttm_resource_manager *man = &mgr->manager;
int ret; int ret;
struct amdgpu_vram_reservation *rsv, *temp;
ttm_resource_manager_set_used(man, false); ttm_resource_manager_set_used(man, false);
...@@ -219,6 +222,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) ...@@ -219,6 +222,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
return; return;
spin_lock(&mgr->lock); spin_lock(&mgr->lock);
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
drm_mm_remove_node(&rsv->mm_node);
kfree(rsv);
}
drm_mm_takedown(&mgr->mm); drm_mm_takedown(&mgr->mm);
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
...@@ -277,6 +287,101 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) ...@@ -277,6 +287,101 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
return usage; return usage;
} }
static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
struct drm_mm *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv, *temp;
uint64_t vis_usage;
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
if (drm_mm_reserve_node(mm, &rsv->mm_node))
continue;
dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Successed\n",
rsv->mm_node.start, rsv->mm_node.size);
vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
atomic64_add(vis_usage, &mgr->vis_usage);
atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage);
list_move(&rsv->node, &mgr->reserved_pages);
}
}
/**
* amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM
*
* @man: TTM memory type manager
* @start: start address of the range in VRAM
* @size: size of the range
*
* Reserve memory from start addess with the specified size in VRAM
*/
int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
uint64_t start, uint64_t size)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
rsv = kzalloc(sizeof(*rsv), GFP_KERNEL);
if (!rsv)
return -ENOMEM;
INIT_LIST_HEAD(&rsv->node);
rsv->mm_node.start = start >> PAGE_SHIFT;
rsv->mm_node.size = size >> PAGE_SHIFT;
spin_lock(&mgr->lock);
list_add_tail(&mgr->reservations_pending, &rsv->node);
amdgpu_vram_mgr_do_reserve(man);
spin_unlock(&mgr->lock);
return 0;
}
/**
* amdgpu_vram_mgr_query_page_status - query the reservation status
*
* @man: TTM memory type manager
* @start: start address of a page in VRAM
*
* Returns:
* -EBUSY: the page is still hold and in pending list
* 0: the page has been reserved
* -ENOENT: the input page is not a reservation
*/
int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
uint64_t start)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_vram_reservation *rsv;
int ret;
spin_lock(&mgr->lock);
list_for_each_entry(rsv, &mgr->reservations_pending, node) {
if ((rsv->mm_node.start <= start) &&
(start < (rsv->mm_node.start + rsv->mm_node.size))) {
ret = -EBUSY;
goto out;
}
}
list_for_each_entry(rsv, &mgr->reserved_pages, node) {
if ((rsv->mm_node.start <= start) &&
(start < (rsv->mm_node.start + rsv->mm_node.size))) {
ret = 0;
goto out;
}
}
ret = -ENOENT;
out:
spin_unlock(&mgr->lock);
return ret;
}
/** /**
* amdgpu_vram_mgr_virt_start - update virtual start address * amdgpu_vram_mgr_virt_start - update virtual start address
* *
...@@ -447,6 +552,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, ...@@ -447,6 +552,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);
++nodes; ++nodes;
} }
amdgpu_vram_mgr_do_reserve(man);
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
atomic64_sub(usage, &mgr->usage); atomic64_sub(usage, &mgr->usage);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment