Commit e1a1ef84 authored by Alexey Kardashevskiy's avatar Alexey Kardashevskiy Committed by Paul Mackerras

KVM: PPC: Book3S: Allocate guest TCEs on demand too

We already allocate hardware TCE tables in multiple levels and skip
intermediate levels when we can, now it is a turn of the KVM TCE tables.
Thankfully these are allocated already in 2 levels.

This moves the table's last level allocation from the creating helper to
kvmppc_tce_put() and kvm_spapr_tce_fault(). Since such allocation cannot
be done in real mode, this creates a virtual mode version of
kvmppc_tce_put() which handles allocations.

This adds kvmppc_rm_ioba_validate() to do an additional test if
the consequent kvmppc_tce_put() needs a page which has not been allocated;
if this is the case, we bail out to virtual mode handlers.

The allocations are protected by a new mutex as kvm->lock is not suitable
for the task because the fault handler is called with the mmap_sem held
but kvmhv_setup_mmu() locks kvm->lock and mmap_sem in the reverse order.
Signed-off-by: default avatarAlexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent 2001825e
...@@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table { ...@@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table {
struct kref kref; struct kref kref;
}; };
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
struct kvmppc_spapr_tce_table { struct kvmppc_spapr_tce_table {
struct list_head list; struct list_head list;
struct kvm *kvm; struct kvm *kvm;
...@@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table { ...@@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table {
u64 offset; /* in pages */ u64 offset; /* in pages */
u64 size; /* window size in pages */ u64 size; /* window size in pages */
struct list_head iommu_tables; struct list_head iommu_tables;
struct mutex alloc_lock;
struct page *pages[0]; struct page *pages[0];
}; };
......
...@@ -197,8 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table( ...@@ -197,8 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \ (stt)->size, (ioba), (npages)) ? \
H_PARAMETER : H_SUCCESS) H_PARAMETER : H_SUCCESS)
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce); unsigned long ioba, unsigned long tce);
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
......
...@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head) ...@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
unsigned long i, npages = kvmppc_tce_pages(stt->size); unsigned long i, npages = kvmppc_tce_pages(stt->size);
for (i = 0; i < npages; i++) for (i = 0; i < npages; i++)
if (stt->pages[i])
__free_page(stt->pages[i]); __free_page(stt->pages[i]);
kfree(stt); kfree(stt);
} }
static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
unsigned long sttpage)
{
struct page *page = stt->pages[sttpage];
if (page)
return page;
mutex_lock(&stt->alloc_lock);
page = stt->pages[sttpage];
if (!page) {
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
WARN_ON_ONCE(!page);
if (page)
stt->pages[sttpage] = page;
}
mutex_unlock(&stt->alloc_lock);
return page;
}
static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
{ {
struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
...@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) ...@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
page = stt->pages[vmf->pgoff]; page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
if (!page)
return VM_FAULT_OOM;
get_page(page); get_page(page);
vmf->page = page; vmf->page = page;
return 0; return 0;
...@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvmppc_spapr_tce_table *siter; struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size = args->size; unsigned long npages, size = args->size;
int ret = -ENOMEM; int ret = -ENOMEM;
int i;
if (!args->size || args->page_shift < 12 || args->page_shift > 34 || if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift))) (args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
...@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
stt->offset = args->offset; stt->offset = args->offset;
stt->size = size; stt->size = size;
stt->kvm = kvm; stt->kvm = kvm;
mutex_init(&stt->alloc_lock);
INIT_LIST_HEAD_RCU(&stt->iommu_tables); INIT_LIST_HEAD_RCU(&stt->iommu_tables);
for (i = 0; i < npages; i++) {
stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!stt->pages[i])
goto fail;
}
mutex_lock(&kvm->lock); mutex_lock(&kvm->lock);
/* Check this LIOBN hasn't been previously allocated */ /* Check this LIOBN hasn't been previously allocated */
...@@ -352,11 +371,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -352,11 +371,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (ret >= 0) if (ret >= 0)
return ret; return ret;
fail:
for (i = 0; i < npages; i++)
if (stt->pages[i])
__free_page(stt->pages[i]);
kfree(stt); kfree(stt);
fail_acct: fail_acct:
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
...@@ -413,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, ...@@ -413,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS; return H_SUCCESS;
} }
/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
* Cannot fail so kvmppc_tce_validate must be called before it.
*/
static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce)
{
struct page *page;
u64 *tbl;
unsigned long sttpage;
idx -= stt->offset;
sttpage = idx / TCES_PER_PAGE;
page = stt->pages[sttpage];
if (!page) {
/* We allow any TCE, not just with read|write permissions */
if (!tce)
return;
page = kvm_spapr_get_tce_page(stt, sttpage);
if (!page)
return;
}
tbl = page_to_virt(page);
tbl[idx % TCES_PER_PAGE] = tce;
}
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry) unsigned long entry)
{ {
......
...@@ -66,8 +66,6 @@ ...@@ -66,8 +66,6 @@
#endif #endif
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
/* /*
* Finds a TCE table descriptor by LIOBN. * Finds a TCE table descriptor by LIOBN.
* *
...@@ -148,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, ...@@ -148,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS; return H_SUCCESS;
} }
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
/* Note on the use of page_address() in real mode, /* Note on the use of page_address() in real mode,
* *
...@@ -180,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page) ...@@ -180,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page)
/* /*
* Handles TCE requests for emulated devices. * Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them. * Puts guest TCE values to the table and expects user space to convert them.
* Called in both real and virtual modes. * Cannot fail so kvmppc_rm_tce_validate must be called before it.
* Cannot fail so kvmppc_tce_validate must be called before it.
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/ */
void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce) unsigned long idx, unsigned long tce)
{ {
struct page *page; struct page *page;
...@@ -194,13 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, ...@@ -194,13 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
idx -= stt->offset; idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE]; page = stt->pages[idx / TCES_PER_PAGE];
/*
* page must not be NULL in real mode,
* kvmppc_rm_ioba_validate() must have taken care of this.
*/
WARN_ON_ONCE_RM(!page);
tbl = kvmppc_page_address(page); tbl = kvmppc_page_address(page);
tbl[idx % TCES_PER_PAGE] = tce; tbl[idx % TCES_PER_PAGE] = tce;
} }
EXPORT_SYMBOL_GPL(kvmppc_tce_put);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /*
* TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
* in real mode.
* Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
* allocated or not required (when clearing a tce entry).
*/
static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long ioba, unsigned long npages, bool clearing)
{
unsigned long i, idx, sttpage, sttpages;
unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);
if (ret)
return ret;
/*
* clearing==true says kvmppc_rm_tce_put won't be allocating pages
* for empty tces.
*/
if (clearing)
return H_SUCCESS;
idx = (ioba >> stt->page_shift) - stt->offset;
sttpage = idx / TCES_PER_PAGE;
sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
TCES_PER_PAGE;
for (i = sttpage; i < sttpage + sttpages; ++i)
if (!stt->pages[i])
return H_TOO_HARD;
return H_SUCCESS;
}
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa, unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction) enum dma_data_direction *direction)
...@@ -378,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -378,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (!stt) if (!stt)
return H_TOO_HARD; return H_TOO_HARD;
ret = kvmppc_ioba_validate(stt, ioba, 1); ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
return ret; return ret;
...@@ -406,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -406,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
} }
} }
kvmppc_tce_put(stt, entry, tce); kvmppc_rm_tce_put(stt, entry, tce);
return H_SUCCESS; return H_SUCCESS;
} }
...@@ -477,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -477,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (tce_list & (SZ_4K - 1)) if (tce_list & (SZ_4K - 1))
return H_PARAMETER; return H_PARAMETER;
ret = kvmppc_ioba_validate(stt, ioba, npages); ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
return ret; return ret;
...@@ -554,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -554,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
} }
} }
kvmppc_tce_put(stt, entry + i, tce); kvmppc_rm_tce_put(stt, entry + i, tce);
} }
unlock_exit: unlock_exit:
...@@ -580,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, ...@@ -580,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
if (!stt) if (!stt)
return H_TOO_HARD; return H_TOO_HARD;
ret = kvmppc_ioba_validate(stt, ioba, npages); ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
return ret; return ret;
...@@ -607,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, ...@@ -607,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
} }
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
return H_SUCCESS; return H_SUCCESS;
} }
...@@ -632,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -632,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
idx = (ioba >> stt->page_shift) - stt->offset; idx = (ioba >> stt->page_shift) - stt->offset;
page = stt->pages[idx / TCES_PER_PAGE]; page = stt->pages[idx / TCES_PER_PAGE];
if (!page) {
vcpu->arch.regs.gpr[4] = 0;
return H_SUCCESS;
}
tbl = (u64 *)page_address(page); tbl = (u64 *)page_address(page);
vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment