Commit 081f359e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'hyperv-fixes-signed-20221125' of...

Merge tag 'hyperv-fixes-signed-20221125' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull hyperv fixes from Wei Liu:

 - Fix IRTE allocation in Hyper-V PCI controller (Dexuan Cui)

 - Fix handling of SCSI srb_status and capacity change events (Michael
   Kelley)

 - Restore VP assist page after CPU offlining and onlining (Vitaly
   Kuznetsov)

 - Fix some memory leak issues in VMBus (Yang Yingliang)

* tag 'hyperv-fixes-signed-20221125' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
  Drivers: hv: vmbus: fix possible memory leak in vmbus_device_register()
  Drivers: hv: vmbus: fix double free in the error path of vmbus_add_channel_work()
  PCI: hv: Only reuse existing IRTE allocation for Multi-MSI
  scsi: storvsc: Fix handling of srb_status and capacity change events
  x86/hyperv: Restore VP assist page after cpu offlining/onlining
parents 0b1dcc2c 25c94b05
...@@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void) ...@@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
static int hv_cpu_init(unsigned int cpu) static int hv_cpu_init(unsigned int cpu)
{ {
union hv_vp_assist_msr_contents msr = { 0 }; union hv_vp_assist_msr_contents msr = { 0 };
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
int ret; int ret;
ret = hv_common_cpu_init(cpu); ret = hv_common_cpu_init(cpu);
...@@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu) ...@@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu)
if (!hv_vp_assist_page) if (!hv_vp_assist_page)
return 0; return 0;
if (!*hvp) { if (hv_root_partition) {
if (hv_root_partition) { /*
/* * For root partition we get the hypervisor provided VP assist
* For root partition we get the hypervisor provided VP assist * page, instead of allocating a new page.
* page, instead of allocating a new page. */
*/ rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
*hvp = memremap(msr.pfn << PAGE_SIZE, MEMREMAP_WB);
HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, } else {
PAGE_SIZE, MEMREMAP_WB); /*
} else { * The VP assist page is an "overlay" page (see Hyper-V TLFS's
/* * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
* The VP assist page is an "overlay" page (see Hyper-V TLFS's * out to make sure we always write the EOI MSR in
* Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed * hv_apic_eoi_write() *after* the EOI optimization is disabled
* out to make sure we always write the EOI MSR in * in hv_cpu_die(), otherwise a CPU may not be stopped in the
* hv_apic_eoi_write() *after* the EOI optimization is disabled * case of CPU offlining and the VM will hang.
* in hv_cpu_die(), otherwise a CPU may not be stopped in the */
* case of CPU offlining and the VM will hang. if (!*hvp)
*/
*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
if (*hvp) if (*hvp)
msr.pfn = vmalloc_to_pfn(*hvp); msr.pfn = vmalloc_to_pfn(*hvp);
}
WARN_ON(!(*hvp)); }
if (*hvp) { if (!WARN_ON(!(*hvp))) {
msr.enable = 1; msr.enable = 1;
wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
}
} }
return hyperv_init_ghcb(); return hyperv_init_ghcb();
......
...@@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work) ...@@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work)
* Add the new device to the bus. This will kick off device-driver * Add the new device to the bus. This will kick off device-driver
* binding which eventually invokes the device driver's AddDevice() * binding which eventually invokes the device driver's AddDevice()
* method. * method.
*
* If vmbus_device_register() fails, the 'device_obj' is freed in
* vmbus_device_release() as called by device_unregister() in the
* error path of vmbus_device_register(). In the outside error
* path, there's no need to free it.
*/ */
ret = vmbus_device_register(newchannel->device_obj); ret = vmbus_device_register(newchannel->device_obj);
if (ret != 0) { if (ret != 0) {
pr_err("unable to add child device object (relid %d)\n", pr_err("unable to add child device object (relid %d)\n",
newchannel->offermsg.child_relid); newchannel->offermsg.child_relid);
kfree(newchannel->device_obj);
goto err_deq_chan; goto err_deq_chan;
} }
......
...@@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) ...@@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
ret = device_register(&child_device_obj->device); ret = device_register(&child_device_obj->device);
if (ret) { if (ret) {
pr_err("Unable to register child device\n"); pr_err("Unable to register child device\n");
put_device(&child_device_obj->device);
return ret; return ret;
} }
......
...@@ -1613,7 +1613,7 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp, ...@@ -1613,7 +1613,7 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp,
} }
static u32 hv_compose_msi_req_v1( static u32 hv_compose_msi_req_v1(
struct pci_create_interrupt *int_pkt, const struct cpumask *affinity, struct pci_create_interrupt *int_pkt,
u32 slot, u8 vector, u16 vector_count) u32 slot, u8 vector, u16 vector_count)
{ {
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
...@@ -1631,6 +1631,35 @@ static u32 hv_compose_msi_req_v1( ...@@ -1631,6 +1631,35 @@ static u32 hv_compose_msi_req_v1(
return sizeof(*int_pkt); return sizeof(*int_pkt);
} }
/*
* The vCPU selected by hv_compose_multi_msi_req_get_cpu() and
* hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be
* interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V
* via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is
* not irrelevant because Hyper-V chooses the physical CPU to handle the
* interrupts based on the vCPU specified in message sent to the vPCI VSP in
* hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest,
* but assigning too many vPCI device interrupts to the same pCPU can cause a
* performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V
* to spread out the pCPUs that it selects.
*
* For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu()
* to always return the same dummy vCPU, because a second call to
* hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a
* new pCPU for the interrupt. But for the multi-MSI case, the second call to
* hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the
* original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that
* the pCPUs are spread out. All interrupts for a multi-MSI device end up using
* the same pCPU, even though the vCPUs will be spread out by later calls
* to hv_irq_unmask(), but that is the best we can do now.
*
* With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not*
* cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an
* enhancement is planned for a future version. With that enhancement, the
* dummy vCPU selection won't matter, and interrupts for the same multi-MSI
* device will be spread across multiple pCPUs.
*/
/* /*
* Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
* by subsequent retarget in hv_irq_unmask(). * by subsequent retarget in hv_irq_unmask().
...@@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity) ...@@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity)
return cpumask_first_and(affinity, cpu_online_mask); return cpumask_first_and(affinity, cpu_online_mask);
} }
static u32 hv_compose_msi_req_v2( /*
struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity, * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0.
u32 slot, u8 vector, u16 vector_count) */
static int hv_compose_multi_msi_req_get_cpu(void)
{ {
static DEFINE_SPINLOCK(multi_msi_cpu_lock);
/* -1 means starting with CPU 0 */
static int cpu_next = -1;
unsigned long flags;
int cpu; int cpu;
spin_lock_irqsave(&multi_msi_cpu_lock, flags);
cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids,
false);
cpu = cpu_next;
spin_unlock_irqrestore(&multi_msi_cpu_lock, flags);
return cpu;
}
static u32 hv_compose_msi_req_v2(
struct pci_create_interrupt2 *int_pkt, int cpu,
u32 slot, u8 vector, u16 vector_count)
{
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
int_pkt->wslot.slot = slot; int_pkt->wslot.slot = slot;
int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector = vector;
int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.vector_count = vector_count;
int_pkt->int_desc.delivery_mode = DELIVERY_MODE; int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
cpu = hv_compose_msi_req_get_cpu(affinity);
int_pkt->int_desc.processor_array[0] = int_pkt->int_desc.processor_array[0] =
hv_cpu_number_to_vp_number(cpu); hv_cpu_number_to_vp_number(cpu);
int_pkt->int_desc.processor_count = 1; int_pkt->int_desc.processor_count = 1;
...@@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2( ...@@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2(
} }
static u32 hv_compose_msi_req_v3( static u32 hv_compose_msi_req_v3(
struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity, struct pci_create_interrupt3 *int_pkt, int cpu,
u32 slot, u32 vector, u16 vector_count) u32 slot, u32 vector, u16 vector_count)
{ {
int cpu;
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3; int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3;
int_pkt->wslot.slot = slot; int_pkt->wslot.slot = slot;
int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector = vector;
int_pkt->int_desc.reserved = 0; int_pkt->int_desc.reserved = 0;
int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.vector_count = vector_count;
int_pkt->int_desc.delivery_mode = DELIVERY_MODE; int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
cpu = hv_compose_msi_req_get_cpu(affinity);
int_pkt->int_desc.processor_array[0] = int_pkt->int_desc.processor_array[0] =
hv_cpu_number_to_vp_number(cpu); hv_cpu_number_to_vp_number(cpu);
int_pkt->int_desc.processor_count = 1; int_pkt->int_desc.processor_count = 1;
...@@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ...@@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
struct pci_create_interrupt3 v3; struct pci_create_interrupt3 v3;
} int_pkts; } int_pkts;
} __packed ctxt; } __packed ctxt;
bool multi_msi;
u64 trans_id; u64 trans_id;
u32 size; u32 size;
int ret; int ret;
int cpu;
msi_desc = irq_data_get_msi_desc(data);
multi_msi = !msi_desc->pci.msi_attrib.is_msix &&
msi_desc->nvec_used > 1;
/* Reuse the previous allocation */ /* Reuse the previous allocation */
if (data->chip_data) { if (data->chip_data && multi_msi) {
int_desc = data->chip_data; int_desc = data->chip_data;
msg->address_hi = int_desc->address >> 32; msg->address_hi = int_desc->address >> 32;
msg->address_lo = int_desc->address & 0xffffffff; msg->address_lo = int_desc->address & 0xffffffff;
...@@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ...@@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
return; return;
} }
msi_desc = irq_data_get_msi_desc(data);
pdev = msi_desc_to_pci_dev(msi_desc); pdev = msi_desc_to_pci_dev(msi_desc);
dest = irq_data_get_effective_affinity_mask(data); dest = irq_data_get_effective_affinity_mask(data);
pbus = pdev->bus; pbus = pdev->bus;
...@@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ...@@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
if (!hpdev) if (!hpdev)
goto return_null_message; goto return_null_message;
/* Free any previous message that might have already been composed. */
if (data->chip_data && !multi_msi) {
int_desc = data->chip_data;
data->chip_data = NULL;
hv_int_desc_free(hpdev, int_desc);
}
int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
if (!int_desc) if (!int_desc)
goto drop_reference; goto drop_reference;
if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) { if (multi_msi) {
/* /*
* If this is not the first MSI of Multi MSI, we already have * If this is not the first MSI of Multi MSI, we already have
* a mapping. Can exit early. * a mapping. Can exit early.
...@@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ...@@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
*/ */
vector = 32; vector = 32;
vector_count = msi_desc->nvec_used; vector_count = msi_desc->nvec_used;
cpu = hv_compose_multi_msi_req_get_cpu();
} else { } else {
vector = hv_msi_get_int_vector(data); vector = hv_msi_get_int_vector(data);
vector_count = 1; vector_count = 1;
cpu = hv_compose_msi_req_get_cpu(dest);
} }
/* /*
...@@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ...@@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
switch (hbus->protocol_version) { switch (hbus->protocol_version) {
case PCI_PROTOCOL_VERSION_1_1: case PCI_PROTOCOL_VERSION_1_1:
size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
dest,
hpdev->desc.win_slot.slot, hpdev->desc.win_slot.slot,
(u8)vector, (u8)vector,
vector_count); vector_count);
...@@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ...@@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
case PCI_PROTOCOL_VERSION_1_2: case PCI_PROTOCOL_VERSION_1_2:
case PCI_PROTOCOL_VERSION_1_3: case PCI_PROTOCOL_VERSION_1_3:
size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
dest, cpu,
hpdev->desc.win_slot.slot, hpdev->desc.win_slot.slot,
(u8)vector, (u8)vector,
vector_count); vector_count);
...@@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ...@@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
case PCI_PROTOCOL_VERSION_1_4: case PCI_PROTOCOL_VERSION_1_4:
size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3, size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
dest, cpu,
hpdev->desc.win_slot.slot, hpdev->desc.win_slot.slot,
vector, vector,
vector_count); vector_count);
......
...@@ -303,16 +303,21 @@ enum storvsc_request_type { ...@@ -303,16 +303,21 @@ enum storvsc_request_type {
}; };
/* /*
* SRB status codes and masks; a subset of the codes used here. * SRB status codes and masks. In the 8-bit field, the two high order bits
* are flags, while the remaining 6 bits are an integer status code. The
* definitions here include only the subset of the integer status codes that
* are tested for in this driver.
*/ */
#define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_AUTOSENSE_VALID 0x80
#define SRB_STATUS_QUEUE_FROZEN 0x40 #define SRB_STATUS_QUEUE_FROZEN 0x40
#define SRB_STATUS_INVALID_LUN 0x20
#define SRB_STATUS_SUCCESS 0x01 /* SRB status integer codes */
#define SRB_STATUS_ABORTED 0x02 #define SRB_STATUS_SUCCESS 0x01
#define SRB_STATUS_ERROR 0x04 #define SRB_STATUS_ABORTED 0x02
#define SRB_STATUS_DATA_OVERRUN 0x12 #define SRB_STATUS_ERROR 0x04
#define SRB_STATUS_INVALID_REQUEST 0x06
#define SRB_STATUS_DATA_OVERRUN 0x12
#define SRB_STATUS_INVALID_LUN 0x20
#define SRB_STATUS(status) \ #define SRB_STATUS(status) \
(status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN))
...@@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, ...@@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
void (*process_err_fn)(struct work_struct *work); void (*process_err_fn)(struct work_struct *work);
struct hv_host_device *host_dev = shost_priv(host); struct hv_host_device *host_dev = shost_priv(host);
/* switch (SRB_STATUS(vm_srb->srb_status)) {
* In some situations, Hyper-V sets multiple bits in the case SRB_STATUS_ERROR:
* srb_status, such as ABORTED and ERROR. So process them case SRB_STATUS_ABORTED:
* individually, with the most specific bits first. case SRB_STATUS_INVALID_REQUEST:
*/ if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) {
/* Check for capacity change */
if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) { if ((asc == 0x2a) && (ascq == 0x9)) {
set_host_byte(scmnd, DID_NO_CONNECT); process_err_fn = storvsc_device_scan;
process_err_fn = storvsc_remove_lun; /* Retry the I/O that triggered this. */
goto do_work; set_host_byte(scmnd, DID_REQUEUE);
} goto do_work;
}
if (vm_srb->srb_status & SRB_STATUS_ABORTED) {
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
/* Capacity data has changed */
(asc == 0x2a) && (ascq == 0x9)) {
process_err_fn = storvsc_device_scan;
/* /*
* Retry the I/O that triggered this. * Otherwise, let upper layer deal with the
* error when sense message is present
*/ */
set_host_byte(scmnd, DID_REQUEUE);
goto do_work;
}
}
if (vm_srb->srb_status & SRB_STATUS_ERROR) {
/*
* Let upper layer deal with error when
* sense message is present.
*/
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
return; return;
}
/* /*
* If there is an error; offline the device since all * If there is an error; offline the device since all
...@@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, ...@@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
default: default:
set_host_byte(scmnd, DID_ERROR); set_host_byte(scmnd, DID_ERROR);
} }
return;
case SRB_STATUS_INVALID_LUN:
set_host_byte(scmnd, DID_NO_CONNECT);
process_err_fn = storvsc_remove_lun;
goto do_work;
} }
return; return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment