Commit d271ab29 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-5.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen updates from Juergen Gross:

 - fix a bug introduced in 5.5 in the Xen gntdev driver

 - fix the Xen balloon driver when running on ancient Xen versions

 - allow Xen stubdoms to control interrupt enable flags of
   passed-through PCI cards

 - release resources in Xen backends under memory pressure

* tag 'for-linus-5.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/blkback: Consistently insert one empty line between functions
  xen/blkback: Remove unnecessary static variable name prefixes
  xen/blkback: Squeeze page pools if a memory pressure is detected
  xenbus/backend: Protect xenbus callback with lock
  xenbus/backend: Add memory pressure handler callback
  xen/gntdev: Do not use mm notifiers with autotranslating guests
  xen/balloon: Support xend-based toolstack take two
  xen-pciback: optionally allow interrupt enable flag writes
parents 2634744b 8557bbe5
...@@ -11,3 +11,16 @@ Description: ...@@ -11,3 +11,16 @@ Description:
#echo 00:19.0-E0:2:FF > /sys/bus/pci/drivers/pciback/quirks #echo 00:19.0-E0:2:FF > /sys/bus/pci/drivers/pciback/quirks
will allow the guest to read and write to the configuration will allow the guest to read and write to the configuration
register 0x0E. register 0x0E.
What: /sys/bus/pci/drivers/pciback/allow_interrupt_control
Date: Jan 2020
KernelVersion: 5.6
Contact: xen-devel@lists.xenproject.org
Description:
List of devices which can have interrupt control flag (INTx,
MSI, MSI-X) set by a connected guest. It is meant to be set
only when the guest is a stubdomain hosting device model (qemu)
and the actual device is assigned to a HVM. It is not safe
(similar to permissive attribute) to set for a devices assigned
to a PV guest. The device is automatically removed from this
list when the connected pcifront terminates.
...@@ -25,3 +25,13 @@ Description: ...@@ -25,3 +25,13 @@ Description:
allocated without being in use. The time is in allocated without being in use. The time is in
seconds, 0 means indefinitely long. seconds, 0 means indefinitely long.
The default is 60 seconds. The default is 60 seconds.
What: /sys/module/xen_blkback/parameters/buffer_squeeze_duration_ms
Date: December 2019
KernelVersion: 5.6
Contact: SeongJae Park <sjpark@amazon.de>
Description:
When memory pressure is reported to blkback this option
controls the duration in milliseconds that blkback will not
cache any page not backed by a grant mapping.
The default is 10ms.
...@@ -62,8 +62,8 @@ ...@@ -62,8 +62,8 @@
* IO workloads. * IO workloads.
*/ */
static int xen_blkif_max_buffer_pages = 1024; static int max_buffer_pages = 1024;
module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644); module_param_named(max_buffer_pages, max_buffer_pages, int, 0644);
MODULE_PARM_DESC(max_buffer_pages, MODULE_PARM_DESC(max_buffer_pages,
"Maximum number of free pages to keep in each block backend buffer"); "Maximum number of free pages to keep in each block backend buffer");
...@@ -78,8 +78,8 @@ MODULE_PARM_DESC(max_buffer_pages, ...@@ -78,8 +78,8 @@ MODULE_PARM_DESC(max_buffer_pages,
* algorithm. * algorithm.
*/ */
static int xen_blkif_max_pgrants = 1056; static int max_pgrants = 1056;
module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); module_param_named(max_persistent_grants, max_pgrants, int, 0644);
MODULE_PARM_DESC(max_persistent_grants, MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently"); "Maximum number of grants to map persistently");
...@@ -88,8 +88,8 @@ MODULE_PARM_DESC(max_persistent_grants, ...@@ -88,8 +88,8 @@ MODULE_PARM_DESC(max_persistent_grants,
* use. The time is in seconds, 0 means indefinitely long. * use. The time is in seconds, 0 means indefinitely long.
*/ */
static unsigned int xen_blkif_pgrant_timeout = 60; static unsigned int pgrant_timeout = 60;
module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout, module_param_named(persistent_grant_unused_seconds, pgrant_timeout,
uint, 0644); uint, 0644);
MODULE_PARM_DESC(persistent_grant_unused_seconds, MODULE_PARM_DESC(persistent_grant_unused_seconds,
"Time in seconds an unused persistent grant is allowed to " "Time in seconds an unused persistent grant is allowed to "
...@@ -137,9 +137,8 @@ module_param(log_stats, int, 0644); ...@@ -137,9 +137,8 @@ module_param(log_stats, int, 0644);
static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt) static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
{ {
return xen_blkif_pgrant_timeout && return pgrant_timeout && (jiffies - persistent_gnt->last_used >=
(jiffies - persistent_gnt->last_used >= HZ * pgrant_timeout);
HZ * xen_blkif_pgrant_timeout);
} }
static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page) static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
...@@ -234,7 +233,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring, ...@@ -234,7 +233,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *this; struct persistent_gnt *this;
struct xen_blkif *blkif = ring->blkif; struct xen_blkif *blkif = ring->blkif;
if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) { if (ring->persistent_gnt_c >= max_pgrants) {
if (!blkif->vbd.overflow_max_grants) if (!blkif->vbd.overflow_max_grants)
blkif->vbd.overflow_max_grants = 1; blkif->vbd.overflow_max_grants = 1;
return -EBUSY; return -EBUSY;
...@@ -397,14 +396,13 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) ...@@ -397,14 +396,13 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
goto out; goto out;
} }
if (ring->persistent_gnt_c < xen_blkif_max_pgrants || if (ring->persistent_gnt_c < max_pgrants ||
(ring->persistent_gnt_c == xen_blkif_max_pgrants && (ring->persistent_gnt_c == max_pgrants &&
!ring->blkif->vbd.overflow_max_grants)) { !ring->blkif->vbd.overflow_max_grants)) {
num_clean = 0; num_clean = 0;
} else { } else {
num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; num_clean = (max_pgrants / 100) * LRU_PERCENT_CLEAN;
num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean = ring->persistent_gnt_c - max_pgrants + num_clean;
num_clean;
num_clean = min(ring->persistent_gnt_c, num_clean); num_clean = min(ring->persistent_gnt_c, num_clean);
pr_debug("Going to purge at least %u persistent grants\n", pr_debug("Going to purge at least %u persistent grants\n",
num_clean); num_clean);
...@@ -599,8 +597,7 @@ static void print_stats(struct xen_blkif_ring *ring) ...@@ -599,8 +597,7 @@ static void print_stats(struct xen_blkif_ring *ring)
current->comm, ring->st_oo_req, current->comm, ring->st_oo_req,
ring->st_rd_req, ring->st_wr_req, ring->st_rd_req, ring->st_wr_req,
ring->st_f_req, ring->st_ds_req, ring->st_f_req, ring->st_ds_req,
ring->persistent_gnt_c, ring->persistent_gnt_c, max_pgrants);
xen_blkif_max_pgrants);
ring->st_print = jiffies + msecs_to_jiffies(10 * 1000); ring->st_print = jiffies + msecs_to_jiffies(10 * 1000);
ring->st_rd_req = 0; ring->st_rd_req = 0;
ring->st_wr_req = 0; ring->st_wr_req = 0;
...@@ -656,8 +653,11 @@ int xen_blkif_schedule(void *arg) ...@@ -656,8 +653,11 @@ int xen_blkif_schedule(void *arg)
ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL); ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
} }
/* Shrink if we have more than xen_blkif_max_buffer_pages */ /* Shrink the free pages pool if it is too large. */
shrink_free_pagepool(ring, xen_blkif_max_buffer_pages); if (time_before(jiffies, blkif->buffer_squeeze_end))
shrink_free_pagepool(ring, 0);
else
shrink_free_pagepool(ring, max_buffer_pages);
if (log_stats && time_after(jiffies, ring->st_print)) if (log_stats && time_after(jiffies, ring->st_print))
print_stats(ring); print_stats(ring);
...@@ -884,7 +884,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, ...@@ -884,7 +884,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
continue; continue;
} }
if (use_persistent_gnts && if (use_persistent_gnts &&
ring->persistent_gnt_c < xen_blkif_max_pgrants) { ring->persistent_gnt_c < max_pgrants) {
/* /*
* We are using persistent grants, the grant is * We are using persistent grants, the grant is
* not mapped but we might have room for it. * not mapped but we might have room for it.
...@@ -911,7 +911,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, ...@@ -911,7 +911,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
pages[seg_idx]->persistent_gnt = persistent_gnt; pages[seg_idx]->persistent_gnt = persistent_gnt;
pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n", pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
persistent_gnt->gnt, ring->persistent_gnt_c, persistent_gnt->gnt, ring->persistent_gnt_c,
xen_blkif_max_pgrants); max_pgrants);
goto next; goto next;
} }
if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) { if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
......
...@@ -319,6 +319,7 @@ struct xen_blkif { ...@@ -319,6 +319,7 @@ struct xen_blkif {
/* All rings for this device. */ /* All rings for this device. */
struct xen_blkif_ring *rings; struct xen_blkif_ring *rings;
unsigned int nr_rings; unsigned int nr_rings;
unsigned long buffer_squeeze_end;
}; };
struct seg_buf { struct seg_buf {
......
...@@ -467,7 +467,6 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev) ...@@ -467,7 +467,6 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev)
device_remove_file(&dev->dev, &dev_attr_physical_device); device_remove_file(&dev->dev, &dev_attr_physical_device);
} }
static void xen_vbd_free(struct xen_vbd *vbd) static void xen_vbd_free(struct xen_vbd *vbd)
{ {
if (vbd->bdev) if (vbd->bdev)
...@@ -524,6 +523,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, ...@@ -524,6 +523,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
handle, blkif->domid); handle, blkif->domid);
return 0; return 0;
} }
static int xen_blkbk_remove(struct xenbus_device *dev) static int xen_blkbk_remove(struct xenbus_device *dev)
{ {
struct backend_info *be = dev_get_drvdata(&dev->dev); struct backend_info *be = dev_get_drvdata(&dev->dev);
...@@ -607,6 +607,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info ...@@ -607,6 +607,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
if (err) if (err)
dev_warn(&dev->dev, "writing feature-discard (%d)", err); dev_warn(&dev->dev, "writing feature-discard (%d)", err);
} }
int xen_blkbk_barrier(struct xenbus_transaction xbt, int xen_blkbk_barrier(struct xenbus_transaction xbt,
struct backend_info *be, int state) struct backend_info *be, int state)
{ {
...@@ -691,7 +692,6 @@ static int xen_blkbk_probe(struct xenbus_device *dev, ...@@ -691,7 +692,6 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
return err; return err;
} }
/* /*
* Callback received when the hotplug scripts have placed the physical-device * Callback received when the hotplug scripts have placed the physical-device
* node. Read it and the mode node, and create a vbd. If the frontend is * node. Read it and the mode node, and create a vbd. If the frontend is
...@@ -783,7 +783,6 @@ static void backend_changed(struct xenbus_watch *watch, ...@@ -783,7 +783,6 @@ static void backend_changed(struct xenbus_watch *watch,
} }
} }
/* /*
* Callback received when the frontend's state changes. * Callback received when the frontend's state changes.
*/ */
...@@ -858,9 +857,27 @@ static void frontend_changed(struct xenbus_device *dev, ...@@ -858,9 +857,27 @@ static void frontend_changed(struct xenbus_device *dev,
} }
} }
/* Once a memory pressure is detected, squeeze free page pools for a while. */
static unsigned int buffer_squeeze_duration_ms = 10;
module_param_named(buffer_squeeze_duration_ms,
buffer_squeeze_duration_ms, int, 0644);
MODULE_PARM_DESC(buffer_squeeze_duration_ms,
"Duration in ms to squeeze pages buffer when a memory pressure is detected");
/* ** Connection ** */ /*
* Callback received when the memory pressure is detected.
*/
static void reclaim_memory(struct xenbus_device *dev)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
if (!be)
return;
be->blkif->buffer_squeeze_end = jiffies +
msecs_to_jiffies(buffer_squeeze_duration_ms);
}
/* ** Connection ** */
/* /*
* Write the physical details regarding the block device to the store, and * Write the physical details regarding the block device to the store, and
...@@ -1152,6 +1169,7 @@ static struct xenbus_driver xen_blkbk_driver = { ...@@ -1152,6 +1169,7 @@ static struct xenbus_driver xen_blkbk_driver = {
.remove = xen_blkbk_remove, .remove = xen_blkbk_remove,
.otherend_changed = frontend_changed, .otherend_changed = frontend_changed,
.allow_rebind = true, .allow_rebind = true,
.reclaim_memory = reclaim_memory,
}; };
int xen_blkif_xenbus_init(void) int xen_blkif_xenbus_init(void)
......
...@@ -1006,19 +1006,19 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) ...@@ -1006,19 +1006,19 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
} }
mutex_unlock(&priv->lock); mutex_unlock(&priv->lock);
/*
* gntdev takes the address of the PTE in find_grant_ptes() and passes
* it to the hypervisor in gntdev_map_grant_pages(). The purpose of
* the notifier is to prevent the hypervisor pointer to the PTE from
* going stale.
*
* Since this vma's mappings can't be touched without the mmap_sem,
* and we are holding it now, there is no need for the notifier_range
* locking pattern.
*/
mmu_interval_read_begin(&map->notifier);
if (use_ptemod) { if (use_ptemod) {
/*
* gntdev takes the address of the PTE in find_grant_ptes() and
* passes it to the hypervisor in gntdev_map_grant_pages(). The
* purpose of the notifier is to prevent the hypervisor pointer
* to the PTE from going stale.
*
* Since this vma's mappings can't be touched without the
* mmap_sem, and we are holding it now, there is no need for
* the notifier_range locking pattern.
*/
mmu_interval_read_begin(&map->notifier);
map->pages_vm_start = vma->vm_start; map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start, err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start, vma->vm_end - vma->vm_start,
......
...@@ -94,7 +94,7 @@ static void watch_target(struct xenbus_watch *watch, ...@@ -94,7 +94,7 @@ static void watch_target(struct xenbus_watch *watch,
"%llu", &static_max) == 1)) "%llu", &static_max) == 1))
static_max >>= PAGE_SHIFT - 10; static_max >>= PAGE_SHIFT - 10;
else else
static_max = new_target; static_max = balloon_stats.current_pages;
target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0
: static_max - balloon_stats.target_pages; : static_max - balloon_stats.target_pages;
......
...@@ -286,6 +286,43 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) ...@@ -286,6 +286,43 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
return xen_pcibios_err_to_errno(err); return xen_pcibios_err_to_errno(err);
} }
int xen_pcibk_get_interrupt_type(struct pci_dev *dev)
{
int err;
u16 val;
int ret = 0;
err = pci_read_config_word(dev, PCI_COMMAND, &val);
if (err)
return err;
if (!(val & PCI_COMMAND_INTX_DISABLE))
ret |= INTERRUPT_TYPE_INTX;
/*
* Do not trust dev->msi(x)_enabled here, as enabling could be done
* bypassing the pci_*msi* functions, by the qemu.
*/
if (dev->msi_cap) {
err = pci_read_config_word(dev,
dev->msi_cap + PCI_MSI_FLAGS,
&val);
if (err)
return err;
if (val & PCI_MSI_FLAGS_ENABLE)
ret |= INTERRUPT_TYPE_MSI;
}
if (dev->msix_cap) {
err = pci_read_config_word(dev,
dev->msix_cap + PCI_MSIX_FLAGS,
&val);
if (err)
return err;
if (val & PCI_MSIX_FLAGS_ENABLE)
ret |= INTERRUPT_TYPE_MSIX;
}
return ret;
}
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev) void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
{ {
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
......
...@@ -65,6 +65,11 @@ struct config_field_entry { ...@@ -65,6 +65,11 @@ struct config_field_entry {
void *data; void *data;
}; };
#define INTERRUPT_TYPE_NONE (1<<0)
#define INTERRUPT_TYPE_INTX (1<<1)
#define INTERRUPT_TYPE_MSI (1<<2)
#define INTERRUPT_TYPE_MSIX (1<<3)
extern bool xen_pcibk_permissive; extern bool xen_pcibk_permissive;
#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) #define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
...@@ -126,4 +131,6 @@ int xen_pcibk_config_capability_init(void); ...@@ -126,4 +131,6 @@ int xen_pcibk_config_capability_init(void);
int xen_pcibk_config_header_add_fields(struct pci_dev *dev); int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
int xen_pcibk_config_capability_add_fields(struct pci_dev *dev); int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
int xen_pcibk_get_interrupt_type(struct pci_dev *dev);
#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */ #endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
...@@ -189,6 +189,85 @@ static const struct config_field caplist_pm[] = { ...@@ -189,6 +189,85 @@ static const struct config_field caplist_pm[] = {
{} {}
}; };
static struct msi_msix_field_config {
u16 enable_bit; /* bit for enabling MSI/MSI-X */
unsigned int int_type; /* interrupt type for exclusiveness check */
} msi_field_config = {
.enable_bit = PCI_MSI_FLAGS_ENABLE,
.int_type = INTERRUPT_TYPE_MSI,
}, msix_field_config = {
.enable_bit = PCI_MSIX_FLAGS_ENABLE,
.int_type = INTERRUPT_TYPE_MSIX,
};
static void *msi_field_init(struct pci_dev *dev, int offset)
{
return &msi_field_config;
}
static void *msix_field_init(struct pci_dev *dev, int offset)
{
return &msix_field_config;
}
static int msi_msix_flags_write(struct pci_dev *dev, int offset, u16 new_value,
void *data)
{
int err;
u16 old_value;
const struct msi_msix_field_config *field_config = data;
const struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
if (xen_pcibk_permissive || dev_data->permissive)
goto write;
err = pci_read_config_word(dev, offset, &old_value);
if (err)
return err;
if (new_value == old_value)
return 0;
if (!dev_data->allow_interrupt_control ||
(new_value ^ old_value) & ~field_config->enable_bit)
return PCIBIOS_SET_FAILED;
if (new_value & field_config->enable_bit) {
/* don't allow enabling together with other interrupt types */
int int_type = xen_pcibk_get_interrupt_type(dev);
if (int_type == INTERRUPT_TYPE_NONE ||
int_type == field_config->int_type)
goto write;
return PCIBIOS_SET_FAILED;
}
write:
return pci_write_config_word(dev, offset, new_value);
}
static const struct config_field caplist_msix[] = {
{
.offset = PCI_MSIX_FLAGS,
.size = 2,
.init = msix_field_init,
.u.w.read = xen_pcibk_read_config_word,
.u.w.write = msi_msix_flags_write,
},
{}
};
static const struct config_field caplist_msi[] = {
{
.offset = PCI_MSI_FLAGS,
.size = 2,
.init = msi_field_init,
.u.w.read = xen_pcibk_read_config_word,
.u.w.write = msi_msix_flags_write,
},
{}
};
static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = { static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = {
.capability = PCI_CAP_ID_PM, .capability = PCI_CAP_ID_PM,
.fields = caplist_pm, .fields = caplist_pm,
...@@ -197,11 +276,21 @@ static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = { ...@@ -197,11 +276,21 @@ static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = {
.capability = PCI_CAP_ID_VPD, .capability = PCI_CAP_ID_VPD,
.fields = caplist_vpd, .fields = caplist_vpd,
}; };
static struct xen_pcibk_config_capability xen_pcibk_config_capability_msi = {
.capability = PCI_CAP_ID_MSI,
.fields = caplist_msi,
};
static struct xen_pcibk_config_capability xen_pcibk_config_capability_msix = {
.capability = PCI_CAP_ID_MSIX,
.fields = caplist_msix,
};
int xen_pcibk_config_capability_init(void) int xen_pcibk_config_capability_init(void)
{ {
register_capability(&xen_pcibk_config_capability_vpd); register_capability(&xen_pcibk_config_capability_vpd);
register_capability(&xen_pcibk_config_capability_pm); register_capability(&xen_pcibk_config_capability_pm);
register_capability(&xen_pcibk_config_capability_msi);
register_capability(&xen_pcibk_config_capability_msix);
return 0; return 0;
} }
...@@ -117,6 +117,25 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) ...@@ -117,6 +117,25 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
pci_clear_mwi(dev); pci_clear_mwi(dev);
} }
if (dev_data && dev_data->allow_interrupt_control) {
if ((cmd->val ^ value) & PCI_COMMAND_INTX_DISABLE) {
if (value & PCI_COMMAND_INTX_DISABLE) {
pci_intx(dev, 0);
} else {
/* Do not allow enabling INTx together with MSI or MSI-X. */
switch (xen_pcibk_get_interrupt_type(dev)) {
case INTERRUPT_TYPE_NONE:
pci_intx(dev, 1);
break;
case INTERRUPT_TYPE_INTX:
break;
default:
return PCIBIOS_SET_FAILED;
}
}
}
}
cmd->val = value; cmd->val = value;
if (!xen_pcibk_permissive && (!dev_data || !dev_data->permissive)) if (!xen_pcibk_permissive && (!dev_data || !dev_data->permissive))
......
...@@ -304,6 +304,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev) ...@@ -304,6 +304,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
xen_pcibk_config_reset_dev(dev); xen_pcibk_config_reset_dev(dev);
xen_pcibk_config_free_dyn_fields(dev); xen_pcibk_config_free_dyn_fields(dev);
dev_data->allow_interrupt_control = 0;
xen_unregister_device_domain_owner(dev); xen_unregister_device_domain_owner(dev);
spin_lock_irqsave(&found_psdev->lock, flags); spin_lock_irqsave(&found_psdev->lock, flags);
...@@ -1431,6 +1433,65 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf) ...@@ -1431,6 +1433,65 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)
} }
static DRIVER_ATTR_RW(permissive); static DRIVER_ATTR_RW(permissive);
static ssize_t allow_interrupt_control_store(struct device_driver *drv,
const char *buf, size_t count)
{
int domain, bus, slot, func;
int err;
struct pcistub_device *psdev;
struct xen_pcibk_dev_data *dev_data;
err = str_to_slot(buf, &domain, &bus, &slot, &func);
if (err)
goto out;
psdev = pcistub_device_find(domain, bus, slot, func);
if (!psdev) {
err = -ENODEV;
goto out;
}
dev_data = pci_get_drvdata(psdev->dev);
/* the driver data for a device should never be null at this point */
if (!dev_data) {
err = -ENXIO;
goto release;
}
dev_data->allow_interrupt_control = 1;
release:
pcistub_device_put(psdev);
out:
if (!err)
err = count;
return err;
}
static ssize_t allow_interrupt_control_show(struct device_driver *drv,
char *buf)
{
struct pcistub_device *psdev;
struct xen_pcibk_dev_data *dev_data;
size_t count = 0;
unsigned long flags;
spin_lock_irqsave(&pcistub_devices_lock, flags);
list_for_each_entry(psdev, &pcistub_devices, dev_list) {
if (count >= PAGE_SIZE)
break;
if (!psdev->dev)
continue;
dev_data = pci_get_drvdata(psdev->dev);
if (!dev_data || !dev_data->allow_interrupt_control)
continue;
count +=
scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
pci_name(psdev->dev));
}
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
return count;
}
static DRIVER_ATTR_RW(allow_interrupt_control);
static void pcistub_exit(void) static void pcistub_exit(void)
{ {
driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot); driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot);
...@@ -1440,6 +1501,8 @@ static void pcistub_exit(void) ...@@ -1440,6 +1501,8 @@ static void pcistub_exit(void)
driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks); driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks);
driver_remove_file(&xen_pcibk_pci_driver.driver, driver_remove_file(&xen_pcibk_pci_driver.driver,
&driver_attr_permissive); &driver_attr_permissive);
driver_remove_file(&xen_pcibk_pci_driver.driver,
&driver_attr_allow_interrupt_control);
driver_remove_file(&xen_pcibk_pci_driver.driver, driver_remove_file(&xen_pcibk_pci_driver.driver,
&driver_attr_irq_handlers); &driver_attr_irq_handlers);
driver_remove_file(&xen_pcibk_pci_driver.driver, driver_remove_file(&xen_pcibk_pci_driver.driver,
...@@ -1530,6 +1593,9 @@ static int __init pcistub_init(void) ...@@ -1530,6 +1593,9 @@ static int __init pcistub_init(void)
if (!err) if (!err)
err = driver_create_file(&xen_pcibk_pci_driver.driver, err = driver_create_file(&xen_pcibk_pci_driver.driver,
&driver_attr_permissive); &driver_attr_permissive);
if (!err)
err = driver_create_file(&xen_pcibk_pci_driver.driver,
&driver_attr_allow_interrupt_control);
if (!err) if (!err)
err = driver_create_file(&xen_pcibk_pci_driver.driver, err = driver_create_file(&xen_pcibk_pci_driver.driver,
......
...@@ -45,6 +45,7 @@ struct xen_pcibk_dev_data { ...@@ -45,6 +45,7 @@ struct xen_pcibk_dev_data {
struct list_head config_fields; struct list_head config_fields;
struct pci_saved_state *pci_saved_state; struct pci_saved_state *pci_saved_state;
unsigned int permissive:1; unsigned int permissive:1;
unsigned int allow_interrupt_control:1;
unsigned int warned_on_write:1; unsigned int warned_on_write:1;
unsigned int enable_intx:1; unsigned int enable_intx:1;
unsigned int isr_on:1; /* Whether the IRQ handler is installed. */ unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
......
...@@ -239,7 +239,9 @@ int xenbus_dev_probe(struct device *_dev) ...@@ -239,7 +239,9 @@ int xenbus_dev_probe(struct device *_dev)
goto fail; goto fail;
} }
spin_lock(&dev->reclaim_lock);
err = drv->probe(dev, id); err = drv->probe(dev, id);
spin_unlock(&dev->reclaim_lock);
if (err) if (err)
goto fail_put; goto fail_put;
...@@ -268,8 +270,11 @@ int xenbus_dev_remove(struct device *_dev) ...@@ -268,8 +270,11 @@ int xenbus_dev_remove(struct device *_dev)
free_otherend_watch(dev); free_otherend_watch(dev);
if (drv->remove) if (drv->remove) {
spin_lock(&dev->reclaim_lock);
drv->remove(dev); drv->remove(dev);
spin_unlock(&dev->reclaim_lock);
}
module_put(drv->driver.owner); module_put(drv->driver.owner);
...@@ -468,6 +473,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, ...@@ -468,6 +473,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
goto fail; goto fail;
dev_set_name(&xendev->dev, "%s", devname); dev_set_name(&xendev->dev, "%s", devname);
spin_lock_init(&xendev->reclaim_lock);
/* Register with generic device framework. */ /* Register with generic device framework. */
err = device_register(&xendev->dev); err = device_register(&xendev->dev);
......
...@@ -247,6 +247,41 @@ static int backend_probe_and_watch(struct notifier_block *notifier, ...@@ -247,6 +247,41 @@ static int backend_probe_and_watch(struct notifier_block *notifier,
return NOTIFY_DONE; return NOTIFY_DONE;
} }
static int backend_reclaim_memory(struct device *dev, void *data)
{
const struct xenbus_driver *drv;
struct xenbus_device *xdev;
if (!dev->driver)
return 0;
drv = to_xenbus_driver(dev->driver);
if (drv && drv->reclaim_memory) {
xdev = to_xenbus_device(dev);
if (!spin_trylock(&xdev->reclaim_lock))
return 0;
drv->reclaim_memory(xdev);
spin_unlock(&xdev->reclaim_lock);
}
return 0;
}
/*
* Returns 0 always because we are using shrinker to only detect memory
* pressure.
*/
static unsigned long backend_shrink_memory_count(struct shrinker *shrinker,
struct shrink_control *sc)
{
bus_for_each_dev(&xenbus_backend.bus, NULL, NULL,
backend_reclaim_memory);
return 0;
}
static struct shrinker backend_memory_shrinker = {
.count_objects = backend_shrink_memory_count,
.seeks = DEFAULT_SEEKS,
};
static int __init xenbus_probe_backend_init(void) static int __init xenbus_probe_backend_init(void)
{ {
static struct notifier_block xenstore_notifier = { static struct notifier_block xenstore_notifier = {
...@@ -263,6 +298,9 @@ static int __init xenbus_probe_backend_init(void) ...@@ -263,6 +298,9 @@ static int __init xenbus_probe_backend_init(void)
register_xenstore_notifier(&xenstore_notifier); register_xenstore_notifier(&xenstore_notifier);
if (register_shrinker(&backend_memory_shrinker))
pr_warn("shrinker registration failed\n");
return 0; return 0;
} }
subsys_initcall(xenbus_probe_backend_init); subsys_initcall(xenbus_probe_backend_init);
...@@ -76,6 +76,7 @@ struct xenbus_device { ...@@ -76,6 +76,7 @@ struct xenbus_device {
enum xenbus_state state; enum xenbus_state state;
struct completion down; struct completion down;
struct work_struct work; struct work_struct work;
spinlock_t reclaim_lock;
}; };
static inline struct xenbus_device *to_xenbus_device(struct device *dev) static inline struct xenbus_device *to_xenbus_device(struct device *dev)
...@@ -105,6 +106,7 @@ struct xenbus_driver { ...@@ -105,6 +106,7 @@ struct xenbus_driver {
struct device_driver driver; struct device_driver driver;
int (*read_otherend_details)(struct xenbus_device *dev); int (*read_otherend_details)(struct xenbus_device *dev);
int (*is_ready)(struct xenbus_device *dev); int (*is_ready)(struct xenbus_device *dev);
void (*reclaim_memory)(struct xenbus_device *dev);
}; };
static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment