Commit 5b285415 authored by Yinghai Lu's avatar Yinghai Lu Committed by Bjorn Helgaas

PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources

This patch changes the way we handle 64-bit prefetchable bridge windows to
make it more likely that we can assign space to all devices.

Previously we put all prefetchable resources in the prefetchable bridge
window.  If any of those resources was 32-bit only, we restricted the
window to be below 4GB.

After this patch, we only put 64-bit prefetchable resources in a 64-bit
prefetchable window.  We put all 32-bit prefetchable resources in the
non-prefetchable window, even if there are no 64-bit prefetchable
resources.

With the previous approach, if there was a 32-bit prefetchable resource
behind a bridge, we forced the bridge's prefetchable window below 4GB,
which meant that even if there was plenty of space above 4GB available, we
couldn't use it, and assignment of large 64-bit resources could fail, as
in the bugzilla below.

The new strategy is:

  1) If the prefetchable window is 64 bits wide, we put only 64-bit
     prefetchable resources in it.  Any 32-bit prefetchable resources go in
     the non-prefetchable window.

  2) If the prefetchable window is 32 bits wide, we put both 32- and 64-bit
     prefetchable resources in it.

  3) If there is no prefetchable window, all MMIO resources go in the
     non-prefetchable window.

This reduces performance for 32-bit prefetchable resources below a bridge
with a 64-bit prefetchable window.  We previously assigned prefetchable
space, but now we'll assign non-prefetchable space.  This is the case even
if there are no 64-bit prefetchable resources, or if they would all fit
below 4GB.  In those cases, the old strategy would work and would have
better performance.

[bhelgaas: write changelog, add bugzilla link, fold in mem64_mask removal]
Link: https://bugzilla.kernel.org/show_bug.cgi?id=74151Tested-by: default avatarGuo Chao <yan@linux.vnet.ibm.com>
Tested-by: default avatarWei Yang <weiyang@linux.vnet.ibm.com>
Signed-off-by: default avatarYinghai Lu <yinghai@kernel.org>
Signed-off-by: default avatarBjorn Helgaas <bhelgaas@google.com>
parent 14c8530d
...@@ -713,12 +713,11 @@ static void pci_bridge_check_ranges(struct pci_bus *bus) ...@@ -713,12 +713,11 @@ static void pci_bridge_check_ranges(struct pci_bus *bus)
bus resource of a given type. Note: we intentionally skip bus resource of a given type. Note: we intentionally skip
the bus resources which have already been assigned (that is, the bus resources which have already been assigned (that is,
have non-NULL parent resource). */ have non-NULL parent resource). */
static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned long type) static struct resource *find_free_bus_resource(struct pci_bus *bus,
unsigned long type_mask, unsigned long type)
{ {
int i; int i;
struct resource *r; struct resource *r;
unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
IORESOURCE_PREFETCH;
pci_bus_for_each_resource(bus, r, i) { pci_bus_for_each_resource(bus, r, i) {
if (r == &ioport_resource || r == &iomem_resource) if (r == &ioport_resource || r == &iomem_resource)
...@@ -815,7 +814,8 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, ...@@ -815,7 +814,8 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
resource_size_t add_size, struct list_head *realloc_head) resource_size_t add_size, struct list_head *realloc_head)
{ {
struct pci_dev *dev; struct pci_dev *dev;
struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
IORESOURCE_IO);
resource_size_t size = 0, size0 = 0, size1 = 0; resource_size_t size = 0, size0 = 0, size1 = 0;
resource_size_t children_add_size = 0; resource_size_t children_add_size = 0;
resource_size_t min_align, align; resource_size_t min_align, align;
...@@ -907,6 +907,8 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns, ...@@ -907,6 +907,8 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
* @bus : the bus * @bus : the bus
* @mask: mask the resource flag, then compare it with type * @mask: mask the resource flag, then compare it with type
* @type: the type of free resource from bridge * @type: the type of free resource from bridge
* @type2: second match type
* @type3: third match type
* @min_size : the minimum memory window that must to be allocated * @min_size : the minimum memory window that must to be allocated
* @add_size : additional optional memory window * @add_size : additional optional memory window
* @realloc_head : track the additional memory window on this list * @realloc_head : track the additional memory window on this list
...@@ -915,16 +917,17 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns, ...@@ -915,16 +917,17 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
* guarantees that all child resources fit in this size. * guarantees that all child resources fit in this size.
*/ */
static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
unsigned long type, resource_size_t min_size, unsigned long type, unsigned long type2,
resource_size_t add_size, unsigned long type3,
struct list_head *realloc_head) resource_size_t min_size, resource_size_t add_size,
struct list_head *realloc_head)
{ {
struct pci_dev *dev; struct pci_dev *dev;
resource_size_t min_align, align, size, size0, size1; resource_size_t min_align, align, size, size0, size1;
resource_size_t aligns[14]; /* Alignments from 1Mb to 8Gb */ resource_size_t aligns[14]; /* Alignments from 1Mb to 8Gb */
int order, max_order; int order, max_order;
struct resource *b_res = find_free_bus_resource(bus, type); struct resource *b_res = find_free_bus_resource(bus,
unsigned int mem64_mask = 0; mask | IORESOURCE_PREFETCH, type);
resource_size_t children_add_size = 0; resource_size_t children_add_size = 0;
if (!b_res) if (!b_res)
...@@ -934,9 +937,6 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, ...@@ -934,9 +937,6 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
max_order = 0; max_order = 0;
size = 0; size = 0;
mem64_mask = b_res->flags & IORESOURCE_MEM_64;
b_res->flags &= ~IORESOURCE_MEM_64;
list_for_each_entry(dev, &bus->devices, bus_list) { list_for_each_entry(dev, &bus->devices, bus_list) {
int i; int i;
...@@ -944,7 +944,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, ...@@ -944,7 +944,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
struct resource *r = &dev->resource[i]; struct resource *r = &dev->resource[i];
resource_size_t r_size; resource_size_t r_size;
if (r->parent || (r->flags & mask) != type) if (r->parent || ((r->flags & mask) != type &&
(r->flags & mask) != type2 &&
(r->flags & mask) != type3))
continue; continue;
r_size = resource_size(r); r_size = resource_size(r);
#ifdef CONFIG_PCI_IOV #ifdef CONFIG_PCI_IOV
...@@ -981,7 +983,6 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, ...@@ -981,7 +983,6 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
aligns[order] += align; aligns[order] += align;
if (order > max_order) if (order > max_order)
max_order = order; max_order = order;
mem64_mask &= r->flags & IORESOURCE_MEM_64;
if (realloc_head) if (realloc_head)
children_add_size += get_res_add_size(realloc_head, r); children_add_size += get_res_add_size(realloc_head, r);
...@@ -1006,7 +1007,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, ...@@ -1006,7 +1007,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
} }
b_res->start = min_align; b_res->start = min_align;
b_res->end = size0 + min_align - 1; b_res->end = size0 + min_align - 1;
b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; b_res->flags |= IORESOURCE_STARTALIGN;
if (size1 > size0 && realloc_head) { if (size1 > size0 && realloc_head) {
add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align); add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window " dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "
...@@ -1122,8 +1123,9 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, ...@@ -1122,8 +1123,9 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
struct list_head *realloc_head) struct list_head *realloc_head)
{ {
struct pci_dev *dev; struct pci_dev *dev;
unsigned long mask, prefmask; unsigned long mask, prefmask, type2 = 0, type3 = 0;
resource_size_t additional_mem_size = 0, additional_io_size = 0; resource_size_t additional_mem_size = 0, additional_io_size = 0;
struct resource *b_res;
list_for_each_entry(dev, &bus->devices, bus_list) { list_for_each_entry(dev, &bus->devices, bus_list) {
struct pci_bus *b = dev->subordinate; struct pci_bus *b = dev->subordinate;
...@@ -1168,15 +1170,37 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, ...@@ -1168,15 +1170,37 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
has already been allocated by arch code, try has already been allocated by arch code, try
non-prefetchable range for both types of PCI memory non-prefetchable range for both types of PCI memory
resources. */ resources. */
b_res = &bus->self->resource[PCI_BRIDGE_RESOURCES];
mask = IORESOURCE_MEM; mask = IORESOURCE_MEM;
prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH; prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
if (pbus_size_mem(bus, prefmask, prefmask, if (b_res[2].flags & IORESOURCE_MEM_64) {
prefmask |= IORESOURCE_MEM_64;
if (pbus_size_mem(bus, prefmask, prefmask,
prefmask, prefmask,
realloc_head ? 0 : additional_mem_size, realloc_head ? 0 : additional_mem_size,
additional_mem_size, realloc_head)) additional_mem_size, realloc_head)) {
mask = prefmask; /* Success, size non-prefetch only. */ /*
else * Success, with pref mmio64,
additional_mem_size += additional_mem_size; * next will size non-pref or
pbus_size_mem(bus, mask, IORESOURCE_MEM, * non-mmio64 */
mask = prefmask;
type2 = prefmask & ~IORESOURCE_MEM_64;
type3 = prefmask & ~IORESOURCE_PREFETCH;
}
}
if (!type2) {
prefmask &= ~IORESOURCE_MEM_64;
if (pbus_size_mem(bus, prefmask, prefmask,
prefmask, prefmask,
realloc_head ? 0 : additional_mem_size,
additional_mem_size, realloc_head)) {
/* Success, next will size non-prefetch. */
mask = prefmask;
} else
additional_mem_size += additional_mem_size;
type2 = type3 = IORESOURCE_MEM;
}
pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
realloc_head ? 0 : additional_mem_size, realloc_head ? 0 : additional_mem_size,
additional_mem_size, realloc_head); additional_mem_size, realloc_head);
break; break;
...@@ -1262,42 +1286,66 @@ static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge, ...@@ -1262,42 +1286,66 @@ static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge,
static void pci_bridge_release_resources(struct pci_bus *bus, static void pci_bridge_release_resources(struct pci_bus *bus,
unsigned long type) unsigned long type)
{ {
int idx; struct pci_dev *dev = bus->self;
bool changed = false;
struct pci_dev *dev;
struct resource *r; struct resource *r;
unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM | unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
IORESOURCE_PREFETCH; IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
unsigned old_flags = 0;
struct resource *b_res;
int idx = 1;
dev = bus->self; b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
for (idx = PCI_BRIDGE_RESOURCES; idx <= PCI_BRIDGE_RESOURCE_END;
idx++) { /*
r = &dev->resource[idx]; * 1. if there is io port assign fail, will release bridge
if ((r->flags & type_mask) != type) * io port.
continue; * 2. if there is non pref mmio assign fail, release bridge
if (!r->parent) * nonpref mmio.
continue; * 3. if there is 64bit pref mmio assign fail, and bridge pref
/* * is 64bit, release bridge pref mmio.
* if there are children under that, we should release them * 4. if there is pref mmio assign fail, and bridge pref is
* all * 32bit mmio, release bridge pref mmio
*/ * 5. if there is pref mmio assign fail, and bridge pref is not
release_child_resources(r); * assigned, release bridge nonpref mmio.
if (!release_resource(r)) { */
dev_printk(KERN_DEBUG, &dev->dev, if (type & IORESOURCE_IO)
"resource %d %pR released\n", idx, r); idx = 0;
/* keep the old size */ else if (!(type & IORESOURCE_PREFETCH))
r->end = resource_size(r) - 1; idx = 1;
r->start = 0; else if ((type & IORESOURCE_MEM_64) &&
r->flags = 0; (b_res[2].flags & IORESOURCE_MEM_64))
changed = true; idx = 2;
} else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
} (b_res[2].flags & IORESOURCE_PREFETCH))
idx = 2;
else
idx = 1;
r = &b_res[idx];
if (!r->parent)
return;
/*
* if there are children under that, we should release them
* all
*/
release_child_resources(r);
if (!release_resource(r)) {
type = old_flags = r->flags & type_mask;
dev_printk(KERN_DEBUG, &dev->dev, "resource %d %pR released\n",
PCI_BRIDGE_RESOURCES + idx, r);
/* keep the old size */
r->end = resource_size(r) - 1;
r->start = 0;
r->flags = 0;
if (changed) {
/* avoiding touch the one without PREF */ /* avoiding touch the one without PREF */
if (type & IORESOURCE_PREFETCH) if (type & IORESOURCE_PREFETCH)
type = IORESOURCE_PREFETCH; type = IORESOURCE_PREFETCH;
__pci_setup_bridge(bus, type); __pci_setup_bridge(bus, type);
/* for next child res under same bridge */
r->flags = old_flags;
} }
} }
...@@ -1476,7 +1524,7 @@ void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus) ...@@ -1476,7 +1524,7 @@ void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus)
LIST_HEAD(fail_head); LIST_HEAD(fail_head);
struct pci_dev_resource *fail_res; struct pci_dev_resource *fail_res;
unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM | unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
IORESOURCE_PREFETCH; IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
int pci_try_num = 1; int pci_try_num = 1;
enum enable_type enable_local; enum enable_type enable_local;
......
...@@ -211,15 +211,31 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, ...@@ -211,15 +211,31 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
/* First, try exact prefetching match.. */ /* First, try exact prefetching match.. */
ret = pci_bus_alloc_resource(bus, res, size, align, min, ret = pci_bus_alloc_resource(bus, res, size, align, min,
IORESOURCE_PREFETCH, IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
pcibios_align_resource, dev); pcibios_align_resource, dev);
if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) { if (ret < 0 &&
(res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
(IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
/*
* That failed.
*
* Try 32bit pref
*/
ret = pci_bus_alloc_resource(bus, res, size, align, min,
IORESOURCE_PREFETCH,
pcibios_align_resource, dev);
}
if (ret < 0 &&
(res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))) {
/* /*
* That failed. * That failed.
* *
* But a prefetching area can handle a non-prefetching * But a prefetching area can handle a non-prefetching
* window (it will just not perform as well). * window (it will just not perform as well).
*
* Also can put 64bit under 32bit range. (below 4g).
*/ */
ret = pci_bus_alloc_resource(bus, res, size, align, min, 0, ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
pcibios_align_resource, dev); pcibios_align_resource, dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment