Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6

* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6: (88 commits) PCI: fix HT MSI mapping fix PCI: don't enable too much HT MSI mapping x86/PCI: make pci=lastbus=255 work when acpi is on PCI: save and restore PCIe 2.0 registers PCI: update fakephp for bus_id removal PCI: fix kernel oops on bridge removal PCI: fix conflict between SR-IOV and config space sizing powerpc/PCI: include pci.h in powerpc MSI implementation PCI Hotplug: schedule fakephp for feature removal PCI Hotplug: rename legacy_fakephp to fakephp PCI Hotplug: restore fakephp interface with complete reimplementation PCI: Introduce /sys/bus/pci/devices/.../rescan PCI: Introduce /sys/bus/pci/devices/.../remove PCI: Introduce /sys/bus/pci/rescan PCI: Introduce pci_rescan_bus() PCI: do not enable bridges more than once PCI: do not initialize bridges more than once PCI: always scan child buses PCI: pci_scan_slot() returns newly found devices PCI: don't scan existing devices ... Fix trivial append-only conflict in Documentation/feature-removal-schedule.txt

Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6
* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6: (88 commits) PCI: fix HT MSI mapping fix PCI: don't enable too much HT MSI mapping x86/PCI: make pci=lastbus=255 work when acpi is on PCI: save and restore PCIe 2.0 registers PCI: update fakephp for bus_id removal PCI: fix kernel oops on bridge removal PCI: fix conflict between SR-IOV and config space sizing powerpc/PCI: include pci.h in powerpc MSI implementation PCI Hotplug: schedule fakephp for feature removal PCI Hotplug: rename legacy_fakephp to fakephp PCI Hotplug: restore fakephp interface with complete reimplementation PCI: Introduce /sys/bus/pci/devices/.../rescan PCI: Introduce /sys/bus/pci/devices/.../remove PCI: Introduce /sys/bus/pci/rescan PCI: Introduce pci_rescan_bus() PCI: do not enable bridges more than once PCI: do not initialize bridges more than once PCI: always scan child buses PCI: pci_scan_slot() returns newly found devices PCI: don't scan existing devices ... Fix trivial append-only conflict in Documentation/feature-removal-schedule.txt
e76e5b2c · Linus Torvalds · 32527bc0 · eeafda70 · e76e5b2c · e76e5b2c
Commit e76e5b2c authored Apr 01, 2009 by Linus Torvalds
62 changed files
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -41,6 +41,49 @@ Description:
 		for the device and attempt to bind to it.  For example:
 		# echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id

+What:		/sys/bus/pci/drivers/.../remove_id
+Date:		February 2009
+Contact:	Chris Wright <chrisw@sous-sol.org>
+Description:
+		Writing a device ID to this file will remove an ID
+		that was dynamically added via the new_id sysfs entry.
+		The format for the device ID is:
+		VVVV DDDD SVVV SDDD CCCC MMMM.	That is Vendor ID, Device
+		ID, Subsystem Vendor ID, Subsystem Device ID, Class,
+		and Class Mask.  The Vendor ID and Device ID fields are
+		required, the rest are optional.  After successfully
+		removing an ID, the driver will no longer support the
+		device.  This is useful to ensure auto probing won't
+		match the driver to the device.  For example:
+		# echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id
+
+What:		/sys/bus/pci/rescan
+Date:		January 2009
+Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+		Writing a non-zero value to this attribute will
+		force a rescan of all PCI buses in the system, and
+		re-discover previously removed devices.
+		Depends on CONFIG_HOTPLUG.
+
+What:		/sys/bus/pci/devices/.../remove
+Date:		January 2009
+Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+		Writing a non-zero value to this attribute will
+		hot-remove the PCI device and any of its children.
+		Depends on CONFIG_HOTPLUG.
+
+What:		/sys/bus/pci/devices/.../rescan
+Date:		January 2009
+Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+		Writing a non-zero value to this attribute will
+		force a rescan of the device's parent bus and all
+		child buses, and re-discover devices removed earlier
+		from this part of the device tree.
+		Depends on CONFIG_HOTPLUG.
+
 What:		/sys/bus/pci/devices/.../vpd
 Date:		February 2008
 Contact:	Ben Hutchings <bhutchings@solarflare.com>
@@ -52,3 +95,30 @@ Description:
 		that some devices may have malformatted data.  If the
 		underlying VPD has a writable section then the
 		corresponding section of this file will be writable.
+
+What:		/sys/bus/pci/devices/.../virtfnN
+Date:		March 2009
+Contact:	Yu Zhao <yu.zhao@intel.com>
+Description:
+		This symbolic link appears when hardware supports the SR-IOV
+		capability and the Physical Function driver has enabled it.
+		The symbolic link points to the PCI device sysfs entry of the
+		Virtual Function whose index is N (0...MaxVFs-1).
+
+What:		/sys/bus/pci/devices/.../dep_link
+Date:		March 2009
+Contact:	Yu Zhao <yu.zhao@intel.com>
+Description:
+		This symbolic link appears when hardware supports the SR-IOV
+		capability and the Physical Function driver has enabled it,
+		and this device has vendor specific dependencies with others.
+		The symbolic link points to the PCI device sysfs entry of
+		Physical Function this device depends on.
+
+What:		/sys/bus/pci/devices/.../physfn
+Date:		March 2009
+Contact:	Yu Zhao <yu.zhao@intel.com>
+Description:
+		This symbolic link appears when a device is a Virtual Function.
+		The symbolic link points to the PCI device sysfs entry of the
+		Physical Function this device associates with.
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -199,6 +199,7 @@ X!Edrivers/pci/hotplug.c
 -->
 !Edrivers/pci/probe.c
 !Edrivers/pci/rom.c
+!Edrivers/pci/iov.c
     </sect1>
     <sect1><title>PCI Hotplug Support Library</title>
 !Edrivers/pci/hotplug/pci_hotplug_core.c

--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -4,506 +4,356 @@
 	Revised Feb 12, 2004 by Martine Silbermann
 		email: Martine.Silbermann@hp.com
 	Revised Jun 25, 2004 by Tom L Nguyen
+	Revised Jul  9, 2008 by Matthew Wilcox <willy@linux.intel.com>
+		Copyright 2003, 2008 Intel Corporation

 1. About this guide

-This guide describes the basics of Message Signaled Interrupts (MSI),
-the advantages of using MSI over traditional interrupt mechanisms,
-and how to enable your driver to use MSI or MSI-X. Also included is
-a Frequently Asked Questions (FAQ) section.
-
-1.1 Terminology
-
-PCI devices can be single-function or multi-function.  In either case,
-when this text talks about enabling or disabling MSI on a "device
-function," it is referring to one specific PCI device and function and
-not to all functions on a PCI device (unless the PCI device has only
-one function).
-
-2. Copyright 2003 Intel Corporation
-
-3. What is MSI/MSI-X?
-
-Message Signaled Interrupt (MSI), as described in the PCI Local Bus
-Specification Revision 2.3 or later, is an optional feature, and a
-required feature for PCI Express devices. MSI enables a device function
-to request service by sending an Inbound Memory Write on its PCI bus to
-the FSB as a Message Signal Interrupt transaction. Because MSI is
-generated in the form of a Memory Write, all transaction conditions,
-such as a Retry, Master-Abort, Target-Abort or normal completion, are
-supported.
-
-A PCI device that supports MSI must also support pin IRQ assertion
-interrupt mechanism to provide backward compatibility for systems that
-do not support MSI. In systems which support MSI, the bus driver is
-responsible for initializing the message address and message data of
-the device function's MSI/MSI-X capability structure during device
-initial configuration.
-
-An MSI capable device function indicates MSI support by implementing
-the MSI/MSI-X capability structure in its PCI capability list. The
-device function may implement both the MSI capability structure and
-the MSI-X capability structure; however, the bus driver should not
-enable both.
-
-The MSI capability structure contains Message Control register,
-Message Address register and Message Data register. These registers
-provide the bus driver control over MSI. The Message Control register
-indicates the MSI capability supported by the device. The Message
-Address register specifies the target address and the Message Data
-register specifies the characteristics of the message. To request
-service, the device function writes the content of the Message Data
-register to the target address. The device and its software driver
-are prohibited from writing to these registers.
-
-The MSI-X capability structure is an optional extension to MSI. It
-uses an independent and separate capability structure. There are
-some key advantages to implementing the MSI-X capability structure
-over the MSI capability structure as described below.
-
-	- Support a larger maximum number of vectors per function.
-
-	- Provide the ability for system software to configure
-	each vector with an independent message address and message
-	data, specified by a table that resides in Memory Space.
-
-        - MSI and MSI-X both support per-vector masking. Per-vector
-	masking is an optional extension of MSI but a required
-	feature for MSI-X. Per-vector masking provides the kernel the
-	ability to mask/unmask a single MSI while running its
-	interrupt service routine. If per-vector masking is
-	not supported, then the device driver should provide the
-	hardware/software synchronization to ensure that the device
-	generates MSI when the driver wants it to do so.
-
-4. Why use MSI?
-
-As a benefit to the simplification of board design, MSI allows board
-designers to remove out-of-band interrupt routing. MSI is another
-step towards a legacy-free environment.
-
-Due to increasing pressure on chipset and processor packages to
-reduce pin count, the need for interrupt pins is expected to
-diminish over time. Devices, due to pin constraints, may implement
-messages to increase performance.
-
-PCI Express endpoints uses INTx emulation (in-band messages) instead
-of IRQ pin assertion. Using INTx emulation requires interrupt
-sharing among devices connected to the same node (PCI bridge) while
-MSI is unique (non-shared) and does not require BIOS configuration
-support. As a result, the PCI Express technology requires MSI
-support for better interrupt performance.
-
-Using MSI enables the device functions to support two or more
-vectors, which can be configured to target different CPUs to
-increase scalability.
-
-5. Configuring a driver to use MSI/MSI-X
-
-By default, the kernel will not enable MSI/MSI-X on all devices that
-support this capability. The CONFIG_PCI_MSI kernel option
-must be selected to enable MSI/MSI-X support.
-
-5.1 Including MSI/MSI-X support into the kernel
-
-To allow MSI/MSI-X capable device drivers to selectively enable
-MSI/MSI-X (using pci_enable_msi()/pci_enable_msix() as described
-below), the VECTOR based scheme needs to be enabled by setting
-CONFIG_PCI_MSI during kernel config.
-
-Since the target of the inbound message is the local APIC, providing
-CONFIG_X86_LOCAL_APIC must be enabled as well as CONFIG_PCI_MSI.
-
-5.2 Configuring for MSI support
-
-Due to the non-contiguous fashion in vector assignment of the
-existing Linux kernel, this version does not support multiple
-messages regardless of a device function is capable of supporting
-more than one vector. To enable MSI on a device function's MSI
-capability structure requires a device driver to call the function
-pci_enable_msi() explicitly.
-
-5.2.1 API pci_enable_msi
+This guide describes the basics of Message Signaled Interrupts (MSIs),
+the advantages of using MSI over traditional interrupt mechanisms, how
+to change your driver to use MSI or MSI-X and some basic diagnostics to
+try if a device doesn't support MSIs.

-int pci_enable_msi(struct pci_dev *dev)

-With this new API, a device driver that wants to have MSI
-enabled on its device function must call this API to enable MSI.
-A successful call will initialize the MSI capability structure
-with ONE vector, regardless of whether a device function is
-capable of supporting multiple messages. This vector replaces the
-pre-assigned dev->irq with a new MSI vector. To avoid a conflict
-of the new assigned vector with existing pre-assigned vector requires
-a device driver to call this API before calling request_irq().
+2. What are MSIs?

-5.2.2 API pci_disable_msi
+A Message Signaled Interrupt is a write from the device to a special
+address which causes an interrupt to be received by the CPU.

-void pci_disable_msi(struct pci_dev *dev)
+The MSI capability was first specified in PCI 2.2 and was later enhanced
+in PCI 3.0 to allow each interrupt to be masked individually.  The MSI-X
+capability was also introduced with PCI 3.0.  It supports more interrupts
+per device than MSI and allows interrupts to be independently configured.

-This API should always be used to undo the effect of pci_enable_msi()
-when a device driver is unloading. This API restores dev->irq with
-the pre-assigned IOAPIC vector and switches a device's interrupt
-mode to PCI pin-irq assertion/INTx emulation mode.
-
-Note that a device driver should always call free_irq() on the MSI vector
-that it has done request_irq() on before calling this API. Failure to do
-so results in a BUG_ON() and a device will be left with MSI enabled and
-leaks its vector.
-
-5.2.3 MSI mode vs. legacy mode diagram
-
-The below diagram shows the events which switch the interrupt
-mode on the MSI-capable device function between MSI mode and
-PIN-IRQ assertion mode.
-
-	 ------------   pci_enable_msi 	 ------------------------
-	|	     | <===============	| 			 |
-	| MSI MODE   |	  	     	| PIN-IRQ ASSERTION MODE |
-	| 	     | ===============>	|			 |
- 	 ------------	pci_disable_msi  ------------------------
-
-
-Figure 1. MSI Mode vs. Legacy Mode
-
-In Figure 1, a device operates by default in legacy mode. Legacy
-in this context means PCI pin-irq assertion or PCI-Express INTx
-emulation. A successful MSI request (using pci_enable_msi()) switches
-a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector
-stored in dev->irq will be saved by the PCI subsystem and a new
-assigned MSI vector will replace dev->irq.
-
-To return back to its default mode, a device driver should always call
-pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a
-device driver should always call free_irq() on the MSI vector it has
-done request_irq() on before calling pci_disable_msi(). Failure to do
-so results in a BUG_ON() and a device will be left with MSI enabled and
-leaks its vector. Otherwise, the PCI subsystem restores a device's
-dev->irq with a pre-assigned IOAPIC vector and marks the released
-MSI vector as unused.
-
-Once being marked as unused, there is no guarantee that the PCI
-subsystem will reserve this MSI vector for a device. Depending on
-the availability of current PCI vector resources and the number of
-MSI/MSI-X requests from other drivers, this MSI may be re-assigned.
-
-For the case where the PCI subsystem re-assigns this MSI vector to
-another driver, a request to switch back to MSI mode may result
-in being assigned a different MSI vector or a failure if no more
-vectors are available.
-
-5.3 Configuring for MSI-X support
-
-Due to the ability of the system software to configure each vector of
-the MSI-X capability structure with an independent message address
-and message data, the non-contiguous fashion in vector assignment of
-the existing Linux kernel has no impact on supporting multiple
-messages on an MSI-X capable device functions. To enable MSI-X on
-a device function's MSI-X capability structure requires its device
-driver to call the function pci_enable_msix() explicitly.
-
-The function pci_enable_msix(), once invoked, enables either
-all or nothing, depending on the current availability of PCI vector
-resources. If the PCI vector resources are available for the number
-of vectors requested by a device driver, this function will configure
-the MSI-X table of the MSI-X capability structure of a device with
-requested messages. To emphasize this reason, for example, a device
-may be capable for supporting the maximum of 32 vectors while its
-software driver usually may request 4 vectors. It is recommended
-that the device driver should call this function once during the
-initialization phase of the device driver.
-
-Unlike the function pci_enable_msi(), the function pci_enable_msix()
-does not replace the pre-assigned IOAPIC dev->irq with a new MSI
-vector because the PCI subsystem writes the 1:1 vector-to-entry mapping
-into the field vector of each element contained in a second argument.
-Note that the pre-assigned IOAPIC dev->irq is valid only if the device
-operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at
-using dev->irq by the device driver to request for interrupt service
-may result in unpredictable behavior.
-
-For each MSI-X vector granted, a device driver is responsible for calling
-other functions like request_irq(), enable_irq(), etc. to enable
-this vector with its corresponding interrupt service handler. It is
-a device driver's choice to assign all vectors with the same
-interrupt service handler or each vector with a unique interrupt
-service handler.
-
-5.3.1 Handling MMIO address space of MSI-X Table
-
-The PCI 3.0 specification has implementation notes that MMIO address
-space for a device's MSI-X structure should be isolated so that the
-software system can set different pages for controlling accesses to the
-MSI-X structure. The implementation of MSI support requires the PCI
-subsystem, not a device driver, to maintain full control of the MSI-X
-table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
-table/MSI-X PBA.  A device driver should not access the MMIO address
-space of the MSI-X table/MSI-X PBA.
-
-5.3.2 API pci_enable_msix
+Devices may support both MSI and MSI-X, but only one can be enabled at
+a time.

-int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)

-This API enables a device driver to request the PCI subsystem
-to enable MSI-X messages on its hardware device. Depending on
-the availability of PCI vectors resources, the PCI subsystem enables
-either all or none of the requested vectors.
+3. Why use MSIs?
+
+There are three reasons why using MSIs can give an advantage over
+traditional pin-based interrupts.
+
+Pin-based PCI interrupts are often shared amongst several devices.
+To support this, the kernel must call each interrupt handler associated
+with an interrupt, which leads to reduced performance for the system as
+a whole.  MSIs are never shared, so this problem cannot arise.
+
+When a device writes data to memory, then raises a pin-based interrupt,
+it is possible that the interrupt may arrive before all the data has
+arrived in memory (this becomes more likely with devices behind PCI-PCI
+bridges).  In order to ensure that all the data has arrived in memory,
+the interrupt handler must read a register on the device which raised
+the interrupt.  PCI transaction ordering rules require that all the data
+arrives in memory before the value can be returned from the register.
+Using MSIs avoids this problem as the interrupt-generating write cannot
+pass the data writes, so by the time the interrupt is raised, the driver
+knows that all the data has arrived in memory.
+
+PCI devices can only support a single pin-based interrupt per function.
+Often drivers have to query the device to find out what event has
+occurred, slowing down interrupt handling for the common case.  With
+MSIs, a device can support more interrupts, allowing each interrupt
+to be specialised to a different purpose.  One possible design gives
+infrequent conditions (such as errors) their own interrupt which allows
+the driver to handle the normal interrupt handling path more efficiently.
+Other possible designs include giving one interrupt to each packet queue
+in a network card or each port in a storage controller.
+
+
+4. How to use MSIs
+
+PCI devices are initialised to use pin-based interrupts.  The device
+driver has to set up the device to use MSI or MSI-X.  Not all machines
+support MSIs correctly, and for those machines, the APIs described below
+will simply fail and the device will continue to use pin-based interrupts.
+
+4.1 Include kernel support for MSIs
+
+To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
+option enabled.  This option is only available on some architectures,
+and it may depend on some other options also being set.  For example,
+on x86, you must also enable X86_UP_APIC or SMP in order to see the
+CONFIG_PCI_MSI option.
+
+4.2 Using MSI
+
+Most of the hard work is done for the driver in the PCI layer.  It simply
+has to request that the PCI layer set up the MSI capability for this
+device.
+
+4.2.1 pci_enable_msi
+
+int pci_enable_msi(struct pci_dev *dev)
+
+A successful call will allocate ONE interrupt to the device, regardless
+of how many MSIs the device supports.  The device will be switched from
+pin-based interrupt mode to MSI mode.  The dev->irq number is changed
+to a new number which represents the message signaled interrupt.
+This function should be called before the driver calls request_irq()
+since enabling MSIs disables the pin-based IRQ and the driver will not
+receive interrupts on the old interrupt.
+
+4.2.2 pci_enable_msi_block
+
+int pci_enable_msi_block(struct pci_dev *dev, int count)
+
+This variation on the above call allows a device driver to request multiple
+MSIs.  The MSI specification only allows interrupts to be allocated in
+powers of two, up to a maximum of 2^5 (32).
+
+If this function returns 0, it has succeeded in allocating at least as many
+interrupts as the driver requested (it may have allocated more in order
+to satisfy the power-of-two requirement).  In this case, the function
+enables MSI on this device and updates dev->irq to be the lowest of
+the new interrupts assigned to it.  The other interrupts assigned to
+the device are in the range dev->irq to dev->irq + count - 1.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to request any more MSI interrupts for
+this device.  If this function returns a positive number, it will be
+less than 'count' and indicate the number of interrupts that could have
+been allocated.  In neither case will the irq value have been
+updated, nor will the device have been switched into MSI mode.
+
+The device driver must decide what action to take if
+pci_enable_msi_block() returns a value less than the number asked for.
+Some devices can make use of fewer interrupts than the maximum they
+request; in this case the driver should call pci_enable_msi_block()
+again.  Note that it is not guaranteed to succeed, even when the
+'count' has been reduced to the value returned from a previous call to
+pci_enable_msi_block().  This is because there are multiple constraints
+on the number of vectors that can be allocated; pci_enable_msi_block()
+will return as soon as it finds any constraint that doesn't allow the
+call to succeed.
+
+4.2.3 pci_disable_msi
+
+void pci_disable_msi(struct pci_dev *dev)

-Argument 'dev' points to the device (pci_dev) structure.
+This function should be used to undo the effect of pci_enable_msi() or
+pci_enable_msi_block().  Calling it restores dev->irq to the pin-based
+interrupt number and frees the previously allocated message signaled
+interrupt(s).  The interrupt may subsequently be assigned to another
+device, so drivers should not cache the value of dev->irq.

-Argument 'entries' is a pointer to an array of msix_entry structs.
-The number of entries is indicated in argument 'nvec'.
-struct msix_entry is defined in /driver/pci/msi.h:
+A device driver must always call free_irq() on the interrupt(s)
+for which it has called request_irq() before calling this function.
+Failure to do so will result in a BUG_ON(), the device will be left with
+MSI enabled and will leak its vector.
+
+4.3 Using MSI-X
+
+The MSI-X capability is much more flexible than the MSI capability.
+It supports up to 2048 interrupts, each of which can be controlled
+independently.  To support this flexibility, drivers must use an array of
+`struct msix_entry':

 struct msix_entry {
 	u16 	vector; /* kernel uses to write alloc vector */
 	u16	entry; /* driver uses to specify entry */
 };

-A device driver is responsible for initializing the field 'entry' of
-each element with a unique entry supported by MSI-X table. Otherwise,
-EINVAL will be returned as a result. A successful return of zero
-indicates the PCI subsystem completed initializing each of the requested
-entries of the MSI-X table with message address and message data.
-Last but not least, the PCI subsystem will write the 1:1
-vector-to-entry mapping into the field 'vector' of each element. A
-device driver is responsible for keeping track of allocated MSI-X
-vectors in its internal data structure.
-
-A return of zero indicates that the number of MSI-X vectors was
-successfully allocated. A return of greater than zero indicates
-MSI-X vector shortage. Or a return of less than zero indicates
-a failure. This failure may be a result of duplicate entries
-specified in second argument, or a result of no available vector,
-or a result of failing to initialize MSI-X table entries.
-
-5.3.3 API pci_disable_msix
+This allows for the device to use these interrupts in a sparse fashion;
+for example it could use interrupts 3 and 1027 and allocate only a
+two-element array.  The driver is expected to fill in the 'entry' value
+in each element of the array to indicate which entries it wants the kernel
+to assign interrupts for.  It is invalid to fill in two entries with the
+same number.
+
+4.3.1 pci_enable_msix
+
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+
+Calling this function asks the PCI subsystem to allocate 'nvec' MSIs.
+The 'entries' argument is a pointer to an array of msix_entry structs
+which should be at least 'nvec' entries in size.  On success, the
+function will return 0 and the device will have been switched into
+MSI-X interrupt mode.  The 'vector' elements in each entry will have
+been filled in with the interrupt number.  The driver should then call
+request_irq() for each 'vector' that it decides to use.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to allocate any more MSI-X interrupts for
+this device.  If it returns a positive number, it indicates the maximum
+number of interrupt vectors that could have been allocated. See example
+below.
+
+This function, in contrast with pci_enable_msi(), does not adjust
+dev->irq.  The device will not generate interrupts for this interrupt
+number once MSI-X is enabled.  The device driver is responsible for
+keeping track of the interrupts assigned to the MSI-X vectors so it can
+free them again later.
+
+Device drivers should normally call this function once per device
+during the initialization phase.
+
+It is ideal if drivers can cope with a variable number of MSI-X interrupts,
+there are many reasons why the platform may not be able to provide the
+exact number a driver asks for.
+
+A request loop to achieve that might look like:
+
+static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
+{
+	while (nvec >= FOO_DRIVER_MINIMUM_NVEC) {
+		rc = pci_enable_msix(adapter->pdev,
+				     adapter->msix_entries, nvec);
+		if (rc > 0)
+			nvec = rc;
+		else
+			return rc;
+	}
+
+	return -ENOSPC;
+}
+
+4.3.2 pci_disable_msix

 void pci_disable_msix(struct pci_dev *dev)

-This API should always be used to undo the effect of pci_enable_msix()
-when a device driver is unloading. Note that a device driver should
-always call free_irq() on all MSI-X vectors it has done request_irq()
-on before calling this API. Failure to do so results in a BUG_ON() and
-a device will be left with MSI-X enabled and leaks its vectors.
-
-5.3.4 MSI-X mode vs. legacy mode diagram
-
-The below diagram shows the events which switch the interrupt
-mode on the MSI-X capable device function between MSI-X mode and
-PIN-IRQ assertion mode (legacy).
-
-	 ------------   pci_enable_msix(,,n) ------------------------
-	|	     | <===============	    | 			     |
-	| MSI-X MODE |	  	     	    | PIN-IRQ ASSERTION MODE |
-	| 	     | ===============>	    |			     |
- 	 ------------	pci_disable_msix     ------------------------
-
-Figure 2. MSI-X Mode vs. Legacy Mode
-
-In Figure 2, a device operates by default in legacy mode. A
-successful MSI-X request (using pci_enable_msix()) switches a
-device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector
-stored in dev->irq will be saved by the PCI subsystem; however,
-unlike MSI mode, the PCI subsystem will not replace dev->irq with
-assigned MSI-X vector because the PCI subsystem already writes the 1:1
-vector-to-entry mapping into the field 'vector' of each element
-specified in second argument.
-
-To return back to its default mode, a device driver should always call
-pci_disable_msix() to undo the effect of pci_enable_msix(). Note that
-a device driver should always call free_irq() on all MSI-X vectors it
-has done request_irq() on before calling pci_disable_msix(). Failure
-to do so results in a BUG_ON() and a device will be left with MSI-X
-enabled and leaks its vectors. Otherwise, the PCI subsystem switches a
-device function's interrupt mode from MSI-X mode to legacy mode and
-marks all allocated MSI-X vectors as unused.
-
-Once being marked as unused, there is no guarantee that the PCI
-subsystem will reserve these MSI-X vectors for a device. Depending on
-the availability of current PCI vector resources and the number of
-MSI/MSI-X requests from other drivers, these MSI-X vectors may be
-re-assigned.
-
-For the case where the PCI subsystem re-assigned these MSI-X vectors
-to other drivers, a request to switch back to MSI-X mode may result
-being assigned with another set of MSI-X vectors or a failure if no
-more vectors are available.
-
-5.4 Handling function implementing both MSI and MSI-X capabilities
-
-For the case where a function implements both MSI and MSI-X
-capabilities, the PCI subsystem enables a device to run either in MSI
-mode or MSI-X mode but not both. A device driver determines whether it
-wants MSI or MSI-X enabled on its hardware device. Once a device
-driver requests for MSI, for example, it is prohibited from requesting
-MSI-X; in other words, a device driver is not permitted to ping-pong
-between MSI mod MSI-X mode during a run-time.
-
-5.5 Hardware requirements for MSI/MSI-X support
-
-MSI/MSI-X support requires support from both system hardware and
-individual hardware device functions.
-
-5.5.1 Required x86 hardware support
-
-Since the target of MSI address is the local APIC CPU, enabling
-MSI/MSI-X support in the Linux kernel is dependent on whether existing
-system hardware supports local APIC. Users should verify that their
-system supports local APIC operation by testing that it runs when
-CONFIG_X86_LOCAL_APIC=y.
-
-In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set;
-however, in UP environment, users must manually set
-CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting
-CONFIG_PCI_MSI enables the VECTOR based scheme and the option for
-MSI-capable device drivers to selectively enable MSI/MSI-X.
-
-Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X
-vector is allocated new during runtime and MSI/MSI-X support does not
-depend on BIOS support. This key independency enables MSI/MSI-X
-support on future IOxAPIC free platforms.
-
-5.5.2 Device hardware support
-
-The hardware device function supports MSI by indicating the
-MSI/MSI-X capability structure on its PCI capability list. By
-default, this capability structure will not be initialized by
-the kernel to enable MSI during the system boot. In other words,
-the device function is running on its default pin assertion mode.
-Note that in many cases the hardware supporting MSI have bugs,
-which may result in system hangs. The software driver of specific
-MSI-capable hardware is responsible for deciding whether to call
-pci_enable_msi or not. A return of zero indicates the kernel
-successfully initialized the MSI/MSI-X capability structure of the
-device function. The device function is now running on MSI/MSI-X mode.
-
-5.6 How to tell whether MSI/MSI-X is enabled on device function
-
-At the driver level, a return of zero from the function call of
-pci_enable_msi()/pci_enable_msix() indicates to a device driver that
-its device function is initialized successfully and ready to run in
-MSI/MSI-X mode.
-
-At the user level, users can use the command 'cat /proc/interrupts'
-to display the vectors allocated for devices and their interrupt
-MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is
-enabled on a SCSI Adaptec 39320D Ultra320 controller.
-
-           CPU0       CPU1
-  0:     324639          0    IO-APIC-edge  timer
-  1:       1186          0    IO-APIC-edge  i8042
-  2:          0          0          XT-PIC  cascade
- 12:       2797          0    IO-APIC-edge  i8042
- 14:       6543          0    IO-APIC-edge  ide0
- 15:          1          0    IO-APIC-edge  ide1
-169:          0          0   IO-APIC-level  uhci-hcd
-185:          0          0   IO-APIC-level  uhci-hcd
-193:        138         10         PCI-MSI  aic79xx
-201:         30          0         PCI-MSI  aic79xx
-225:         30          0   IO-APIC-level  aic7xxx
-233:         30          0   IO-APIC-level  aic7xxx
-NMI:          0          0
-LOC:     324553     325068
-ERR:          0
-MIS:          0
-
-6. MSI quirks
-
-Several PCI chipsets or devices are known to not support MSI.
-The PCI stack provides 3 possible levels of MSI disabling:
-* on a single device
-* on all devices behind a specific bridge
-* globally
-
-6.1. Disabling MSI on a single device
-
-Under some circumstances it might be required to disable MSI on a
-single device.  This may be achieved by either not calling pci_enable_msi()
-or all, or setting the pci_dev->no_msi flag before (most of the time
-in a quirk).
-
-6.2. Disabling MSI below a bridge
-
-The vast majority of MSI quirks are required by PCI bridges not
-being able to route MSI between busses. In this case, MSI have to be
-disabled on all devices behind this bridge. It is achieves by setting
-the PCI_BUS_FLAGS_NO_MSI flag in the pci_bus->bus_flags of the bridge
-subordinate bus. There is no need to set the same flag on bridges that
-are below the broken bridge. When pci_enable_msi() is called to enable
-MSI on a device, pci_msi_supported() takes care of checking the NO_MSI
-flag in all parent busses of the device.
-
-Some bridges actually support dynamic MSI support enabling/disabling
-by changing some bits in their PCI configuration space (especially
-the Hypertransport chipsets such as the nVidia nForce and Serverworks
-HT2000). It may then be required to update the NO_MSI flag on the
-corresponding devices in the sysfs hierarchy. To enable MSI support
-on device "0000:00:0e", do:
-
-	echo 1 > /sys/bus/pci/devices/0000:00:0e/msi_bus
-
-To disable MSI support, echo 0 instead of 1. Note that it should be
-used with caution since changing this value might break interrupts.
-
-6.3. Disabling MSI globally
-
-Some extreme cases may require to disable MSI globally on the system.
-For now, the only known case is a Serverworks PCI-X chipsets (MSI are
-not supported on several busses that are not all connected to the
-chipset in the Linux PCI hierarchy). In the vast majority of other
-cases, disabling only behind a specific bridge is enough.
-
-For debugging purpose, the user may also pass pci=nomsi on the kernel
-command-line to explicitly disable MSI globally. But, once the appro-
-priate quirks are added to the kernel, this option should not be
-required anymore.
-
-6.4. Finding why MSI cannot be enabled on a device
-
-Assuming that MSI are not enabled on a device, you should look at
-dmesg to find messages that quirks may output when disabling MSI
-on some devices, some bridges or even globally.
-Then, lspci -t gives the list of bridges above a device. Reading
-/sys/bus/pci/devices/0000:00:0e/msi_bus will tell you whether MSI
-are enabled (1) or disabled (0). In 0 is found in a single bridge
-msi_bus file above the device, MSI cannot be enabled.
-
-7. FAQ
-
-Q1. Are there any limitations on using the MSI?
-
-A1. If the PCI device supports MSI and conforms to the
-specification and the platform supports the APIC local bus,
-then using MSI should work.
-
-Q2. Will it work on all the Pentium processors (P3, P4, Xeon,
-AMD processors)? In P3 IPI's are transmitted on the APIC local
-bus and in P4 and Xeon they are transmitted on the system
-bus. Are there any implications with this?
-
-A2. MSI support enables a PCI device sending an inbound
-memory write (0xfeexxxxx as target address) on its PCI bus
-directly to the FSB. Since the message address has a
-redirection hint bit cleared, it should work.
-
-Q3. The target address 0xfeexxxxx will be translated by the
-Host Bridge into an interrupt message. Are there any
-limitations on the chipsets such as Intel 8xx, Intel e7xxx,
-or VIA?
-
-A3. If these chipsets support an inbound memory write with
-target address set as 0xfeexxxxx, as conformed to PCI
-specification 2.3 or latest, then it should work.
-
-Q4. From the driver point of view, if the MSI is lost because
-of errors occurring during inbound memory write, then it may
-wait forever. Is there a mechanism for it to recover?
-
-A4. Since the target of the transaction is an inbound memory
-write, all transaction termination conditions (Retry,
-Master-Abort, Target-Abort, or normal completion) are
-supported. A device sending an MSI must abide by all the PCI
-rules and conditions regarding that inbound memory write. So,
-if a retry is signaled it must retry, etc... We believe that
-the recommendation for Abort is also a retry (refer to PCI
-specification 2.3 or latest).
+This API should be used to undo the effect of pci_enable_msix().  It frees
+the previously allocated message signaled interrupts.  The interrupts may
+subsequently be assigned to another device, so drivers should not cache
+the value of the 'vector' elements over a call to pci_disable_msix().
+
+A device driver must always call free_irq() on the interrupt(s)
+for which it has called request_irq() before calling this function.
+Failure to do so will result in a BUG_ON(), the device will be left with
+MSI enabled and will leak its vector.
+
+4.3.3 The MSI-X Table
+
+The MSI-X capability specifies a BAR and offset within that BAR for the
+MSI-X Table.  This address is mapped by the PCI subsystem, and should not
+be accessed directly by the device driver.  If the driver wishes to
+mask or unmask an interrupt, it should call disable_irq() / enable_irq().
+
+4.4 Handling devices implementing both MSI and MSI-X capabilities
+
+If a device implements both MSI and MSI-X capabilities, it can
+run in either MSI mode or MSI-X mode but not both simultaneously.
+This is a requirement of the PCI spec, and it is enforced by the
+PCI layer.  Calling pci_enable_msi() when MSI-X is already enabled or
+pci_enable_msix() when MSI is already enabled will result in an error.
+If a device driver wishes to switch between MSI and MSI-X at runtime,
+it must first quiesce the device, then switch it back to pin-interrupt
+mode, before calling pci_enable_msi() or pci_enable_msix() and resuming
+operation.  This is not expected to be a common operation but may be
+useful for debugging or testing during development.
+
+4.5 Considerations when using MSIs
+
+4.5.1 Choosing between MSI-X and MSI
+
+If your device supports both MSI-X and MSI capabilities, you should use
+the MSI-X facilities in preference to the MSI facilities.  As mentioned
+above, MSI-X supports any number of interrupts between 1 and 2048.
+In constrast, MSI is restricted to a maximum of 32 interrupts (and
+must be a power of two).  In addition, the MSI interrupt vectors must
+be allocated consecutively, so the system may not be able to allocate
+as many vectors for MSI as it could for MSI-X.  On some platforms, MSI
+interrupts must all be targetted at the same set of CPUs whereas MSI-X
+interrupts can all be targetted at different CPUs.
+
+4.5.2 Spinlocks
+
+Most device drivers have a per-device spinlock which is taken in the
+interrupt handler.  With pin-based interrupts or a single MSI, it is not
+necessary to disable interrupts (Linux guarantees the same interrupt will
+not be re-entered).  If a device uses multiple interrupts, the driver
+must disable interrupts while the lock is held.  If the device sends
+a different interrupt, the driver will deadlock trying to recursively
+acquire the spinlock.
+
+There are two solutions.  The first is to take the lock with
+spin_lock_irqsave() or spin_lock_irq() (see
+Documentation/DocBook/kernel-locking).  The second is to specify
+IRQF_DISABLED to request_irq() so that the kernel runs the entire
+interrupt routine with interrupts disabled.
+
+If your MSI interrupt routine does not hold the lock for the whole time
+it is running, the first solution may be best.  The second solution is
+normally preferred as it avoids making two transitions from interrupt
+disabled to enabled and back again.
+
+4.6 How to tell whether MSI/MSI-X is enabled on a device
+
+Using 'lspci -v' (as root) may show some devices with "MSI", "Message
+Signalled Interrupts" or "MSI-X" capabilities.  Each of these capabilities
+has an 'Enable' flag which will be followed with either "+" (enabled)
+or "-" (disabled).
+
+
+5. MSI quirks
+
+Several PCI chipsets or devices are known not to support MSIs.
+The PCI stack provides three ways to disable MSIs:
+
+1. globally
+2. on all devices behind a specific bridge
+3. on a single device
+
+5.1. Disabling MSIs globally
+
+Some host chipsets simply don't support MSIs properly.  If we're
+lucky, the manufacturer knows this and has indicated it in the ACPI
+FADT table.  In this case, Linux will automatically disable MSIs.
+Some boards don't include this information in the table and so we have
+to detect them ourselves.  The complete list of these is found near the
+quirk_disable_all_msi() function in drivers/pci/quirks.c.
+
+If you have a board which has problems with MSIs, you can pass pci=nomsi
+on the kernel command line to disable MSIs on all devices.  It would be
+in your best interests to report the problem to linux-pci@vger.kernel.org
+including a full 'lspci -v' so we can add the quirks to the kernel.
+
+5.2. Disabling MSIs below a bridge
+
+Some PCI bridges are not able to route MSIs between busses properly.
+In this case, MSIs must be disabled on all devices behind the bridge.
+
+Some bridges allow you to enable MSIs by changing some bits in their
+PCI configuration space (especially the Hypertransport chipsets such
+as the nVidia nForce and Serverworks HT2000).  As with host chipsets,
+Linux mostly knows about them and automatically enables MSIs if it can.
+If you have a bridge which Linux doesn't yet know about, you can enable
+MSIs in configuration space using whatever method you know works, then
+enable MSIs on that bridge by doing:
+
+       echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
+
+where $bridge is the PCI address of the bridge you've enabled (eg
+0000:00:0e.0).
+
+To disable MSIs, echo 0 instead of 1.  Changing this value should be
+done with caution as it can break interrupt handling for all devices
+below this bridge.
+
+Again, please notify linux-pci@vger.kernel.org of any bridges that need
+special handling.
+
+5.3. Disabling MSIs on a single device
+
+Some devices are known to have faulty MSI implementations.  Usually this
+is handled in the individual device driver but occasionally it's necessary
+to handle this with a quirk.  Some drivers have an option to disable use
+of MSI.  While this is a convenient workaround for the driver author,
+it is not good practise, and should not be emulated.
+
+5.4. Finding why MSIs are disabled on a device
+
+From the above three sections, you can see that there are many reasons
+why MSIs may not be enabled for a given device.  Your first step should
+be to examine your dmesg carefully to determine whether MSIs are enabled
+for your machine.  You should also check your .config to be sure you
+have enabled CONFIG_PCI_MSI.
+
+Then, 'lspci -t' gives the list of bridges above a device.  Reading
+/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1)
+or disabled (0).  If 0 is found in any of the msi_bus files belonging
+to bridges between the PCI root and the device, MSIs are disabled.
+
+It is also worth checking the device driver to see whether it supports MSIs.
+For example, it may contain calls to pci_enable_msi(), pci_enable_msix() or
+pci_enable_msi_block().
--- a/Documentation/PCI/pci-iov-howto.txt
+++ b/Documentation/PCI/pci-iov-howto.txt
+		PCI Express I/O Virtualization Howto
+		Copyright (C) 2009 Intel Corporation
+		    Yu Zhao <yu.zhao@intel.com>
+
+
+1. Overview
+
+1.1 What is SR-IOV
+
+Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
+capability which makes one physical device appear as multiple virtual
+devices. The physical device is referred to as Physical Function (PF)
+while the virtual devices are referred to as Virtual Functions (VF).
+Allocation of the VF can be dynamically controlled by the PF via
+registers encapsulated in the capability. By default, this feature is
+not enabled and the PF behaves as traditional PCIe device. Once it's
+turned on, each VF's PCI configuration space can be accessed by its own
+Bus, Device and Function Number (Routing ID). And each VF also has PCI
+Memory Space, which is used to map its register set. VF device driver
+operates on the register set so it can be functional and appear as a
+real existing PCI device.
+
+2. User Guide
+
+2.1 How can I enable SR-IOV capability
+
+The device driver (PF driver) will control the enabling and disabling
+of the capability via API provided by SR-IOV core. If the hardware
+has SR-IOV capability, loading its PF driver would enable it and all
+VFs associated with the PF.
+
+2.2 How can I use the Virtual Functions
+
+The VF is treated as hot-plugged PCI devices in the kernel, so they
+should be able to work in the same way as real PCI devices. The VF
+requires device driver that is same as a normal PCI device's.
+
+3. Developer Guide
+
+3.1 SR-IOV API
+
+To enable SR-IOV capability:
+	int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+	'nr_virtfn' is number of VFs to be enabled.
+
+To disable SR-IOV capability:
+	void pci_disable_sriov(struct pci_dev *dev);
+
+To notify SR-IOV core of Virtual Function Migration:
+	irqreturn_t pci_sriov_migration(struct pci_dev *dev);
+
+3.2 Usage example
+
+Following piece of code illustrates the usage of the SR-IOV API.
+
+static int __devinit dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	pci_enable_sriov(dev, NR_VIRTFN);
+
+	...
+
+	return 0;
+}
+
+static void __devexit dev_remove(struct pci_dev *dev)
+{
+	pci_disable_sriov(dev);
+
+	...
+}
+
+static int dev_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	...
+
+	return 0;
+}
+
+static int dev_resume(struct pci_dev *dev)
+{
+	...
+
+	return 0;
+}
+
+static void dev_shutdown(struct pci_dev *dev)
+{
+	...
+}
+
+static struct pci_driver dev_driver = {
+	.name =		"SR-IOV Physical Function driver",
+	.id_table =	dev_id_table,
+	.probe =	dev_probe,
+	.remove =	__devexit_p(dev_remove),
+	.suspend =	dev_suspend,
+	.resume =	dev_resume,
+	.shutdown =	dev_shutdown,
+};
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -392,3 +392,35 @@ Why:	The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
 	have been kept around for migration reasons. After more than two years
 	it's time to remove them finally
 Who:	Thomas Gleixner <tglx@linutronix.de>
+
+---------------------------
+
+What:	fakephp and associated sysfs files in /sys/bus/pci/slots/
+When:	2011
+Why:	In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to
+	represent a machine's physical PCI slots. The change in semantics
+	had userspace implications, as the hotplug core no longer allowed
+	drivers to create multiple sysfs files per physical slot (required
+	for multi-function devices, e.g.). fakephp was seen as a developer's
+	tool only, and its interface changed. Too late, we learned that
+	there were some users of the fakephp interface.
+
+	In 2.6.30, the original fakephp interface was restored. At the same
+	time, the PCI core gained the ability that fakephp provided, namely
+	function-level hot-remove and hot-add.
+
+	Since the PCI core now provides the same functionality, exposed in:
+
+		/sys/bus/pci/rescan
+		/sys/bus/pci/devices/.../remove
+		/sys/bus/pci/devices/.../rescan
+
+	there is no functional reason to maintain fakephp as well.
+
+	We will keep the existing module so that 'modprobe fakephp' will
+	present the old /sys/bus/pci/slots/... interface for compatibility,
+	but users are urged to migrate their applications to the API above.
+
+	After a reasonable transition period, we will remove the legacy
+	fakephp interface.
+Who:	Alex Chiang <achiang@hp.com>
--- a/Documentation/filesystems/sysfs-pci.txt
+++ b/Documentation/filesystems/sysfs-pci.txt
@@ -12,6 +12,7 @@ that support it.  For example, a given bus might look like this:
     |   |-- enable
     |   |-- irq
     |   |-- local_cpus
+     |   |-- remove
     |   |-- resource
     |   |-- resource0
     |   |-- resource1
@@ -36,6 +37,7 @@ files, each with their own function.
       enable	           Whether the device is enabled (ascii, rw)
       irq		   IRQ number (ascii, ro)
       local_cpus	   nearby CPU mask (cpumask, ro)
+       remove		   remove device from kernel's list (ascii, wo)
       resource		   PCI resource host addresses (ascii, ro)
       resource0..N	   PCI resource N, if present (binary, mmap)
       resource0_wc..N_wc  PCI WC map resource N, if prefetchable (binary, mmap)
@@ -46,6 +48,7 @@ files, each with their own function.

  ro - read only file
  rw - file is readable and writable
+  wo - write only file
  mmap - file is mmapable
  ascii - file contains ascii text
  binary - file contains binary data
@@ -73,6 +76,13 @@ that the device must be enabled for a rom read to return data succesfully.
 In the event a driver is not bound to the device, it can be enabled using the
 'enable' file, documented above.

+The 'remove' file is used to remove the PCI device, by writing a non-zero
+integer to the file.  This does not involve any kind of hot-plug functionality,
+e.g. powering off the device.  The device is removed from the kernel's list of
+PCI devices, the sysfs directory for it is removed, and the device will be
+removed from any drivers attached to it. Removal of PCI root buses is
+disallowed.
+
 Accessing legacy resources through sysfs
 ----------------------------------------


--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1695,6 +1695,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			See also Documentation/blockdev/paride.txt.

 	pci=option[,option...]	[PCI] various PCI subsystem options:
+		earlydump	[X86] dump PCI config space before the kernel
+			        changes anything
 		off		[X86] don't probe for the PCI bus
 		bios		[X86-32] force use of PCI BIOS, don't access
 				the hardware directly. Use this if your machine
@@ -1794,6 +1796,15 @@ and is between 256 and 4096 characters. It is defined in the file
 		cbmemsize=nn[KMG]	The fixed amount of bus space which is
 				reserved for the CardBus bridge's memory
 				window. The default value is 64 megabytes.
+		resource_alignment=
+				Format:
+				[<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...]
+				Specifies alignment and device to reassign
+				aligned memory resources.
+				If <order of align> is not specified,
+				PAGE_SIZE is used as alignment.
+				PCI-PCI bridge can be specified, if resource
+				windows need to be expanded.

 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 			Management.

--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -273,4 +273,18 @@ struct pci_dev *alpha_gendev_to_pci(struct device *dev);

 extern struct pci_dev *isa_bridge;

+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+			   size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+			    size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+				   enum pci_mmap_state mmap_type);
+#define HAVE_PCI_LEGACY	1
+
+extern int pci_create_resource_files(struct pci_dev *dev);
+extern void pci_remove_resource_files(struct pci_dev *dev);
+
 #endif /* __ALPHA_PCI_H */
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -12,7 +12,7 @@ obj-y    := entry.o traps.o process.o init_task.o osf_sys.o irq.o \

 obj-$(CONFIG_VGA_HOSE)	+= console.o
 obj-$(CONFIG_SMP)	+= smp.o
-obj-$(CONFIG_PCI)	+= pci.o pci_iommu.o
+obj-$(CONFIG_PCI)	+= pci.o pci_iommu.o pci-sysfs.o
 obj-$(CONFIG_SRM_ENV)	+= srm_env.o
 obj-$(CONFIG_MODULES)	+= module.o


--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
+/*
+ * arch/alpha/kernel/pci-sysfs.c
+ *
+ * Copyright (C) 2009 Ivan Kokshaysky
+ *
+ * Alpha PCI resource files.
+ *
+ * Loosely based on generic HAVE_PCI_MMAP implementation in
+ * drivers/pci/pci-sysfs.c
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+static int hose_mmap_page_range(struct pci_controller *hose,
+				struct vm_area_struct *vma,
+				enum pci_mmap_state mmap_type, int sparse)
+{
+	unsigned long base;
+
+	if (mmap_type == pci_mmap_mem)
+		base = sparse ? hose->sparse_mem_base : hose->dense_mem_base;
+	else
+		base = sparse ? hose->sparse_io_base : hose->dense_io_base;
+
+	vma->vm_pgoff += base >> PAGE_SHIFT;
+	vma->vm_flags |= (VM_IO | VM_RESERVED);
+
+	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+}
+
+static int __pci_mmap_fits(struct pci_dev *pdev, int num,
+			   struct vm_area_struct *vma, int sparse)
+{
+	unsigned long nr, start, size;
+	int shift = sparse ? 5 : 0;
+
+	nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	start = vma->vm_pgoff;
+	size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1;
+
+	if (start < size && size - start >= nr)
+		return 1;
+	WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on %s BAR %d "
+		"(size 0x%08lx)\n",
+		current->comm, sparse ? " sparse" : "", start, start + nr,
+		pci_name(pdev), num, size);
+	return 0;
+}
+
+/**
+ * pci_mmap_resource - map a PCI resource into user memory space
+ * @kobj: kobject for mapping
+ * @attr: struct bin_attribute for the file being mapped
+ * @vma: struct vm_area_struct passed into the mmap
+ * @sparse: address space type
+ *
+ * Use the bus mapping routines to map a PCI resource into userspace.
+ */
+static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
+			     struct vm_area_struct *vma, int sparse)
+{
+	struct pci_dev *pdev = to_pci_dev(container_of(kobj,
+						       struct device, kobj));
+	struct resource *res = (struct resource *)attr->private;
+	enum pci_mmap_state mmap_type;
+	struct pci_bus_region bar;
+	int i;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++)
+		if (res == &pdev->resource[i])
+			break;
+	if (i >= PCI_ROM_RESOURCE)
+		return -ENODEV;
+
+	if (!__pci_mmap_fits(pdev, i, vma, sparse))
+		return -EINVAL;
+
+	if (iomem_is_exclusive(res->start))
+		return -EINVAL;
+
+	pcibios_resource_to_bus(pdev, &bar, res);
+	vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0));
+	mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
+
+	return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse);
+}
+
+static int pci_mmap_resource_sparse(struct kobject *kobj,
+				    struct bin_attribute *attr,
+				    struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 1);
+}
+
+static int pci_mmap_resource_dense(struct kobject *kobj,
+				   struct bin_attribute *attr,
+				   struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 0);
+}
+
+/**
+ * pci_remove_resource_files - cleanup resource files
+ * @dev: dev to cleanup
+ *
+ * If we created resource files for @dev, remove them from sysfs and
+ * free their resources.
+ */
+void pci_remove_resource_files(struct pci_dev *pdev)
+{
+	int i;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+		struct bin_attribute *res_attr;
+
+		res_attr = pdev->res_attr[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+
+		res_attr = pdev->res_attr_wc[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+	}
+}
+
+static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num)
+{
+	struct pci_bus_region bar;
+	struct pci_controller *hose = pdev->sysdata;
+	long dense_offset;
+	unsigned long sparse_size;
+
+	pcibios_resource_to_bus(pdev, &bar, &pdev->resource[num]);
+
+	/* All core logic chips have 4G sparse address space, except
+	   CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM
+	   definitions in asm/core_xxx.h files). This corresponds
+	   to 128M or 512M of the bus space. */
+	dense_offset = (long)(hose->dense_mem_base - hose->sparse_mem_base);
+	sparse_size = dense_offset >= 0x400000000UL ? 0x20000000 : 0x8000000;
+
+	return bar.end < sparse_size;
+}
+
+static int pci_create_one_attr(struct pci_dev *pdev, int num, char *name,
+			       char *suffix, struct bin_attribute *res_attr,
+			       unsigned long sparse)
+{
+	size_t size = pci_resource_len(pdev, num);
+
+	sprintf(name, "resource%d%s", num, suffix);
+	res_attr->mmap = sparse ? pci_mmap_resource_sparse :
+				  pci_mmap_resource_dense;
+	res_attr->attr.name = name;
+	res_attr->attr.mode = S_IRUSR | S_IWUSR;
+	res_attr->size = sparse ? size << 5 : size;
+	res_attr->private = &pdev->resource[num];
+	return sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+}
+
+static int pci_create_attr(struct pci_dev *pdev, int num)
+{
+	/* allocate attribute structure, piggyback attribute name */
+	int retval, nlen1, nlen2 = 0, res_count = 1;
+	unsigned long sparse_base, dense_base;
+	struct bin_attribute *attr;
+	struct pci_controller *hose = pdev->sysdata;
+	char *suffix, *attr_name;
+
+	suffix = "";	/* Assume bwx machine, normal resourceN files. */
+	nlen1 = 10;
+
+	if (pdev->resource[num].flags & IORESOURCE_MEM) {
+		sparse_base = hose->sparse_mem_base;
+		dense_base = hose->dense_mem_base;
+		if (sparse_base && !sparse_mem_mmap_fits(pdev, num)) {
+			sparse_base = 0;
+			suffix = "_dense";
+			nlen1 = 16;	/* resourceN_dense */
+		}
+	} else {
+		sparse_base = hose->sparse_io_base;
+		dense_base = hose->dense_io_base;
+	}
+
+	if (sparse_base) {
+		suffix = "_sparse";
+		nlen1 = 17;
+		if (dense_base) {
+			nlen2 = 16;	/* resourceN_dense */
+			res_count = 2;
+		}
+	}
+
+	attr = kzalloc(sizeof(*attr) * res_count + nlen1 + nlen2, GFP_ATOMIC);
+	if (!attr)
+		return -ENOMEM;
+
+	/* Create bwx, sparse or single dense file */
+	attr_name = (char *)(attr + res_count);
+	pdev->res_attr[num] = attr;
+	retval = pci_create_one_attr(pdev, num, attr_name, suffix, attr,
+				     sparse_base);
+	if (retval || res_count == 1)
+		return retval;
+
+	/* Create dense file */
+	attr_name += nlen1;
+	attr++;
+	pdev->res_attr_wc[num] = attr;
+	return pci_create_one_attr(pdev, num, attr_name, "_dense", attr, 0);
+}
+
+/**
+ * pci_create_resource_files - create resource files in sysfs for @dev
+ * @dev: dev in question
+ *
+ * Walk the resources in @dev creating files for each resource available.
+ */
+int pci_create_resource_files(struct pci_dev *pdev)
+{
+	int i;
+	int retval;
+
+	/* Expose the PCI resources from this device as files */
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+
+		/* skip empty resources */
+		if (!pci_resource_len(pdev, i))
+			continue;
+
+		retval = pci_create_attr(pdev, i);
+		if (retval) {
+			pci_remove_resource_files(pdev);
+			return retval;
+		}
+	}
+	return 0;
+}
+
+/* Legacy I/O bus mapping stuff. */
+
+static int __legacy_mmap_fits(struct pci_controller *hose,
+			      struct vm_area_struct *vma,
+			      unsigned long res_size, int sparse)
+{
+	unsigned long nr, start, size;
+
+	nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	start = vma->vm_pgoff;
+	size = ((res_size - 1) >> PAGE_SHIFT) + 1;
+
+	if (start < size && size - start >= nr)
+		return 1;
+	WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on hose %d "
+		"(size 0x%08lx)\n",
+		current->comm, sparse ? " sparse" : "", start, start + nr,
+		hose->index, size);
+	return 0;
+}
+
+static inline int has_sparse(struct pci_controller *hose,
+			     enum pci_mmap_state mmap_type)
+{
+	unsigned long base;
+
+	base = (mmap_type == pci_mmap_mem) ? hose->sparse_mem_base :
+					     hose->sparse_io_base;
+
+	return base != 0;
+}
+
+int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_type)
+{
+	struct pci_controller *hose = bus->sysdata;
+	int sparse = has_sparse(hose, mmap_type);
+	unsigned long res_size;
+
+	res_size = (mmap_type == pci_mmap_mem) ? bus->legacy_mem->size :
+						 bus->legacy_io->size;
+	if (!__legacy_mmap_fits(hose, vma, res_size, sparse))
+		return -EINVAL;
+
+	return hose_mmap_page_range(hose, vma, mmap_type, sparse);
+}
+
+/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Adjust file name and size for sparse mappings.
+ */
+void pci_adjust_legacy_attr(struct pci_bus *bus, enum pci_mmap_state mmap_type)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	if (!has_sparse(hose, mmap_type))
+		return;
+
+	if (mmap_type == pci_mmap_mem) {
+		bus->legacy_mem->attr.name = "legacy_mem_sparse";
+		bus->legacy_mem->size <<= 5;
+	} else {
+		bus->legacy_io->attr.name = "legacy_io_sparse";
+		bus->legacy_io->size <<= 5;
+	}
+	return;
+}
+
+/* Legacy I/O bus read/write functions */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	port += hose->io_space->start;
+
+	switch(size) {
+	case 1:
+		*((u8 *)val) = inb(port);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		*((u16 *)val) = inw(port);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		*((u32 *)val) = inl(port);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	port += hose->io_space->start;
+
+	switch(size) {
+	case 1:
+		outb(port, val);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		outw(port, val);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		outl(port, val);
+		return 4;
+	}
+	return -EINVAL;
+}
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -114,6 +114,10 @@ extern int pci_domain_nr(struct pci_bus *bus);
 /* Decide whether to display the domain number in /proc */
 extern int pci_proc_domain(struct pci_bus *bus);

+/* MSI arch hooks */
+#define arch_setup_msi_irqs arch_setup_msi_irqs
+#define arch_teardown_msi_irqs arch_teardown_msi_irqs
+#define arch_msi_check_device arch_msi_check_device

 struct vm_area_struct;
 /* Map a range of PCI memory or I/O space for a device into user space */

--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -9,6 +9,7 @@

 #include <linux/kernel.h>
 #include <linux/msi.h>
+#include <linux/pci.h>

 #include <asm/machdep.h>

@@ -19,6 +20,10 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
 		return -ENOSYS;
 	}

+	/* PowerPC doesn't support multiple MSI yet */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
 	if (ppc_md.msi_check_device) {
 		pr_debug("msi: Using platform check routine.\n");
 		return ppc_md.msi_check_device(dev, nvec, type);

--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -86,6 +86,9 @@ static inline void early_quirks(void) { }

 extern void pci_iommu_alloc(void);

+/* MSI arch hook */
+#define arch_setup_msi_irqs arch_setup_msi_irqs
+
 #endif  /* __KERNEL__ */

 #ifdef CONFIG_X86_32

--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3468,6 +3468,10 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	struct intel_iommu *iommu = NULL;
 	int index = 0;

+	/* x86 doesn't support multiple MSI yet */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {

--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -300,8 +300,7 @@ fs_initcall(pci_iommu_init);
 static __devinit void via_no_dac(struct pci_dev *dev)
 {
 	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
-		printk(KERN_INFO
-			"PCI: VIA PCI bridge detected. Disabling DAC.\n");
+		dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
 		forbid_dac = 1;
 	}
 }

--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -69,11 +69,12 @@ void early_dump_pci_device(u8 bus, u8 slot, u8 func)
 	int j;
 	u32 val;

-	printk(KERN_INFO "PCI: %02x:%02x:%02x", bus, slot, func);
+	printk(KERN_INFO "pci 0000:%02x:%02x.%d config space:",
+	       bus, slot, func);

 	for (i = 0; i < 256; i += 4) {
 		if (!(i & 0x0f))
-			printk("\n%04x:",i);
+			printk("\n  %02x:",i);

 		val = read_pci_config(bus, slot, func, i);
 		for (j = 0; j < 4; j++) {
@@ -96,20 +97,22 @@ void early_dump_pci_devices(void)
 			for (func = 0; func < 8; func++) {
 				u32 class;
 				u8 type;
+
 				class = read_pci_config(bus, slot, func,
 							PCI_CLASS_REVISION);
 				if (class == 0xffffffff)
-					break;
+					continue;

 				early_dump_pci_device(bus, slot, func);

-				/* No multi-function device? */
-				type = read_pci_config_byte(bus, slot, func,
+				if (func == 0) {
+					type = read_pci_config_byte(bus, slot,
+								    func,
 							       PCI_HEADER_TYPE);
-				if (!(type & 0x80))
-					break;
+					if (!(type & 0x80))
+						break;
+				}
 			}
 		}
 	}
 }
-
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -494,26 +494,6 @@ static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
 			  pci_siemens_interrupt_controller);

-/*
- * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
- * 4096 bytes configuration space for each function of their processor
- * configuration space.
- */
-static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
-{
-	dev->cfg_size = pci_cfg_space_size_ext(dev);
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
-
 /*
 * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
 * confusing the PCI engine:

--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -50,8 +50,6 @@ static int __init pci_legacy_init(void)
 	if (pci_root_bus)
 		pci_bus_add_devices(pci_root_bus);

-	pcibios_fixup_peer_bridges();
-
 	return 0;
 }

@@ -67,6 +65,7 @@ int __init pci_subsys_init(void)
 	pci_visws_init();
 #endif
 	pci_legacy_init();
+	pcibios_fixup_peer_bridges();
 	pcibios_irq_init();
 	pcibios_init();


--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
+#include <linux/sort.h>
 #include <asm/e820.h>
 #include <asm/pci_x86.h>

@@ -24,24 +25,49 @@
 /* Indicate if the mmcfg resources have been placed into the resource table. */
 static int __initdata pci_mmcfg_resources_inserted;

+static __init int extend_mmcfg(int num)
+{
+	struct acpi_mcfg_allocation *new;
+	int new_num = pci_mmcfg_config_num + num;
+
+	new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL);
+	if (!new)
+		return -1;
+
+	if (pci_mmcfg_config) {
+		memcpy(new, pci_mmcfg_config,
+			 sizeof(pci_mmcfg_config[0]) * new_num);
+		kfree(pci_mmcfg_config);
+	}
+	pci_mmcfg_config = new;
+
+	return 0;
+}
+
+static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end)
+{
+	int i = pci_mmcfg_config_num;
+
+	pci_mmcfg_config_num++;
+	pci_mmcfg_config[i].address = addr;
+	pci_mmcfg_config[i].pci_segment = segment;
+	pci_mmcfg_config[i].start_bus_number = start;
+	pci_mmcfg_config[i].end_bus_number = end;
+}
+
 static const char __init *pci_mmcfg_e7520(void)
 {
 	u32 win;
 	raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win);

 	win = win & 0xf000;
-	if(win == 0x0000 || win == 0xf000)
-		pci_mmcfg_config_num = 0;
-	else {
-		pci_mmcfg_config_num = 1;
-		pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
-		if (!pci_mmcfg_config)
-			return NULL;
-		pci_mmcfg_config[0].address = win << 16;
-		pci_mmcfg_config[0].pci_segment = 0;
-		pci_mmcfg_config[0].start_bus_number = 0;
-		pci_mmcfg_config[0].end_bus_number = 255;
-	}
+	if (win == 0x0000 || win == 0xf000)
+		return NULL;
+
+	if (extend_mmcfg(1) == -1)
+		return NULL;
+
+	fill_one_mmcfg(win << 16, 0, 0, 255);

 	return "Intel Corporation E7520 Memory Controller Hub";
 }
@@ -50,13 +76,11 @@ static const char __init *pci_mmcfg_intel_945(void)
 {
 	u32 pciexbar, mask = 0, len = 0;

-	pci_mmcfg_config_num = 1;
-
 	raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar);

 	/* Enable bit */
 	if (!(pciexbar & 1))
-		pci_mmcfg_config_num = 0;
+		return NULL;

 	/* Size bits */
 	switch ((pciexbar >> 1) & 3) {
@@ -73,28 +97,23 @@ static const char __init *pci_mmcfg_intel_945(void)
 		len  = 0x04000000U;
 		break;
 	default:
-		pci_mmcfg_config_num = 0;
+		return NULL;
 	}

 	/* Errata #2, things break when not aligned on a 256Mb boundary */
 	/* Can only happen in 64M/128M mode */

 	if ((pciexbar & mask) & 0x0fffffffU)
-		pci_mmcfg_config_num = 0;
+		return NULL;

 	/* Don't hit the APIC registers and their friends */
 	if ((pciexbar & mask) >= 0xf0000000U)
-		pci_mmcfg_config_num = 0;
-
-	if (pci_mmcfg_config_num) {
-		pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
-		if (!pci_mmcfg_config)
-			return NULL;
-		pci_mmcfg_config[0].address = pciexbar & mask;
-		pci_mmcfg_config[0].pci_segment = 0;
-		pci_mmcfg_config[0].start_bus_number = 0;
-		pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1;
-	}
+		return NULL;
+
+	if (extend_mmcfg(1) == -1)
+		return NULL;
+
+	fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1);

 	return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
 }
@@ -138,22 +157,77 @@ static const char __init *pci_mmcfg_amd_fam10h(void)
 		busnbits = 8;
 	}

-	pci_mmcfg_config_num = (1 << segnbits);
-	pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) *
-				   pci_mmcfg_config_num, GFP_KERNEL);
-	if (!pci_mmcfg_config)
+	if (extend_mmcfg(1 << segnbits) == -1)
 		return NULL;

-	for (i = 0; i < (1 << segnbits); i++) {
-		pci_mmcfg_config[i].address = base + (1<<28) * i;
-		pci_mmcfg_config[i].pci_segment = i;
-		pci_mmcfg_config[i].start_bus_number = 0;
-		pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1;
-	}
+	for (i = 0; i < (1 << segnbits); i++)
+		fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1);

 	return "AMD Family 10h NB";
 }

+static bool __initdata mcp55_checked;
+static const char __init *pci_mmcfg_nvidia_mcp55(void)
+{
+	int bus;
+	int mcp55_mmconf_found = 0;
+
+	static const u32 extcfg_regnum		= 0x90;
+	static const u32 extcfg_regsize		= 4;
+	static const u32 extcfg_enable_mask	= 1<<31;
+	static const u32 extcfg_start_mask	= 0xff<<16;
+	static const int extcfg_start_shift	= 16;
+	static const u32 extcfg_size_mask	= 0x3<<28;
+	static const int extcfg_size_shift	= 28;
+	static const int extcfg_sizebus[]	= {0x100, 0x80, 0x40, 0x20};
+	static const u32 extcfg_base_mask[]	= {0x7ff8, 0x7ffc, 0x7ffe, 0x7fff};
+	static const int extcfg_base_lshift	= 25;
+
+	/*
+	 * do check if amd fam10h already took over
+	 */
+	if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked)
+		return NULL;
+
+	mcp55_checked = true;
+	for (bus = 0; bus < 256; bus++) {
+		u64 base;
+		u32 l, extcfg;
+		u16 vendor, device;
+		int start, size_index, end;
+
+		raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), 0, 4, &l);
+		vendor = l & 0xffff;
+		device = (l >> 16) & 0xffff;
+
+		if (PCI_VENDOR_ID_NVIDIA != vendor || 0x0369 != device)
+			continue;
+
+		raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), extcfg_regnum,
+				  extcfg_regsize, &extcfg);
+
+		if (!(extcfg & extcfg_enable_mask))
+			continue;
+
+		if (extend_mmcfg(1) == -1)
+			continue;
+
+		size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift;
+		base = extcfg & extcfg_base_mask[size_index];
+		/* base could > 4G */
+		base <<= extcfg_base_lshift;
+		start = (extcfg & extcfg_start_mask) >> extcfg_start_shift;
+		end = start + extcfg_sizebus[size_index] - 1;
+		fill_one_mmcfg(base, 0, start, end);
+		mcp55_mmconf_found++;
+	}
+
+	if (!mcp55_mmconf_found)
+		return NULL;
+
+	return "nVidia MCP55";
+}
+
 struct pci_mmcfg_hostbridge_probe {
 	u32 bus;
 	u32 devfn;
@@ -171,8 +245,52 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
 	  0x1200, pci_mmcfg_amd_fam10h },
 	{ 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD,
 	  0x1200, pci_mmcfg_amd_fam10h },
+	{ 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_NVIDIA,
+	  0x0369, pci_mmcfg_nvidia_mcp55 },
 };

+static int __init cmp_mmcfg(const void *x1, const void *x2)
+{
+	const typeof(pci_mmcfg_config[0]) *m1 = x1;
+	const typeof(pci_mmcfg_config[0]) *m2 = x2;
+	int start1, start2;
+
+	start1 = m1->start_bus_number;
+	start2 = m2->start_bus_number;
+
+	return start1 - start2;
+}
+
+static void __init pci_mmcfg_check_end_bus_number(void)
+{
+	int i;
+	typeof(pci_mmcfg_config[0]) *cfg, *cfgx;
+
+	/* sort them at first */
+	sort(pci_mmcfg_config, pci_mmcfg_config_num,
+		 sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL);
+
+	/* last one*/
+	if (pci_mmcfg_config_num > 0) {
+		i = pci_mmcfg_config_num - 1;
+		cfg = &pci_mmcfg_config[i];
+		if (cfg->end_bus_number < cfg->start_bus_number)
+			cfg->end_bus_number = 255;
+	}
+
+	/* don't overlap please */
+	for (i = 0; i < pci_mmcfg_config_num - 1; i++) {
+		cfg = &pci_mmcfg_config[i];
+		cfgx = &pci_mmcfg_config[i+1];
+
+		if (cfg->end_bus_number < cfg->start_bus_number)
+			cfg->end_bus_number = 255;
+
+		if (cfg->end_bus_number >= cfgx->start_bus_number)
+			cfg->end_bus_number = cfgx->start_bus_number - 1;
+	}
+}
+
 static int __init pci_mmcfg_check_hostbridge(void)
 {
 	u32 l;
@@ -186,31 +304,33 @@ static int __init pci_mmcfg_check_hostbridge(void)

 	pci_mmcfg_config_num = 0;
 	pci_mmcfg_config = NULL;
-	name = NULL;

-	for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
+	for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
 		bus =  pci_mmcfg_probes[i].bus;
 		devfn = pci_mmcfg_probes[i].devfn;
 		raw_pci_ops->read(0, bus, devfn, 0, 4, &l);
 		vendor = l & 0xffff;
 		device = (l >> 16) & 0xffff;

+		name = NULL;
 		if (pci_mmcfg_probes[i].vendor == vendor &&
 		    pci_mmcfg_probes[i].device == device)
 			name = pci_mmcfg_probes[i].probe();
-	}

-	if (name) {
-		printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n",
-		       name, pci_mmcfg_config_num ? "with" : "without");
+		if (name)
+			printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n",
+			       name);
 	}

-	return name != NULL;
+	/* some end_bus_number is crazy, fix it */
+	pci_mmcfg_check_end_bus_number();
+
+	return pci_mmcfg_config_num != 0;
 }

 static void __init pci_mmcfg_insert_resources(void)
 {
-#define PCI_MMCFG_RESOURCE_NAME_LEN 19
+#define PCI_MMCFG_RESOURCE_NAME_LEN 24
 	int i;
 	struct resource *res;
 	char *names;
@@ -228,9 +348,10 @@ static void __init pci_mmcfg_insert_resources(void)
 		struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i];
 		num_buses = cfg->end_bus_number - cfg->start_bus_number + 1;
 		res->name = names;
-		snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
-			 cfg->pci_segment);
-		res->start = cfg->address;
+		snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN,
+			 "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment,
+			 cfg->start_bus_number, cfg->end_bus_number);
+		res->start = cfg->address + (cfg->start_bus_number << 20);
 		res->end = res->start + (num_buses << 20) - 1;
 		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		insert_resource(&iomem_resource, res);
@@ -354,8 +475,6 @@ static void __init pci_mmcfg_reject_broken(int early)
 	    (pci_mmcfg_config[0].address == 0))
 		return;

-	cfg = &pci_mmcfg_config[0];
-
 	for (i = 0; i < pci_mmcfg_config_num; i++) {
 		int valid = 0;
 		u64 addr, size;
@@ -423,10 +542,10 @@ static void __init __pci_mmcfg_init(int early)
 			known_bridge = 1;
 	}

-	if (!known_bridge) {
+	if (!known_bridge)
 		acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
-		pci_mmcfg_reject_broken(early);
-	}
+
+	pci_mmcfg_reject_broken(early);

 	if ((pci_mmcfg_config_num == 0) ||
 	    (pci_mmcfg_config == NULL) ||

--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -112,13 +112,18 @@ static struct pci_raw_ops pci_mmcfg = {
 static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
 {
 	void __iomem *addr;
-	u32 size;
-
-	size = (cfg->end_bus_number + 1) << 20;
-	addr = ioremap_nocache(cfg->address, size);
+	u64 start, size;
+
+	start = cfg->start_bus_number;
+	start <<= 20;
+	start += cfg->address;
+	size = cfg->end_bus_number + 1 - cfg->start_bus_number;
+	size <<= 20;
+	addr = ioremap_nocache(start, size);
 	if (addr) {
 		printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n",
-		       cfg->address, cfg->address + size - 1);
+		       start, start + size - 1);
+		addr -= cfg->start_bus_number << 20;
 	}
 	return addr;
 }
@@ -157,7 +162,7 @@ void __init pci_mmcfg_arch_free(void)

 	for (i = 0; i < pci_mmcfg_config_num; ++i) {
 		if (pci_mmcfg_virt[i].virt) {
-			iounmap(pci_mmcfg_virt[i].virt);
+			iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20));
 			pci_mmcfg_virt[i].virt = NULL;
 			pci_mmcfg_virt[i].cfg = NULL;
 		}

--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -66,11 +66,18 @@ struct acpi_pci_root {
 	struct acpi_device * device;
 	struct acpi_pci_id id;
 	struct pci_bus *bus;
+
+	u32 osc_support_set;	/* _OSC state of support bits */
+	u32 osc_control_set;	/* _OSC state of control bits */
+	u32 osc_control_qry;	/* the latest _OSC query result */
+
+	u32 osc_queried:1;	/* has _OSC control been queried? */
 };

 static LIST_HEAD(acpi_pci_roots);

 static struct acpi_pci_driver *sub_driver;
+static DEFINE_MUTEX(osc_lock);

 int acpi_pci_register_driver(struct acpi_pci_driver *driver)
 {
@@ -185,6 +192,175 @@ static void acpi_pci_bridge_scan(struct acpi_device *device)
 		}
 }

+static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
+			  0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
+
+static acpi_status acpi_pci_run_osc(acpi_handle handle,
+				    const u32 *capbuf, u32 *retval)
+{
+	acpi_status status;
+	struct acpi_object_list input;
+	union acpi_object in_params[4];
+	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *out_obj;
+	u32 errors;
+
+	/* Setting up input parameters */
+	input.count = 4;
+	input.pointer = in_params;
+	in_params[0].type 		= ACPI_TYPE_BUFFER;
+	in_params[0].buffer.length 	= 16;
+	in_params[0].buffer.pointer	= OSC_UUID;
+	in_params[1].type 		= ACPI_TYPE_INTEGER;
+	in_params[1].integer.value 	= 1;
+	in_params[2].type 		= ACPI_TYPE_INTEGER;
+	in_params[2].integer.value	= 3;
+	in_params[3].type		= ACPI_TYPE_BUFFER;
+	in_params[3].buffer.length 	= 12;
+	in_params[3].buffer.pointer 	= (u8 *)capbuf;
+
+	status = acpi_evaluate_object(handle, "_OSC", &input, &output);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	if (!output.length)
+		return AE_NULL_OBJECT;
+
+	out_obj = output.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
+		printk(KERN_DEBUG "_OSC evaluation returned wrong type\n");
+		status = AE_TYPE;
+		goto out_kfree;
+	}
+	/* Need to ignore the bit0 in result code */
+	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+	if (errors) {
+		if (errors & OSC_REQUEST_ERROR)
+			printk(KERN_DEBUG "_OSC request failed\n");
+		if (errors & OSC_INVALID_UUID_ERROR)
+			printk(KERN_DEBUG "_OSC invalid UUID\n");
+		if (errors & OSC_INVALID_REVISION_ERROR)
+			printk(KERN_DEBUG "_OSC invalid revision\n");
+		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
+			if (capbuf[OSC_QUERY_TYPE] & OSC_QUERY_ENABLE)
+				goto out_success;
+			printk(KERN_DEBUG
+			       "Firmware did not grant requested _OSC control\n");
+			status = AE_SUPPORT;
+			goto out_kfree;
+		}
+		status = AE_ERROR;
+		goto out_kfree;
+	}
+out_success:
+	*retval = *((u32 *)(out_obj->buffer.pointer + 8));
+	status = AE_OK;
+
+out_kfree:
+	kfree(output.pointer);
+	return status;
+}
+
+static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags)
+{
+	acpi_status status;
+	u32 support_set, result, capbuf[3];
+
+	/* do _OSC query for all possible controls */
+	support_set = root->osc_support_set | (flags & OSC_SUPPORT_MASKS);
+	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_TYPE] = support_set;
+	capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
+
+	status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
+	if (ACPI_SUCCESS(status)) {
+		root->osc_support_set = support_set;
+		root->osc_control_qry = result;
+		root->osc_queried = 1;
+	}
+	return status;
+}
+
+static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags)
+{
+	acpi_status status;
+	acpi_handle tmp;
+
+	status = acpi_get_handle(root->device->handle, "_OSC", &tmp);
+	if (ACPI_FAILURE(status))
+		return status;
+	mutex_lock(&osc_lock);
+	status = acpi_pci_query_osc(root, flags);
+	mutex_unlock(&osc_lock);
+	return status;
+}
+
+static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
+{
+	struct acpi_pci_root *root;
+	list_for_each_entry(root, &acpi_pci_roots, node) {
+		if (root->device->handle == handle)
+			return root;
+	}
+	return NULL;
+}
+
+/**
+ * acpi_pci_osc_control_set - commit requested control to Firmware
+ * @handle: acpi_handle for the target ACPI object
+ * @flags: driver's requested control bits
+ *
+ * Attempt to take control from Firmware on requested control bits.
+ **/
+acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
+{
+	acpi_status status;
+	u32 control_req, result, capbuf[3];
+	acpi_handle tmp;
+	struct acpi_pci_root *root;
+
+	status = acpi_get_handle(handle, "_OSC", &tmp);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	control_req = (flags & OSC_CONTROL_MASKS);
+	if (!control_req)
+		return AE_TYPE;
+
+	root = acpi_pci_find_root(handle);
+	if (!root)
+		return AE_NOT_EXIST;
+
+	mutex_lock(&osc_lock);
+	/* No need to evaluate _OSC if the control was already granted. */
+	if ((root->osc_control_set & control_req) == control_req)
+		goto out;
+
+	/* Need to query controls first before requesting them */
+	if (!root->osc_queried) {
+		status = acpi_pci_query_osc(root, root->osc_support_set);
+		if (ACPI_FAILURE(status))
+			goto out;
+	}
+	if ((root->osc_control_qry & control_req) != control_req) {
+		printk(KERN_DEBUG
+		       "Firmware did not grant requested _OSC control\n");
+		status = AE_SUPPORT;
+		goto out;
+	}
+
+	capbuf[OSC_QUERY_TYPE] = 0;
+	capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set;
+	capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req;
+	status = acpi_pci_run_osc(handle, capbuf, &result);
+	if (ACPI_SUCCESS(status))
+		root->osc_control_set = result;
+out:
+	mutex_unlock(&osc_lock);
+	return status;
+}
+EXPORT_SYMBOL(acpi_pci_osc_control_set);
+
 static int __devinit acpi_pci_root_add(struct acpi_device *device)
 {
 	int result = 0;
@@ -217,7 +393,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	 * PCI domains, so we indicate this in _OSC support capabilities.
 	 */
 	flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT;
-	pci_acpi_osc_support(device->handle, flags);
+	acpi_pci_osc_support(root, flags);

 	/* 
 	 * Segment
@@ -353,7 +529,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	if (pci_msi_enabled())
 		flags |= OSC_MSI_SUPPORT;
 	if (flags != base_flags)
-		pci_acpi_osc_support(device->handle, flags);
+		acpi_pci_osc_support(root, flags);

      end:
 	if (result) {

--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -59,3 +59,13 @@ config HT_IRQ
 	   This allows native hypertransport devices to use interrupts.

 	   If unsure say Y.
+
+config PCI_IOV
+	bool "PCI IOV support"
+	depends on PCI
+	help
+	  I/O Virtualization is a PCI feature supported by some devices
+	  which allows them to create virtual devices which share their
+	  physical resources.
+
+	  If unsure, say N.
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -29,6 +29,8 @@ obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o

 obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o

+obj-$(CONFIG_PCI_IOV) += iov.o
+
 #
 # Some architectures use the generic PCI setup functions
 #

--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -133,7 +133,7 @@ int pci_bus_add_child(struct pci_bus *bus)
 *
 * Call hotplug for each new devices.
 */
-void pci_bus_add_devices(struct pci_bus *bus)
+void pci_bus_add_devices(const struct pci_bus *bus)
 {
 	struct pci_dev *dev;
 	struct pci_bus *child;
@@ -184,8 +184,10 @@ void pci_enable_bridges(struct pci_bus *bus)

 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		if (dev->subordinate) {
-			retval = pci_enable_device(dev);
-			pci_set_master(dev);
+			if (atomic_read(&dev->enable_cnt) == 0) {
+				retval = pci_enable_device(dev);
+				pci_set_master(dev);
+			}
 			pci_enable_bridges(dev->subordinate);
 		}
 	}

--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -30,9 +30,8 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
+#include <linux/acpi.h>
 #include <linux/pci-acpi.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>

 #define MY_NAME	"acpi_pcihp"

@@ -333,19 +332,14 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 {
 	acpi_status status = AE_NOT_FOUND;
 	acpi_handle handle, phandle;
-	struct pci_bus *pbus = bus;
-	struct pci_dev *pdev;
-
-	do {
-		pdev = pbus->self;
-		if (!pdev) {
-			handle = acpi_get_pci_rootbridge_handle(
-				pci_domain_nr(pbus), pbus->number);
+	struct pci_bus *pbus;
+
+	handle = NULL;
+	for (pbus = bus; pbus; pbus = pbus->parent) {
+		handle = acpi_pci_get_bridge_handle(pbus);
+		if (handle)
 			break;
-		}
-		handle = DEVICE_ACPI_HANDLE(&(pdev->dev));
-		pbus = pbus->parent;
-	} while (!handle);
+	}

 	/*
 	 * _HPP settings apply to all child buses, until another _HPP is
@@ -378,12 +372,10 @@ EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware);
 *
 * Attempt to take hotplug control from firmware.
 */
-int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
+int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
 {
 	acpi_status status;
 	acpi_handle chandle, handle;
-	struct pci_dev *pdev = dev;
-	struct pci_bus *parent;
 	struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };

 	flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |
@@ -408,33 +400,25 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
 		acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
 		dbg("Trying to get hotplug control for %s\n",
 				(char *)string.pointer);
-		status = pci_osc_control_set(handle, flags);
+		status = acpi_pci_osc_control_set(handle, flags);
 		if (ACPI_SUCCESS(status))
 			goto got_one;
 		kfree(string.pointer);
 		string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
 	}

-	pdev = dev;
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
-	while (!handle) {
+	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!handle) {
 		/*
 		 * This hotplug controller was not listed in the ACPI name
 		 * space at all. Try to get acpi handle of parent pci bus.
 		 */
-		if (!pdev || !pdev->bus->parent)
-			break;
-		parent = pdev->bus->parent;
-		dbg("Could not find %s in acpi namespace, trying parent\n",
-		    pci_name(pdev));
-		if (!parent->self)
-			/* Parent must be a host bridge */
-			handle = acpi_get_pci_rootbridge_handle(
-					pci_domain_nr(parent),
-					parent->number);
-		else
-			handle = DEVICE_ACPI_HANDLE(&(parent->self->dev));
-		pdev = parent->self;
+		struct pci_bus *pbus;
+		for (pbus = pdev->bus; pbus; pbus = pbus->parent) {
+			handle = acpi_pci_get_bridge_handle(pbus);
+			if (handle)
+				break;
+		}
 	}

 	while (handle) {
@@ -453,13 +437,13 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
 	}

 	dbg("Cannot get control of hotplug hardware for pci %s\n",
-	    pci_name(dev));
+	    pci_name(pdev));

 	kfree(string.pointer);
 	return -ENODEV;
 got_one:
-	dbg("Gained control for hotplug HW for pci %s (%s)\n", pci_name(dev),
-			(char *)string.pointer);
+	dbg("Gained control for hotplug HW for pci %s (%s)\n",
+	    pci_name(pdev), (char *)string.pointer);
 	kfree(string.pointer);
 	return 0;
 }

--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
-/*
- * Fake PCI Hot Plug Controller Driver
+/* Works like the fakephp driver used to, except a little better.
 *
- * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
- * Copyright (C) 2003 IBM Corp.
- * Copyright (C) 2003 Rolf Eike Beer <eike-kernel@sf-tec.de>
+ * - It's possible to remove devices with subordinate busses.
+ * - New PCI devices that appear via any method, not just a fakephp triggered
+ *   rescan, will be noticed.
+ * - Devices that are removed via any method, not just a fakephp triggered
+ *   removal, will also be noticed.
 *
- * Based on ideas and code from:
- * 	Vladimir Kondratiev <vladimir.kondratiev@intel.com>
- *	Rolf Eike Beer <eike-kernel@sf-tec.de>
+ * Uses nothing from the pci-hotplug subsystem.
 *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 2 of the License.
- *
- * Send feedback to <greg@kroah.com>
 */

-/*
- *
- * This driver will "emulate" removing PCI devices from the system.  If
- * the "power" file is written to with "0" then the specified PCI device
- * will be completely removed from the kernel.
- *
- * WARNING, this does NOT turn off the power to the PCI device.  This is
- * a "logical" removal, not a physical or electrical removal.
- *
- * Use this module at your own risk, you have been warned!
- *
- * Enabling PCI devices is left as an exercise for the reader...
- *
- */
-#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/pci_hotplug.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
 #include <linux/init.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
+#include <linux/pci.h>
+#include <linux/device.h>
 #include "../pci.h"

-#if !defined(MODULE)
-	#define MY_NAME	"fakephp"
-#else
-	#define MY_NAME	THIS_MODULE->name
-#endif
-
-#define dbg(format, arg...)					\
-	do {							\
-		if (debug)					\
-			printk(KERN_DEBUG "%s: " format,	\
-				MY_NAME , ## arg); 		\
-	} while (0)
-#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
-#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg)
-
-#define DRIVER_AUTHOR	"Greg Kroah-Hartman <greg@kroah.com>"
-#define DRIVER_DESC	"Fake PCI Hot Plug Controller Driver"
-
-struct dummy_slot {
-	struct list_head node;
-	struct hotplug_slot *slot;
-	struct pci_dev *dev;
-	struct work_struct remove_work;
-	unsigned long removed;
+struct legacy_slot {
+	struct kobject		kobj;
+	struct pci_dev		*dev;
+	struct list_head	list;
 };

-static int debug;
-static int dup_slots;
-static LIST_HEAD(slot_list);
-static struct workqueue_struct *dummyphp_wq;
-
-static void pci_rescan_worker(struct work_struct *work);
-static DECLARE_WORK(pci_rescan_work, pci_rescan_worker);
-
-static int enable_slot (struct hotplug_slot *slot);
-static int disable_slot (struct hotplug_slot *slot);
+static LIST_HEAD(legacy_list);

-static struct hotplug_slot_ops dummy_hotplug_slot_ops = {
-	.owner			= THIS_MODULE,
-	.enable_slot		= enable_slot,
-	.disable_slot		= disable_slot,
-};
-
-static void dummy_release(struct hotplug_slot *slot)
+static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr,
+			   char *buf)
 {
-	struct dummy_slot *dslot = slot->private;
-
-	list_del(&dslot->node);
-	kfree(dslot->slot->info);
-	kfree(dslot->slot);
-	pci_dev_put(dslot->dev);
-	kfree(dslot);
+	struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
+	strcpy(buf, "1\n");
+	return 2;
 }

-#define SLOT_NAME_SIZE	8
-
-static int add_slot(struct pci_dev *dev)
+static void remove_callback(void *data)
 {
-	struct dummy_slot *dslot;
-	struct hotplug_slot *slot;
-	char name[SLOT_NAME_SIZE];
-	int retval = -ENOMEM;
-	static int count = 1;
-
-	slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
-	if (!slot)
-		goto error;
-
-	slot->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL);
-	if (!slot->info)
-		goto error_slot;
-
-	slot->info->power_status = 1;
-	slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
-	slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
-
-	dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL);
-	if (!dslot)
-		goto error_info;
-
-	if (dup_slots)
-		snprintf(name, SLOT_NAME_SIZE, "fake");
-	else
-		snprintf(name, SLOT_NAME_SIZE, "fake%d", count++);
-	dbg("slot->name = %s\n", name);
-	slot->ops = &dummy_hotplug_slot_ops;
-	slot->release = &dummy_release;
-	slot->private = dslot;
-
-	retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn), name);
-	if (retval) {
-		err("pci_hp_register failed with error %d\n", retval);
-		goto error_dslot;
-	}
-
-	dbg("slot->name = %s\n", hotplug_slot_name(slot));
-	dslot->slot = slot;
-	dslot->dev = pci_dev_get(dev);
-	list_add (&dslot->node, &slot_list);
-	return retval;
-
-error_dslot:
-	kfree(dslot);
-error_info:
-	kfree(slot->info);
-error_slot:
-	kfree(slot);
-error:
-	return retval;
+	pci_remove_bus_device((struct pci_dev *)data);
 }

-static int __init pci_scan_buses(void)
+static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr,
+			    const char *buf, size_t len)
 {
-	struct pci_dev *dev = NULL;
-	int lastslot = 0;
+	struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
+	unsigned long val;

-	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-		if (PCI_FUNC(dev->devfn) > 0 &&
-				lastslot == PCI_SLOT(dev->devfn))
-			continue;
-		lastslot = PCI_SLOT(dev->devfn);
-		add_slot(dev);
-	}
+	if (strict_strtoul(buf, 0, &val) < 0)
+		return -EINVAL;

-	return 0;
+	if (val)
+		pci_rescan_bus(slot->dev->bus);
+	else
+		sysfs_schedule_callback(&slot->dev->dev.kobj, remove_callback,
+					slot->dev, THIS_MODULE);
+	return len;
 }

-static void remove_slot(struct dummy_slot *dslot)
-{
-	int retval;
-
-	dbg("removing slot %s\n", hotplug_slot_name(dslot->slot));
-	retval = pci_hp_deregister(dslot->slot);
-	if (retval)
-		err("Problem unregistering a slot %s\n",
-			hotplug_slot_name(dslot->slot));
-}
+static struct attribute *legacy_attrs[] = {
+	&(struct attribute){ .name = "power", .mode = 0644 },
+	NULL,
+};

-/* called from the single-threaded workqueue handler to remove a slot */
-static void remove_slot_worker(struct work_struct *work)
+static void legacy_release(struct kobject *kobj)
 {
-	struct dummy_slot *dslot =
-		container_of(work, struct dummy_slot, remove_work);
-	remove_slot(dslot);
-}
+	struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);

-/**
- * pci_rescan_slot - Rescan slot
- * @temp: Device template. Should be set: bus and devfn.
- *
- * Tries hard not to re-enable already existing devices;
- * also handles scanning of subfunctions.
- */
-static int pci_rescan_slot(struct pci_dev *temp)
-{
-	struct pci_bus *bus = temp->bus;
-	struct pci_dev *dev;
-	int func;
-	u8 hdr_type;
-	int count = 0;
-
-	if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) {
-		temp->hdr_type = hdr_type & 0x7f;
-		if ((dev = pci_get_slot(bus, temp->devfn)) != NULL)
-			pci_dev_put(dev);
-		else {
-			dev = pci_scan_single_device(bus, temp->devfn);
-			if (dev) {
-				dbg("New device on %s function %x:%x\n",
-					bus->name, temp->devfn >> 3,
-					temp->devfn & 7);
-				count++;
-			}
-		}
-		/* multifunction device? */
-		if (!(hdr_type & 0x80))
-			return count;
-
-		/* continue scanning for other functions */
-		for (func = 1, temp->devfn++; func < 8; func++, temp->devfn++) {
-			if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type))
-				continue;
-			temp->hdr_type = hdr_type & 0x7f;
-
-			if ((dev = pci_get_slot(bus, temp->devfn)) != NULL)
-				pci_dev_put(dev);
-			else {
-				dev = pci_scan_single_device(bus, temp->devfn);
-				if (dev) {
-					dbg("New device on %s function %x:%x\n",
-						bus->name, temp->devfn >> 3,
-						temp->devfn & 7);
-					count++;
-				}
-			}
-		}
-	}
-
-	return count;
+	pci_dev_put(slot->dev);
+	kfree(slot);
 }

+static struct kobj_type legacy_ktype = {
+	.sysfs_ops = &(struct sysfs_ops){
+		.store = legacy_store, .show = legacy_show
+	},
+	.release = &legacy_release,
+	.default_attrs = legacy_attrs,
+};

-/**
- * pci_rescan_bus - Rescan PCI bus
- * @bus: the PCI bus to rescan
- *
- * Call pci_rescan_slot for each possible function of the bus.
- */
-static void pci_rescan_bus(const struct pci_bus *bus)
+static int legacy_add_slot(struct pci_dev *pdev)
 {
-	unsigned int devfn;
-	struct pci_dev *dev;
-	int retval;
-	int found = 0;
-	dev = alloc_pci_dev();
-	if (!dev)
-		return;
+	struct legacy_slot *slot = kzalloc(sizeof(*slot), GFP_KERNEL);

-	dev->bus = (struct pci_bus*)bus;
-	dev->sysdata = bus->sysdata;
-	for (devfn = 0; devfn < 0x100; devfn += 8) {
-		dev->devfn = devfn;
-		found += pci_rescan_slot(dev);
-	}
-
-	if (found) {
-		pci_bus_assign_resources(bus);
-		list_for_each_entry(dev, &bus->devices, bus_list) {
-			/* Skip already-added devices */
-			if (dev->is_added)
-					continue;
-			retval = pci_bus_add_device(dev);
-			if (retval)
-				dev_err(&dev->dev,
-					"Error adding device, continuing\n");
-			else
-				add_slot(dev);
-		}
-		pci_bus_add_devices(bus);
-	}
-	kfree(dev);
-}
+	if (!slot)
+		return -ENOMEM;

-/* recursively scan all buses */
-static void pci_rescan_buses(const struct list_head *list)
-{
-	const struct list_head *l;
-	list_for_each(l,list) {
-		const struct pci_bus *b = pci_bus_b(l);
-		pci_rescan_bus(b);
-		pci_rescan_buses(&b->children);
+	if (kobject_init_and_add(&slot->kobj, &legacy_ktype,
+				 &pci_slots_kset->kobj, "%s",
+				 dev_name(&pdev->dev))) {
+		dev_warn(&pdev->dev, "Failed to created legacy fake slot\n");
+		return -EINVAL;
 	}
-}
+	slot->dev = pci_dev_get(pdev);

-/* initiate rescan of all pci buses */
-static inline void pci_rescan(void) {
-	pci_rescan_buses(&pci_root_buses);
-}
-
-/* called from the single-threaded workqueue handler to rescan all pci buses */
-static void pci_rescan_worker(struct work_struct *work)
-{
-	pci_rescan();
-}
+	list_add(&slot->list, &legacy_list);

-static int enable_slot(struct hotplug_slot *hotplug_slot)
-{
-	/* mis-use enable_slot for rescanning of the pci bus */
-	cancel_work_sync(&pci_rescan_work);
-	queue_work(dummyphp_wq, &pci_rescan_work);
 	return 0;
 }

-static int disable_slot(struct hotplug_slot *slot)
+static int legacy_notify(struct notifier_block *nb,
+			 unsigned long action, void *data)
 {
-	struct dummy_slot *dslot;
-	struct pci_dev *dev;
-	int func;
-
-	if (!slot)
-		return -ENODEV;
-	dslot = slot->private;
-
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot_name(slot));
+	struct pci_dev *pdev = to_pci_dev(data);

-	for (func = 7; func >= 0; func--) {
-		dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func);
-		if (!dev)
-			continue;
+	if (action == BUS_NOTIFY_ADD_DEVICE) {
+		legacy_add_slot(pdev);
+	} else if (action == BUS_NOTIFY_DEL_DEVICE) {
+		struct legacy_slot *slot;

-		if (test_and_set_bit(0, &dslot->removed)) {
-			dbg("Slot already scheduled for removal\n");
-			pci_dev_put(dev);
-			return -ENODEV;
-		}
+		list_for_each_entry(slot, &legacy_list, list)
+			if (slot->dev == pdev)
+				goto found;

-		/* remove the device from the pci core */
-		pci_remove_bus_device(dev);
-
-		/* queue work item to blow away this sysfs entry and other
-		 * parts.
-		 */
-		INIT_WORK(&dslot->remove_work, remove_slot_worker);
-		queue_work(dummyphp_wq, &dslot->remove_work);
-
-		pci_dev_put(dev);
+		dev_warn(&pdev->dev, "Missing legacy fake slot?");
+		return -ENODEV;
+found:
+		kobject_del(&slot->kobj);
+		list_del(&slot->list);
+		kobject_put(&slot->kobj);
 	}
+
 	return 0;
 }

-static void cleanup_slots (void)
-{
-	struct list_head *tmp;
-	struct list_head *next;
-	struct dummy_slot *dslot;
-
-	destroy_workqueue(dummyphp_wq);
-	list_for_each_safe (tmp, next, &slot_list) {
-		dslot = list_entry (tmp, struct dummy_slot, node);
-		remove_slot(dslot);
-	}
-	
-}
+static struct notifier_block legacy_notifier = {
+	.notifier_call = legacy_notify
+};

-static int __init dummyphp_init(void)
+static int __init init_legacy(void)
 {
-	info(DRIVER_DESC "\n");
+	struct pci_dev *pdev = NULL;

-	dummyphp_wq = create_singlethread_workqueue(MY_NAME);
-	if (!dummyphp_wq)
-		return -ENOMEM;
+	/* Add existing devices */
+	while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev)))
+		legacy_add_slot(pdev);

-	return pci_scan_buses();
+	/* Be alerted of any new ones */
+	bus_register_notifier(&pci_bus_type, &legacy_notifier);
+	return 0;
 }
+module_init(init_legacy);

-
-static void __exit dummyphp_exit(void)
+static void __exit remove_legacy(void)
 {
-	cleanup_slots();
+	struct legacy_slot *slot, *tmp;
+
+	bus_unregister_notifier(&pci_bus_type, &legacy_notifier);
+
+	list_for_each_entry_safe(slot, tmp, &legacy_list, list) {
+		list_del(&slot->list);
+		kobject_del(&slot->kobj);
+		kobject_put(&slot->kobj);
+	}
 }
+module_exit(remove_legacy);

-module_init(dummyphp_init);
-module_exit(dummyphp_exit);

-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Trent Piepho <xyzzy@speakeasy.org>");
+MODULE_DESCRIPTION("Legacy version of the fakephp interface");
 MODULE_LICENSE("GPL");
-module_param(debug, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
-module_param(dup_slots, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(dup_slots, "Force duplicate slot names for debugging");
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -46,10 +46,10 @@ extern int pciehp_force;
 extern struct workqueue_struct *pciehp_wq;

 #define dbg(format, arg...)						\
-	do {								\
-		if (pciehp_debug)					\
-			printk("%s: " format, MY_NAME , ## arg);	\
-	} while (0)
+do {									\
+	if (pciehp_debug)						\
+		printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg);	\
+} while (0)
 #define err(format, arg...)						\
 	printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
 #define info(format, arg...)						\
@@ -60,7 +60,7 @@ extern struct workqueue_struct *pciehp_wq;
 #define ctrl_dbg(ctrl, format, arg...)					\
 	do {								\
 		if (pciehp_debug)					\
-			dev_printk(, &ctrl->pcie->device,		\
+			dev_printk(KERN_DEBUG, &ctrl->pcie->device,	\
 					format, ## arg);		\
 	} while (0)
 #define ctrl_err(ctrl, format, arg...)					\
@@ -108,10 +108,11 @@ struct controller {
 	u32 slot_cap;
 	u8 cap_base;
 	struct timer_list poll_timer;
-	int cmd_busy;
+	unsigned int cmd_busy:1;
 	unsigned int no_cmd_complete:1;
 	unsigned int link_active_reporting:1;
 	unsigned int notification_enabled:1;
+	unsigned int power_fault_detected;
 };

 #define INT_BUTTON_IGNORE		0

--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -67,37 +67,27 @@ static int __init parse_detect_mode(void)
 	return PCIEHP_DETECT_DEFAULT;
 }

-static struct pcie_port_service_id __initdata port_pci_ids[] = {
-	{
-		.vendor = PCI_ANY_ID,
-		.device = PCI_ANY_ID,
-		.port_type = PCIE_ANY_PORT,
-		.service_type = PCIE_PORT_SERVICE_HP,
-		.driver_data =  0,
-        }, { /* end: all zeroes */ }
-};
-
 static int __initdata dup_slot_id;
 static int __initdata acpi_slot_detected;
 static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots);

 /* Dummy driver for dumplicate name detection */
-static int __init dummy_probe(struct pcie_device *dev,
-			      const struct pcie_port_service_id *id)
+static int __init dummy_probe(struct pcie_device *dev)
 {
 	int pos;
 	u32 slot_cap;
 	struct slot *slot, *tmp;
 	struct pci_dev *pdev = dev->port;
 	struct pci_bus *pbus = pdev->subordinate;
-	if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL)))
-		return -ENOMEM;
 	/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
 	if (pciehp_get_hp_hw_control_from_firmware(pdev))
 		return -ENODEV;
 	if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP)))
 		return -ENODEV;
 	pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap);
+	slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+	if (!slot)
+		return -ENOMEM;
 	slot->number = slot_cap >> 19;
 	list_for_each_entry(tmp, &dummy_slots, slot_list) {
 		if (tmp->number == slot->number)
@@ -111,7 +101,8 @@ static int __init dummy_probe(struct pcie_device *dev,

 static struct pcie_port_service_driver __initdata dummy_driver = {
        .name           = "pciehp_dummy",
-        .id_table       = port_pci_ids,
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_HP,
        .probe          = dummy_probe,
 };


--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -401,7 +401,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
 	return 0;
 }

-static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_id *id)
+static int pciehp_probe(struct pcie_device *dev)
 {
 	int rc;
 	struct controller *ctrl;
@@ -475,7 +475,7 @@ static void pciehp_remove (struct pcie_device *dev)
 }

 #ifdef CONFIG_PM
-static int pciehp_suspend (struct pcie_device *dev, pm_message_t state)
+static int pciehp_suspend (struct pcie_device *dev)
 {
 	dev_info(&dev->device, "%s ENTRY\n", __func__);
 	return 0;
@@ -503,20 +503,12 @@ static int pciehp_resume (struct pcie_device *dev)
 	}
 	return 0;
 }
-#endif
-
-static struct pcie_port_service_id port_pci_ids[] = { {
-	.vendor = PCI_ANY_ID,
-	.device = PCI_ANY_ID,
-	.port_type = PCIE_ANY_PORT,
-	.service_type = PCIE_PORT_SERVICE_HP,
-	.driver_data =	0,
-	}, { /* end: all zeroes */ }
-};
+#endif /* PM */

 static struct pcie_port_service_driver hpdriver_portdrv = {
 	.name		= PCIE_MODULE_NAME,
-	.id_table	= &port_pci_ids[0],
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_HP,

 	.probe		= pciehp_probe,
 	.remove		= pciehp_remove,

--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -548,23 +548,21 @@ static int hpc_power_on_slot(struct slot * slot)

 	slot_cmd = POWER_ON;
 	cmd_mask = PCI_EXP_SLTCTL_PCC;
-	/* Enable detection that we turned off at slot power-off time */
 	if (!pciehp_poll_mode) {
-		slot_cmd |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
-			     PCI_EXP_SLTCTL_PDCE);
-		cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
-			     PCI_EXP_SLTCTL_PDCE);
+		/* Enable power fault detection turned off at power off time */
+		slot_cmd |= PCI_EXP_SLTCTL_PFDE;
+		cmd_mask |= PCI_EXP_SLTCTL_PFDE;
 	}

 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-
 	if (retval) {
 		ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd);
-		return -1;
+		return retval;
 	}
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
 		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);

+	ctrl->power_fault_detected = 0;
 	return retval;
 }

@@ -621,18 +619,10 @@ static int hpc_power_off_slot(struct slot * slot)

 	slot_cmd = POWER_OFF;
 	cmd_mask = PCI_EXP_SLTCTL_PCC;
-	/*
-	 * If we get MRL or presence detect interrupts now, the isr
-	 * will notice the sticky power-fault bit too and issue power
-	 * indicator change commands. This will lead to an endless loop
-	 * of command completions, since the power-fault bit remains on
-	 * till the slot is powered on again.
-	 */
 	if (!pciehp_poll_mode) {
-		slot_cmd &= ~(PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
-			      PCI_EXP_SLTCTL_PDCE);
-		cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
-			     PCI_EXP_SLTCTL_PDCE);
+		/* Disable power fault detection */
+		slot_cmd &= ~PCI_EXP_SLTCTL_PFDE;
+		cmd_mask |= PCI_EXP_SLTCTL_PFDE;
 	}

 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
@@ -672,10 +662,11 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 		detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
 			     PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC |
 			     PCI_EXP_SLTSTA_CC);
+		detected &= ~intr_loc;
 		intr_loc |= detected;
 		if (!intr_loc)
 			return IRQ_NONE;
-		if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, detected)) {
+		if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, intr_loc)) {
 			ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
 				 __func__);
 			return IRQ_NONE;
@@ -709,9 +700,10 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 		pciehp_handle_presence_change(p_slot);

 	/* Check Power Fault Detected */
-	if (intr_loc & PCI_EXP_SLTSTA_PFD)
+	if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
+		ctrl->power_fault_detected = 1;
 		pciehp_handle_power_fault(p_slot);
-
+	}
 	return IRQ_HANDLED;
 }


--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -48,10 +48,10 @@ extern int shpchp_debug;
 extern struct workqueue_struct *shpchp_wq;

 #define dbg(format, arg...)						\
-	do {								\
-		if (shpchp_debug)					\
-			printk("%s: " format, MY_NAME , ## arg);	\
-	} while (0)
+do {									\
+	if (shpchp_debug)						\
+		printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg);	\
+} while (0)
 #define err(format, arg...)						\
 	printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
 #define info(format, arg...)						\
@@ -62,7 +62,7 @@ extern struct workqueue_struct *shpchp_wq;
 #define ctrl_dbg(ctrl, format, arg...)					\
 	do {								\
 		if (shpchp_debug)					\
-			dev_printk(, &ctrl->pci_dev->dev,		\
+			dev_printk(KERN_DEBUG, &ctrl->pci_dev->dev,	\
 					format, ## arg);		\
 	} while (0)
 #define ctrl_err(ctrl, format, arg...)					\

--- a/drivers/pci/hotplug/shpchp_pci.c
+++ b/drivers/pci/hotplug/shpchp_pci.c
@@ -137,7 +137,7 @@ int __ref shpchp_configure_device(struct slot *p_slot)
 							busnr))
 					break;
 			}
-			if (busnr >= end) {
+			if (busnr > end) {
 				ctrl_err(ctrl,
 					 "No free bus for hot-added bridge\n");
 				pci_dev_put(dev);

--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1782,7 +1782,7 @@ static inline void iommu_prepare_isa(void)
 	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);

 	if (ret)
-		printk("IOMMU: Failed to create 0-64M identity map, "
+		printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, "
 			"floppy might not work\n");

 }

--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
+/*
+ * drivers/pci/iov.c
+ *
+ * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
+ *
+ * PCI Express I/O Virtualization (IOV) support.
+ *   Single Root IOV 1.0
+ */
+
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include "pci.h"
+
+#define VIRTFN_ID_LEN	16
+
+static inline u8 virtfn_bus(struct pci_dev *dev, int id)
+{
+	return dev->bus->number + ((dev->devfn + dev->sriov->offset +
+				    dev->sriov->stride * id) >> 8);
+}
+
+static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
+{
+	return (dev->devfn + dev->sriov->offset +
+		dev->sriov->stride * id) & 0xff;
+}
+
+static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
+{
+	int rc;
+	struct pci_bus *child;
+
+	if (bus->number == busnr)
+		return bus;
+
+	child = pci_find_bus(pci_domain_nr(bus), busnr);
+	if (child)
+		return child;
+
+	child = pci_add_new_bus(bus, NULL, busnr);
+	if (!child)
+		return NULL;
+
+	child->subordinate = busnr;
+	child->dev.parent = bus->bridge;
+	rc = pci_bus_add_child(child);
+	if (rc) {
+		pci_remove_bus(child);
+		return NULL;
+	}
+
+	return child;
+}
+
+static void virtfn_remove_bus(struct pci_bus *bus, int busnr)
+{
+	struct pci_bus *child;
+
+	if (bus->number == busnr)
+		return;
+
+	child = pci_find_bus(pci_domain_nr(bus), busnr);
+	BUG_ON(!child);
+
+	if (list_empty(&child->devices))
+		pci_remove_bus(child);
+}
+
+static int virtfn_add(struct pci_dev *dev, int id, int reset)
+{
+	int i;
+	int rc;
+	u64 size;
+	char buf[VIRTFN_ID_LEN];
+	struct pci_dev *virtfn;
+	struct resource *res;
+	struct pci_sriov *iov = dev->sriov;
+
+	virtfn = alloc_pci_dev();
+	if (!virtfn)
+		return -ENOMEM;
+
+	mutex_lock(&iov->dev->sriov->lock);
+	virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id));
+	if (!virtfn->bus) {
+		kfree(virtfn);
+		mutex_unlock(&iov->dev->sriov->lock);
+		return -ENOMEM;
+	}
+	virtfn->devfn = virtfn_devfn(dev, id);
+	virtfn->vendor = dev->vendor;
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device);
+	pci_setup_device(virtfn);
+	virtfn->dev.parent = dev->dev.parent;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		if (!res->parent)
+			continue;
+		virtfn->resource[i].name = pci_name(virtfn);
+		virtfn->resource[i].flags = res->flags;
+		size = resource_size(res);
+		do_div(size, iov->total);
+		virtfn->resource[i].start = res->start + size * id;
+		virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
+		rc = request_resource(res, &virtfn->resource[i]);
+		BUG_ON(rc);
+	}
+
+	if (reset)
+		pci_execute_reset_function(virtfn);
+
+	pci_device_add(virtfn, virtfn->bus);
+	mutex_unlock(&iov->dev->sriov->lock);
+
+	virtfn->physfn = pci_dev_get(dev);
+	virtfn->is_virtfn = 1;
+
+	rc = pci_bus_add_device(virtfn);
+	if (rc)
+		goto failed1;
+	sprintf(buf, "virtfn%u", id);
+	rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
+	if (rc)
+		goto failed1;
+	rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
+	if (rc)
+		goto failed2;
+
+	kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+
+	return 0;
+
+failed2:
+	sysfs_remove_link(&dev->dev.kobj, buf);
+failed1:
+	pci_dev_put(dev);
+	mutex_lock(&iov->dev->sriov->lock);
+	pci_remove_bus_device(virtfn);
+	virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+	mutex_unlock(&iov->dev->sriov->lock);
+
+	return rc;
+}
+
+static void virtfn_remove(struct pci_dev *dev, int id, int reset)
+{
+	char buf[VIRTFN_ID_LEN];
+	struct pci_bus *bus;
+	struct pci_dev *virtfn;
+	struct pci_sriov *iov = dev->sriov;
+
+	bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id));
+	if (!bus)
+		return;
+
+	virtfn = pci_get_slot(bus, virtfn_devfn(dev, id));
+	if (!virtfn)
+		return;
+
+	pci_dev_put(virtfn);
+
+	if (reset) {
+		device_release_driver(&virtfn->dev);
+		pci_execute_reset_function(virtfn);
+	}
+
+	sprintf(buf, "virtfn%u", id);
+	sysfs_remove_link(&dev->dev.kobj, buf);
+	sysfs_remove_link(&virtfn->dev.kobj, "physfn");
+
+	mutex_lock(&iov->dev->sriov->lock);
+	pci_remove_bus_device(virtfn);
+	virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+	mutex_unlock(&iov->dev->sriov->lock);
+
+	pci_dev_put(dev);
+}
+
+static int sriov_migration(struct pci_dev *dev)
+{
+	u16 status;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!iov->nr_virtfn)
+		return 0;
+
+	if (!(iov->cap & PCI_SRIOV_CAP_VFM))
+		return 0;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status);
+	if (!(status & PCI_SRIOV_STATUS_VFM))
+		return 0;
+
+	schedule_work(&iov->mtask);
+
+	return 1;
+}
+
+static void sriov_migration_task(struct work_struct *work)
+{
+	int i;
+	u8 state;
+	u16 status;
+	struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask);
+
+	for (i = iov->initial; i < iov->nr_virtfn; i++) {
+		state = readb(iov->mstate + i);
+		if (state == PCI_SRIOV_VFM_MI) {
+			writeb(PCI_SRIOV_VFM_AV, iov->mstate + i);
+			state = readb(iov->mstate + i);
+			if (state == PCI_SRIOV_VFM_AV)
+				virtfn_add(iov->self, i, 1);
+		} else if (state == PCI_SRIOV_VFM_MO) {
+			virtfn_remove(iov->self, i, 1);
+			writeb(PCI_SRIOV_VFM_UA, iov->mstate + i);
+			state = readb(iov->mstate + i);
+			if (state == PCI_SRIOV_VFM_AV)
+				virtfn_add(iov->self, i, 0);
+		}
+	}
+
+	pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status);
+	status &= ~PCI_SRIOV_STATUS_VFM;
+	pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status);
+}
+
+static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn)
+{
+	int bir;
+	u32 table;
+	resource_size_t pa;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (nr_virtfn <= iov->initial)
+		return 0;
+
+	pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table);
+	bir = PCI_SRIOV_VFM_BIR(table);
+	if (bir > PCI_STD_RESOURCE_END)
+		return -EIO;
+
+	table = PCI_SRIOV_VFM_OFFSET(table);
+	if (table + nr_virtfn > pci_resource_len(dev, bir))
+		return -EIO;
+
+	pa = pci_resource_start(dev, bir) + table;
+	iov->mstate = ioremap(pa, nr_virtfn);
+	if (!iov->mstate)
+		return -ENOMEM;
+
+	INIT_WORK(&iov->mtask, sriov_migration_task);
+
+	iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR;
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+
+	return 0;
+}
+
+static void sriov_disable_migration(struct pci_dev *dev)
+{
+	struct pci_sriov *iov = dev->sriov;
+
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+
+	cancel_work_sync(&iov->mtask);
+	iounmap(iov->mstate);
+}
+
+static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
+{
+	int rc;
+	int i, j;
+	int nres;
+	u16 offset, stride, initial;
+	struct resource *res;
+	struct pci_dev *pdev;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!nr_virtfn)
+		return 0;
+
+	if (iov->nr_virtfn)
+		return -EINVAL;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
+	if (initial > iov->total ||
+	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total)))
+		return -EIO;
+
+	if (nr_virtfn < 0 || nr_virtfn > iov->total ||
+	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
+		return -EINVAL;
+
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride);
+	if (!offset || (nr_virtfn > 1 && !stride))
+		return -EIO;
+
+	nres = 0;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		if (res->parent)
+			nres++;
+	}
+	if (nres != iov->nres) {
+		dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n");
+		return -ENOMEM;
+	}
+
+	iov->offset = offset;
+	iov->stride = stride;
+
+	if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) {
+		dev_err(&dev->dev, "SR-IOV: bus number out of range\n");
+		return -ENOMEM;
+	}
+
+	if (iov->link != dev->devfn) {
+		pdev = pci_get_slot(dev->bus, iov->link);
+		if (!pdev)
+			return -ENODEV;
+
+		pci_dev_put(pdev);
+
+		if (!pdev->is_physfn)
+			return -ENODEV;
+
+		rc = sysfs_create_link(&dev->dev.kobj,
+					&pdev->dev.kobj, "dep_link");
+		if (rc)
+			return rc;
+	}
+
+	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
+	pci_block_user_cfg_access(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	msleep(100);
+	pci_unblock_user_cfg_access(dev);
+
+	iov->initial = initial;
+	if (nr_virtfn < initial)
+		initial = nr_virtfn;
+
+	for (i = 0; i < initial; i++) {
+		rc = virtfn_add(dev, i, 0);
+		if (rc)
+			goto failed;
+	}
+
+	if (iov->cap & PCI_SRIOV_CAP_VFM) {
+		rc = sriov_enable_migration(dev, nr_virtfn);
+		if (rc)
+			goto failed;
+	}
+
+	kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
+	iov->nr_virtfn = nr_virtfn;
+
+	return 0;
+
+failed:
+	for (j = 0; j < i; j++)
+		virtfn_remove(dev, j, 0);
+
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+	pci_block_user_cfg_access(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	ssleep(1);
+	pci_unblock_user_cfg_access(dev);
+
+	if (iov->link != dev->devfn)
+		sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+	return rc;
+}
+
+static void sriov_disable(struct pci_dev *dev)
+{
+	int i;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!iov->nr_virtfn)
+		return;
+
+	if (iov->cap & PCI_SRIOV_CAP_VFM)
+		sriov_disable_migration(dev);
+
+	for (i = 0; i < iov->nr_virtfn; i++)
+		virtfn_remove(dev, i, 0);
+
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+	pci_block_user_cfg_access(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	ssleep(1);
+	pci_unblock_user_cfg_access(dev);
+
+	if (iov->link != dev->devfn)
+		sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+	iov->nr_virtfn = 0;
+}
+
+static int sriov_init(struct pci_dev *dev, int pos)
+{
+	int i;
+	int rc;
+	int nres;
+	u32 pgsz;
+	u16 ctrl, total, offset, stride;
+	struct pci_sriov *iov;
+	struct resource *res;
+	struct pci_dev *pdev;
+
+	if (dev->pcie_type != PCI_EXP_TYPE_RC_END &&
+	    dev->pcie_type != PCI_EXP_TYPE_ENDPOINT)
+		return -ENODEV;
+
+	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
+	if (ctrl & PCI_SRIOV_CTRL_VFE) {
+		pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
+		ssleep(1);
+	}
+
+	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
+	if (!total)
+		return 0;
+
+	ctrl = 0;
+	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
+		if (pdev->is_physfn)
+			goto found;
+
+	pdev = NULL;
+	if (pci_ari_enabled(dev->bus))
+		ctrl |= PCI_SRIOV_CTRL_ARI;
+
+found:
+	pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
+	pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride);
+	if (!offset || (total > 1 && !stride))
+		return -EIO;
+
+	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
+	i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
+	pgsz &= ~((1 << i) - 1);
+	if (!pgsz)
+		return -EIO;
+
+	pgsz &= ~(pgsz - 1);
+	pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
+
+	nres = 0;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		i += __pci_read_base(dev, pci_bar_unknown, res,
+				     pos + PCI_SRIOV_BAR + i * 4);
+		if (!res->flags)
+			continue;
+		if (resource_size(res) & (PAGE_SIZE - 1)) {
+			rc = -EIO;
+			goto failed;
+		}
+		res->end = res->start + resource_size(res) * total - 1;
+		nres++;
+	}
+
+	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+	if (!iov) {
+		rc = -ENOMEM;
+		goto failed;
+	}
+
+	iov->pos = pos;
+	iov->nres = nres;
+	iov->ctrl = ctrl;
+	iov->total = total;
+	iov->offset = offset;
+	iov->stride = stride;
+	iov->pgsz = pgsz;
+	iov->self = dev;
+	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
+	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+
+	if (pdev)
+		iov->dev = pci_dev_get(pdev);
+	else {
+		iov->dev = dev;
+		mutex_init(&iov->lock);
+	}
+
+	dev->sriov = iov;
+	dev->is_physfn = 1;
+
+	return 0;
+
+failed:
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		res->flags = 0;
+	}
+
+	return rc;
+}
+
+static void sriov_release(struct pci_dev *dev)
+{
+	BUG_ON(dev->sriov->nr_virtfn);
+
+	if (dev == dev->sriov->dev)
+		mutex_destroy(&dev->sriov->lock);
+	else
+		pci_dev_put(dev->sriov->dev);
+
+	kfree(dev->sriov);
+	dev->sriov = NULL;
+}
+
+static void sriov_restore_state(struct pci_dev *dev)
+{
+	int i;
+	u16 ctrl;
+	struct pci_sriov *iov = dev->sriov;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
+	if (ctrl & PCI_SRIOV_CTRL_VFE)
+		return;
+
+	for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
+		pci_update_resource(dev, i);
+
+	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
+		msleep(100);
+}
+
+/**
+ * pci_iov_init - initialize the IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_init(struct pci_dev *dev)
+{
+	int pos;
+
+	if (!dev->is_pcie)
+		return -ENODEV;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+	if (pos)
+		return sriov_init(dev, pos);
+
+	return -ENODEV;
+}
+
+/**
+ * pci_iov_release - release resources used by the IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_release(struct pci_dev *dev)
+{
+	if (dev->is_physfn)
+		sriov_release(dev);
+}
+
+/**
+ * pci_iov_resource_bar - get position of the SR-IOV BAR
+ * @dev: the PCI device
+ * @resno: the resource number
+ * @type: the BAR type to be filled in
+ *
+ * Returns position of the BAR encapsulated in the SR-IOV capability.
+ */
+int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+			 enum pci_bar_type *type)
+{
+	if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END)
+		return 0;
+
+	BUG_ON(!dev->is_physfn);
+
+	*type = pci_bar_unknown;
+
+	return dev->sriov->pos + PCI_SRIOV_BAR +
+		4 * (resno - PCI_IOV_RESOURCES);
+}
+
+/**
+ * pci_restore_iov_state - restore the state of the IOV capability
+ * @dev: the PCI device
+ */
+void pci_restore_iov_state(struct pci_dev *dev)
+{
+	if (dev->is_physfn)
+		sriov_restore_state(dev);
+}
+
+/**
+ * pci_iov_bus_range - find bus range used by Virtual Function
+ * @bus: the PCI bus
+ *
+ * Returns max number of buses (exclude current one) used by Virtual
+ * Functions.
+ */
+int pci_iov_bus_range(struct pci_bus *bus)
+{
+	int max = 0;
+	u8 busnr;
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (!dev->is_physfn)
+			continue;
+		busnr = virtfn_bus(dev, dev->sriov->total - 1);
+		if (busnr > max)
+			max = busnr;
+	}
+
+	return max ? max - bus->number : 0;
+}
+
+/**
+ * pci_enable_sriov - enable the SR-IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+	might_sleep();
+
+	if (!dev->is_physfn)
+		return -ENODEV;
+
+	return sriov_enable(dev, nr_virtfn);
+}
+EXPORT_SYMBOL_GPL(pci_enable_sriov);
+
+/**
+ * pci_disable_sriov - disable the SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_disable_sriov(struct pci_dev *dev)
+{
+	might_sleep();
+
+	if (!dev->is_physfn)
+		return;
+
+	sriov_disable(dev);
+}
+EXPORT_SYMBOL_GPL(pci_disable_sriov);
+
+/**
+ * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration
+ * @dev: the PCI device
+ *
+ * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not.
+ *
+ * Physical Function driver is responsible to register IRQ handler using
+ * VF Migration Interrupt Message Number, and call this function when the
+ * interrupt is generated by the hardware.
+ */
+irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+{
+	if (!dev->is_physfn)
+		return IRQ_NONE;
+
+	return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_migration);
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -27,48 +27,53 @@ static int pci_msi_enable = 1;

 /* Arch hooks */

-int __attribute__ ((weak))
-arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
+#ifndef arch_msi_check_device
+int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
 {
 	return 0;
 }
+#endif

-int __attribute__ ((weak))
-arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry)
-{
-	return 0;
-}
-
-int __attribute__ ((weak))
-arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+#ifndef arch_setup_msi_irqs
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	struct msi_desc *entry;
 	int ret;

+	/*
+	 * If an architecture wants to support multiple MSI, it needs to
+	 * override arch_setup_msi_irqs()
+	 */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
 	list_for_each_entry(entry, &dev->msi_list, list) {
 		ret = arch_setup_msi_irq(dev, entry);
-		if (ret)
+		if (ret < 0)
 			return ret;
+		if (ret > 0)
+			return -ENOSPC;
 	}

 	return 0;
 }
+#endif

-void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq)
-{
-	return;
-}
-
-void __attribute__ ((weak))
-arch_teardown_msi_irqs(struct pci_dev *dev)
+#ifndef arch_teardown_msi_irqs
+void arch_teardown_msi_irqs(struct pci_dev *dev)
 {
 	struct msi_desc *entry;

 	list_for_each_entry(entry, &dev->msi_list, list) {
-		if (entry->irq != 0)
-			arch_teardown_msi_irq(entry->irq);
+		int i, nvec;
+		if (entry->irq == 0)
+			continue;
+		nvec = 1 << entry->msi_attrib.multiple;
+		for (i = 0; i < nvec; i++)
+			arch_teardown_msi_irq(entry->irq + i);
 	}
 }
+#endif

 static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
 {
@@ -111,27 +116,14 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
 	return (1 << (1 << x)) - 1;
 }

-static void msix_flush_writes(struct irq_desc *desc)
+static inline __attribute_const__ u32 msi_capable_mask(u16 control)
 {
-	struct msi_desc *entry;
+	return msi_mask((control >> 1) & 7);
+}

-	entry = get_irq_desc_msi(desc);
-	BUG_ON(!entry || !entry->dev);
-	switch (entry->msi_attrib.type) {
-	case PCI_CAP_ID_MSI:
-		/* nothing to do */
-		break;
-	case PCI_CAP_ID_MSIX:
-	{
-		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
-			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
-		readl(entry->mask_base + offset);
-		break;
-	}
-	default:
-		BUG();
-		break;
-	}
+static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
+{
+	return msi_mask((control >> 4) & 7);
 }

 /*
@@ -143,49 +135,71 @@ static void msix_flush_writes(struct irq_desc *desc)
 * Returns 1 if it succeeded in masking the interrupt and 0 if the device
 * doesn't support MSI masking.
 */
-static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag)
+static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
-	struct msi_desc *entry;
+	u32 mask_bits = desc->masked;

-	entry = get_irq_desc_msi(desc);
-	BUG_ON(!entry || !entry->dev);
-	switch (entry->msi_attrib.type) {
-	case PCI_CAP_ID_MSI:
-		if (entry->msi_attrib.maskbit) {
-			int pos;
-			u32 mask_bits;
-
-			pos = (long)entry->mask_base;
-			pci_read_config_dword(entry->dev, pos, &mask_bits);
-			mask_bits &= ~(mask);
-			mask_bits |= flag & mask;
-			pci_write_config_dword(entry->dev, pos, mask_bits);
-		} else {
-			return 0;
-		}
-		break;
-	case PCI_CAP_ID_MSIX:
-	{
-		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
-			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
-		writel(flag, entry->mask_base + offset);
-		readl(entry->mask_base + offset);
-		break;
-	}
-	default:
-		BUG();
-		break;
+	if (!desc->msi_attrib.maskbit)
+		return;
+
+	mask_bits &= ~mask;
+	mask_bits |= flag;
+	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
+	desc->masked = mask_bits;
+}
+
+/*
+ * This internal function does not flush PCI writes to the device.
+ * All users must ensure that they read from the device before either
+ * assuming that the device state is up to date, or returning out of this
+ * file.  This saves a few milliseconds when initialising devices with lots
+ * of MSI-X interrupts.
+ */
+static void msix_mask_irq(struct msi_desc *desc, u32 flag)
+{
+	u32 mask_bits = desc->masked;
+	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+					PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+	mask_bits &= ~1;
+	mask_bits |= flag;
+	writel(mask_bits, desc->mask_base + offset);
+	desc->masked = mask_bits;
+}
+
+static void msi_set_mask_bit(unsigned irq, u32 flag)
+{
+	struct msi_desc *desc = get_irq_msi(irq);
+
+	if (desc->msi_attrib.is_msix) {
+		msix_mask_irq(desc, flag);
+		readl(desc->mask_base);		/* Flush write to device */
+	} else {
+		unsigned offset = irq - desc->dev->irq;
+		msi_mask_irq(desc, 1 << offset, flag << offset);
 	}
-	entry->msi_attrib.masked = !!flag;
-	return 1;
+}
+
+void mask_msi_irq(unsigned int irq)
+{
+	msi_set_mask_bit(irq, 1);
+}
+
+void unmask_msi_irq(unsigned int irq)
+{
+	msi_set_mask_bit(irq, 0);
 }

 void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 {
 	struct msi_desc *entry = get_irq_desc_msi(desc);
-	switch(entry->msi_attrib.type) {
-	case PCI_CAP_ID_MSI:
-	{
+	if (entry->msi_attrib.is_msix) {
+		void __iomem *base = entry->mask_base +
+			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
+	} else {
 		struct pci_dev *dev = entry->dev;
 		int pos = entry->msi_attrib.pos;
 		u16 data;
@@ -201,21 +215,6 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 			pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
 		}
 		msg->data = data;
-		break;
-	}
-	case PCI_CAP_ID_MSIX:
-	{
-		void __iomem *base;
-		base = entry->mask_base +
-			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
-
-		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
-		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
-		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
- 		break;
- 	}
- 	default:
-		BUG();
 	}
 }

@@ -229,11 +228,25 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 {
 	struct msi_desc *entry = get_irq_desc_msi(desc);
-	switch (entry->msi_attrib.type) {
-	case PCI_CAP_ID_MSI:
-	{
+	if (entry->msi_attrib.is_msix) {
+		void __iomem *base;
+		base = entry->mask_base +
+			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+		writel(msg->address_lo,
+			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+		writel(msg->address_hi,
+			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
+	} else {
 		struct pci_dev *dev = entry->dev;
 		int pos = entry->msi_attrib.pos;
+		u16 msgctl;
+
+		pci_read_config_word(dev, msi_control_reg(pos), &msgctl);
+		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
+		msgctl |= entry->msi_attrib.multiple << 4;
+		pci_write_config_word(dev, msi_control_reg(pos), msgctl);

 		pci_write_config_dword(dev, msi_lower_address_reg(pos),
 					msg->address_lo);
@@ -246,23 +259,6 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 			pci_write_config_word(dev, msi_data_reg(pos, 0),
 						msg->data);
 		}
-		break;
-	}
-	case PCI_CAP_ID_MSIX:
-	{
-		void __iomem *base;
-		base = entry->mask_base +
-			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
-
-		writel(msg->address_lo,
-			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
-		writel(msg->address_hi,
-			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
-		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
-		break;
-	}
-	default:
-		BUG();
 	}
 	entry->msg = *msg;
 }
@@ -274,37 +270,18 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 	write_msi_msg_desc(desc, msg);
 }

-void mask_msi_irq(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	msi_set_mask_bits(desc, 1, 1);
-	msix_flush_writes(desc);
-}
-
-void unmask_msi_irq(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	msi_set_mask_bits(desc, 1, 0);
-	msix_flush_writes(desc);
-}
-
 static int msi_free_irqs(struct pci_dev* dev);

-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
 {
-	struct msi_desc *entry;
-
-	entry = kzalloc(sizeof(struct msi_desc), GFP_KERNEL);
-	if (!entry)
+	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
 		return NULL;

-	INIT_LIST_HEAD(&entry->list);
-	entry->irq = 0;
-	entry->dev = NULL;
+	INIT_LIST_HEAD(&desc->list);
+	desc->dev = dev;

-	return entry;
+	return desc;
 }

 static void pci_intx_for_msi(struct pci_dev *dev, int enable)
@@ -328,15 +305,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, 0);
 	write_msi_msg(dev->irq, &entry->msg);
-	if (entry->msi_attrib.maskbit) {
-		struct irq_desc *desc = irq_to_desc(dev->irq);
-		msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask,
-				  entry->msi_attrib.masked);
-	}

 	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
+	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
 	control &= ~PCI_MSI_FLAGS_QSIZE;
-	control |= PCI_MSI_FLAGS_ENABLE;
+	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
 	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
 }

@@ -354,9 +327,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
 	msix_set_enable(dev, 0);

 	list_for_each_entry(entry, &dev->msi_list, list) {
-		struct irq_desc *desc = irq_to_desc(entry->irq);
 		write_msi_msg(entry->irq, &entry->msg);
-		msi_set_mask_bits(desc, 1, entry->msi_attrib.masked);
+		msix_mask_irq(entry, entry->masked);
 	}

 	BUG_ON(list_empty(&dev->msi_list));
@@ -378,52 +350,48 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 /**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
+ * @nvec: number of interrupts to allocate
 *
- * Setup the MSI capability structure of device function with a single
- * MSI irq, regardless of device function is capable of handling
- * multiple messages. A return of zero indicates the successful setup
- * of an entry zero with the new MSI irq or non-zero for otherwise.
- **/
-static int msi_capability_init(struct pci_dev *dev)
+ * Setup the MSI capability structure of the device with the requested
+ * number of interrupts.  A return value of zero indicates the successful
+ * setup of an entry with the new MSI irq.  A negative return value indicates
+ * an error, and a positive return value indicates the number of interrupts
+ * which could have been allocated.
+ */
+static int msi_capability_init(struct pci_dev *dev, int nvec)
 {
 	struct msi_desc *entry;
 	int pos, ret;
 	u16 control;
+	unsigned mask;

 	msi_set_enable(dev, 0);	/* Ensure msi is disabled as I set it up */

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
 	/* MSI Entry Initialization */
-	entry = alloc_msi_entry();
+	entry = alloc_msi_entry(dev);
 	if (!entry)
 		return -ENOMEM;

-	entry->msi_attrib.type = PCI_CAP_ID_MSI;
+	entry->msi_attrib.is_msix = 0;
 	entry->msi_attrib.is_64 = is_64bit_address(control);
 	entry->msi_attrib.entry_nr = 0;
 	entry->msi_attrib.maskbit = is_mask_bit_support(control);
-	entry->msi_attrib.masked = 1;
 	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.pos = pos;
-	entry->dev = dev;
-	if (entry->msi_attrib.maskbit) {
-		unsigned int base, maskbits, temp;
-
-		base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
-		entry->mask_base = (void __iomem *)(long)base;
-
-		/* All MSIs are unmasked by default, Mask them all */
-		pci_read_config_dword(dev, base, &maskbits);
-		temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1);
-		maskbits |= temp;
-		pci_write_config_dword(dev, base, maskbits);
-		entry->msi_attrib.maskbits_mask = temp;
-	}
+
+	entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
+	/* All MSIs are unmasked by default, Mask them all */
+	if (entry->msi_attrib.maskbit)
+		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
+	mask = msi_capable_mask(control);
+	msi_mask_irq(entry, mask, mask);
+
 	list_add_tail(&entry->list, &dev->msi_list);

 	/* Configure MSI capability structure */
-	ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI);
+	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
 	if (ret) {
 		msi_free_irqs(dev);
 		return ret;
@@ -476,26 +444,28 @@ static int msix_capability_init(struct pci_dev *dev,

 	/* MSI-X Table Initialization */
 	for (i = 0; i < nvec; i++) {
-		entry = alloc_msi_entry();
+		entry = alloc_msi_entry(dev);
 		if (!entry)
 			break;

 		j = entries[i].entry;
-		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
+		entry->msi_attrib.is_msix = 1;
 		entry->msi_attrib.is_64 = 1;
 		entry->msi_attrib.entry_nr = j;
-		entry->msi_attrib.maskbit = 1;
-		entry->msi_attrib.masked = 1;
 		entry->msi_attrib.default_irq = dev->irq;
 		entry->msi_attrib.pos = pos;
-		entry->dev = dev;
 		entry->mask_base = base;
+		entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE +
+					PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+		msix_mask_irq(entry, 1);

 		list_add_tail(&entry->list, &dev->msi_list);
 	}

 	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
-	if (ret) {
+	if (ret < 0) {
+		/* If we had some success report the number of irqs
+		 * we succeeded in setting up. */
 		int avail = 0;
 		list_for_each_entry(entry, &dev->msi_list, list) {
 			if (entry->irq != 0) {
@@ -503,14 +473,13 @@ static int msix_capability_init(struct pci_dev *dev,
 			}
 		}

-		msi_free_irqs(dev);
+		if (avail != 0)
+			ret = avail;
+	}

-		/* If we had some success report the number of irqs
-		 * we succeeded in setting up.
-		 */
-		if (avail == 0)
-			avail = ret;
-		return avail;
+	if (ret) {
+		msi_free_irqs(dev);
+		return ret;
 	}

 	i = 0;
@@ -575,39 +544,54 @@ static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
 }

 /**
- * pci_enable_msi - configure device's MSI capability structure
- * @dev: pointer to the pci_dev data structure of MSI device function
+ * pci_enable_msi_block - configure device's MSI capability structure
+ * @dev: device to configure
+ * @nvec: number of interrupts to configure
 *
- * Setup the MSI capability structure of device function with
- * a single MSI irq upon its software driver call to request for
- * MSI mode enabled on its hardware device function. A return of zero
- * indicates the successful setup of an entry zero with the new MSI
- * irq or non-zero for otherwise.
- **/
-int pci_enable_msi(struct pci_dev* dev)
+ * Allocate IRQs for a device with the MSI capability.
+ * This function returns a negative errno if an error occurs.  If it
+ * is unable to allocate the number of interrupts requested, it returns
+ * the number of interrupts it might be able to allocate.  If it successfully
+ * allocates at least the number of interrupts requested, it returns 0 and
+ * updates the @dev's irq member to the lowest new interrupt number; the
+ * other interrupt numbers allocated to this device are consecutive.
+ */
+int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 {
-	int status;
+	int status, pos, maxvec;
+	u16 msgctl;

-	status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
+	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	if (!pos)
+		return -EINVAL;
+	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
+	maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+	if (nvec > maxvec)
+		return maxvec;
+
+	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
 	if (status)
 		return status;

 	WARN_ON(!!dev->msi_enabled);

-	/* Check whether driver already requested for MSI-X irqs */
+	/* Check whether driver already requested MSI-X irqs */
 	if (dev->msix_enabled) {
 		dev_info(&dev->dev, "can't enable MSI "
 			 "(MSI-X already enabled)\n");
 		return -EINVAL;
 	}
-	status = msi_capability_init(dev);
+
+	status = msi_capability_init(dev, nvec);
 	return status;
 }
-EXPORT_SYMBOL(pci_enable_msi);
+EXPORT_SYMBOL(pci_enable_msi_block);

-void pci_msi_shutdown(struct pci_dev* dev)
+void pci_msi_shutdown(struct pci_dev *dev)
 {
-	struct msi_desc *entry;
+	struct msi_desc *desc;
+	u32 mask;
+	u16 ctrl;

 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
 		return;
@@ -617,19 +601,15 @@ void pci_msi_shutdown(struct pci_dev* dev)
 	dev->msi_enabled = 0;

 	BUG_ON(list_empty(&dev->msi_list));
-	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
-	/* Return the the pci reset with msi irqs unmasked */
-	if (entry->msi_attrib.maskbit) {
-		u32 mask = entry->msi_attrib.maskbits_mask;
-		struct irq_desc *desc = irq_to_desc(dev->irq);
-		msi_set_mask_bits(desc, mask, ~mask);
-	}
-	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
-		return;
+	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
+	pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS, &ctrl);
+	mask = msi_capable_mask(ctrl);
+	msi_mask_irq(desc, mask, ~mask);

 	/* Restore dev->irq to its default pin-assertion irq */
-	dev->irq = entry->msi_attrib.default_irq;
+	dev->irq = desc->msi_attrib.default_irq;
 }
+
 void pci_disable_msi(struct pci_dev* dev)
 {
 	struct msi_desc *entry;
@@ -640,7 +620,7 @@ void pci_disable_msi(struct pci_dev* dev)
 	pci_msi_shutdown(dev);

 	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
-	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
+	if (entry->msi_attrib.is_msix)
 		return;

 	msi_free_irqs(dev);
@@ -652,14 +632,18 @@ static int msi_free_irqs(struct pci_dev* dev)
 	struct msi_desc *entry, *tmp;

 	list_for_each_entry(entry, &dev->msi_list, list) {
-		if (entry->irq)
-			BUG_ON(irq_has_action(entry->irq));
+		int i, nvec;
+		if (!entry->irq)
+			continue;
+		nvec = 1 << entry->msi_attrib.multiple;
+		for (i = 0; i < nvec; i++)
+			BUG_ON(irq_has_action(entry->irq + i));
 	}

 	arch_teardown_msi_irqs(dev);

 	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
-		if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) {
+		if (entry->msi_attrib.is_msix) {
 			writel(1, entry->mask_base + entry->msi_attrib.entry_nr
 				  * PCI_MSIX_ENTRY_SIZE
 				  + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
@@ -674,6 +658,23 @@ static int msi_free_irqs(struct pci_dev* dev)
 	return 0;
 }

+/**
+ * pci_msix_table_size - return the number of device's MSI-X table entries
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
+ */
+int pci_msix_table_size(struct pci_dev *dev)
+{
+	int pos;
+	u16 control;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+	if (!pos)
+		return 0;
+
+	pci_read_config_word(dev, msi_control_reg(pos), &control);
+	return multi_msix_capable(control);
+}
+
 /**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
@@ -691,9 +692,8 @@ static int msi_free_irqs(struct pci_dev* dev)
 **/
 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
 {
-	int status, pos, nr_entries;
+	int status, nr_entries;
 	int i, j;
-	u16 control;

 	if (!entries)
 		return -EINVAL;
@@ -702,9 +702,7 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
 	if (status)
 		return status;

-	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-	pci_read_config_word(dev, msi_control_reg(pos), &control);
-	nr_entries = multi_msix_capable(control);
+	nr_entries = pci_msix_table_size(dev);
 	if (nvec > nr_entries)
 		return -EINVAL;


--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -20,14 +20,8 @@
 #define msi_mask_bits_reg(base, is64bit) \
 	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
 #define msi_disable(control)		control &= ~PCI_MSI_FLAGS_ENABLE
-#define multi_msi_capable(control) \
-	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
-#define multi_msi_enable(control, num) \
-	control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
 #define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
 #define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
-#define msi_enable(control, num) multi_msi_enable(control, num); \
-	control |= PCI_MSI_FLAGS_ENABLE

 #define msix_table_offset_reg(base)	(base + 0x04)
 #define msix_pba_offset_reg(base)	(base + 0x08)

--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -18,221 +18,6 @@
 #include <linux/pci-acpi.h>
 #include "pci.h"

-struct acpi_osc_data {
-	acpi_handle handle;
-	u32 support_set;
-	u32 control_set;
-	u32 control_query;
-	int is_queried;
-	struct list_head sibiling;
-};
-static LIST_HEAD(acpi_osc_data_list);
-
-struct acpi_osc_args {
-	u32 capbuf[3];
-};
-
-static DEFINE_MUTEX(pci_acpi_lock);
-
-static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle)
-{
-	struct acpi_osc_data *data;
-
-	list_for_each_entry(data, &acpi_osc_data_list, sibiling) {
-		if (data->handle == handle)
-			return data;
-	}
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return NULL;
-	INIT_LIST_HEAD(&data->sibiling);
-	data->handle = handle;
-	list_add_tail(&data->sibiling, &acpi_osc_data_list);
-	return data;
-}
-
-static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
-			  0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
-
-static acpi_status acpi_run_osc(acpi_handle handle,
-				struct acpi_osc_args *osc_args, u32 *retval)
-{
-	acpi_status status;
-	struct acpi_object_list input;
-	union acpi_object in_params[4];
-	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
-	union acpi_object *out_obj;
-	u32 errors, flags = osc_args->capbuf[OSC_QUERY_TYPE];
-
-	/* Setting up input parameters */
-	input.count = 4;
-	input.pointer = in_params;
-	in_params[0].type 		= ACPI_TYPE_BUFFER;
-	in_params[0].buffer.length 	= 16;
-	in_params[0].buffer.pointer	= OSC_UUID;
-	in_params[1].type 		= ACPI_TYPE_INTEGER;
-	in_params[1].integer.value 	= 1;
-	in_params[2].type 		= ACPI_TYPE_INTEGER;
-	in_params[2].integer.value	= 3;
-	in_params[3].type		= ACPI_TYPE_BUFFER;
-	in_params[3].buffer.length 	= 12;
-	in_params[3].buffer.pointer 	= (u8 *)osc_args->capbuf;
-
-	status = acpi_evaluate_object(handle, "_OSC", &input, &output);
-	if (ACPI_FAILURE(status))
-		return status;
-
-	if (!output.length)
-		return AE_NULL_OBJECT;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n");
-		status = AE_TYPE;
-		goto out_kfree;
-	}
-	/* Need to ignore the bit0 in result code */
-	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
-	if (errors) {
-		if (errors & OSC_REQUEST_ERROR)
-			printk(KERN_DEBUG "_OSC request fails\n"); 
-		if (errors & OSC_INVALID_UUID_ERROR)
-			printk(KERN_DEBUG "_OSC invalid UUID\n"); 
-		if (errors & OSC_INVALID_REVISION_ERROR)
-			printk(KERN_DEBUG "_OSC invalid revision\n"); 
-		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
-			if (flags & OSC_QUERY_ENABLE)
-				goto out_success;
-			printk(KERN_DEBUG "_OSC FW not grant req. control\n");
-			status = AE_SUPPORT;
-			goto out_kfree;
-		}
-		status = AE_ERROR;
-		goto out_kfree;
-	}
-out_success:
-	*retval = *((u32 *)(out_obj->buffer.pointer + 8));
-	status = AE_OK;
-
-out_kfree:
-	kfree(output.pointer);
-	return status;
-}
-
-static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data)
-{
-	acpi_status status;
-	u32 support_set, result;
-	struct acpi_osc_args osc_args;
-
-	/* do _OSC query for all possible controls */
-	support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS);
-	osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
-	osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set;
-	osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
-
-	status = acpi_run_osc(osc_data->handle, &osc_args, &result);
-	if (ACPI_SUCCESS(status)) {
-		osc_data->support_set = support_set;
-		osc_data->control_query = result;
-		osc_data->is_queried = 1;
-	}
-
-	return status;
-}
-
-/*
- * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature
- * @flags: Bitmask of flags to support
- *
- * See the ACPI spec for the definition of the flags
- */
-int pci_acpi_osc_support(acpi_handle handle, u32 flags)
-{
-	acpi_status status;
-	acpi_handle tmp;
-	struct acpi_osc_data *osc_data;
-	int rc = 0;
-
-	status = acpi_get_handle(handle, "_OSC", &tmp);
-	if (ACPI_FAILURE(status))
-		return -ENOTTY;
-
-	mutex_lock(&pci_acpi_lock);
-	osc_data = acpi_get_osc_data(handle);
-	if (!osc_data) {
-		printk(KERN_ERR "acpi osc data array is full\n");
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	__acpi_query_osc(flags, osc_data);
-out:
-	mutex_unlock(&pci_acpi_lock);
-	return rc;
-}
-
-/**
- * pci_osc_control_set - commit requested control to Firmware
- * @handle: acpi_handle for the target ACPI object
- * @flags: driver's requested control bits
- *
- * Attempt to take control from Firmware on requested control bits.
- **/
-acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
-{
-	acpi_status status;
-	u32 control_req, control_set, result;
-	acpi_handle tmp;
-	struct acpi_osc_data *osc_data;
-	struct acpi_osc_args osc_args;
-
-	status = acpi_get_handle(handle, "_OSC", &tmp);
-	if (ACPI_FAILURE(status))
-		return status;
-
-	mutex_lock(&pci_acpi_lock);
-	osc_data = acpi_get_osc_data(handle);
-	if (!osc_data) {
-		printk(KERN_ERR "acpi osc data array is full\n");
-		status = AE_ERROR;
-		goto out;
-	}
-
-	control_req = (flags & OSC_CONTROL_MASKS);
-	if (!control_req) {
-		status = AE_TYPE;
-		goto out;
-	}
-
-	/* No need to evaluate _OSC if the control was already granted. */
-	if ((osc_data->control_set & control_req) == control_req)
-		goto out;
-
-	if (!osc_data->is_queried) {
-		status = __acpi_query_osc(osc_data->support_set, osc_data);
-		if (ACPI_FAILURE(status))
-			goto out;
-	}
-
-	if ((osc_data->control_query & control_req) != control_req) {
-		status = AE_SUPPORT;
-		goto out;
-	}
-
-	control_set = osc_data->control_set | control_req;
-	osc_args.capbuf[OSC_QUERY_TYPE] = 0;
-	osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set;
-	osc_args.capbuf[OSC_CONTROL_TYPE] = control_set;
-	status = acpi_run_osc(handle, &osc_args, &result);
-	if (ACPI_SUCCESS(status))
-		osc_data->control_set = result;
-out:
-	mutex_unlock(&pci_acpi_lock);
-	return status;
-}
-EXPORT_SYMBOL(pci_osc_control_set);
-
 /*
 * _SxD returns the D-state with the highest power
 * (lowest D-state number) supported in the S-state "x".

--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -99,6 +99,52 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 }
 static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);

+/**
+ * store_remove_id - remove a PCI device ID from this driver
+ * @driver: target device driver
+ * @buf: buffer for scanning device ID data
+ * @count: input size
+ *
+ * Removes a dynamic pci device ID to this driver.
+ */
+static ssize_t
+store_remove_id(struct device_driver *driver, const char *buf, size_t count)
+{
+	struct pci_dynid *dynid, *n;
+	struct pci_driver *pdrv = to_pci_driver(driver);
+	__u32 vendor, device, subvendor = PCI_ANY_ID,
+		subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
+	int fields = 0;
+	int retval = -ENODEV;
+
+	fields = sscanf(buf, "%x %x %x %x %x %x",
+			&vendor, &device, &subvendor, &subdevice,
+			&class, &class_mask);
+	if (fields < 2)
+		return -EINVAL;
+
+	spin_lock(&pdrv->dynids.lock);
+	list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) {
+		struct pci_device_id *id = &dynid->id;
+		if ((id->vendor == vendor) &&
+		    (id->device == device) &&
+		    (subvendor == PCI_ANY_ID || id->subvendor == subvendor) &&
+		    (subdevice == PCI_ANY_ID || id->subdevice == subdevice) &&
+		    !((id->class ^ class) & class_mask)) {
+			list_del(&dynid->node);
+			kfree(dynid);
+			retval = 0;
+			break;
+		}
+	}
+	spin_unlock(&pdrv->dynids.lock);
+
+	if (retval)
+		return retval;
+	return count;
+}
+static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);
+
 static void
 pci_free_dynids(struct pci_driver *drv)
 {
@@ -125,6 +171,20 @@ static void pci_remove_newid_file(struct pci_driver *drv)
 {
 	driver_remove_file(&drv->driver, &driver_attr_new_id);
 }
+
+static int
+pci_create_removeid_file(struct pci_driver *drv)
+{
+	int error = 0;
+	if (drv->probe != NULL)
+		error = driver_create_file(&drv->driver,&driver_attr_remove_id);
+	return error;
+}
+
+static void pci_remove_removeid_file(struct pci_driver *drv)
+{
+	driver_remove_file(&drv->driver, &driver_attr_remove_id);
+}
 #else /* !CONFIG_HOTPLUG */
 static inline void pci_free_dynids(struct pci_driver *drv) {}
 static inline int pci_create_newid_file(struct pci_driver *drv)
@@ -132,6 +192,11 @@ static inline int pci_create_newid_file(struct pci_driver *drv)
 	return 0;
 }
 static inline void pci_remove_newid_file(struct pci_driver *drv) {}
+static inline int pci_create_removeid_file(struct pci_driver *drv)
+{
+	return 0;
+}
+static inline void pci_remove_removeid_file(struct pci_driver *drv) {}
 #endif

 /**
@@ -899,13 +964,23 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
 	/* register with core */
 	error = driver_register(&drv->driver);
 	if (error)
-		return error;
+		goto out;

 	error = pci_create_newid_file(drv);
 	if (error)
-		driver_unregister(&drv->driver);
+		goto out_newid;

+	error = pci_create_removeid_file(drv);
+	if (error)
+		goto out_removeid;
+out:
 	return error;
+
+out_removeid:
+	pci_remove_newid_file(drv);
+out_newid:
+	driver_unregister(&drv->driver);
+	goto out;
 }

 /**
@@ -921,6 +996,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
 void
 pci_unregister_driver(struct pci_driver *drv)
 {
+	pci_remove_removeid_file(drv);
 	pci_remove_newid_file(drv);
 	driver_unregister(&drv->driver);
 	pci_free_dynids(drv);
@@ -1020,6 +1096,7 @@ struct bus_type pci_bus_type = {
 	.remove		= pci_device_remove,
 	.shutdown	= pci_device_shutdown,
 	.dev_attrs	= pci_dev_attrs,
+	.bus_attrs	= pci_bus_attrs,
 	.pm		= PCI_PM_OPS_PTR,
 };


--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -219,6 +219,83 @@ msi_bus_store(struct device *dev, struct device_attribute *attr,
 	return count;
 }

+#ifdef CONFIG_HOTPLUG
+static DEFINE_MUTEX(pci_remove_rescan_mutex);
+static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf,
+				size_t count)
+{
+	unsigned long val;
+	struct pci_bus *b = NULL;
+
+	if (strict_strtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val) {
+		mutex_lock(&pci_remove_rescan_mutex);
+		while ((b = pci_find_next_bus(b)) != NULL)
+			pci_rescan_bus(b);
+		mutex_unlock(&pci_remove_rescan_mutex);
+	}
+	return count;
+}
+
+struct bus_attribute pci_bus_attrs[] = {
+	__ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, bus_rescan_store),
+	__ATTR_NULL
+};
+
+static ssize_t
+dev_rescan_store(struct device *dev, struct device_attribute *attr,
+		 const char *buf, size_t count)
+{
+	unsigned long val;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (strict_strtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val) {
+		mutex_lock(&pci_remove_rescan_mutex);
+		pci_rescan_bus(pdev->bus);
+		mutex_unlock(&pci_remove_rescan_mutex);
+	}
+	return count;
+}
+
+static void remove_callback(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	mutex_lock(&pci_remove_rescan_mutex);
+	pci_remove_bus_device(pdev);
+	mutex_unlock(&pci_remove_rescan_mutex);
+}
+
+static ssize_t
+remove_store(struct device *dev, struct device_attribute *dummy,
+	     const char *buf, size_t count)
+{
+	int ret = 0;
+	unsigned long val;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (strict_strtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (pci_is_root_bus(pdev->bus))
+		return -EBUSY;
+
+	/* An attribute cannot be unregistered by one of its own methods,
+	 * so we have to use this roundabout approach.
+	 */
+	if (val)
+		ret = device_schedule_callback(dev, remove_callback);
+	if (ret)
+		count = ret;
+	return count;
+}
+#endif
+
 struct device_attribute pci_dev_attrs[] = {
 	__ATTR_RO(resource),
 	__ATTR_RO(vendor),
@@ -237,9 +314,24 @@ struct device_attribute pci_dev_attrs[] = {
 	__ATTR(broken_parity_status,(S_IRUGO|S_IWUSR),
 		broken_parity_status_show,broken_parity_status_store),
 	__ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store),
+#ifdef CONFIG_HOTPLUG
+	__ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store),
+	__ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store),
+#endif
 	__ATTR_NULL,
 };

+static ssize_t
+boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sprintf(buf, "%u\n",
+		!!(pdev->resource[PCI_ROM_RESOURCE].flags &
+		   IORESOURCE_ROM_SHADOW));
+}
+struct device_attribute vga_attr = __ATTR_RO(boot_vga);
+
 static ssize_t
 pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
@@ -492,6 +584,19 @@ pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
 }

+/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Stub implementation. Can be overridden by arch if necessary.
+ */
+void __weak
+pci_adjust_legacy_attr(struct pci_bus *b, enum pci_mmap_state mmap_type)
+{
+	return;
+}
+
 /**
 * pci_create_legacy_files - create legacy I/O port and memory files
 * @b: bus to create files under
@@ -518,6 +623,7 @@ void pci_create_legacy_files(struct pci_bus *b)
 	b->legacy_io->read = pci_read_legacy_io;
 	b->legacy_io->write = pci_write_legacy_io;
 	b->legacy_io->mmap = pci_mmap_legacy_io;
+	pci_adjust_legacy_attr(b, pci_mmap_io);
 	error = device_create_bin_file(&b->dev, b->legacy_io);
 	if (error)
 		goto legacy_io_err;
@@ -528,6 +634,7 @@ void pci_create_legacy_files(struct pci_bus *b)
 	b->legacy_mem->size = 1024*1024;
 	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
 	b->legacy_mem->mmap = pci_mmap_legacy_mem;
+	pci_adjust_legacy_attr(b, pci_mmap_mem);
 	error = device_create_bin_file(&b->dev, b->legacy_mem);
 	if (error)
 		goto legacy_mem_err;
@@ -719,8 +826,8 @@ static int pci_create_resource_files(struct pci_dev *pdev)
 	return 0;
 }
 #else /* !HAVE_PCI_MMAP */
-static inline int pci_create_resource_files(struct pci_dev *dev) { return 0; }
-static inline void pci_remove_resource_files(struct pci_dev *dev) { return; }
+int __weak pci_create_resource_files(struct pci_dev *dev) { return 0; }
+void __weak pci_remove_resource_files(struct pci_dev *dev) { return; }
 #endif /* HAVE_PCI_MMAP */

 /**
@@ -884,18 +991,27 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 		pdev->rom_attr = attr;
 	}

+	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) {
+		retval = device_create_file(&pdev->dev, &vga_attr);
+		if (retval)
+			goto err_rom_file;
+	}
+
 	/* add platform-specific attributes */
 	retval = pcibios_add_platform_entries(pdev);
 	if (retval)
-		goto err_rom_file;
+		goto err_vga_file;

 	/* add sysfs entries for various capabilities */
 	retval = pci_create_capabilities_sysfs(pdev);
 	if (retval)
-		goto err_rom_file;
+		goto err_vga_file;

 	return 0;

+err_vga_file:
+	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+		device_remove_file(&pdev->dev, &vga_attr);
 err_rom_file:
 	if (rom_size) {
 		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);

--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -20,6 +20,8 @@
 #include <linux/pm_wakeup.h>
 #include <linux/interrupt.h>
 #include <asm/dma.h>	/* isa_dma_bridge_buggy */
+#include <linux/device.h>
+#include <asm/setup.h>
 #include "pci.h"

 unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
@@ -677,6 +679,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state)

 EXPORT_SYMBOL(pci_choose_state);

+#define PCI_EXP_SAVE_REGS	7
+
 static int pci_save_pcie_state(struct pci_dev *dev)
 {
 	int pos, i = 0;
@@ -689,7 +693,7 @@ static int pci_save_pcie_state(struct pci_dev *dev)

 	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
 	if (!save_state) {
-		dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__);
+		dev_err(&dev->dev, "buffer not found in %s\n", __func__);
 		return -ENOMEM;
 	}
 	cap = (u16 *)&save_state->data[0];
@@ -698,6 +702,9 @@ static int pci_save_pcie_state(struct pci_dev *dev)
 	pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]);
 	pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]);
 	pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]);
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]);
+	pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]);
+	pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]);

 	return 0;
 }
@@ -718,6 +725,9 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
 	pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]);
 	pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]);
 	pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]);
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]);
+	pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]);
+	pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]);
 }


@@ -732,7 +742,7 @@ static int pci_save_pcix_state(struct pci_dev *dev)

 	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
 	if (!save_state) {
-		dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__);
+		dev_err(&dev->dev, "buffer not found in %s\n", __func__);
 		return -ENOMEM;
 	}

@@ -805,6 +815,7 @@ pci_restore_state(struct pci_dev *dev)
 	}
 	pci_restore_pcix_state(dev);
 	pci_restore_msi_state(dev);
+	pci_restore_iov_state(dev);

 	return 0;
 }
@@ -1401,7 +1412,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
 {
 	int error;

-	error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, 4 * sizeof(u16));
+	error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP,
+					PCI_EXP_SAVE_REGS * sizeof(u16));
 	if (error)
 		dev_err(&dev->dev,
 			"unable to preallocate PCI Express save buffer\n");
@@ -1472,7 +1484,7 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 	if (!pin)
 		return -1;

-	while (dev->bus->self) {
+	while (dev->bus->parent) {
 		pin = pci_swizzle_interrupt_pin(dev, pin);
 		dev = dev->bus->self;
 	}
@@ -1492,7 +1504,7 @@ u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp)
 {
 	u8 pin = *pinp;

-	while (dev->bus->self) {
+	while (dev->bus->parent) {
 		pin = pci_swizzle_interrupt_pin(dev, pin);
 		dev = dev->bus->self;
 	}
@@ -2016,18 +2028,24 @@ static int __pcie_flr(struct pci_dev *dev, int probe)
 	pci_block_user_cfg_access(dev);

 	/* Wait for Transaction Pending bit clean */
+	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+	if (!(status & PCI_EXP_DEVSTA_TRPND))
+		goto transaction_done;
+
 	msleep(100);
 	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
-	if (status & PCI_EXP_DEVSTA_TRPND) {
-		dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
+	if (!(status & PCI_EXP_DEVSTA_TRPND))
+		goto transaction_done;
+
+	dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
 			"sleeping for 1 second\n");
-		ssleep(1);
-		pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
-		if (status & PCI_EXP_DEVSTA_TRPND)
-			dev_info(&dev->dev, "Still busy after 1s; "
+	ssleep(1);
+	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+	if (status & PCI_EXP_DEVSTA_TRPND)
+		dev_info(&dev->dev, "Still busy after 1s; "
 				"proceeding with reset anyway\n");
-	}

+transaction_done:
 	pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL,
 				PCI_EXP_DEVCTL_BCR_FLR);
 	mdelay(100);
@@ -2054,18 +2072,24 @@ static int __pci_af_flr(struct pci_dev *dev, int probe)
 	pci_block_user_cfg_access(dev);

 	/* Wait for Transaction Pending bit clean */
+	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
+	if (!(status & PCI_AF_STATUS_TP))
+		goto transaction_done;
+
 	msleep(100);
 	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
-	if (status & PCI_AF_STATUS_TP) {
-		dev_info(&dev->dev, "Busy after 100ms while trying to"
-				" reset; sleeping for 1 second\n");
-		ssleep(1);
-		pci_read_config_byte(dev,
-				cappos + PCI_AF_STATUS, &status);
-		if (status & PCI_AF_STATUS_TP)
-			dev_info(&dev->dev, "Still busy after 1s; "
-					"proceeding with reset anyway\n");
-	}
+	if (!(status & PCI_AF_STATUS_TP))
+		goto transaction_done;
+
+	dev_info(&dev->dev, "Busy after 100ms while trying to"
+			" reset; sleeping for 1 second\n");
+	ssleep(1);
+	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
+	if (status & PCI_AF_STATUS_TP)
+		dev_info(&dev->dev, "Still busy after 1s; "
+				"proceeding with reset anyway\n");
+
+transaction_done:
 	pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
 	mdelay(100);

@@ -2334,18 +2358,140 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags)
 */
 int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
 {
+	int reg;
+
 	if (resno < PCI_ROM_RESOURCE) {
 		*type = pci_bar_unknown;
 		return PCI_BASE_ADDRESS_0 + 4 * resno;
 	} else if (resno == PCI_ROM_RESOURCE) {
 		*type = pci_bar_mem32;
 		return dev->rom_base_reg;
+	} else if (resno < PCI_BRIDGE_RESOURCES) {
+		/* device specific resource */
+		reg = pci_iov_resource_bar(dev, resno, type);
+		if (reg)
+			return reg;
 	}

 	dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno);
 	return 0;
 }

+#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
+static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
+spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED;
+
+/**
+ * pci_specified_resource_alignment - get resource alignment specified by user.
+ * @dev: the PCI device to get
+ *
+ * RETURNS: Resource alignment if it is specified.
+ *          Zero if it is not specified.
+ */
+resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
+{
+	int seg, bus, slot, func, align_order, count;
+	resource_size_t align = 0;
+	char *p;
+
+	spin_lock(&resource_alignment_lock);
+	p = resource_alignment_param;
+	while (*p) {
+		count = 0;
+		if (sscanf(p, "%d%n", &align_order, &count) == 1 &&
+							p[count] == '@') {
+			p += count + 1;
+		} else {
+			align_order = -1;
+		}
+		if (sscanf(p, "%x:%x:%x.%x%n",
+			&seg, &bus, &slot, &func, &count) != 4) {
+			seg = 0;
+			if (sscanf(p, "%x:%x.%x%n",
+					&bus, &slot, &func, &count) != 3) {
+				/* Invalid format */
+				printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n",
+					p);
+				break;
+			}
+		}
+		p += count;
+		if (seg == pci_domain_nr(dev->bus) &&
+			bus == dev->bus->number &&
+			slot == PCI_SLOT(dev->devfn) &&
+			func == PCI_FUNC(dev->devfn)) {
+			if (align_order == -1) {
+				align = PAGE_SIZE;
+			} else {
+				align = 1 << align_order;
+			}
+			/* Found */
+			break;
+		}
+		if (*p != ';' && *p != ',') {
+			/* End of param or invalid format */
+			break;
+		}
+		p++;
+	}
+	spin_unlock(&resource_alignment_lock);
+	return align;
+}
+
+/**
+ * pci_is_reassigndev - check if specified PCI is target device to reassign
+ * @dev: the PCI device to check
+ *
+ * RETURNS: non-zero for PCI device is a target device to reassign,
+ *          or zero is not.
+ */
+int pci_is_reassigndev(struct pci_dev *dev)
+{
+	return (pci_specified_resource_alignment(dev) != 0);
+}
+
+ssize_t pci_set_resource_alignment_param(const char *buf, size_t count)
+{
+	if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1)
+		count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1;
+	spin_lock(&resource_alignment_lock);
+	strncpy(resource_alignment_param, buf, count);
+	resource_alignment_param[count] = '\0';
+	spin_unlock(&resource_alignment_lock);
+	return count;
+}
+
+ssize_t pci_get_resource_alignment_param(char *buf, size_t size)
+{
+	size_t count;
+	spin_lock(&resource_alignment_lock);
+	count = snprintf(buf, size, "%s", resource_alignment_param);
+	spin_unlock(&resource_alignment_lock);
+	return count;
+}
+
+static ssize_t pci_resource_alignment_show(struct bus_type *bus, char *buf)
+{
+	return pci_get_resource_alignment_param(buf, PAGE_SIZE);
+}
+
+static ssize_t pci_resource_alignment_store(struct bus_type *bus,
+					const char *buf, size_t count)
+{
+	return pci_set_resource_alignment_param(buf, count);
+}
+
+BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show,
+					pci_resource_alignment_store);
+
+static int __init pci_resource_alignment_sysfs_init(void)
+{
+	return bus_create_file(&pci_bus_type,
+					&bus_attr_resource_alignment);
+}
+
+late_initcall(pci_resource_alignment_sysfs_init);
+
 static void __devinit pci_no_domains(void)
 {
 #ifdef CONFIG_PCI_DOMAINS
@@ -2394,6 +2540,9 @@ static int __init pci_setup(char *str)
 				pci_cardbus_io_size = memparse(str + 9, &str);
 			} else if (!strncmp(str, "cbmemsize=", 10)) {
 				pci_cardbus_mem_size = memparse(str + 10, &str);
+			} else if (!strncmp(str, "resource_alignment=", 19)) {
+				pci_set_resource_alignment_param(str + 19,
+							strlen(str + 19));
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);

--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
 #ifndef DRIVERS_PCI_H
 #define DRIVERS_PCI_H

+#include <linux/workqueue.h>
+
 #define PCI_CFG_SPACE_SIZE	256
 #define PCI_CFG_SPACE_EXP_SIZE	4096

@@ -135,6 +137,12 @@ extern int pcie_mch_quirk;
 extern struct device_attribute pci_dev_attrs[];
 extern struct device_attribute dev_attr_cpuaffinity;
 extern struct device_attribute dev_attr_cpulistaffinity;
+#ifdef CONFIG_HOTPLUG
+extern struct bus_attribute pci_bus_attrs[];
+#else
+#define pci_bus_attrs	NULL
+#endif
+

 /**
 * pci_match_one_device - Tell if a PCI device structure has a matching
@@ -177,6 +185,7 @@ enum pci_bar_type {
 	pci_bar_mem64,		/* A 64-bit memory BAR */
 };

+extern int pci_setup_device(struct pci_dev *dev);
 extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 				struct resource *res, unsigned int reg);
 extern int pci_resource_bar(struct pci_dev *dev, int resno,
@@ -194,4 +203,60 @@ static inline int pci_ari_enabled(struct pci_bus *bus)
 	return bus->self && bus->self->ari_enabled;
 }

+#ifdef CONFIG_PCI_QUIRKS
+extern int pci_is_reassigndev(struct pci_dev *dev);
+resource_size_t pci_specified_resource_alignment(struct pci_dev *dev);
+extern void pci_disable_bridge_window(struct pci_dev *dev);
+#endif
+
+/* Single Root I/O Virtualization */
+struct pci_sriov {
+	int pos;		/* capability position */
+	int nres;		/* number of resources */
+	u32 cap;		/* SR-IOV Capabilities */
+	u16 ctrl;		/* SR-IOV Control */
+	u16 total;		/* total VFs associated with the PF */
+	u16 initial;		/* initial VFs associated with the PF */
+	u16 nr_virtfn;		/* number of VFs available */
+	u16 offset;		/* first VF Routing ID offset */
+	u16 stride;		/* following VF stride */
+	u32 pgsz;		/* page size for BAR alignment */
+	u8 link;		/* Function Dependency Link */
+	struct pci_dev *dev;	/* lowest numbered PF */
+	struct pci_dev *self;	/* this PF */
+	struct mutex lock;	/* lock for VF bus */
+	struct work_struct mtask; /* VF Migration task */
+	u8 __iomem *mstate;	/* VF Migration State Array */
+};
+
+#ifdef CONFIG_PCI_IOV
+extern int pci_iov_init(struct pci_dev *dev);
+extern void pci_iov_release(struct pci_dev *dev);
+extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+				enum pci_bar_type *type);
+extern void pci_restore_iov_state(struct pci_dev *dev);
+extern int pci_iov_bus_range(struct pci_bus *bus);
+#else
+static inline int pci_iov_init(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+static inline void pci_iov_release(struct pci_dev *dev)
+
+{
+}
+static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+				       enum pci_bar_type *type)
+{
+	return 0;
+}
+static inline void pci_restore_iov_state(struct pci_dev *dev)
+{
+}
+static inline int pci_iov_bus_range(struct pci_bus *bus)
+{
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 #endif /* DRIVERS_PCI_H */
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -38,30 +38,13 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");

-static int __devinit aer_probe (struct pcie_device *dev,
-	const struct pcie_port_service_id *id );
+static int __devinit aer_probe (struct pcie_device *dev);
 static void aer_remove(struct pcie_device *dev);
-static int aer_suspend(struct pcie_device *dev, pm_message_t state)
-{return 0;}
-static int aer_resume(struct pcie_device *dev) {return 0;}
 static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
 	enum pci_channel_state error);
 static void aer_error_resume(struct pci_dev *dev);
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev);

-/*
- * PCI Express bus's AER Root service driver data structure
- */
-static struct pcie_port_service_id aer_id[] = {
-	{
-	.vendor 	= PCI_ANY_ID,
-	.device 	= PCI_ANY_ID,
-	.port_type 	= PCIE_RC_PORT,
-	.service_type 	= PCIE_PORT_SERVICE_AER,
-	},
-	{ /* end: all zeroes */ }
-};
-
 static struct pci_error_handlers aer_error_handlers = {
 	.error_detected = aer_error_detected,
 	.resume = aer_error_resume,
@@ -69,14 +52,12 @@ static struct pci_error_handlers aer_error_handlers = {

 static struct pcie_port_service_driver aerdriver = {
 	.name		= "aer",
-	.id_table	= &aer_id[0],
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_AER,

 	.probe		= aer_probe,
 	.remove		= aer_remove,

-	.suspend	= aer_suspend,
-	.resume		= aer_resume,
-
 	.err_handler	= &aer_error_handlers,

 	.reset_link	= aer_root_reset,
@@ -207,8 +188,7 @@ static void aer_remove(struct pcie_device *dev)
 *
 * Invoked when PCI Express bus loads AER service driver.
 **/
-static int __devinit aer_probe (struct pcie_device *dev,
-				const struct pcie_port_service_id *id )
+static int __devinit aer_probe (struct pcie_device *dev)
 {
 	int status;
 	struct aer_rpc *rpc;

--- a/drivers/pci/pcie/aer/aerdrv_acpi.c
+++ b/drivers/pci/pcie/aer/aerdrv_acpi.c
@@ -38,7 +38,7 @@ int aer_osc_setup(struct pcie_device *pciedev)

 	handle = acpi_find_root_bridge_handle(pdev);
 	if (handle) {
-		status = pci_osc_control_set(handle,
+		status = acpi_pci_osc_control_set(handle,
 					OSC_PCI_EXPRESS_AER_CONTROL |
 					OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
 	}

--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -351,21 +351,21 @@ static int find_aer_service_iter(struct device *device, void *data)
 {
 	struct device_driver *driver;
 	struct pcie_port_service_driver *service_driver;
-	struct pcie_device *pcie_dev;
 	struct find_aer_service_data *result;

 	result = (struct find_aer_service_data *) data;

 	if (device->bus == &pcie_port_bus_type) {
-		pcie_dev = to_pcie_device(device);
-		if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT)
+		struct pcie_port_data *port_data;
+
+		port_data = pci_get_drvdata(to_pcie_device(device)->port);
+		if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT)
 			result->is_downstream = 1;

 		driver = device->driver;
 		if (driver) {
 			service_driver = to_service_driver(driver);
-			if (service_driver->id_table->service_type ==
-					PCIE_PORT_SERVICE_AER) {
+			if (service_driver->service == PCIE_PORT_SERVICE_AER) {
 				result->aer_driver = service_driver;
 				return 1;
 			}

--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -25,19 +25,21 @@
 #define PCIE_CAPABILITIES_REG		0x2
 #define PCIE_SLOT_CAPABILITIES_REG	0x14
 #define PCIE_PORT_DEVICE_MAXSERVICES	4
+#define PCIE_PORT_MSI_VECTOR_MASK	0x1f
+/*
+ * According to the PCI Express Base Specification 2.0, the indices of the MSI-X
+ * table entires used by port services must not exceed 31
+ */
+#define PCIE_PORT_MAX_MSIX_ENTRIES	32

 #define get_descriptor_id(type, service) (((type - 4) << 4) | service)

-struct pcie_port_device_ext {
-	int interrupt_mode;	/* [0:INTx | 1:MSI | 2:MSI-X] */
-};
-
 extern struct bus_type pcie_port_bus_type;
 extern int pcie_port_device_probe(struct pci_dev *dev);
 extern int pcie_port_device_register(struct pci_dev *dev);
 #ifdef CONFIG_PM
-extern int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state);
-extern int pcie_port_device_resume(struct pci_dev *dev);
+extern int pcie_port_device_suspend(struct device *dev);
+extern int pcie_port_device_resume(struct device *dev);
 #endif
 extern void pcie_port_device_remove(struct pci_dev *dev);
 extern int __must_check pcie_port_bus_register(void);

--- a/drivers/pci/pcie/portdrv_bus.c
+++ b/drivers/pci/pcie/portdrv_bus.c
@@ -26,20 +26,22 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type);
 static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
 {
 	struct pcie_device *pciedev;
+	struct pcie_port_data *port_data;
 	struct pcie_port_service_driver *driver;

 	if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type)
 		return 0;
-	
+
 	pciedev = to_pcie_device(dev);
 	driver = to_service_driver(drv);
-	if (   (driver->id_table->vendor != PCI_ANY_ID && 
-		driver->id_table->vendor != pciedev->id.vendor) ||
-	       (driver->id_table->device != PCI_ANY_ID &&
-		driver->id_table->device != pciedev->id.device) ||	
-	       (driver->id_table->port_type != PCIE_ANY_PORT &&
-		driver->id_table->port_type != pciedev->id.port_type) ||
-		driver->id_table->service_type != pciedev->id.service_type )
+
+	if (driver->service != pciedev->service)
+		return 0;
+
+	port_data = pci_get_drvdata(pciedev->port);
+
+	if (driver->port_type != PCIE_ANY_PORT
+	     && driver->port_type != port_data->port_type)
 		return 0;

 	return 1;

--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -15,10 +15,9 @@
 #include <linux/slab.h>
 #include <linux/pcieport_if.h>

+#include "../pci.h"
 #include "portdrv.h"

-extern int pcie_mch_quirk;	/* MSI-quirk Indicator */
-
 /**
 * release_pcie_device - free PCI Express port service device structure
 * @dev: Port service device to release
@@ -31,26 +30,150 @@ static void release_pcie_device(struct device *dev)
 	kfree(to_pcie_device(dev));			
 }

-static int is_msi_quirked(struct pci_dev *dev)
+/**
+ * pcie_port_msix_add_entry - add entry to given array of MSI-X entries
+ * @entries: Array of MSI-X entries
+ * @new_entry: Index of the entry to add to the array
+ * @nr_entries: Number of entries aleady in the array
+ *
+ * Return value: Position of the added entry in the array
+ */
+static int pcie_port_msix_add_entry(
+	struct msix_entry *entries, int new_entry, int nr_entries)
 {
-	int port_type, quirk = 0;
+	int j;
+
+	for (j = 0; j < nr_entries; j++)
+		if (entries[j].entry == new_entry)
+			return j;
+
+	entries[j].entry = new_entry;
+	return j;
+}
+
+/**
+ * pcie_port_enable_msix - try to set up MSI-X as interrupt mode for given port
+ * @dev: PCI Express port to handle
+ * @vectors: Array of interrupt vectors to populate
+ * @mask: Bitmask of port capabilities returned by get_port_device_capability()
+ *
+ * Return value: 0 on success, error code on failure
+ */
+static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask)
+{
+	struct msix_entry *msix_entries;
+	int idx[PCIE_PORT_DEVICE_MAXSERVICES];
+	int nr_entries, status, pos, i, nvec;
 	u16 reg16;
+	u32 reg32;

-	pci_read_config_word(dev, 
-		pci_find_capability(dev, PCI_CAP_ID_EXP) + 
-		PCIE_CAPABILITIES_REG, &reg16);
-	port_type = (reg16 >> 4) & PORT_TYPE_MASK;
-	switch(port_type) {
-	case PCIE_RC_PORT:
-		if (pcie_mch_quirk == 1)
-			quirk = 1;
-		break;
-	case PCIE_SW_UPSTREAM_PORT:
-	case PCIE_SW_DOWNSTREAM_PORT:
-	default:
-		break;	
+	nr_entries = pci_msix_table_size(dev);
+	if (!nr_entries)
+		return -EINVAL;
+	if (nr_entries > PCIE_PORT_MAX_MSIX_ENTRIES)
+		nr_entries = PCIE_PORT_MAX_MSIX_ENTRIES;
+
+	msix_entries = kzalloc(sizeof(*msix_entries) * nr_entries, GFP_KERNEL);
+	if (!msix_entries)
+		return -ENOMEM;
+
+	/*
+	 * Allocate as many entries as the port wants, so that we can check
+	 * which of them will be useful.  Moreover, if nr_entries is correctly
+	 * equal to the number of entries this port actually uses, we'll happily
+	 * go through without any tricks.
+	 */
+	for (i = 0; i < nr_entries; i++)
+		msix_entries[i].entry = i;
+
+	status = pci_enable_msix(dev, msix_entries, nr_entries);
+	if (status)
+		goto Exit;
+
+	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+		idx[i] = -1;
+	status = -EIO;
+	nvec = 0;
+
+	if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) {
+		int entry;
+
+		/*
+		 * The code below follows the PCI Express Base Specification 2.0
+		 * stating in Section 6.1.6 that "PME and Hot-Plug Event
+		 * interrupts (when both are implemented) always share the same
+		 * MSI or MSI-X vector, as indicated by the Interrupt Message
+		 * Number field in the PCI Express Capabilities register", where
+		 * according to Section 7.8.2 of the specification "For MSI-X,
+		 * the value in this field indicates which MSI-X Table entry is
+		 * used to generate the interrupt message."
+		 */
+		pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+		pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, &reg16);
+		entry = (reg16 >> 9) & PCIE_PORT_MSI_VECTOR_MASK;
+		if (entry >= nr_entries)
+			goto Error;
+
+		i = pcie_port_msix_add_entry(msix_entries, entry, nvec);
+		if (i == nvec)
+			nvec++;
+
+		idx[PCIE_PORT_SERVICE_PME_SHIFT] = i;
+		idx[PCIE_PORT_SERVICE_HP_SHIFT] = i;
+	}
+
+	if (mask & PCIE_PORT_SERVICE_AER) {
+		int entry;
+
+		/*
+		 * The code below follows Section 7.10.10 of the PCI Express
+		 * Base Specification 2.0 stating that bits 31-27 of the Root
+		 * Error Status Register contain a value indicating which of the
+		 * MSI/MSI-X vectors assigned to the port is going to be used
+		 * for AER, where "For MSI-X, the value in this register
+		 * indicates which MSI-X Table entry is used to generate the
+		 * interrupt message."
+		 */
+		pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+		pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &reg32);
+		entry = reg32 >> 27;
+		if (entry >= nr_entries)
+			goto Error;
+
+		i = pcie_port_msix_add_entry(msix_entries, entry, nvec);
+		if (i == nvec)
+			nvec++;
+
+		idx[PCIE_PORT_SERVICE_AER_SHIFT] = i;
 	}
-	return quirk;
+
+	/*
+	 * If nvec is equal to the allocated number of entries, we can just use
+	 * what we have.  Otherwise, the port has some extra entries not for the
+	 * services we know and we need to work around that.
+	 */
+	if (nvec == nr_entries) {
+		status = 0;
+	} else {
+		/* Drop the temporary MSI-X setup */
+		pci_disable_msix(dev);
+
+		/* Now allocate the MSI-X vectors for real */
+		status = pci_enable_msix(dev, msix_entries, nvec);
+		if (status)
+			goto Exit;
+	}
+
+	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+		vectors[i] = idx[i] >= 0 ? msix_entries[idx[i]].vector : -1;
+
+ Exit:
+	kfree(msix_entries);
+	return status;
+
+ Error:
+	pci_disable_msix(dev);
+	goto Exit;
 }

 /**
@@ -64,47 +187,32 @@ static int is_msi_quirked(struct pci_dev *dev)
 */
 static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask)
 {
-	int i, pos, nvec, status = -EINVAL;
-	int interrupt_mode = PCIE_PORT_INTx_MODE;
+	struct pcie_port_data *port_data = pci_get_drvdata(dev);
+	int irq, interrupt_mode = PCIE_PORT_NO_IRQ;
+	int i;

-	/* Set INTx as default */
-	for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
-		if (mask & (1 << i)) 
-			nvec++;
-		vectors[i] = dev->irq;
-	}
-	
 	/* Check MSI quirk */
-	if (is_msi_quirked(dev))
-		return interrupt_mode;
-
-	/* Select MSI-X over MSI if supported */		
-	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-	if (pos) {
-		struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = 
-			{{0, 0}, {0, 1}, {0, 2}, {0, 3}};
-		status = pci_enable_msix(dev, msix_entries, nvec);
-		if (!status) {
-			int j = 0;
-
-			interrupt_mode = PCIE_PORT_MSIX_MODE;
-			for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
-				if (mask & (1 << i)) 
-					vectors[i] = msix_entries[j++].vector;
-			}
-		}
-	} 
-	if (status) {
-		pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
-		if (pos) {
-			status = pci_enable_msi(dev);
-			if (!status) {
-				interrupt_mode = PCIE_PORT_MSI_MODE;
-				for (i = 0;i < PCIE_PORT_DEVICE_MAXSERVICES;i++)
-					vectors[i] = dev->irq;
-			}
-		}
-	} 
+	if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk)
+		goto Fallback;
+
+	/* Try to use MSI-X if supported */
+	if (!pcie_port_enable_msix(dev, vectors, mask))
+		return PCIE_PORT_MSIX_MODE;
+
+	/* We're not going to use MSI-X, so try MSI and fall back to INTx */
+	if (!pci_enable_msi(dev))
+		interrupt_mode = PCIE_PORT_MSI_MODE;
+
+ Fallback:
+	if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin)
+		interrupt_mode = PCIE_PORT_INTx_MODE;
+
+	irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1;
+	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+		vectors[i] = irq;
+
+	vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1;
+
 	return interrupt_mode;
 }

@@ -132,13 +240,11 @@ static int get_port_device_capability(struct pci_dev *dev)
 			pos + PCIE_SLOT_CAPABILITIES_REG, &reg32);
 		if (reg32 & SLOT_HP_CAPABLE_MASK)
 			services |= PCIE_PORT_SERVICE_HP;
-	} 
-	/* PME Capable - root port capability */
-	if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT)
-		services |= PCIE_PORT_SERVICE_PME;
-
+	}
+	/* AER capable */
 	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
 		services |= PCIE_PORT_SERVICE_AER;
+	/* VC support */
 	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
 		services |= PCIE_PORT_SERVICE_VC;

@@ -152,20 +258,17 @@ static int get_port_device_capability(struct pci_dev *dev)
 * @port_type: Type of the port
 * @service_type: Type of service to associate with the service device
 * @irq: Interrupt vector to associate with the service device
- * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI)
 */
 static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, 
-	int port_type, int service_type, int irq, int irq_mode)
+	int service_type, int irq)
 {
+	struct pcie_port_data *port_data = pci_get_drvdata(parent);
 	struct device *device;
+	int port_type = port_data->port_type;

 	dev->port = parent;
-	dev->interrupt_mode = irq_mode;
 	dev->irq = irq;
-	dev->id.vendor = parent->vendor;
-	dev->id.device = parent->device;
-	dev->id.port_type = port_type;
-	dev->id.service_type = (1 << service_type);
+	dev->service = service_type;

 	/* Initialize generic device interface */
 	device = &dev->device;
@@ -185,10 +288,9 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev,
 * @port_type: Type of the port
 * @service_type: Type of service to associate with the service device
 * @irq: Interrupt vector to associate with the service device
- * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI)
 */
 static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
-	int port_type, int service_type, int irq, int irq_mode)
+	int service_type, int irq)
 {
 	struct pcie_device *device;

@@ -196,7 +298,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
 	if (!device)
 		return NULL;

-	pcie_device_init(parent, device, port_type, service_type, irq,irq_mode);
+	pcie_device_init(parent, device, service_type, irq);
 	return device;
 }

@@ -230,63 +332,90 @@ int pcie_port_device_probe(struct pci_dev *dev)
 */
 int pcie_port_device_register(struct pci_dev *dev)
 {
-	struct pcie_port_device_ext *p_ext;
-	int status, type, capabilities, irq_mode, i;
+	struct pcie_port_data *port_data;
+	int status, capabilities, irq_mode, i, nr_serv;
 	int vectors[PCIE_PORT_DEVICE_MAXSERVICES];
 	u16 reg16;

-	/* Allocate port device extension */
-	if (!(p_ext = kmalloc(sizeof(struct pcie_port_device_ext), GFP_KERNEL)))
+	port_data = kzalloc(sizeof(*port_data), GFP_KERNEL);
+	if (!port_data)
 		return -ENOMEM;
-
-	pci_set_drvdata(dev, p_ext);
+	pci_set_drvdata(dev, port_data);

 	/* Get port type */
 	pci_read_config_word(dev,
 		pci_find_capability(dev, PCI_CAP_ID_EXP) +
 		PCIE_CAPABILITIES_REG, &reg16);
-	type = (reg16 >> 4) & PORT_TYPE_MASK;
+	port_data->port_type = (reg16 >> 4) & PORT_TYPE_MASK;

-	/* Now get port services */
 	capabilities = get_port_device_capability(dev);
+	/* Root ports are capable of generating PME too */
+	if (port_data->port_type == PCIE_RC_PORT)
+		capabilities |= PCIE_PORT_SERVICE_PME;
+
 	irq_mode = assign_interrupt_mode(dev, vectors, capabilities);
-	p_ext->interrupt_mode = irq_mode;
+	if (irq_mode == PCIE_PORT_NO_IRQ) {
+		/*
+		 * Don't use service devices that require interrupts if there is
+		 * no way to generate them.
+		 */
+		if (!(capabilities & PCIE_PORT_SERVICE_VC)) {
+			status = -ENODEV;
+			goto Error;
+		}
+		capabilities = PCIE_PORT_SERVICE_VC;
+	}
+	port_data->port_irq_mode = irq_mode;
+
+	status = pci_enable_device(dev);
+	if (status)
+		goto Error;
+	pci_set_master(dev);

 	/* Allocate child services if any */
-	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
+	for (i = 0, nr_serv = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
 		struct pcie_device *child;
+		int service = 1 << i;
+
+		if (!(capabilities & service))
+			continue;

-		if (capabilities & (1 << i)) {
-			child = alloc_pcie_device(
-				dev, 		/* parent */
-				type,		/* port type */
-				i,		/* service type */
-				vectors[i],	/* irq */
-				irq_mode	/* interrupt mode */);
-			if (child) {
-				status = device_register(&child->device);
-				if (status) {
-					kfree(child);
-					continue;
-				}
-				get_device(&child->device);
-			}
+		child = alloc_pcie_device(dev, service, vectors[i]);
+		if (!child)
+			continue;
+
+		status = device_register(&child->device);
+		if (status) {
+			kfree(child);
+			continue;
 		}
+
+		get_device(&child->device);
+		nr_serv++;
+	}
+	if (!nr_serv) {
+		pci_disable_device(dev);
+		status = -ENODEV;
+		goto Error;
 	}
+
 	return 0;
+
+ Error:
+	kfree(port_data);
+	return status;
 }

 #ifdef CONFIG_PM
 static int suspend_iter(struct device *dev, void *data)
 {
 	struct pcie_port_service_driver *service_driver;
-	pm_message_t state = * (pm_message_t *) data;

 	if ((dev->bus == &pcie_port_bus_type) &&
 	    (dev->driver)) {
 		service_driver = to_service_driver(dev->driver);
 		if (service_driver->suspend)
- 			service_driver->suspend(to_pcie_device(dev), state);
+ 			service_driver->suspend(to_pcie_device(dev));
  	}
 	return 0;
 }
@@ -294,11 +423,10 @@ static int suspend_iter(struct device *dev, void *data)
 /**
 * pcie_port_device_suspend - suspend port services associated with a PCIe port
 * @dev: PCI Express port to handle
- * @state: Representation of system power management transition in progress
 */
-int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state)
+int pcie_port_device_suspend(struct device *dev)
 {
-	return device_for_each_child(&dev->dev, &state, suspend_iter);
+	return device_for_each_child(dev, NULL, suspend_iter);
 }

 static int resume_iter(struct device *dev, void *data)
@@ -318,24 +446,17 @@ static int resume_iter(struct device *dev, void *data)
 * pcie_port_device_suspend - resume port services associated with a PCIe port
 * @dev: PCI Express port to handle
 */
-int pcie_port_device_resume(struct pci_dev *dev)
+int pcie_port_device_resume(struct device *dev)
 {
-	return device_for_each_child(&dev->dev, NULL, resume_iter);
+	return device_for_each_child(dev, NULL, resume_iter);
 }
-#endif
+#endif /* PM */

 static int remove_iter(struct device *dev, void *data)
 {
-	struct pcie_port_service_driver *service_driver;
-
 	if (dev->bus == &pcie_port_bus_type) {
-		if (dev->driver) {
-			service_driver = to_service_driver(dev->driver);
-			if (service_driver->remove)
-				service_driver->remove(to_pcie_device(dev));
-		}
-		*(unsigned long*)data = (unsigned long)dev;
-		return 1;
+		put_device(dev);
+		device_unregister(dev);
 	}
 	return 0;
 }
@@ -349,25 +470,21 @@ static int remove_iter(struct device *dev, void *data)
 */
 void pcie_port_device_remove(struct pci_dev *dev)
 {
-	struct device *device;
-	unsigned long device_addr;
-	int interrupt_mode = PCIE_PORT_INTx_MODE;
-	int status;
+	struct pcie_port_data *port_data = pci_get_drvdata(dev);

-	do {
-		status = device_for_each_child(&dev->dev, &device_addr, remove_iter);
-		if (status) {
-			device = (struct device*)device_addr;
-			interrupt_mode = (to_pcie_device(device))->interrupt_mode;
-			put_device(device);
-			device_unregister(device);
-		}
-	} while (status);
-	/* Switch to INTx by default if MSI enabled */
-	if (interrupt_mode == PCIE_PORT_MSIX_MODE)
+	device_for_each_child(&dev->dev, NULL, remove_iter);
+	pci_disable_device(dev);
+
+	switch (port_data->port_irq_mode) {
+	case PCIE_PORT_MSIX_MODE:
 		pci_disable_msix(dev);
-	else if (interrupt_mode == PCIE_PORT_MSI_MODE)
+		break;
+	case PCIE_PORT_MSI_MODE:
 		pci_disable_msi(dev);
+		break;
+	}
+
+	kfree(port_data);
 }

 /**
@@ -392,7 +509,7 @@ static int pcie_port_probe_service(struct device *dev)
 		return -ENODEV;

 	pciedev = to_pcie_device(dev);
-	status = driver->probe(pciedev, driver->id_table);
+	status = driver->probe(pciedev);
 	if (!status) {
 		dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n",
 			driver->name);

--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -32,11 +32,6 @@ MODULE_LICENSE("GPL");
 /* global data */
 static const char device_name[] = "pcieport-driver";

-static int pcie_portdrv_save_config(struct pci_dev *dev)
-{
-	return pci_save_state(dev);
-}
-
 static int pcie_portdrv_restore_config(struct pci_dev *dev)
 {
 	int retval;
@@ -49,21 +44,21 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev)
 }

 #ifdef CONFIG_PM
-static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state)
-{
-	return pcie_port_device_suspend(dev, state);
+static struct dev_pm_ops pcie_portdrv_pm_ops = {
+	.suspend	= pcie_port_device_suspend,
+	.resume		= pcie_port_device_resume,
+	.freeze		= pcie_port_device_suspend,
+	.thaw		= pcie_port_device_resume,
+	.poweroff	= pcie_port_device_suspend,
+	.restore	= pcie_port_device_resume,
+};

-}
+#define PCIE_PORTDRV_PM_OPS	(&pcie_portdrv_pm_ops)

-static int pcie_portdrv_resume(struct pci_dev *dev)
-{
-	pci_set_master(dev);
-	return pcie_port_device_resume(dev);
-}
-#else
-#define pcie_portdrv_suspend NULL
-#define pcie_portdrv_resume NULL
-#endif
+#else /* !PM */
+
+#define PCIE_PORTDRV_PM_OPS	NULL
+#endif /* !PM */

 /*
 * pcie_portdrv_probe - Probe PCI-Express port devices
@@ -82,20 +77,15 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
 	if (status)
 		return status;

-	if (pci_enable_device(dev) < 0) 
-		return -ENODEV;
-	
-	pci_set_master(dev);
        if (!dev->irq && dev->pin) {
 		dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
 			 "check vendor BIOS\n", dev->vendor, dev->device);
 	}
-	if (pcie_port_device_register(dev)) {
-		pci_disable_device(dev);
-		return -ENOMEM;
-	}
+	status = pcie_port_device_register(dev);
+	if (status)
+		return status;

-	pcie_portdrv_save_config(dev);
+	pci_save_state(dev);

 	return 0;
 }
@@ -104,7 +94,6 @@ static void pcie_portdrv_remove (struct pci_dev *dev)
 {
 	pcie_port_device_remove(dev);
 	pci_disable_device(dev);
-	kfree(pci_get_drvdata(dev));
 }

 static int error_detected_iter(struct device *device, void *data)
@@ -278,10 +267,9 @@ static struct pci_driver pcie_portdriver = {
 	.probe		= pcie_portdrv_probe,
 	.remove		= pcie_portdrv_remove,

-	.suspend	= pcie_portdrv_suspend,
-	.resume		= pcie_portdrv_resume,
-
 	.err_handler 	= &pcie_portdrv_err_handler,
+
+	.driver.pm 	= PCIE_PORTDRV_PM_OPS,
 };

 static int __init pcie_portdrv_init(void)

--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -287,7 +287,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 	struct resource *res;
 	int i;

-	if (!dev)		/* It's a host bus, nothing to read */
+	if (!child->parent)	/* It's a host bus, nothing to read */
 		return;

 	if (dev->transparent) {
@@ -511,21 +511,21 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,

 		/*
 		 * If we already got to this bus through a different bridge,
-		 * ignore it.  This can happen with the i450NX chipset.
+		 * don't re-add it. This can happen with the i450NX chipset.
+		 *
+		 * However, we continue to descend down the hierarchy and
+		 * scan remaining child buses.
 		 */
-		if (pci_find_bus(pci_domain_nr(bus), busnr)) {
-			dev_info(&dev->dev, "bus %04x:%02x already known\n",
-				 pci_domain_nr(bus), busnr);
-			goto out;
+		child = pci_find_bus(pci_domain_nr(bus), busnr);
+		if (!child) {
+			child = pci_add_new_bus(bus, dev, busnr);
+			if (!child)
+				goto out;
+			child->primary = buses & 0xFF;
+			child->subordinate = (buses >> 16) & 0xFF;
+			child->bridge_ctl = bctl;
 		}

-		child = pci_add_new_bus(bus, dev, busnr);
-		if (!child)
-			goto out;
-		child->primary = buses & 0xFF;
-		child->subordinate = (buses >> 16) & 0xFF;
-		child->bridge_ctl = bctl;
-
 		cmax = pci_scan_child_bus(child);
 		if (cmax > max)
 			max = cmax;
@@ -674,6 +674,19 @@ static void pci_read_irq(struct pci_dev *dev)
 	dev->irq = irq;
 }

+static void set_pcie_port_type(struct pci_dev *pdev)
+{
+	int pos;
+	u16 reg16;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+	pdev->is_pcie = 1;
+	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
+}
+
 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)

 /**
@@ -683,12 +696,33 @@ static void pci_read_irq(struct pci_dev *dev)
 * Initialize the device structure with information about the device's 
 * vendor,class,memory and IO-space addresses,IRQ lines etc.
 * Called at initialisation of the PCI subsystem and by CardBus services.
- * Returns 0 on success and -1 if unknown type of device (not normal, bridge
- * or CardBus).
+ * Returns 0 on success and negative if unknown type of device (not normal,
+ * bridge or CardBus).
 */
-static int pci_setup_device(struct pci_dev * dev)
+int pci_setup_device(struct pci_dev *dev)
 {
 	u32 class;
+	u8 hdr_type;
+	struct pci_slot *slot;
+
+	if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
+		return -EIO;
+
+	dev->sysdata = dev->bus->sysdata;
+	dev->dev.parent = dev->bus->bridge;
+	dev->dev.bus = &pci_bus_type;
+	dev->hdr_type = hdr_type & 0x7f;
+	dev->multifunction = !!(hdr_type & 0x80);
+	dev->error_state = pci_channel_io_normal;
+	set_pcie_port_type(dev);
+
+	list_for_each_entry(slot, &dev->bus->slots, list)
+		if (PCI_SLOT(dev->devfn) == slot->number)
+			dev->slot = slot;
+
+	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+	   set this higher, assuming the system even supports it.  */
+	dev->dma_mask = 0xffffffff;

 	dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
 		     dev->bus->number, PCI_SLOT(dev->devfn),
@@ -703,12 +737,14 @@ static int pci_setup_device(struct pci_dev * dev)
 	dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
 		 dev->vendor, dev->device, class, dev->hdr_type);

+	/* need to have dev->class ready */
+	dev->cfg_size = pci_cfg_space_size(dev);
+
 	/* "Unknown power state" */
 	dev->current_state = PCI_UNKNOWN;

 	/* Early fixups, before probing the BARs */
 	pci_fixup_device(pci_fixup_early, dev);
-	class = dev->class >> 8;

 	switch (dev->hdr_type) {		    /* header type */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
@@ -770,7 +806,7 @@ static int pci_setup_device(struct pci_dev * dev)
 	default:				    /* unknown header */
 		dev_err(&dev->dev, "unknown header type %02x, "
 			"ignoring device\n", dev->hdr_type);
-		return -1;
+		return -EIO;

 	bad:
 		dev_err(&dev->dev, "ignoring class %02x (doesn't match header "
@@ -785,6 +821,7 @@ static int pci_setup_device(struct pci_dev * dev)
 static void pci_release_capabilities(struct pci_dev *dev)
 {
 	pci_vpd_release(dev);
+	pci_iov_release(dev);
 }

 /**
@@ -803,19 +840,6 @@ static void pci_release_dev(struct device *dev)
 	kfree(pci_dev);
 }

-static void set_pcie_port_type(struct pci_dev *pdev)
-{
-	int pos;
-	u16 reg16;
-
-	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
-	if (!pos)
-		return;
-	pdev->is_pcie = 1;
-	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
-	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
-}
-
 /**
 * pci_cfg_space_size - get the configuration space size of the PCI device.
 * @dev: PCI device
@@ -847,6 +871,11 @@ int pci_cfg_space_size(struct pci_dev *dev)
 {
 	int pos;
 	u32 status;
+	u16 class;
+
+	class = dev->class >> 8;
+	if (class == PCI_CLASS_BRIDGE_HOST)
+		return pci_cfg_space_size_ext(dev);

 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos) {
@@ -891,9 +920,7 @@ EXPORT_SYMBOL(alloc_pci_dev);
 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 {
 	struct pci_dev *dev;
-	struct pci_slot *slot;
 	u32 l;
-	u8 hdr_type;
 	int delay = 1;

 	if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
@@ -920,34 +947,16 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 		}
 	}

-	if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type))
-		return NULL;
-
 	dev = alloc_pci_dev();
 	if (!dev)
 		return NULL;

 	dev->bus = bus;
-	dev->sysdata = bus->sysdata;
-	dev->dev.parent = bus->bridge;
-	dev->dev.bus = &pci_bus_type;
 	dev->devfn = devfn;
-	dev->hdr_type = hdr_type & 0x7f;
-	dev->multifunction = !!(hdr_type & 0x80);
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
-	dev->cfg_size = pci_cfg_space_size(dev);
-	dev->error_state = pci_channel_io_normal;
-	set_pcie_port_type(dev);
-
-	list_for_each_entry(slot, &bus->slots, list)
-		if (PCI_SLOT(devfn) == slot->number)
-			dev->slot = slot;

-	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
-	   set this higher, assuming the system even supports it.  */
-	dev->dma_mask = 0xffffffff;
-	if (pci_setup_device(dev) < 0) {
+	if (pci_setup_device(dev)) {
 		kfree(dev);
 		return NULL;
 	}
@@ -972,6 +981,9 @@ static void pci_init_capabilities(struct pci_dev *dev)

 	/* Alternative Routing-ID Forwarding */
 	pci_enable_ari(dev);
+
+	/* Single Root I/O Virtualization */
+	pci_iov_init(dev);
 }

 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
@@ -1006,6 +1018,12 @@ struct pci_dev *__ref pci_scan_single_device(struct pci_bus *bus, int devfn)
 {
 	struct pci_dev *dev;

+	dev = pci_get_slot(bus, devfn);
+	if (dev) {
+		pci_dev_put(dev);
+		return dev;
+	}
+
 	dev = pci_scan_device(bus, devfn);
 	if (!dev)
 		return NULL;
@@ -1024,35 +1042,27 @@ EXPORT_SYMBOL(pci_scan_single_device);
 * Scan a PCI slot on the specified PCI bus for devices, adding
 * discovered devices to the @bus->devices list.  New devices
 * will not have is_added set.
+ *
+ * Returns the number of new devices found.
 */
 int pci_scan_slot(struct pci_bus *bus, int devfn)
 {
-	int func, nr = 0;
-	int scan_all_fns;
-
-	scan_all_fns = pcibios_scan_all_fns(bus, devfn);
-
-	for (func = 0; func < 8; func++, devfn++) {
-		struct pci_dev *dev;
-
-		dev = pci_scan_single_device(bus, devfn);
-		if (dev) {
-			nr++;
+	int fn, nr = 0;
+	struct pci_dev *dev;

-			/*
-		 	 * If this is a single function device,
-		 	 * don't scan past the first function.
-		 	 */
-			if (!dev->multifunction) {
-				if (func > 0) {
-					dev->multifunction = 1;
-				} else {
- 					break;
-				}
+	dev = pci_scan_single_device(bus, devfn);
+	if (dev && !dev->is_added)	/* new device? */
+		nr++;
+
+	if ((dev && dev->multifunction) ||
+	    (!dev && pcibios_scan_all_fns(bus, devfn))) {
+		for (fn = 1; fn < 8; fn++) {
+			dev = pci_scan_single_device(bus, devfn + fn);
+			if (dev) {
+				if (!dev->is_added)
+					nr++;
+				dev->multifunction = 1;
 			}
-		} else {
-			if (func == 0 && !scan_all_fns)
-				break;
 		}
 	}

@@ -1074,12 +1084,21 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
 	for (devfn = 0; devfn < 0x100; devfn += 8)
 		pci_scan_slot(bus, devfn);

+	/* Reserve buses for SR-IOV capability. */
+	max += pci_iov_bus_range(bus);
+
 	/*
 	 * After performing arch-dependent fixup of the bus, look behind
 	 * all PCI-to-PCI bridges on this bus.
 	 */
-	pr_debug("PCI: Fixups for bus %04x:%02x\n", pci_domain_nr(bus), bus->number);
-	pcibios_fixup_bus(bus);
+	if (!bus->is_added) {
+		pr_debug("PCI: Fixups for bus %04x:%02x\n",
+			 pci_domain_nr(bus), bus->number);
+		pcibios_fixup_bus(bus);
+		if (pci_is_root_bus(bus))
+			bus->is_added = 1;
+	}
+
 	for (pass=0; pass < 2; pass++)
 		list_for_each_entry(dev, &bus->devices, bus_list) {
 			if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
@@ -1114,7 +1133,7 @@ struct pci_bus * pci_create_bus(struct device *parent,
 	if (!b)
 		return NULL;

-	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev){
 		kfree(b);
 		return NULL;
@@ -1133,7 +1152,6 @@ struct pci_bus * pci_create_bus(struct device *parent,
 	list_add_tail(&b->node, &pci_root_buses);
 	up_write(&pci_bus_sem);

-	memset(dev, 0, sizeof(*dev));
 	dev->parent = parent;
 	dev->release = pci_release_bus_bridge_dev;
 	dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus);
@@ -1193,6 +1211,38 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
 EXPORT_SYMBOL(pci_scan_bus_parented);

 #ifdef CONFIG_HOTPLUG
+/**
+ * pci_rescan_bus - scan a PCI bus for devices.
+ * @bus: PCI bus to scan
+ *
+ * Scan a PCI bus and child buses for new devices, adds them,
+ * and enables them.
+ *
+ * Returns the max number of subordinate bus discovered.
+ */
+unsigned int __devinit pci_rescan_bus(struct pci_bus *bus)
+{
+	unsigned int max;
+	struct pci_dev *dev;
+
+	max = pci_scan_child_bus(bus);
+
+	down_read(&pci_bus_sem);
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+		    dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+			if (dev->subordinate)
+				pci_bus_size_bridges(dev->subordinate);
+	up_read(&pci_bus_sem);
+
+	pci_bus_assign_resources(bus);
+	pci_enable_bridges(bus);
+	pci_bus_add_devices(bus);
+
+	return max;
+}
+EXPORT_SYMBOL_GPL(pci_rescan_bus);
+
 EXPORT_SYMBOL(pci_add_new_bus);
 EXPORT_SYMBOL(pci_scan_slot);
 EXPORT_SYMBOL(pci_scan_bridge);

--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -24,6 +24,7 @@
 #include <linux/kallsyms.h>
 #include <linux/dmi.h>
 #include <linux/pci-aspm.h>
+#include <linux/ioport.h>
 #include "pci.h"

 int isa_dma_bridge_buggy;
@@ -34,6 +35,65 @@ int pcie_mch_quirk;
 EXPORT_SYMBOL(pcie_mch_quirk);

 #ifdef CONFIG_PCI_QUIRKS
+/*
+ * This quirk function disables the device and releases resources
+ * which is specified by kernel's boot parameter 'pci=resource_alignment='.
+ * It also rounds up size to specified alignment.
+ * Later on, the kernel will assign page-aligned memory resource back
+ * to that device.
+ */
+static void __devinit quirk_resource_alignment(struct pci_dev *dev)
+{
+	int i;
+	struct resource *r;
+	resource_size_t align, size;
+
+	if (!pci_is_reassigndev(dev))
+		return;
+
+	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+		dev_warn(&dev->dev,
+			"Can't reassign resources to host bridge.\n");
+		return;
+	}
+
+	dev_info(&dev->dev, "Disabling device and release resources.\n");
+	pci_disable_device(dev);
+
+	align = pci_specified_resource_alignment(dev);
+	for (i=0; i < PCI_BRIDGE_RESOURCES; i++) {
+		r = &dev->resource[i];
+		if (!(r->flags & IORESOURCE_MEM))
+			continue;
+		size = resource_size(r);
+		if (size < align) {
+			size = align;
+			dev_info(&dev->dev,
+				"Rounding up size of resource #%d to %#llx.\n",
+				i, (unsigned long long)size);
+		}
+		r->end = size - 1;
+		r->start = 0;
+	}
+	/* Need to disable bridge's resource window,
+	 * to enable the kernel to reassign new resource
+	 * window later on.
+	 */
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
+			r = &dev->resource[i];
+			if (!(r->flags & IORESOURCE_MEM))
+				continue;
+			r->end = resource_size(r) - 1;
+			r->start = 0;
+		}
+		pci_disable_bridge_window(dev);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment);
+
 /* The Mellanox Tavor device gives false positive parity errors
 * Mark this device with a broken_parity_status, to allow
 * PCI scanning code to "skip" this now blacklisted device.
@@ -1126,10 +1186,15 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev)
 				 * its on-board VGA controller */
 				asus_hides_smbus = 1;
 			}
-		else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG)
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2)
 			switch(dev->subsystem_device) {
 			case 0x00b8: /* Compaq Evo D510 CMT */
 			case 0x00b9: /* Compaq Evo D510 SFF */
+				/* Motherboard doesn't have Host bridge
+				 * subvendor/subdevice IDs and on-board VGA
+				 * controller is disabled if an AGP card is
+				 * inserted, therefore checking USB UHCI
+				 * Controller #1 */
 				asus_hides_smbus = 1;
 			}
 		else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC)
@@ -1154,7 +1219,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82855GM_HB,	as
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge);

 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82810_IG3,	asus_hides_smbus_hostbridge);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82845G_IG,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801DB_2,	asus_hides_smbus_hostbridge);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82815_CGC,	asus_hides_smbus_hostbridge);

 static void asus_hides_smbus_lpc(struct pci_dev *dev)
@@ -1664,9 +1729,13 @@ static void __devinit quirk_netmos(struct pci_dev *dev)
 	 * of parallel ports and <S> is the number of serial ports.
 	 */
 	switch (dev->device) {
+	case PCI_DEVICE_ID_NETMOS_9835:
+		/* Well, this rule doesn't hold for the following 9835 device */
+		if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM &&
+				dev->subsystem_device == 0x0299)
+			return;
 	case PCI_DEVICE_ID_NETMOS_9735:
 	case PCI_DEVICE_ID_NETMOS_9745:
-	case PCI_DEVICE_ID_NETMOS_9835:
 	case PCI_DEVICE_ID_NETMOS_9845:
 	case PCI_DEVICE_ID_NETMOS_9855:
 		if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL &&
@@ -2078,6 +2147,92 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
 			PCI_DEVICE_ID_NVIDIA_NVENET_15,
 			nvenet_msi_disable);

+static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
+{
+	int pos, ttl = 48;
+	int found = 0;
+
+	/* check if there is HT MSI cap or enabled on this device */
+	pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
+	while (pos && ttl--) {
+		u8 flags;
+
+		if (found < 1)
+			found = 1;
+		if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
+					 &flags) == 0) {
+			if (flags & HT_MSI_FLAGS_ENABLE) {
+				if (found < 2) {
+					found = 2;
+					break;
+				}
+			}
+		}
+		pos = pci_find_next_ht_capability(dev, pos,
+						  HT_CAPTYPE_MSI_MAPPING);
+	}
+
+	return found;
+}
+
+static int __devinit host_bridge_with_leaf(struct pci_dev *host_bridge)
+{
+	struct pci_dev *dev;
+	int pos;
+	int i, dev_no;
+	int found = 0;
+
+	dev_no = host_bridge->devfn >> 3;
+	for (i = dev_no + 1; i < 0x20; i++) {
+		dev = pci_get_slot(host_bridge->bus, PCI_DEVFN(i, 0));
+		if (!dev)
+			continue;
+
+		/* found next host bridge ?*/
+		pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+		if (pos != 0) {
+			pci_dev_put(dev);
+			break;
+		}
+
+		if (ht_check_msi_mapping(dev)) {
+			found = 1;
+			pci_dev_put(dev);
+			break;
+		}
+		pci_dev_put(dev);
+	}
+
+	return found;
+}
+
+#define PCI_HT_CAP_SLAVE_CTRL0     4    /* link control */
+#define PCI_HT_CAP_SLAVE_CTRL1     8    /* link control to */
+
+static int __devinit is_end_of_ht_chain(struct pci_dev *dev)
+{
+	int pos, ctrl_off;
+	int end = 0;
+	u16 flags, ctrl;
+
+	pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+
+	if (!pos)
+		goto out;
+
+	pci_read_config_word(dev, pos + PCI_CAP_FLAGS, &flags);
+
+	ctrl_off = ((flags >> 10) & 1) ?
+			PCI_HT_CAP_SLAVE_CTRL0 : PCI_HT_CAP_SLAVE_CTRL1;
+	pci_read_config_word(dev, pos + ctrl_off, &ctrl);
+
+	if (ctrl & (1 << 6))
+		end = 1;
+
+out:
+	return end;
+}
+
 static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
 {
 	struct pci_dev *host_bridge;
@@ -2102,6 +2257,11 @@ static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
 	if (!found)
 		return;

+	/* don't enable end_device/host_bridge with leaf directly here */
+	if (host_bridge == dev && is_end_of_ht_chain(host_bridge) &&
+	    host_bridge_with_leaf(host_bridge))
+		goto out;
+
 	/* root did that ! */
 	if (msi_ht_cap_enabled(host_bridge))
 		goto out;
@@ -2132,44 +2292,12 @@ static void __devinit ht_disable_msi_mapping(struct pci_dev *dev)
 	}
 }

-static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
-{
-	int pos, ttl = 48;
-	int found = 0;
-
-	/* check if there is HT MSI cap or enabled on this device */
-	pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
-	while (pos && ttl--) {
-		u8 flags;
-
-		if (found < 1)
-			found = 1;
-		if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
-					 &flags) == 0) {
-			if (flags & HT_MSI_FLAGS_ENABLE) {
-				if (found < 2) {
-					found = 2;
-					break;
-				}
-			}
-		}
-		pos = pci_find_next_ht_capability(dev, pos,
-						  HT_CAPTYPE_MSI_MAPPING);
-	}
-
-	return found;
-}
-
-static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
+static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all)
 {
 	struct pci_dev *host_bridge;
 	int pos;
 	int found;

-	/* Enabling HT MSI mapping on this device breaks MCP51 */
-	if (dev->device == 0x270)
-		return;
-
 	/* check if there is HT MSI cap or enabled on this device */
 	found = ht_check_msi_mapping(dev);

@@ -2193,7 +2321,10 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
 		/* Host bridge is to HT */
 		if (found == 1) {
 			/* it is not enabled, try to enable it */
-			nv_ht_enable_msi_mapping(dev);
+			if (all)
+				ht_enable_msi_mapping(dev);
+			else
+				nv_ht_enable_msi_mapping(dev);
 		}
 		return;
 	}
@@ -2205,8 +2336,20 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
 	/* Host bridge is not to HT, disable HT MSI mapping on this device */
 	ht_disable_msi_mapping(dev);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk);
+
+static void __devinit nv_msi_ht_cap_quirk_all(struct pci_dev *dev)
+{
+	return __nv_msi_ht_cap_quirk(dev, 1);
+}
+
+static void __devinit nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev)
+{
+	return __nv_msi_ht_cap_quirk(dev, 0);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf);
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all);

 static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev)
 {

--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -71,6 +71,9 @@ void pci_remove_bus(struct pci_bus *pci_bus)
 	down_write(&pci_bus_sem);
 	list_del(&pci_bus->node);
 	up_write(&pci_bus_sem);
+	if (!pci_bus->is_added)
+		return;
+
 	pci_remove_legacy_files(pci_bus);
 	device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
 	device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
@@ -92,6 +95,7 @@ EXPORT_SYMBOL(pci_remove_bus);
 */
 void pci_remove_bus_device(struct pci_dev *dev)
 {
+	pci_stop_bus_device(dev);
 	if (dev->subordinate) {
 		struct pci_bus *b = dev->subordinate;


--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -29,7 +29,7 @@ pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
 	if (pdev->is_pcie)
 		return NULL;
 	while (1) {
-		if (!pdev->bus->self)
+		if (!pdev->bus->parent)
 			break;
 		pdev = pdev->bus->self;
 		/* a p2p bridge */

--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -27,7 +27,7 @@
 #include <linux/slab.h>


-static void pbus_assign_resources_sorted(struct pci_bus *bus)
+static void pbus_assign_resources_sorted(const struct pci_bus *bus)
 {
 	struct pci_dev *dev;
 	struct resource *res;
@@ -144,6 +144,9 @@ static void pci_setup_bridge(struct pci_bus *bus)
 	struct pci_bus_region region;
 	u32 l, bu, lu, io_upper16;

+	if (!pci_is_root_bus(bus) && bus->is_added)
+		return;
+
 	dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
 		 pci_domain_nr(bus), bus->number);

@@ -495,7 +498,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pci_bus_size_bridges);

-void __ref pci_bus_assign_resources(struct pci_bus *bus)
+void __ref pci_bus_assign_resources(const struct pci_bus *bus)
 {
 	struct pci_bus *b;
 	struct pci_dev *dev;

--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -120,6 +120,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
 	return err;
 }

+#ifdef CONFIG_PCI_QUIRKS
+void pci_disable_bridge_window(struct pci_dev *dev)
+{
+	dev_dbg(&dev->dev, "Disabling bridge window.\n");
+
+	/* MMIO Base/Limit */
+	pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
+
+	/* Prefetchable MMIO Base/Limit */
+	pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
+	pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
+}
+#endif	/* CONFIG_PCI_QUIRKS */
+
 int pci_assign_resource(struct pci_dev *dev, int resno)
 {
 	struct pci_bus *bus = dev->bus;

--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
 /*
 * drivers/pci/slot.c
 * Copyright (C) 2006 Matthew Wilcox <matthew@wil.cx>
- * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P.
- * 	Alex Chiang <achiang@hp.com>
+ * Copyright (C) 2006-2009 Hewlett-Packard Development Company, L.P.
+ *	Alex Chiang <achiang@hp.com>
 */

 #include <linux/kobject.h>
@@ -52,8 +52,8 @@ static void pci_slot_release(struct kobject *kobj)
 	struct pci_dev *dev;
 	struct pci_slot *slot = to_pci_slot(kobj);

-	pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
-		 slot->bus->number, slot->number);
+	dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n",
+		slot->number, pci_slot_name(slot));

 	list_for_each_entry(dev, &slot->bus->devices, bus_list)
 		if (PCI_SLOT(dev->devfn) == slot->number)
@@ -248,9 +248,8 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 		if (PCI_SLOT(dev->devfn) == slot_nr)
 			dev->slot = slot;

-	/* Don't care if debug printk has a -1 for slot_nr */
-	pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
-		 __func__, pci_domain_nr(parent), parent->number, slot_nr);
+	dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n",
+		slot_nr, pci_slot_name(slot));

 out:
 	kfree(slot_name);
@@ -299,9 +298,8 @@ EXPORT_SYMBOL_GPL(pci_renumber_slot);
 */
 void pci_destroy_slot(struct pci_slot *slot)
 {
-	pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__,
-		 atomic_read(&slot->kobj.kref.refcount) - 1,
-		 pci_domain_nr(slot->bus), slot->bus->number, slot->number);
+	dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n",
+		slot->number, atomic_read(&slot->kobj.kref.refcount) - 1);

 	down_write(&pci_bus_sem);
 	kobject_put(&slot->kobj);

--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -257,6 +257,40 @@ void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
 #endif /* CONFIG_PM_SLEEP */
+
+#define OSC_QUERY_TYPE			0
+#define OSC_SUPPORT_TYPE 		1
+#define OSC_CONTROL_TYPE		2
+#define OSC_SUPPORT_MASKS		0x1f
+
+/* _OSC DW0 Definition */
+#define OSC_QUERY_ENABLE		1
+#define OSC_REQUEST_ERROR		2
+#define OSC_INVALID_UUID_ERROR		4
+#define OSC_INVALID_REVISION_ERROR	8
+#define OSC_CAPABILITIES_MASK_ERROR	16
+
+/* _OSC DW1 Definition (OS Support Fields) */
+#define OSC_EXT_PCI_CONFIG_SUPPORT		1
+#define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
+#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	4
+#define OSC_PCI_SEGMENT_GROUPS_SUPPORT		8
+#define OSC_MSI_SUPPORT				16
+
+/* _OSC DW1 Definition (OS Control Fields) */
+#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	1
+#define OSC_SHPC_NATIVE_HP_CONTROL 		2
+#define OSC_PCI_EXPRESS_PME_CONTROL		4
+#define OSC_PCI_EXPRESS_AER_CONTROL		8
+#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	16
+
+#define OSC_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
+				OSC_SHPC_NATIVE_HP_CONTROL | 		\
+				OSC_PCI_EXPRESS_PME_CONTROL |		\
+				OSC_PCI_EXPRESS_AER_CONTROL |		\
+				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
+
+extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
 #else	/* CONFIG_ACPI */

 static inline int early_acpi_boot_init(void)

--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -20,20 +20,23 @@ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);

 struct msi_desc {
 	struct {
-		__u8	type	: 5; 	/* {0: unused, 5h:MSI, 11h:MSI-X} */
+		__u8	is_msix	: 1;
+		__u8	multiple: 3;	/* log2 number of messages */
 		__u8	maskbit	: 1; 	/* mask-pending bit supported ?   */
-		__u8	masked	: 1;
 		__u8	is_64	: 1;	/* Address size: 0=32bit 1=64bit  */
 		__u8	pos;	 	/* Location of the msi capability */
-		__u32	maskbits_mask;  /* mask bits mask */
 		__u16	entry_nr;    	/* specific enabled entry 	  */
 		unsigned default_irq;	/* default pre-assigned irq	  */
-	}msi_attrib;
+	} msi_attrib;

+	u32 masked;			/* mask bits */
 	unsigned int irq;
 	struct list_head list;

-	void __iomem *mask_base;
+	union {
+		void __iomem *mask_base;
+		u8 mask_pos;
+	};
 	struct pci_dev *dev;

 	/* Last set MSI message */

--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -10,72 +10,25 @@

 #include <linux/acpi.h>

-#define OSC_QUERY_TYPE			0
-#define OSC_SUPPORT_TYPE 		1
-#define OSC_CONTROL_TYPE		2
-#define OSC_SUPPORT_MASKS		0x1f
-
-/*
- * _OSC DW0 Definition 
- */
-#define OSC_QUERY_ENABLE		1
-#define OSC_REQUEST_ERROR		2
-#define OSC_INVALID_UUID_ERROR		4
-#define OSC_INVALID_REVISION_ERROR	8
-#define OSC_CAPABILITIES_MASK_ERROR	16
-
-/*
- * _OSC DW1 Definition (OS Support Fields)
- */
-#define OSC_EXT_PCI_CONFIG_SUPPORT		1
-#define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
-#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	4
-#define OSC_PCI_SEGMENT_GROUPS_SUPPORT		8
-#define OSC_MSI_SUPPORT				16
-
-/*
- * _OSC DW1 Definition (OS Control Fields)
- */
-#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	1
-#define OSC_SHPC_NATIVE_HP_CONTROL 		2
-#define OSC_PCI_EXPRESS_PME_CONTROL		4
-#define OSC_PCI_EXPRESS_AER_CONTROL		8
-#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	16
-
-#define OSC_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
-				OSC_SHPC_NATIVE_HP_CONTROL | 		\
-				OSC_PCI_EXPRESS_PME_CONTROL |		\
-				OSC_PCI_EXPRESS_AER_CONTROL |		\
-				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
-
 #ifdef CONFIG_ACPI
-extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags);
-int pci_acpi_osc_support(acpi_handle handle, u32 flags);
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 {
-	/* Find root host bridge */
-	while (pdev->bus->self)
-		pdev = pdev->bus->self;
-
-	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus),
-			pdev->bus->number);
+	struct pci_bus *pbus = pdev->bus;
+	/* Find a PCI root bus */
+	while (pbus->parent)
+		pbus = pbus->parent;
+	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
+					      pbus->number);
 }

 static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
 {
-	int seg = pci_domain_nr(pbus), busnr = pbus->number;
-	struct pci_dev *bridge = pbus->self;
-	if (bridge)
-		return DEVICE_ACPI_HANDLE(&(bridge->dev));
-	return acpi_get_pci_rootbridge_handle(seg, busnr);
+	if (pbus->parent)
+		return DEVICE_ACPI_HANDLE(&(pbus->self->dev));
+	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
+					      pbus->number);
 }
 #else
-#if !defined(AE_ERROR)
-typedef u32 		acpi_status;
-#define AE_ERROR      	(acpi_status) (0x0001)
-#endif    
-static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
-{return AE_ERROR;}
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 { return NULL; }
 #endif

--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -52,6 +52,7 @@
 #include <asm/atomic.h>
 #include <linux/device.h>
 #include <linux/io.h>
+#include <linux/irqreturn.h>

 /* Include the ID list */
 #include <linux/pci_ids.h>
@@ -93,6 +94,12 @@ enum {
 	/* #6: expansion ROM resource */
 	PCI_ROM_RESOURCE,

+	/* device specific resources */
+#ifdef CONFIG_PCI_IOV
+	PCI_IOV_RESOURCES,
+	PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1,
+#endif
+
 	/* resources assigned to buses behind the bridge */
 #define PCI_BRIDGE_RESOURCE_NUM 4

@@ -180,6 +187,7 @@ struct pci_cap_saved_state {

 struct pcie_link_state;
 struct pci_vpd;
+struct pci_sriov;

 /*
 * The pci_dev structure is used to describe PCI devices.
@@ -257,6 +265,8 @@ struct pci_dev {
 	unsigned int	is_managed:1;
 	unsigned int	is_pcie:1;
 	unsigned int	state_saved:1;
+	unsigned int	is_physfn:1;
+	unsigned int	is_virtfn:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */

@@ -270,6 +280,12 @@ struct pci_dev {
 	struct list_head msi_list;
 #endif
 	struct pci_vpd *vpd;
+#ifdef CONFIG_PCI_IOV
+	union {
+		struct pci_sriov *sriov;	/* SR-IOV capability related */
+		struct pci_dev *physfn;	/* the PF this VF is associated with */
+	};
+#endif
 };

 extern struct pci_dev *alloc_pci_dev(void);
@@ -341,6 +357,15 @@ struct pci_bus {
 #define pci_bus_b(n)	list_entry(n, struct pci_bus, node)
 #define to_pci_bus(n)	container_of(n, struct pci_bus, dev)

+/*
+ * Returns true if the pci bus is root (behind host-pci bridge),
+ * false otherwise
+ */
+static inline bool pci_is_root_bus(struct pci_bus *pbus)
+{
+	return !(pbus->parent);
+}
+
 #ifdef CONFIG_PCI_MSI
 static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
 {
@@ -528,7 +553,7 @@ void pcibios_update_irq(struct pci_dev *, int irq);
 /* Generic PCI functions used internally */

 extern struct pci_bus *pci_find_bus(int domain, int busnr);
-void pci_bus_add_devices(struct pci_bus *bus);
+void pci_bus_add_devices(const struct pci_bus *bus);
 struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
 				      struct pci_ops *ops, void *sysdata);
 static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
@@ -702,6 +727,9 @@ int pci_back_from_sleep(struct pci_dev *dev);

 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
+#ifdef CONFIG_HOTPLUG
+unsigned int pci_rescan_bus(struct pci_bus *bus);
+#endif

 /* Vital product data routines */
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
@@ -709,7 +737,7 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void
 int pci_vpd_truncate(struct pci_dev *dev, size_t size);

 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
-void pci_bus_assign_resources(struct pci_bus *bus);
+void pci_bus_assign_resources(const struct pci_bus *bus);
 void pci_bus_size_bridges(struct pci_bus *bus);
 int pci_claim_resource(struct pci_dev *, int);
 void pci_assign_unassigned_resources(void);
@@ -790,7 +818,7 @@ struct msix_entry {


 #ifndef CONFIG_PCI_MSI
-static inline int pci_enable_msi(struct pci_dev *dev)
+static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 {
 	return -1;
 }
@@ -800,6 +828,10 @@ static inline void pci_msi_shutdown(struct pci_dev *dev)
 static inline void pci_disable_msi(struct pci_dev *dev)
 { }

+static inline int pci_msix_table_size(struct pci_dev *dev)
+{
+	return 0;
+}
 static inline int pci_enable_msix(struct pci_dev *dev,
 				  struct msix_entry *entries, int nvec)
 {
@@ -821,9 +853,10 @@ static inline int pci_msi_enabled(void)
 	return 0;
 }
 #else
-extern int pci_enable_msi(struct pci_dev *dev);
+extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
 extern void pci_msi_shutdown(struct pci_dev *dev);
 extern void pci_disable_msi(struct pci_dev *dev);
+extern int pci_msix_table_size(struct pci_dev *dev);
 extern int pci_enable_msix(struct pci_dev *dev,
 	struct msix_entry *entries, int nvec);
 extern void pci_msix_shutdown(struct pci_dev *dev);
@@ -842,6 +875,8 @@ static inline int pcie_aspm_enabled(void)
 extern int pcie_aspm_enabled(void);
 #endif

+#define pci_enable_msi(pdev)	pci_enable_msi_block(pdev, 1)
+
 #ifdef CONFIG_HT_IRQ
 /* The functions a driver should call */
 int  ht_create_irq(struct pci_dev *dev, int idx);
@@ -1195,5 +1230,23 @@ int pci_ext_cfg_avail(struct pci_dev *dev);

 void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);

+#ifdef CONFIG_PCI_IOV
+extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+extern void pci_disable_sriov(struct pci_dev *dev);
+extern irqreturn_t pci_sriov_migration(struct pci_dev *dev);
+#else
+static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+	return -ENODEV;
+}
+static inline void pci_disable_sriov(struct pci_dev *dev)
+{
+}
+static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+{
+	return IRQ_NONE;
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2396,6 +2396,7 @@
 #define PCI_DEVICE_ID_INTEL_82801CA_12	0x248c
 #define PCI_DEVICE_ID_INTEL_82801DB_0	0x24c0
 #define PCI_DEVICE_ID_INTEL_82801DB_1	0x24c1
+#define PCI_DEVICE_ID_INTEL_82801DB_2	0x24c2
 #define PCI_DEVICE_ID_INTEL_82801DB_3	0x24c3
 #define PCI_DEVICE_ID_INTEL_82801DB_5	0x24c5
 #define PCI_DEVICE_ID_INTEL_82801DB_6	0x24c6

--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -235,7 +235,7 @@
 #define  PCI_PM_CAP_PME_SHIFT	11	/* Start of the PME Mask in PMC */
 #define PCI_PM_CTRL		4	/* PM control and status register */
 #define  PCI_PM_CTRL_STATE_MASK	0x0003	/* Current power state (D0 to D3) */
-#define  PCI_PM_CTRL_NO_SOFT_RESET	0x0004	/* No reset for D3hot->D0 */
+#define  PCI_PM_CTRL_NO_SOFT_RESET	0x0008	/* No reset for D3hot->D0 */
 #define  PCI_PM_CTRL_PME_ENABLE	0x0100	/* PME pin enable */
 #define  PCI_PM_CTRL_DATA_SEL_MASK	0x1e00	/* Data select (??) */
 #define  PCI_PM_CTRL_DATA_SCALE_MASK	0x6000	/* Data scale (??) */
@@ -375,6 +375,7 @@
 #define  PCI_EXP_TYPE_UPSTREAM	0x5	/* Upstream Port */
 #define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
 #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
+#define  PCI_EXP_TYPE_RC_END	0x9	/* Root Complex Integrated Endpoint */
 #define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
 #define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
 #define PCI_EXP_DEVCAP		4	/* Device capabilities */
@@ -487,6 +488,8 @@
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
+#define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
+#define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */

 /* Extended Capabilities (PCI-X 2.0 and Express) */
 #define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
@@ -498,6 +501,7 @@
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
 #define PCI_EXT_CAP_ID_ARI	14
+#define PCI_EXT_CAP_ID_SRIOV	16

 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -615,4 +619,35 @@
 #define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
 #define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */

+/* Single Root I/O Virtualization */
+#define PCI_SRIOV_CAP		0x04	/* SR-IOV Capabilities */
+#define  PCI_SRIOV_CAP_VFM	0x01	/* VF Migration Capable */
+#define  PCI_SRIOV_CAP_INTR(x)	((x) >> 21) /* Interrupt Message Number */
+#define PCI_SRIOV_CTRL		0x08	/* SR-IOV Control */
+#define  PCI_SRIOV_CTRL_VFE	0x01	/* VF Enable */
+#define  PCI_SRIOV_CTRL_VFM	0x02	/* VF Migration Enable */
+#define  PCI_SRIOV_CTRL_INTR	0x04	/* VF Migration Interrupt Enable */
+#define  PCI_SRIOV_CTRL_MSE	0x08	/* VF Memory Space Enable */
+#define  PCI_SRIOV_CTRL_ARI	0x10	/* ARI Capable Hierarchy */
+#define PCI_SRIOV_STATUS	0x0a	/* SR-IOV Status */
+#define  PCI_SRIOV_STATUS_VFM	0x01	/* VF Migration Status */
+#define PCI_SRIOV_INITIAL_VF	0x0c	/* Initial VFs */
+#define PCI_SRIOV_TOTAL_VF	0x0e	/* Total VFs */
+#define PCI_SRIOV_NUM_VF	0x10	/* Number of VFs */
+#define PCI_SRIOV_FUNC_LINK	0x12	/* Function Dependency Link */
+#define PCI_SRIOV_VF_OFFSET	0x14	/* First VF Offset */
+#define PCI_SRIOV_VF_STRIDE	0x16	/* Following VF Stride */
+#define PCI_SRIOV_VF_DID	0x1a	/* VF Device ID */
+#define PCI_SRIOV_SUP_PGSIZE	0x1c	/* Supported Page Sizes */
+#define PCI_SRIOV_SYS_PGSIZE	0x20	/* System Page Size */
+#define PCI_SRIOV_BAR		0x24	/* VF BAR0 */
+#define  PCI_SRIOV_NUM_BARS	6	/* Number of VF BARs */
+#define PCI_SRIOV_VFM		0x3c	/* VF Migration State Array Offset*/
+#define  PCI_SRIOV_VFM_BIR(x)	((x) & 7)	/* State BIR */
+#define  PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7)	/* State Offset */
+#define  PCI_SRIOV_VFM_UA	0x0	/* Inactive.Unavailable */
+#define  PCI_SRIOV_VFM_MI	0x1	/* Dormant.MigrateIn */
+#define  PCI_SRIOV_VFM_MO	0x2	/* Active.MigrateOut */
+#define  PCI_SRIOV_VFM_AV	0x3	/* Active.Available */
+
 #endif /* LINUX_PCI_REGS_H */
--- a/include/linux/pcieport_if.h
+++ b/include/linux/pcieport_if.h
@@ -16,29 +16,30 @@
 #define PCIE_ANY_PORT			7

 /* Service Type */
-#define PCIE_PORT_SERVICE_PME		1	/* Power Management Event */
-#define PCIE_PORT_SERVICE_AER		2	/* Advanced Error Reporting */
-#define PCIE_PORT_SERVICE_HP		4	/* Native Hotplug */
-#define PCIE_PORT_SERVICE_VC		8	/* Virtual Channel */
+#define PCIE_PORT_SERVICE_PME_SHIFT	0	/* Power Management Event */
+#define PCIE_PORT_SERVICE_PME		(1 << PCIE_PORT_SERVICE_PME_SHIFT)
+#define PCIE_PORT_SERVICE_AER_SHIFT	1	/* Advanced Error Reporting */
+#define PCIE_PORT_SERVICE_AER		(1 << PCIE_PORT_SERVICE_AER_SHIFT)
+#define PCIE_PORT_SERVICE_HP_SHIFT	2	/* Native Hotplug */
+#define PCIE_PORT_SERVICE_HP		(1 << PCIE_PORT_SERVICE_HP_SHIFT)
+#define PCIE_PORT_SERVICE_VC_SHIFT	3	/* Virtual Channel */
+#define PCIE_PORT_SERVICE_VC		(1 << PCIE_PORT_SERVICE_VC_SHIFT)

 /* Root/Upstream/Downstream Port's Interrupt Mode */
+#define PCIE_PORT_NO_IRQ		(-1)
 #define PCIE_PORT_INTx_MODE		0
 #define PCIE_PORT_MSI_MODE		1
 #define PCIE_PORT_MSIX_MODE		2

-struct pcie_port_service_id {
-	__u32 vendor, device;		/* Vendor and device ID or PCI_ANY_ID*/
-	__u32 subvendor, subdevice;	/* Subsystem ID's or PCI_ANY_ID */
-	__u32 class, class_mask;	/* (class,subclass,prog-if) triplet */
-	__u32 port_type, service_type;	/* Port Entity */
-	kernel_ulong_t driver_data;
+struct pcie_port_data {
+	int port_type;		/* Type of the port */
+	int port_irq_mode;	/* [0:INTx | 1:MSI | 2:MSI-X] */
 };

 struct pcie_device {
 	int 		irq;	    /* Service IRQ/MSI/MSI-X Vector */
-	int 		interrupt_mode;	/* [0:INTx | 1:MSI | 2:MSI-X] */	
-	struct pcie_port_service_id id;	/* Service ID */
-	struct pci_dev	*port;	    /* Root/Upstream/Downstream Port */
+	struct pci_dev *port;	    /* Root/Upstream/Downstream Port */
+	u32		service;    /* Port service this device represents */
 	void		*priv_data; /* Service Private Data */
 	struct device	device;     /* Generic Device Interface */
 };
@@ -56,10 +57,9 @@ static inline void* get_service_data(struct pcie_device *dev)

 struct pcie_port_service_driver {
 	const char *name;
-	int (*probe) (struct pcie_device *dev, 
-		const struct pcie_port_service_id *id);
+	int (*probe) (struct pcie_device *dev);
 	void (*remove) (struct pcie_device *dev);
-	int (*suspend) (struct pcie_device *dev, pm_message_t state);
+	int (*suspend) (struct pcie_device *dev);
 	int (*resume) (struct pcie_device *dev);

 	/* Service Error Recovery Handler */
@@ -68,7 +68,9 @@ struct pcie_port_service_driver {
 	/* Link Reset Capability - AER service driver specific */
 	pci_ers_result_t (*reset_link) (struct pci_dev *dev);

-	const struct pcie_port_service_id *id_table;
+	int port_type;  /* Type of the port this driver can handle */
+	u32 service;    /* Port service this device represents */
+
 	struct device_driver driver;
 };
 #define to_service_driver(d) \