Commit 254b6b89 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 IOMMU & PCI updates

Update for the x86-64 PCI subsystem in 2.5.42.  Main new feature is PCI
IOMMU support through the K8 aperture.  This allows to use more than 4GB
of memory with 32bit PCI devices.  Also some other PCI changes, mostly
merges from i386.
parent 0e97e2a2
/*
* Firmware replacement code.
*
* Work around broken BIOSes that don't set an aperture.
* The IOMMU code needs an aperture even who no AGP is present in the system.
* Map the aperture over some low memory. This is cheaper than doing bounce
* buffering. The memory is lost. This is done at early boot because only
* the bootmem allocator can allocate 32+MB.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
* $Id: aperture.c,v 1.2 2002/09/19 19:25:32 ak Exp $
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/mmzone.h>
#include <linux/pci_ids.h>
#include <asm/e820.h>
#include <asm/io.h>
#include <asm/proto.h>
#include <asm/pci-direct.h>
int fallback_aper_order __initdata = 1; /* 64MB */
int fallback_aper_force __initdata = 0;
extern int no_iommu, force_mmu;
/* This code runs before the PCI subsystem is initialized, so just
access the northbridge directly. */
#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
static u32 __init allocate_aperture(void)
{
#ifdef CONFIG_DISCONTIGMEM
pg_data_t *nd0 = NODE_DATA(0);
#else
pg_data_t *nd0 = &contig_page_data;
#endif
u32 aper_size;
void *p;
if (fallback_aper_order > 7)
fallback_aper_order = 7;
aper_size = (32 * 1024 * 1024) << fallback_aper_order;
/*
* Aperture has to be naturally aligned it seems. This means an
* 2GB aperture won't have much changes to succeed in the lower 4GB of
* memory. Unfortunately we cannot move it up because that would make
* the IOMMU useless.
*/
p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0);
if (!p || __pa(p)+aper_size > 0xffffffff) {
printk("Cannot allocate aperture memory hole (%p,%uK)\n",
p, aper_size>>10);
if (p)
free_bootmem((unsigned long)p, aper_size);
return 0;
}
printk("Mapping aperture over %d KB of RAM @ %lx\n",
aper_size >> 10, __pa(p));
return (u32)__pa(p);
}
void __init iommu_hole_init(void)
{
int fix, num;
u32 aper_size, aper_alloc, aper_order;
u64 aper_base;
if (no_iommu)
return;
if (end_pfn < (0xffffffff>>PAGE_SHIFT) && !force_mmu)
return;
printk("Checking aperture...\n");
fix = 0;
for (num = 24; num < 32; num++) {
if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
continue;
aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
aper_size = (32 * 1024 * 1024) << aper_order;
aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
aper_base <<= 25;
printk("CPU %d: aperture @ %Lx size %u KB\n", num-24,
aper_base, aper_size>>10);
if (!aper_base || aper_base + aper_size >= 0xffffffff) {
fix = 1;
break;
}
if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
printk("Aperture pointing to e820 RAM. Ignoring.\n");
fix = 1;
break;
}
}
if (!fix && !fallback_aper_force)
return;
printk("Your BIOS is broken and doesn't leave a aperture memory hole\n");
aper_alloc = allocate_aperture();
if (!aper_alloc)
return;
for (num = 24; num < 32; num++) {
if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
continue;
/* Don't enable translation yet. That is done later.
Assume this BIOS didn't initialise the GART so
just overwrite all previous bits */
write_pci_config(0, num, 3, 0x90, fallback_aper_order<<1);
write_pci_config(0, num, 3, 0x94, aper_alloc>>25);
}
}
/*
* Dynamic DMA mapping support.
* Dynamic DMA mapping support. Common code
*/
#include <linux/types.h>
......@@ -8,24 +8,63 @@
#include <linux/pci.h>
#include <asm/io.h>
void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
dma_addr_t *dma_handle)
dma_addr_t bad_dma_address = -1UL;
/* Map a set of buffers described by scatterlist in streaming
* mode for DMA. This is the scather-gather version of the
* above pci_map_single interface. Here the scatter gather list
* elements are each tagged with the appropriate dma address
* and length. They are obtained via sg_dma_{address,length}(SG).
*
* NOTE: An implementation may be able to use a smaller number of
* DMA address/length pairs than there are SG table elements.
* (for example via virtual mapping capabilities)
* The routine returns the number of addr/length pairs actually
* used, at most nents.
*
* Device ownership issues as mentioned above for pci_map_single are
* the same here.
*/
int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
int nents, int direction)
{
void *ret;
int gfp = GFP_ATOMIC;
int i;
gfp |= GFP_DMA;
ret = (void *)__get_free_pages(gfp, get_order(size));
BUG_ON(direction == PCI_DMA_NONE);
/*
*
*/
for (i = 0; i < nents; i++ ) {
struct scatterlist *s = &sg[i];
if (s->page) {
s->dma_address = pci_map_page(hwdev, s->page, s->offset,
s->length, direction);
} else
BUG();
if (ret != NULL) {
memset(ret, 0, size);
*dma_handle = virt_to_phys(ret);
if (unlikely(s->dma_address == bad_dma_address))
goto error;
}
return ret;
return nents;
error:
pci_unmap_sg(hwdev, sg, i, direction);
return 0;
}
void pci_free_consistent(struct pci_dev *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle)
/* Unmap a set of streaming mode DMA translations.
* Again, cpu read rules concerning calls here are the same as for
* pci_unmap_single() above.
*/
void pci_unmap_sg(struct pci_dev *dev, struct scatterlist *sg,
int nents, int dir)
{
free_pages((unsigned long)vaddr, get_order(size));
int i;
for (i = 0; i < nents; i++) {
struct scatterlist *s = &sg[i];
BUG_ON(s->page == NULL);
BUG_ON(s->dma_address == 0);
pci_unmap_single(dev, s->dma_address, s->length, dir);
}
}
This diff is collapsed.
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/string.h>
/*
* Dummy IO MMU functions
*/
extern unsigned long end_pfn;
void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
dma_addr_t *dma_handle)
{
void *ret;
int gfp = GFP_ATOMIC;
if (hwdev == NULL ||
end_pfn > (hwdev->dma_mask>>PAGE_SHIFT) || /* XXX */
(u32)hwdev->dma_mask < 0xffffffff)
gfp |= GFP_DMA;
ret = (void *)__get_free_pages(gfp, get_order(size));
if (ret != NULL) {
memset(ret, 0, size);
*dma_handle = virt_to_bus(ret);
}
return ret;
}
void pci_free_consistent(struct pci_dev *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
free_pages((unsigned long)vaddr, get_order(size));
}
static void __init check_ram(void)
{
if (end_pfn >= 0xffffffff>>PAGE_SHIFT) {
printk(KERN_ERR "WARNING more than 4GB of memory but no IOMMU.\n"
KERN_ERR "WARNING 32bit PCI may malfunction.\n");
/* Could play with highmem_start_page here to trick some subsystems
into bounce buffers. Unfortunately that would require setting
CONFIG_HIGHMEM too.
*/
}
}
O_TARGET := pci.o
obj-y := x86-64.o
......
......@@ -133,6 +133,10 @@ static int __init pcibios_init(void)
pcibios_resource_survey();
#ifdef CONFIG_GART_IOMMU
pci_iommu_init();
#endif
/* may eventually need to do ACPI sort here. */
return 0;
}
......@@ -185,11 +189,11 @@ unsigned int pcibios_assign_all_busses(void)
return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
}
int pcibios_enable_device(struct pci_dev *dev)
int pcibios_enable_device(struct pci_dev *dev, int mask)
{
int err;
if ((err = pcibios_enable_resources(dev)) < 0)
if ((err = pcibios_enable_resources(dev, mask)) < 0)
return err;
return pcibios_enable_irq(dev);
......
......@@ -41,19 +41,6 @@ static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
}
}
static void __devinit pci_fixup_ide_trash(struct pci_dev *d)
{
int i;
/*
* There exist PCI IDE controllers which have utter garbage
* in first four base registers. Ignore that.
*/
DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name);
for(i=0; i<4; i++)
d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
}
struct pci_fixup pcibios_fixups[] = {
{ PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases },
{ PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 },
......
......@@ -29,7 +29,7 @@ extern unsigned int pci_probe;
extern unsigned int pcibios_max_latency;
void pcibios_resource_survey(void);
int pcibios_enable_resources(struct pci_dev *);
int pcibios_enable_resources(struct pci_dev *, int);
/* pci-pc.c */
......
......@@ -243,7 +243,7 @@ void __init pcibios_resource_survey(void)
pcibios_assign_resources();
}
int pcibios_enable_resources(struct pci_dev *dev)
int pcibios_enable_resources(struct pci_dev *dev, int mask)
{
u16 cmd, old_cmd;
int idx;
......@@ -252,6 +252,9 @@ int pcibios_enable_resources(struct pci_dev *dev)
pci_read_config_word(dev, PCI_COMMAND, &cmd);
old_cmd = cmd;
for(idx=0; idx<6; idx++) {
if (!(mask & (1<<idx)))
continue;
r = &dev->resource[idx];
if (!r->start && r->end) {
printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name);
......
#ifndef ASM_PCI_DIRECT_H
#define ASM_PCI_DIRECT_H 1
#include <linux/types.h>
#include <asm/io.h>
/* Direct PCI access. This is used for PCI accesses in early boot before
the PCI subsystem works. */
#define PDprintk(x...)
static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
{
u32 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inl(0xcfc);
PDprintk("%x reading from %x: %x\n", slot, offset, v);
return v;
}
static inline void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
u32 val)
{
PDprintk("%x writing to %x: %x\n", slot, offset, val);
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
outl(val, 0xcfc);
}
#endif
......@@ -2,9 +2,15 @@
#define __x8664_PCI_H
#include <linux/config.h>
#include <asm/io.h>
#ifdef __KERNEL__
#include <linux/mm.h> /* for struct page */
extern dma_addr_t bad_dma_address;
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
or architectures with incomplete PCI setup by the loader */
......@@ -23,6 +29,7 @@ void pcibios_config_init(void);
struct pci_bus * pcibios_scan_root(int bus);
extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value);
extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value);
void pcibios_set_master(struct pci_dev *dev);
void pcibios_penalize_isa_irq(int irq);
struct irq_routing_table *pcibios_get_irq_routing_table(void);
......@@ -30,19 +37,16 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <asm/scatterlist.h>
#include <linux/string.h>
#include <asm/io.h>
#include <asm/page.h>
struct pci_dev;
/* The PCI address space does equal the physical memory
* address space. The networking and block device layers use
* this boolean for bounce buffer decisions.
*/
#define PCI_DMA_BUS_IS_PHYS (1)
extern int iommu_setup(char *opt);
extern void pci_iommu_init(void);
/* Allocate and map kernel buffer using consistent mode DMA for a device.
* hwdev should be valid struct pci_dev pointer for PCI devices,
......@@ -65,55 +69,95 @@ extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle);
#ifdef CONFIG_GART_IOMMU
/* Map a single buffer of the indicated size for DMA in streaming mode.
* The 32-bit bus address to use is returned.
*
* Once the device is given the dma address, the device owns this memory
* until either pci_unmap_single or pci_dma_sync_single is performed.
*/
extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
size_t size, int direction);
extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr,
size_t size, int direction);
/*
* pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
* to pci_map_single, but takes a struct page instead of a virtual address
*/
#define pci_map_page(dev,page,offset,size,dir) \
pci_map_single((dev), page_address(page)+(offset), (size), (dir))
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
dma_addr_t ADDR_NAME;
#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
__u32 LEN_NAME;
#define pci_unmap_addr(PTR, ADDR_NAME) \
((PTR)->ADDR_NAME)
#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
(((PTR)->ADDR_NAME) = (VAL))
#define pci_unmap_len(PTR, LEN_NAME) \
((PTR)->LEN_NAME)
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
(((PTR)->LEN_NAME) = (VAL))
static inline void pci_dma_sync_single(struct pci_dev *hwdev,
dma_addr_t dma_handle,
size_t size, int direction)
{
BUG_ON(direction == PCI_DMA_NONE);
}
static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
struct scatterlist *sg,
int nelems, int direction)
{
BUG_ON(direction == PCI_DMA_NONE);
}
#define PCI_DMA_BUS_IS_PHYS 0
#else
static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
size_t size, int direction)
{
dma_addr_t addr;
if (direction == PCI_DMA_NONE)
BUG();
flush_write_buffers();
return virt_to_phys(ptr);
}
out_of_line_bug();
addr = virt_to_bus(ptr);
/* Unmap a single streaming mode DMA translation. The dma_addr and size
* must match what was provided for in a previous pci_map_single call. All
* other usages are undefined.
*
* After this call, reads by the cpu to the buffer are guarenteed to see
* whatever the device wrote there.
/*
* This is gross, but what should I do.
* Unfortunately drivers do not test the return value of this.
*/
if ((addr+size) & ~hwdev->dma_mask)
out_of_line_bug();
return addr;
}
static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
size_t size, int direction)
{
if (direction == PCI_DMA_NONE)
BUG();
out_of_line_bug();
/* Nothing to do */
}
/*
* pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
* to pci_map_single, but takes a struct page instead of a virtual address
*/
static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
unsigned long offset, size_t size, int direction)
{
dma_addr_t addr;
if (direction == PCI_DMA_NONE)
BUG();
return (page - mem_map) * PAGE_SIZE + offset;
}
static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
size_t size, int direction)
{
if (direction == PCI_DMA_NONE)
BUG();
/* Nothing to do */
out_of_line_bug();
addr = page_to_pfn(page) * PAGE_SIZE + offset;
if ((addr+size) & ~hwdev->dma_mask)
out_of_line_bug();
return addr;
}
/* pci_unmap_{page,single} is a nop so... */
......@@ -124,52 +168,6 @@ static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
#define pci_unmap_len(PTR, LEN_NAME) (0)
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
/* Map a set of buffers described by scatterlist in streaming
* mode for DMA. This is the scather-gather version of the
* above pci_map_single interface. Here the scatter gather list
* elements are each tagged with the appropriate dma address
* and length. They are obtained via sg_dma_{address,length}(SG).
*
* NOTE: An implementation may be able to use a smaller number of
* DMA address/length pairs than there are SG table elements.
* (for example via virtual mapping capabilities)
* The routine returns the number of addr/length pairs actually
* used, at most nents.
*
* Device ownership issues as mentioned above for pci_map_single are
* the same here.
*/
static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
int nents, int direction)
{
int i;
if (direction == PCI_DMA_NONE)
BUG();
for (i = 0; i < nents; i++ ) {
if (!sg[i].page)
BUG();
sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
}
flush_write_buffers();
return nents;
}
/* Unmap a set of streaming mode DMA translations.
* Again, cpu read rules concerning calls here are the same as for
* pci_unmap_single() above.
*/
static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
int nents, int direction)
{
if (direction == PCI_DMA_NONE)
BUG();
/* Nothing to do */
}
/* Make physical memory consistent for a single
* streaming mode DMA translation after a transfer.
*
......@@ -184,7 +182,7 @@ static inline void pci_dma_sync_single(struct pci_dev *hwdev,
size_t size, int direction)
{
if (direction == PCI_DMA_NONE)
BUG();
out_of_line_bug();
flush_write_buffers();
}
......@@ -199,10 +197,22 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
int nelems, int direction)
{
if (direction == PCI_DMA_NONE)
BUG();
out_of_line_bug();
flush_write_buffers();
}
#define PCI_DMA_BUS_IS_PHYS 1
#endif
extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
int nents, int direction);
extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
int nents, int direction);
#define pci_unmap_page pci_unmap_single
/* Return whether the given PCI device DMA address mask can
* be supported properly. For example, if your device can
* only drive the low 24-bits during PCI bus mastering, then
......@@ -234,9 +244,7 @@ pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offse
static __inline__ struct page *
pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
{
unsigned long poff = (dma_addr >> PAGE_SHIFT);
return mem_map + poff;
return virt_to_page(__va(dma_addr));
}
static __inline__ unsigned long
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment