Commit ca36c36b authored by David VomLehn's avatar David VomLehn Committed by Ralf Baechle

MIPS: PowerTV: Use O(1) algorthm for phys_to_dma/dma_to_phys

Replace phys_to_dma()/dma_to_phys() looping algorithm with an O(1) algorithm
The approach taken is inspired by the sparse memory implementation: take a
certain number of high-order bits off the address them, use this as an
index into a table containing an offset to the desired address and add
it to the original value. There is a table for mapping physical addresses
to DMA addresses and another one for the reverse mapping. The table sizes
depend on how fine-grained the mappings need to be; Coarser granularity
less to smaller tables.  On a processor with 32-bit physical and DMA
addresses, with 4 MIB granularity, memory usage is two 2048-byte arrays.
Each 32-byte cache line thus covers 64 MiB of address space.

Also, renames phys_to_bus() to phys_to_dma() and bus_to_phys() to
dma_to_phys() to align with kernel usage.

[Ralf: Fixed silly build breakage due to stackoverflow warning caused by
huge array on stack.]
Signed-off-by: default avatarDavid VomLehn <dvomlehn@cisco.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/1257/Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 36f217d9
...@@ -65,21 +65,21 @@ static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, ...@@ -65,21 +65,21 @@ static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
size_t size) size_t size)
{ {
if (is_kseg2(addr)) if (is_kseg2(addr))
return phys_to_bus(virt_to_phys_from_pte(addr)); return phys_to_dma(virt_to_phys_from_pte(addr));
else else
return phys_to_bus(virt_to_phys(addr)); return phys_to_dma(virt_to_phys(addr));
} }
static inline dma_addr_t plat_map_dma_mem_page(struct device *dev, static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
struct page *page) struct page *page)
{ {
return phys_to_bus(page_to_phys(page)); return phys_to_dma(page_to_phys(page));
} }
static inline unsigned long plat_dma_addr_to_phys(struct device *dev, static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
dma_addr_t dma_addr) dma_addr_t dma_addr)
{ {
return bus_to_phys(dma_addr); return dma_to_phys(dma_addr);
} }
static inline void plat_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr, static inline void plat_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr,
......
...@@ -10,64 +10,101 @@ ...@@ -10,64 +10,101 @@
#define __ASM_MACH_POWERTV_IOREMAP_H #define __ASM_MACH_POWERTV_IOREMAP_H
#include <linux/types.h> #include <linux/types.h>
#include <linux/log2.h>
#include <linux/compiler.h>
#define LOW_MEM_BOUNDARY_PHYS 0x20000000 #include <asm/pgtable-bits.h>
#define LOW_MEM_BOUNDARY_MASK (~(LOW_MEM_BOUNDARY_PHYS - 1)) #include <asm/addrspace.h>
/* We're going to mess with bits, so get sizes */
#define IOR_BPC 8 /* Bits per char */
#define IOR_PHYS_BITS (IOR_BPC * sizeof(phys_addr_t))
#define IOR_DMA_BITS (IOR_BPC * sizeof(dma_addr_t))
/* /*
* The bus addresses are different than the physical addresses that * Define the granularity of physical/DMA mapping in terms of the number
* the processor sees by an offset. This offset varies by ASIC * of bits that defines the offset within a grain. These will be the
* version. Define a variable to hold the offset and some macros to * least significant bits of the address. The rest of a physical or DMA
* make the conversion simpler. */ * address will be used to index into an appropriate table to find the
extern unsigned long phys_to_bus_offset; * offset to add to the address to yield the corresponding DMA or physical
* address, respectively.
#ifdef CONFIG_HIGHMEM */
#define MEM_GAP_PHYS 0x60000000 #define IOR_LSBITS 22 /* Bits in a grain */
/* /*
* TODO: We will use the hard code for conversion between physical and * Compute the number of most significant address bits after removing those
* bus until the bootloader releases their device tree to us. * used for the offset within a grain and then compute the number of table
* entries for the conversion.
*/ */
#define phys_to_bus(x) (((x) < LOW_MEM_BOUNDARY_PHYS) ? \ #define IOR_PHYS_MSBITS (IOR_PHYS_BITS - IOR_LSBITS)
((x) + phys_to_bus_offset) : (x)) #define IOR_NUM_PHYS_TO_DMA ((phys_addr_t) 1 << IOR_PHYS_MSBITS)
#define bus_to_phys(x) (((x) < MEM_GAP_PHYS_ADDR) ? \
((x) - phys_to_bus_offset) : (x)) #define IOR_DMA_MSBITS (IOR_DMA_BITS - IOR_LSBITS)
#else #define IOR_NUM_DMA_TO_PHYS ((dma_addr_t) 1 << IOR_DMA_MSBITS)
#define phys_to_bus(x) ((x) + phys_to_bus_offset)
#define bus_to_phys(x) ((x) - phys_to_bus_offset)
#endif
/* /*
* Determine whether the address we are given is for an ASIC device * Define data structures used as elements in the arrays for the conversion
* Params: addr Address to check * between physical and DMA addresses. We do some slightly fancy math to
* Returns: Zero if the address is not for ASIC devices, non-zero * compute the width of the offset element of the conversion tables so
* if it is. * that we can have the smallest conversion tables. Next, round up the
* sizes to the next higher power of two, i.e. the offset element will have
* 8, 16, 32, 64, etc. bits. This eliminates the need to mask off any
* bits. Finally, we compute a shift value that puts the most significant
* bits of the offset into the most significant bits of the offset element.
* This makes it more efficient on processors without barrel shifters and
* easier to see the values if the conversion table is dumped in binary.
*/ */
static inline int asic_is_device_addr(phys_t addr) #define _IOR_OFFSET_WIDTH(n) (1 << order_base_2(n))
#define IOR_OFFSET_WIDTH(n) \
(_IOR_OFFSET_WIDTH(n) < 8 ? 8 : _IOR_OFFSET_WIDTH(n))
#define IOR_PHYS_OFFSET_BITS IOR_OFFSET_WIDTH(IOR_PHYS_MSBITS)
#define IOR_PHYS_SHIFT (IOR_PHYS_BITS - IOR_PHYS_OFFSET_BITS)
#define IOR_DMA_OFFSET_BITS IOR_OFFSET_WIDTH(IOR_DMA_MSBITS)
#define IOR_DMA_SHIFT (IOR_DMA_BITS - IOR_DMA_OFFSET_BITS)
struct ior_phys_to_dma {
dma_addr_t offset:IOR_DMA_OFFSET_BITS __packed
__aligned((IOR_DMA_OFFSET_BITS / IOR_BPC));
};
struct ior_dma_to_phys {
dma_addr_t offset:IOR_PHYS_OFFSET_BITS __packed
__aligned((IOR_PHYS_OFFSET_BITS / IOR_BPC));
};
extern struct ior_phys_to_dma _ior_phys_to_dma[IOR_NUM_PHYS_TO_DMA];
extern struct ior_dma_to_phys _ior_dma_to_phys[IOR_NUM_DMA_TO_PHYS];
static inline dma_addr_t _phys_to_dma_offset_raw(phys_addr_t phys)
{ {
return !((phys_t)addr & (phys_t) LOW_MEM_BOUNDARY_MASK); return (dma_addr_t)_ior_phys_to_dma[phys >> IOR_LSBITS].offset;
} }
/* static inline dma_addr_t _dma_to_phys_offset_raw(dma_addr_t dma)
* Determine whether the address we are given is external RAM mappable
* into KSEG1.
* Params: addr Address to check
* Returns: Zero if the address is not for external RAM and
*/
static inline int asic_is_lowmem_ram_addr(phys_t addr)
{ {
/* return (dma_addr_t)_ior_dma_to_phys[dma >> IOR_LSBITS].offset;
* The RAM always starts at the following address in the processor's }
* physical address space
*/
static const phys_t phys_ram_base = 0x10000000;
phys_t bus_ram_base;
bus_ram_base = phys_to_bus_offset + phys_ram_base; /* These are not portable and should not be used in drivers. Drivers should
* be using ioremap() and friends to map physical addreses to virtual
* addresses and dma_map*() and friends to map virtual addresses into DMA
* addresses and back.
*/
static inline dma_addr_t phys_to_dma(phys_addr_t phys)
{
return phys + (_phys_to_dma_offset_raw(phys) << IOR_PHYS_SHIFT);
}
return addr >= bus_ram_base && static inline phys_addr_t dma_to_phys(dma_addr_t dma)
addr < (bus_ram_base + (LOW_MEM_BOUNDARY_PHYS - phys_ram_base)); {
return dma + (_dma_to_phys_offset_raw(dma) << IOR_DMA_SHIFT);
} }
extern void ioremap_add_map(dma_addr_t phys, phys_addr_t alias,
dma_addr_t size);
/* /*
* Allow physical addresses to be fixed up to help peripherals located * Allow physical addresses to be fixed up to help peripherals located
* outside the low 32-bit range -- generic pass-through version. * outside the low 32-bit range -- generic pass-through version.
...@@ -77,10 +114,50 @@ static inline phys_t fixup_bigphys_addr(phys_t phys_addr, phys_t size) ...@@ -77,10 +114,50 @@ static inline phys_t fixup_bigphys_addr(phys_t phys_addr, phys_t size)
return phys_addr; return phys_addr;
} }
static inline void __iomem *plat_ioremap(phys_t offset, unsigned long size, /*
* Handle the special case of addresses the area aliased into the first
* 512 MiB of the processor's physical address space. These turn into either
* kseg0 or kseg1 addresses, depending on flags.
*/
static inline void __iomem *plat_ioremap(phys_t start, unsigned long size,
unsigned long flags) unsigned long flags)
{ {
return NULL; phys_addr_t start_offset;
void __iomem *result = NULL;
/* Start by checking to see whether this is an aliased address */
start_offset = _dma_to_phys_offset_raw(start);
/*
* If:
* o the memory is aliased into the first 512 MiB, and
* o the start and end are in the same RAM bank, and
* o we don't have a zero size or wrap around, and
* o we are supposed to create an uncached mapping,
* handle this is a kseg0 or kseg1 address
*/
if (start_offset != 0) {
phys_addr_t last;
dma_addr_t dma_to_phys_offset;
last = start + size - 1;
dma_to_phys_offset =
_dma_to_phys_offset_raw(last) << IOR_DMA_SHIFT;
if (dma_to_phys_offset == start_offset &&
size != 0 && start <= last) {
phys_t adjusted_start;
adjusted_start = start + start_offset;
if (flags == _CACHE_UNCACHED)
result = (void __iomem *) (unsigned long)
CKSEG1ADDR(adjusted_start);
else
result = (void __iomem *) (unsigned long)
CKSEG0ADDR(adjusted_start);
}
}
return result;
} }
static inline int plat_iounmap(const volatile void __iomem *addr) static inline int plat_iounmap(const volatile void __iomem *addr)
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
# under Linux. # under Linux.
# #
obj-y += init.o memory.o reset.o time.o powertv_setup.o asic/ pci/ obj-y += init.o ioremap.o memory.o powertv_setup.o reset.o time.o \
asic/ pci/
EXTRA_CFLAGS += -Wall -Werror EXTRA_CFLAGS += -Wall -Werror
...@@ -80,8 +80,8 @@ static bool usb_configured; ...@@ -80,8 +80,8 @@ static bool usb_configured;
* Don't recommend to use it directly, it is usually used by kernel internally. * Don't recommend to use it directly, it is usually used by kernel internally.
* Portable code should be using interfaces such as ioremp, dma_map_single, etc. * Portable code should be using interfaces such as ioremp, dma_map_single, etc.
*/ */
unsigned long phys_to_bus_offset; unsigned long phys_to_dma_offset;
EXPORT_SYMBOL(phys_to_bus_offset); EXPORT_SYMBOL(phys_to_dma_offset);
/* /*
* *
...@@ -533,10 +533,10 @@ void __init configure_platform(void) ...@@ -533,10 +533,10 @@ void __init configure_platform(void)
switch (asic) { switch (asic) {
case ASIC_ZEUS: case ASIC_ZEUS:
phys_to_bus_offset = 0x30000000; phys_to_dma_offset = 0x30000000;
break; break;
case ASIC_CALLIOPE: case ASIC_CALLIOPE:
phys_to_bus_offset = 0x10000000; phys_to_dma_offset = 0x10000000;
break; break;
case ASIC_CRONUSLITE: case ASIC_CRONUSLITE:
/* Fall through */ /* Fall through */
...@@ -546,10 +546,10 @@ void __init configure_platform(void) ...@@ -546,10 +546,10 @@ void __init configure_platform(void)
* 0x2XXXXXXX. If 0x10000000 aliases into 0x60000000- * 0x2XXXXXXX. If 0x10000000 aliases into 0x60000000-
* 0x6XXXXXXX, the offset should be 0x50000000, not 0x10000000. * 0x6XXXXXXX, the offset should be 0x50000000, not 0x10000000.
*/ */
phys_to_bus_offset = 0x10000000; phys_to_dma_offset = 0x10000000;
break; break;
default: default:
phys_to_bus_offset = 0x00000000; phys_to_dma_offset = 0x00000000;
break; break;
} }
} }
...@@ -603,7 +603,7 @@ void __init platform_alloc_bootmem(void) ...@@ -603,7 +603,7 @@ void __init platform_alloc_bootmem(void)
int size = gp_resources[i].end - gp_resources[i].start + 1; int size = gp_resources[i].end - gp_resources[i].start + 1;
if ((gp_resources[i].start != 0) && if ((gp_resources[i].start != 0) &&
((gp_resources[i].flags & IORESOURCE_MEM) != 0)) { ((gp_resources[i].flags & IORESOURCE_MEM) != 0)) {
reserve_bootmem(bus_to_phys(gp_resources[i].start), reserve_bootmem(dma_to_phys(gp_resources[i].start),
size, 0); size, 0);
total += gp_resources[i].end - total += gp_resources[i].end -
gp_resources[i].start + 1; gp_resources[i].start + 1;
...@@ -627,7 +627,7 @@ void __init platform_alloc_bootmem(void) ...@@ -627,7 +627,7 @@ void __init platform_alloc_bootmem(void)
else { else {
gp_resources[i].start = gp_resources[i].start =
phys_to_bus(virt_to_phys(mem)); phys_to_dma(virt_to_phys(mem));
gp_resources[i].end = gp_resources[i].end =
gp_resources[i].start + size - 1; gp_resources[i].start + size - 1;
total += size; total += size;
...@@ -691,7 +691,7 @@ static void __init pmem_setup_resource(void) ...@@ -691,7 +691,7 @@ static void __init pmem_setup_resource(void)
if (resource && pmemaddr && pmemlen) { if (resource && pmemaddr && pmemlen) {
/* The address provided by bootloader is in kseg0. Convert to /* The address provided by bootloader is in kseg0. Convert to
* a bus address. */ * a bus address. */
resource->start = phys_to_bus(pmemaddr - 0x80000000); resource->start = phys_to_dma(pmemaddr - 0x80000000);
resource->end = resource->start + pmemlen - 1; resource->end = resource->start + pmemlen - 1;
pr_info("persistent memory: start=0x%x end=0x%x\n", pr_info("persistent memory: start=0x%x end=0x%x\n",
......
/*
* ioremap.c
*
* Support for mapping between dma_addr_t values a phys_addr_t values.
*
* Copyright (C) 2005-2009 Scientific-Atlanta, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Author: David VomLehn <dvomlehn@cisco.com>
*
* Description: Defines the platform resources for the SA settop.
*
* NOTE: The bootloader allocates persistent memory at an address which is
* 16 MiB below the end of the highest address in KSEG0. All fixed
* address memory reservations must avoid this region.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/mach-powertv/ioremap.h>
/*
* Define the sizes of and masks for grains in physical and DMA space. The
* values are the same but the types are not.
*/
#define IOR_PHYS_GRAIN ((phys_addr_t) 1 << IOR_LSBITS)
#define IOR_PHYS_GRAIN_MASK (IOR_PHYS_GRAIN - 1)
#define IOR_DMA_GRAIN ((dma_addr_t) 1 << IOR_LSBITS)
#define IOR_DMA_GRAIN_MASK (IOR_DMA_GRAIN - 1)
/*
* Values that, when accessed by an index derived from a phys_addr_t and
* added to phys_addr_t value, yield a DMA address
*/
struct ior_phys_to_dma _ior_phys_to_dma[IOR_NUM_PHYS_TO_DMA];
EXPORT_SYMBOL(_ior_phys_to_dma);
/*
* Values that, when accessed by an index derived from a dma_addr_t and
* added to that dma_addr_t value, yield a physical address
*/
struct ior_dma_to_phys _ior_dma_to_phys[IOR_NUM_DMA_TO_PHYS];
EXPORT_SYMBOL(_ior_dma_to_phys);
/**
* setup_dma_to_phys - set up conversion from DMA to physical addresses
* @dma_idx: Top IOR_LSBITS bits of the DMA address, i.e. an index
* into the array _dma_to_phys.
* @delta: Value that, when added to the DMA address, will yield the
* physical address
* @s: Number of bytes in the section of memory with the given delta
* between DMA and physical addresses.
*/
static void setup_dma_to_phys(dma_addr_t dma, phys_addr_t delta, dma_addr_t s)
{
int dma_idx, first_idx, last_idx;
phys_addr_t first, last;
/*
* Calculate the first and last indices, rounding the first up and
* the second down.
*/
first = dma & ~IOR_DMA_GRAIN_MASK;
last = (dma + s - 1) & ~IOR_DMA_GRAIN_MASK;
first_idx = first >> IOR_LSBITS; /* Convert to indices */
last_idx = last >> IOR_LSBITS;
for (dma_idx = first_idx; dma_idx <= last_idx; dma_idx++)
_ior_dma_to_phys[dma_idx].offset = delta >> IOR_DMA_SHIFT;
}
/**
* setup_phys_to_dma - set up conversion from DMA to physical addresses
* @phys_idx: Top IOR_LSBITS bits of the DMA address, i.e. an index
* into the array _phys_to_dma.
* @delta: Value that, when added to the DMA address, will yield the
* physical address
* @s: Number of bytes in the section of memory with the given delta
* between DMA and physical addresses.
*/
static void setup_phys_to_dma(phys_addr_t phys, dma_addr_t delta, phys_addr_t s)
{
int phys_idx, first_idx, last_idx;
phys_addr_t first, last;
/*
* Calculate the first and last indices, rounding the first up and
* the second down.
*/
first = phys & ~IOR_PHYS_GRAIN_MASK;
last = (phys + s - 1) & ~IOR_PHYS_GRAIN_MASK;
first_idx = first >> IOR_LSBITS; /* Convert to indices */
last_idx = last >> IOR_LSBITS;
for (phys_idx = first_idx; phys_idx <= last_idx; phys_idx++)
_ior_phys_to_dma[phys_idx].offset = delta >> IOR_PHYS_SHIFT;
}
/**
* ioremap_add_map - add to the physical and DMA address conversion arrays
* @phys: Process's view of the address of the start of the memory chunk
* @dma: DMA address of the start of the memory chunk
* @size: Size, in bytes, of the chunk of memory
*
* NOTE: It might be obvious, but the assumption is that all @size bytes have
* the same offset between the physical address and the DMA address.
*/
void ioremap_add_map(phys_addr_t phys, phys_addr_t dma, phys_addr_t size)
{
if (size == 0)
return;
if ((dma & IOR_DMA_GRAIN_MASK) != 0 ||
(phys & IOR_PHYS_GRAIN_MASK) != 0 ||
(size & IOR_PHYS_GRAIN_MASK) != 0)
pr_crit("Memory allocation must be in chunks of 0x%x bytes\n",
IOR_PHYS_GRAIN);
setup_dma_to_phys(dma, phys - dma, size);
setup_phys_to_dma(phys, dma - phys, size);
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment