Commit e030dbf9 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'ioat-md-accel-for-linus' of git://lost.foo-projects.org/~dwillia2/git/iop

* 'ioat-md-accel-for-linus' of git://lost.foo-projects.org/~dwillia2/git/iop: (28 commits)
  ioatdma: add the unisys "i/oat" pci vendor/device id
  ARM: Add drivers/dma to arch/arm/Kconfig
  iop3xx: surface the iop3xx DMA and AAU units to the iop-adma driver
  iop13xx: surface the iop13xx adma units to the iop-adma driver
  dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines
  md: remove raid5 compute_block and compute_parity5
  md: handle_stripe5 - request io processing in raid5_run_ops
  md: handle_stripe5 - add request/completion logic for async expand ops
  md: handle_stripe5 - add request/completion logic for async read ops
  md: handle_stripe5 - add request/completion logic for async check ops
  md: handle_stripe5 - add request/completion logic for async compute ops
  md: handle_stripe5 - add request/completion logic for async write ops
  md: common infrastructure for running operations with raid5_run_ops
  md: raid5_run_ops - run stripe operations outside sh->lock
  raid5: replace custom debug PRINTKs with standard pr_debug
  raid5: refactor handle_stripe5 and handle_stripe6 (v3)
  async_tx: add the async_tx api
  xor: make 'xor_blocks' a library routine for use with async_tx
  dmaengine: make clients responsible for managing channels
  dmaengine: refactor dmaengine around dma_async_tx_descriptor
  ...
parents 12a22960 3039f073
......@@ -433,6 +433,12 @@ tcp_workaround_signed_windows - BOOLEAN
not receive a window scaling option from them.
Default: 0
tcp_dma_copybreak - INTEGER
Lower limit, in bytes, of the size of socket reads that will be
offloaded to a DMA copy engine, if one is present in the system
and CONFIG_NET_DMA is enabled.
Default: 4096
CIPSOv4 Variables:
cipso_cache_enable - BOOLEAN
......
......@@ -1042,6 +1042,8 @@ source "drivers/mmc/Kconfig"
source "drivers/rtc/Kconfig"
source "drivers/dma/Kconfig"
endmenu
source "fs/Kconfig"
......
......@@ -25,6 +25,7 @@
#include <asm/hardware.h>
#include <asm/irq.h>
#include <asm/io.h>
#include <asm/hardware/iop_adma.h>
#define IOP13XX_UART_XTAL 33334000
#define IOP13XX_SETUP_DEBUG 0
......@@ -236,19 +237,143 @@ static unsigned long iq8134x_probe_flash_size(void)
}
#endif
/* ADMA Channels */
static struct resource iop13xx_adma_0_resources[] = {
[0] = {
.start = IOP13XX_ADMA_PHYS_BASE(0),
.end = IOP13XX_ADMA_UPPER_PA(0),
.flags = IORESOURCE_MEM,
},
[1] = {
.start = IRQ_IOP13XX_ADMA0_EOT,
.end = IRQ_IOP13XX_ADMA0_EOT,
.flags = IORESOURCE_IRQ
},
[2] = {
.start = IRQ_IOP13XX_ADMA0_EOC,
.end = IRQ_IOP13XX_ADMA0_EOC,
.flags = IORESOURCE_IRQ
},
[3] = {
.start = IRQ_IOP13XX_ADMA0_ERR,
.end = IRQ_IOP13XX_ADMA0_ERR,
.flags = IORESOURCE_IRQ
}
};
static struct resource iop13xx_adma_1_resources[] = {
[0] = {
.start = IOP13XX_ADMA_PHYS_BASE(1),
.end = IOP13XX_ADMA_UPPER_PA(1),
.flags = IORESOURCE_MEM,
},
[1] = {
.start = IRQ_IOP13XX_ADMA1_EOT,
.end = IRQ_IOP13XX_ADMA1_EOT,
.flags = IORESOURCE_IRQ
},
[2] = {
.start = IRQ_IOP13XX_ADMA1_EOC,
.end = IRQ_IOP13XX_ADMA1_EOC,
.flags = IORESOURCE_IRQ
},
[3] = {
.start = IRQ_IOP13XX_ADMA1_ERR,
.end = IRQ_IOP13XX_ADMA1_ERR,
.flags = IORESOURCE_IRQ
}
};
static struct resource iop13xx_adma_2_resources[] = {
[0] = {
.start = IOP13XX_ADMA_PHYS_BASE(2),
.end = IOP13XX_ADMA_UPPER_PA(2),
.flags = IORESOURCE_MEM,
},
[1] = {
.start = IRQ_IOP13XX_ADMA2_EOT,
.end = IRQ_IOP13XX_ADMA2_EOT,
.flags = IORESOURCE_IRQ
},
[2] = {
.start = IRQ_IOP13XX_ADMA2_EOC,
.end = IRQ_IOP13XX_ADMA2_EOC,
.flags = IORESOURCE_IRQ
},
[3] = {
.start = IRQ_IOP13XX_ADMA2_ERR,
.end = IRQ_IOP13XX_ADMA2_ERR,
.flags = IORESOURCE_IRQ
}
};
static u64 iop13xx_adma_dmamask = DMA_64BIT_MASK;
static struct iop_adma_platform_data iop13xx_adma_0_data = {
.hw_id = 0,
.pool_size = PAGE_SIZE,
};
static struct iop_adma_platform_data iop13xx_adma_1_data = {
.hw_id = 1,
.pool_size = PAGE_SIZE,
};
static struct iop_adma_platform_data iop13xx_adma_2_data = {
.hw_id = 2,
.pool_size = PAGE_SIZE,
};
/* The ids are fixed up later in iop13xx_platform_init */
static struct platform_device iop13xx_adma_0_channel = {
.name = "iop-adma",
.id = 0,
.num_resources = 4,
.resource = iop13xx_adma_0_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
.coherent_dma_mask = DMA_64BIT_MASK,
.platform_data = (void *) &iop13xx_adma_0_data,
},
};
static struct platform_device iop13xx_adma_1_channel = {
.name = "iop-adma",
.id = 0,
.num_resources = 4,
.resource = iop13xx_adma_1_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
.coherent_dma_mask = DMA_64BIT_MASK,
.platform_data = (void *) &iop13xx_adma_1_data,
},
};
static struct platform_device iop13xx_adma_2_channel = {
.name = "iop-adma",
.id = 0,
.num_resources = 4,
.resource = iop13xx_adma_2_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
.coherent_dma_mask = DMA_64BIT_MASK,
.platform_data = (void *) &iop13xx_adma_2_data,
},
};
void __init iop13xx_map_io(void)
{
/* Initialize the Static Page Table maps */
iotable_init(iop13xx_std_desc, ARRAY_SIZE(iop13xx_std_desc));
}
static int init_uart = 0;
static int init_i2c = 0;
static int init_uart;
static int init_i2c;
static int init_adma;
void __init iop13xx_platform_init(void)
{
int i;
u32 uart_idx, i2c_idx, plat_idx;
u32 uart_idx, i2c_idx, adma_idx, plat_idx;
struct platform_device *iop13xx_devices[IQ81340_MAX_PLAT_DEVICES];
/* set the bases so we can read the device id */
......@@ -294,6 +419,12 @@ void __init iop13xx_platform_init(void)
}
}
if (init_adma == IOP13XX_INIT_ADMA_DEFAULT) {
init_adma |= IOP13XX_INIT_ADMA_0;
init_adma |= IOP13XX_INIT_ADMA_1;
init_adma |= IOP13XX_INIT_ADMA_2;
}
plat_idx = 0;
uart_idx = 0;
i2c_idx = 0;
......@@ -332,6 +463,56 @@ void __init iop13xx_platform_init(void)
}
}
/* initialize adma channel ids and capabilities */
adma_idx = 0;
for (i = 0; i < IQ81340_NUM_ADMA; i++) {
struct iop_adma_platform_data *plat_data;
if ((init_adma & (1 << i)) && IOP13XX_SETUP_DEBUG)
printk(KERN_INFO
"Adding adma%d to platform device list\n", i);
switch (init_adma & (1 << i)) {
case IOP13XX_INIT_ADMA_0:
iop13xx_adma_0_channel.id = adma_idx++;
iop13xx_devices[plat_idx++] = &iop13xx_adma_0_channel;
plat_data = &iop13xx_adma_0_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
break;
case IOP13XX_INIT_ADMA_1:
iop13xx_adma_1_channel.id = adma_idx++;
iop13xx_devices[plat_idx++] = &iop13xx_adma_1_channel;
plat_data = &iop13xx_adma_1_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
break;
case IOP13XX_INIT_ADMA_2:
iop13xx_adma_2_channel.id = adma_idx++;
iop13xx_devices[plat_idx++] = &iop13xx_adma_2_channel;
plat_data = &iop13xx_adma_2_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
break;
}
}
#ifdef CONFIG_MTD_PHYSMAP
iq8134x_flash_resource.end = iq8134x_flash_resource.start +
iq8134x_probe_flash_size() - 1;
......@@ -399,5 +580,35 @@ static int __init iop13xx_init_i2c_setup(char *str)
return 1;
}
static int __init iop13xx_init_adma_setup(char *str)
{
if (str) {
while (*str != '\0') {
switch (*str) {
case '0':
init_adma |= IOP13XX_INIT_ADMA_0;
break;
case '1':
init_adma |= IOP13XX_INIT_ADMA_1;
break;
case '2':
init_adma |= IOP13XX_INIT_ADMA_2;
break;
case ',':
case '=':
break;
default:
PRINTK("\"iop13xx_init_adma\" malformed"
" at character: \'%c\'", *str);
*(str + 1) = '\0';
init_adma = IOP13XX_INIT_ADMA_DEFAULT;
}
str++;
}
}
return 1;
}
__setup("iop13xx_init_adma", iop13xx_init_adma_setup);
__setup("iop13xx_init_uart", iop13xx_init_uart_setup);
__setup("iop13xx_init_i2c", iop13xx_init_i2c_setup);
......@@ -180,6 +180,8 @@ static void __init glantank_init_machine(void)
platform_device_register(&iop3xx_i2c1_device);
platform_device_register(&glantank_flash_device);
platform_device_register(&glantank_serial_device);
platform_device_register(&iop3xx_dma_0_channel);
platform_device_register(&iop3xx_dma_1_channel);
pm_power_off = glantank_power_off;
}
......
......@@ -298,9 +298,14 @@ static void __init iq31244_init_machine(void)
platform_device_register(&iop3xx_i2c1_device);
platform_device_register(&iq31244_flash_device);
platform_device_register(&iq31244_serial_device);
platform_device_register(&iop3xx_dma_0_channel);
platform_device_register(&iop3xx_dma_1_channel);
if (is_ep80219())
pm_power_off = ep80219_power_off;
if (!is_80219())
platform_device_register(&iop3xx_aau_channel);
}
static int __init force_ep80219_setup(char *str)
......
......@@ -181,6 +181,9 @@ static void __init iq80321_init_machine(void)
platform_device_register(&iop3xx_i2c1_device);
platform_device_register(&iq80321_flash_device);
platform_device_register(&iq80321_serial_device);
platform_device_register(&iop3xx_dma_0_channel);
platform_device_register(&iop3xx_dma_1_channel);
platform_device_register(&iop3xx_aau_channel);
}
MACHINE_START(IQ80321, "Intel IQ80321")
......
......@@ -245,6 +245,8 @@ static void __init n2100_init_machine(void)
platform_device_register(&iop3xx_i2c0_device);
platform_device_register(&n2100_flash_device);
platform_device_register(&n2100_serial_device);
platform_device_register(&iop3xx_dma_0_channel);
platform_device_register(&iop3xx_dma_1_channel);
pm_power_off = n2100_power_off;
......
......@@ -136,6 +136,9 @@ static void __init iq80331_init_machine(void)
platform_device_register(&iop33x_uart0_device);
platform_device_register(&iop33x_uart1_device);
platform_device_register(&iq80331_flash_device);
platform_device_register(&iop3xx_dma_0_channel);
platform_device_register(&iop3xx_dma_1_channel);
platform_device_register(&iop3xx_aau_channel);
}
MACHINE_START(IQ80331, "Intel IQ80331")
......
......@@ -136,6 +136,9 @@ static void __init iq80332_init_machine(void)
platform_device_register(&iop33x_uart0_device);
platform_device_register(&iop33x_uart1_device);
platform_device_register(&iq80332_flash_device);
platform_device_register(&iop3xx_dma_0_channel);
platform_device_register(&iop3xx_dma_1_channel);
platform_device_register(&iop3xx_aau_channel);
}
MACHINE_START(IQ80332, "Intel IQ80332")
......
......@@ -12,6 +12,7 @@ obj-$(CONFIG_ARCH_IOP32X) += setup.o
obj-$(CONFIG_ARCH_IOP32X) += time.o
obj-$(CONFIG_ARCH_IOP32X) += io.o
obj-$(CONFIG_ARCH_IOP32X) += cp6.o
obj-$(CONFIG_ARCH_IOP32X) += adma.o
# IOP33X
obj-$(CONFIG_ARCH_IOP33X) += gpio.o
......@@ -21,6 +22,7 @@ obj-$(CONFIG_ARCH_IOP33X) += setup.o
obj-$(CONFIG_ARCH_IOP33X) += time.o
obj-$(CONFIG_ARCH_IOP33X) += io.o
obj-$(CONFIG_ARCH_IOP33X) += cp6.o
obj-$(CONFIG_ARCH_IOP33X) += adma.o
# IOP13XX
obj-$(CONFIG_ARCH_IOP13XX) += cp6.o
......
/*
* platform device definitions for the iop3xx dma/xor engines
* Copyright © 2006, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/platform_device.h>
#include <asm/hardware/iop3xx.h>
#include <linux/dma-mapping.h>
#include <asm/arch/adma.h>
#include <asm/hardware/iop_adma.h>
#ifdef CONFIG_ARCH_IOP32X
#define IRQ_DMA0_EOT IRQ_IOP32X_DMA0_EOT
#define IRQ_DMA0_EOC IRQ_IOP32X_DMA0_EOC
#define IRQ_DMA0_ERR IRQ_IOP32X_DMA0_ERR
#define IRQ_DMA1_EOT IRQ_IOP32X_DMA1_EOT
#define IRQ_DMA1_EOC IRQ_IOP32X_DMA1_EOC
#define IRQ_DMA1_ERR IRQ_IOP32X_DMA1_ERR
#define IRQ_AA_EOT IRQ_IOP32X_AA_EOT
#define IRQ_AA_EOC IRQ_IOP32X_AA_EOC
#define IRQ_AA_ERR IRQ_IOP32X_AA_ERR
#endif
#ifdef CONFIG_ARCH_IOP33X
#define IRQ_DMA0_EOT IRQ_IOP33X_DMA0_EOT
#define IRQ_DMA0_EOC IRQ_IOP33X_DMA0_EOC
#define IRQ_DMA0_ERR IRQ_IOP33X_DMA0_ERR
#define IRQ_DMA1_EOT IRQ_IOP33X_DMA1_EOT
#define IRQ_DMA1_EOC IRQ_IOP33X_DMA1_EOC
#define IRQ_DMA1_ERR IRQ_IOP33X_DMA1_ERR
#define IRQ_AA_EOT IRQ_IOP33X_AA_EOT
#define IRQ_AA_EOC IRQ_IOP33X_AA_EOC
#define IRQ_AA_ERR IRQ_IOP33X_AA_ERR
#endif
/* AAU and DMA Channels */
static struct resource iop3xx_dma_0_resources[] = {
[0] = {
.start = IOP3XX_DMA_PHYS_BASE(0),
.end = IOP3XX_DMA_UPPER_PA(0),
.flags = IORESOURCE_MEM,
},
[1] = {
.start = IRQ_DMA0_EOT,
.end = IRQ_DMA0_EOT,
.flags = IORESOURCE_IRQ
},
[2] = {
.start = IRQ_DMA0_EOC,
.end = IRQ_DMA0_EOC,
.flags = IORESOURCE_IRQ
},
[3] = {
.start = IRQ_DMA0_ERR,
.end = IRQ_DMA0_ERR,
.flags = IORESOURCE_IRQ
}
};
static struct resource iop3xx_dma_1_resources[] = {
[0] = {
.start = IOP3XX_DMA_PHYS_BASE(1),
.end = IOP3XX_DMA_UPPER_PA(1),
.flags = IORESOURCE_MEM,
},
[1] = {
.start = IRQ_DMA1_EOT,
.end = IRQ_DMA1_EOT,
.flags = IORESOURCE_IRQ
},
[2] = {
.start = IRQ_DMA1_EOC,
.end = IRQ_DMA1_EOC,
.flags = IORESOURCE_IRQ
},
[3] = {
.start = IRQ_DMA1_ERR,
.end = IRQ_DMA1_ERR,
.flags = IORESOURCE_IRQ
}
};
static struct resource iop3xx_aau_resources[] = {
[0] = {
.start = IOP3XX_AAU_PHYS_BASE,
.end = IOP3XX_AAU_UPPER_PA,
.flags = IORESOURCE_MEM,
},
[1] = {
.start = IRQ_AA_EOT,
.end = IRQ_AA_EOT,
.flags = IORESOURCE_IRQ
},
[2] = {
.start = IRQ_AA_EOC,
.end = IRQ_AA_EOC,
.flags = IORESOURCE_IRQ
},
[3] = {
.start = IRQ_AA_ERR,
.end = IRQ_AA_ERR,
.flags = IORESOURCE_IRQ
}
};
static u64 iop3xx_adma_dmamask = DMA_32BIT_MASK;
static struct iop_adma_platform_data iop3xx_dma_0_data = {
.hw_id = DMA0_ID,
.pool_size = PAGE_SIZE,
};
static struct iop_adma_platform_data iop3xx_dma_1_data = {
.hw_id = DMA1_ID,
.pool_size = PAGE_SIZE,
};
static struct iop_adma_platform_data iop3xx_aau_data = {
.hw_id = AAU_ID,
.pool_size = 3 * PAGE_SIZE,
};
struct platform_device iop3xx_dma_0_channel = {
.name = "iop-adma",
.id = 0,
.num_resources = 4,
.resource = iop3xx_dma_0_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
.coherent_dma_mask = DMA_64BIT_MASK,
.platform_data = (void *) &iop3xx_dma_0_data,
},
};
struct platform_device iop3xx_dma_1_channel = {
.name = "iop-adma",
.id = 1,
.num_resources = 4,
.resource = iop3xx_dma_1_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
.coherent_dma_mask = DMA_64BIT_MASK,
.platform_data = (void *) &iop3xx_dma_1_data,
},
};
struct platform_device iop3xx_aau_channel = {
.name = "iop-adma",
.id = 2,
.num_resources = 4,
.resource = iop3xx_aau_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
.coherent_dma_mask = DMA_64BIT_MASK,
.platform_data = (void *) &iop3xx_aau_data,
},
};
static int __init iop3xx_adma_cap_init(void)
{
#ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */
dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
#else
dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
#endif
#ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */
dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
#else
dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
#endif
#ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */
dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#else
dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#endif
return 0;
}
arch_initcall(iop3xx_adma_cap_init);
#
# Cryptographic API Configuration
# Generic algorithms support
#
config XOR_BLOCKS
tristate
#
# async_tx api: hardware offloaded memory transfer/transform support
#
source "crypto/async_tx/Kconfig"
#
# Cryptographic API Configuration
#
menu "Cryptographic options"
config CRYPTO
......
......@@ -50,3 +50,9 @@ obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
#
# generic algorithms and the async_tx api
#
obj-$(CONFIG_XOR_BLOCKS) += xor.o
obj-$(CONFIG_ASYNC_CORE) += async_tx/
config ASYNC_CORE
tristate
config ASYNC_MEMCPY
tristate
select ASYNC_CORE
config ASYNC_XOR
tristate
select ASYNC_CORE
select XOR_BLOCKS
config ASYNC_MEMSET
tristate
select ASYNC_CORE
obj-$(CONFIG_ASYNC_CORE) += async_tx.o
obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
obj-$(CONFIG_ASYNC_XOR) += async_xor.o
/*
* copy offload engine support
*
* Copyright © 2006, Intel Corporation.
*
* Dan Williams <dan.j.williams@intel.com>
*
* with architecture considerations by:
* Neil Brown <neilb@suse.de>
* Jeff Garzik <jeff@garzik.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/kernel.h>
#include <linux/highmem.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/async_tx.h>
/**
* async_memcpy - attempt to copy memory with a dma engine.
* @dest: destination page
* @src: src page
* @offset: offset in pages to start transaction
* @len: length in bytes
* @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
* ASYNC_TX_KMAP_SRC, ASYNC_TX_KMAP_DST
* @depend_tx: memcpy depends on the result of this transaction
* @cb_fn: function to call when the memcpy completes
* @cb_param: parameter to pass to the callback routine
*/
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
unsigned int src_offset, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY);
struct dma_device *device = chan ? chan->device : NULL;
int int_en = cb_fn ? 1 : 0;
struct dma_async_tx_descriptor *tx = device ?
device->device_prep_dma_memcpy(chan, len,
int_en) : NULL;
if (tx) { /* run the memcpy asynchronously */
dma_addr_t addr;
enum dma_data_direction dir;
pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
DMA_NONE : DMA_FROM_DEVICE;
addr = dma_map_page(device->dev, dest, dest_offset, len, dir);
tx->tx_set_dest(addr, tx, 0);
dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
DMA_NONE : DMA_TO_DEVICE;
addr = dma_map_page(device->dev, src, src_offset, len, dir);
tx->tx_set_src(addr, tx, 0);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
} else { /* run the memcpy synchronously */
void *dest_buf, *src_buf;
pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
/* wait for any prerequisite operations */
if (depend_tx) {
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(depend_tx->ack);
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n",
__FUNCTION__);
}
if (flags & ASYNC_TX_KMAP_DST)
dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
else
dest_buf = page_address(dest) + dest_offset;
if (flags & ASYNC_TX_KMAP_SRC)
src_buf = kmap_atomic(src, KM_USER0) + src_offset;
else
src_buf = page_address(src) + src_offset;
memcpy(dest_buf, src_buf, len);
if (flags & ASYNC_TX_KMAP_DST)
kunmap_atomic(dest_buf, KM_USER0);
if (flags & ASYNC_TX_KMAP_SRC)
kunmap_atomic(src_buf, KM_USER0);
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_memcpy);
static int __init async_memcpy_init(void)
{
return 0;
}
static void __exit async_memcpy_exit(void)
{
do { } while (0);
}
module_init(async_memcpy_init);
module_exit(async_memcpy_exit);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memcpy api");
MODULE_LICENSE("GPL");
/*
* memory fill offload engine support
*
* Copyright © 2006, Intel Corporation.
*
* Dan Williams <dan.j.williams@intel.com>
*
* with architecture considerations by:
* Neil Brown <neilb@suse.de>
* Jeff Garzik <jeff@garzik.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/async_tx.h>
/**
* async_memset - attempt to fill memory with a dma engine.
* @dest: destination page
* @val: fill value
* @offset: offset in pages to start transaction
* @len: length in bytes
* @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
* @depend_tx: memset depends on the result of this transaction
* @cb_fn: function to call when the memcpy completes
* @cb_param: parameter to pass to the callback routine
*/
struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset,
size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET);
struct dma_device *device = chan ? chan->device : NULL;
int int_en = cb_fn ? 1 : 0;
struct dma_async_tx_descriptor *tx = device ?
device->device_prep_dma_memset(chan, val, len,
int_en) : NULL;
if (tx) { /* run the memset asynchronously */
dma_addr_t dma_addr;
enum dma_data_direction dir;
pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
DMA_NONE : DMA_FROM_DEVICE;
dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
tx->tx_set_dest(dma_addr, tx, 0);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
} else { /* run the memset synchronously */
void *dest_buf;
pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
dest_buf = (void *) (((char *) page_address(dest)) + offset);
/* wait for any prerequisite operations */
if (depend_tx) {
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(depend_tx->ack);
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n",
__FUNCTION__);
}
memset(dest_buf, val, len);
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_memset);
static int __init async_memset_init(void)
{
return 0;
}
static void __exit async_memset_exit(void)
{
do { } while (0);
}
module_init(async_memset_init);
module_exit(async_memset_exit);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memset api");
MODULE_LICENSE("GPL");
/*
* core routines for the asynchronous memory transfer/transform api
*
* Copyright © 2006, Intel Corporation.
*
* Dan Williams <dan.j.williams@intel.com>
*
* with architecture considerations by:
* Neil Brown <neilb@suse.de>
* Jeff Garzik <jeff@garzik.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/kernel.h>
#include <linux/async_tx.h>
#ifdef CONFIG_DMA_ENGINE
static enum dma_state_client
dma_channel_add_remove(struct dma_client *client,
struct dma_chan *chan, enum dma_state state);
static struct dma_client async_tx_dma = {
.event_callback = dma_channel_add_remove,
/* .cap_mask == 0 defaults to all channels */
};
/**
* dma_cap_mask_all - enable iteration over all operation types
*/
static dma_cap_mask_t dma_cap_mask_all;
/**
* chan_ref_percpu - tracks channel allocations per core/opertion
*/
struct chan_ref_percpu {
struct dma_chan_ref *ref;
};
static int channel_table_initialized;
static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
/**
* async_tx_lock - protect modification of async_tx_master_list and serialize
* rebalance operations
*/
static spinlock_t async_tx_lock;
static struct list_head
async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list);
/* async_tx_issue_pending_all - start all transactions on all channels */
void async_tx_issue_pending_all(void)
{
struct dma_chan_ref *ref;
rcu_read_lock();
list_for_each_entry_rcu(ref, &async_tx_master_list, node)
ref->chan->device->device_issue_pending(ref->chan);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
/* dma_wait_for_async_tx - spin wait for a transcation to complete
* @tx: transaction to wait on
*/
enum dma_status
dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
{
enum dma_status status;
struct dma_async_tx_descriptor *iter;
if (!tx)
return DMA_SUCCESS;
/* poll through the dependency chain, return when tx is complete */
do {
iter = tx;
while (iter->cookie == -EBUSY)
iter = iter->parent;
status = dma_sync_wait(iter->chan, iter->cookie);
} while (status == DMA_IN_PROGRESS || (iter != tx));
return status;
}
EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
/* async_tx_run_dependencies - helper routine for dma drivers to process
* (start) dependent operations on their target channel
* @tx: transaction with dependencies
*/
void
async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
{
struct dma_async_tx_descriptor *dep_tx, *_dep_tx;
struct dma_device *dev;
struct dma_chan *chan;
list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list,
depend_node) {
chan = dep_tx->chan;
dev = chan->device;
/* we can't depend on ourselves */
BUG_ON(chan == tx->chan);
list_del(&dep_tx->depend_node);
tx->tx_submit(dep_tx);
/* we need to poke the engine as client code does not
* know about dependency submission events
*/
dev->device_issue_pending(chan);
}
}
EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
static void
free_dma_chan_ref(struct rcu_head *rcu)
{
struct dma_chan_ref *ref;
ref = container_of(rcu, struct dma_chan_ref, rcu);
kfree(ref);
}
static void
init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
{
INIT_LIST_HEAD(&ref->node);
INIT_RCU_HEAD(&ref->rcu);
ref->chan = chan;
atomic_set(&ref->count, 0);
}
/**
* get_chan_ref_by_cap - returns the nth channel of the given capability
* defaults to returning the channel with the desired capability and the
* lowest reference count if the index can not be satisfied
* @cap: capability to match
* @index: nth channel desired, passing -1 has the effect of forcing the
* default return value
*/
static struct dma_chan_ref *
get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
{
struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
rcu_read_lock();
list_for_each_entry_rcu(ref, &async_tx_master_list, node)
if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
if (!min_ref)
min_ref = ref;
else if (atomic_read(&ref->count) <
atomic_read(&min_ref->count))
min_ref = ref;
if (index-- == 0) {
ret_ref = ref;
break;
}
}
rcu_read_unlock();
if (!ret_ref)
ret_ref = min_ref;
if (ret_ref)
atomic_inc(&ret_ref->count);
return ret_ref;
}
/**
* async_tx_rebalance - redistribute the available channels, optimize
* for cpu isolation in the SMP case, and opertaion isolation in the
* uniprocessor case
*/
static void async_tx_rebalance(void)
{
int cpu, cap, cpu_idx = 0;
unsigned long flags;
if (!channel_table_initialized)
return;
spin_lock_irqsave(&async_tx_lock, flags);
/* undo the last distribution */
for_each_dma_cap_mask(cap, dma_cap_mask_all)
for_each_possible_cpu(cpu) {
struct dma_chan_ref *ref =
per_cpu_ptr(channel_table[cap], cpu)->ref;
if (ref) {
atomic_set(&ref->count, 0);
per_cpu_ptr(channel_table[cap], cpu)->ref =
NULL;
}
}
for_each_dma_cap_mask(cap, dma_cap_mask_all)
for_each_online_cpu(cpu) {
struct dma_chan_ref *new;
if (NR_CPUS > 1)
new = get_chan_ref_by_cap(cap, cpu_idx++);
else
new = get_chan_ref_by_cap(cap, -1);
per_cpu_ptr(channel_table[cap], cpu)->ref = new;
}
spin_unlock_irqrestore(&async_tx_lock, flags);
}
static enum dma_state_client
dma_channel_add_remove(struct dma_client *client,
struct dma_chan *chan, enum dma_state state)
{
unsigned long found, flags;
struct dma_chan_ref *master_ref, *ref;
enum dma_state_client ack = DMA_DUP; /* default: take no action */
switch (state) {
case DMA_RESOURCE_AVAILABLE:
found = 0;
rcu_read_lock();
list_for_each_entry_rcu(ref, &async_tx_master_list, node)
if (ref->chan == chan) {
found = 1;
break;
}
rcu_read_unlock();
pr_debug("async_tx: dma resource available [%s]\n",
found ? "old" : "new");
if (!found)
ack = DMA_ACK;
else
break;
/* add the channel to the generic management list */
master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
if (master_ref) {
/* keep a reference until async_tx is unloaded */
dma_chan_get(chan);
init_dma_chan_ref(master_ref, chan);
spin_lock_irqsave(&async_tx_lock, flags);
list_add_tail_rcu(&master_ref->node,
&async_tx_master_list);
spin_unlock_irqrestore(&async_tx_lock,
flags);
} else {
printk(KERN_WARNING "async_tx: unable to create"
" new master entry in response to"
" a DMA_RESOURCE_ADDED event"
" (-ENOMEM)\n");
return 0;
}
async_tx_rebalance();
break;
case DMA_RESOURCE_REMOVED:
found = 0;
spin_lock_irqsave(&async_tx_lock, flags);
list_for_each_entry_rcu(ref, &async_tx_master_list, node)
if (ref->chan == chan) {
/* permit backing devices to go away */
dma_chan_put(ref->chan);
list_del_rcu(&ref->node);
call_rcu(&ref->rcu, free_dma_chan_ref);
found = 1;
break;
}
spin_unlock_irqrestore(&async_tx_lock, flags);
pr_debug("async_tx: dma resource removed [%s]\n",
found ? "ours" : "not ours");
if (found)
ack = DMA_ACK;
else
break;
async_tx_rebalance();
break;
case DMA_RESOURCE_SUSPEND:
case DMA_RESOURCE_RESUME:
printk(KERN_WARNING "async_tx: does not support dma channel"
" suspend/resume\n");
break;
default:
BUG();
}
return ack;
}
static int __init
async_tx_init(void)
{
enum dma_transaction_type cap;
spin_lock_init(&async_tx_lock);
bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
/* an interrupt will never be an explicit operation type.
* clearing this bit prevents allocation to a slot in 'channel_table'
*/
clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
for_each_dma_cap_mask(cap, dma_cap_mask_all) {
channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
if (!channel_table[cap])
goto err;
}
channel_table_initialized = 1;
dma_async_client_register(&async_tx_dma);
dma_async_client_chan_request(&async_tx_dma);
printk(KERN_INFO "async_tx: api initialized (async)\n");
return 0;
err:
printk(KERN_ERR "async_tx: initialization failure\n");
while (--cap >= 0)
free_percpu(channel_table[cap]);
return 1;
}
static void __exit async_tx_exit(void)
{
enum dma_transaction_type cap;
channel_table_initialized = 0;
for_each_dma_cap_mask(cap, dma_cap_mask_all)
if (channel_table[cap])
free_percpu(channel_table[cap]);
dma_async_client_unregister(&async_tx_dma);
}
/**
* async_tx_find_channel - find a channel to carry out the operation or let
* the transaction execute synchronously
* @depend_tx: transaction dependency
* @tx_type: transaction type
*/
struct dma_chan *
async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
enum dma_transaction_type tx_type)
{
/* see if we can keep the chain on one channel */
if (depend_tx &&
dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
return depend_tx->chan;
else if (likely(channel_table_initialized)) {
struct dma_chan_ref *ref;
int cpu = get_cpu();
ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
put_cpu();
return ref ? ref->chan : NULL;
} else
return NULL;
}
EXPORT_SYMBOL_GPL(async_tx_find_channel);
#else
static int __init async_tx_init(void)
{
printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
return 0;
}
static void __exit async_tx_exit(void)
{
do { } while (0);
}
#endif
void
async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{
tx->callback = cb_fn;
tx->callback_param = cb_param;
/* set this new tx to run after depend_tx if:
* 1/ a dependency exists (depend_tx is !NULL)
* 2/ the tx can not be submitted to the current channel
*/
if (depend_tx && depend_tx->chan != chan) {
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(depend_tx->ack);
tx->parent = depend_tx;
spin_lock_bh(&depend_tx->lock);
list_add_tail(&tx->depend_node, &depend_tx->depend_list);
if (depend_tx->cookie == 0) {
struct dma_chan *dep_chan = depend_tx->chan;
struct dma_device *dep_dev = dep_chan->device;
dep_dev->device_dependency_added(dep_chan);
}
spin_unlock_bh(&depend_tx->lock);
/* schedule an interrupt to trigger the channel switch */
async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL);
} else {
tx->parent = NULL;
tx->tx_submit(tx);
}
if (flags & ASYNC_TX_ACK)
async_tx_ack(tx);
if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
async_tx_ack(depend_tx);
}
EXPORT_SYMBOL_GPL(async_tx_submit);
/**
* async_trigger_callback - schedules the callback function to be run after
* any dependent operations have been completed.
* @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
* @depend_tx: 'callback' requires the completion of this transaction
* @cb_fn: function to call after depend_tx completes
* @cb_param: parameter to pass to the callback routine
*/
struct dma_async_tx_descriptor *
async_trigger_callback(enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{
struct dma_chan *chan;
struct dma_device *device;
struct dma_async_tx_descriptor *tx;
if (depend_tx) {
chan = depend_tx->chan;
device = chan->device;
/* see if we can schedule an interrupt
* otherwise poll for completion
*/
if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask))
device = NULL;
tx = device ? device->device_prep_dma_interrupt(chan) : NULL;
} else
tx = NULL;
if (tx) {
pr_debug("%s: (async)\n", __FUNCTION__);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
} else {
pr_debug("%s: (sync)\n", __FUNCTION__);
/* wait for any prerequisite operations */
if (depend_tx) {
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(depend_tx->ack);
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n",
__FUNCTION__);
}
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_trigger_callback);
module_init(async_tx_init);
module_exit(async_tx_exit);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
MODULE_LICENSE("GPL");
/*
* xor offload engine api
*
* Copyright © 2006, Intel Corporation.
*
* Dan Williams <dan.j.williams@intel.com>
*
* with architecture considerations by:
* Neil Brown <neilb@suse.de>
* Jeff Garzik <jeff@garzik.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/raid/xor.h>
#include <linux/async_tx.h>
static void
do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device,
struct dma_chan *chan, struct page *dest, struct page **src_list,
unsigned int offset, unsigned int src_cnt, size_t len,
enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{
dma_addr_t dma_addr;
enum dma_data_direction dir;
int i;
pr_debug("%s: len: %zu\n", __FUNCTION__, len);
dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
DMA_NONE : DMA_FROM_DEVICE;
dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
tx->tx_set_dest(dma_addr, tx, 0);
dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
DMA_NONE : DMA_TO_DEVICE;
for (i = 0; i < src_cnt; i++) {
dma_addr = dma_map_page(device->dev, src_list[i],
offset, len, dir);
tx->tx_set_src(dma_addr, tx, i);
}
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
}
static void
do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
unsigned int src_cnt, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{
void *_dest;
int i;
pr_debug("%s: len: %zu\n", __FUNCTION__, len);
/* reuse the 'src_list' array to convert to buffer pointers */
for (i = 0; i < src_cnt; i++)
src_list[i] = (struct page *)
(page_address(src_list[i]) + offset);
/* set destination address */
_dest = page_address(dest) + offset;
if (flags & ASYNC_TX_XOR_ZERO_DST)
memset(_dest, 0, len);
xor_blocks(src_cnt, len, _dest,
(void **) src_list);
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
}
/**
* async_xor - attempt to xor a set of blocks with a dma engine.
* xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
* flag must be set to not include dest data in the calculation. The
* assumption with dma eninges is that they only use the destination
* buffer as a source when it is explicity specified in the source list.
* @dest: destination page
* @src_list: array of source pages (if the dest is also a source it must be
* at index zero). The contents of this array may be overwritten.
* @offset: offset in pages to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
* @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
* ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
* @depend_tx: xor depends on the result of this transaction.
* @cb_fn: function to call when the xor completes
* @cb_param: parameter to pass to the callback routine
*/
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
dma_async_tx_callback _cb_fn;
void *_cb_param;
unsigned long local_flags;
int xor_src_cnt;
int i = 0, src_off = 0, int_en;
BUG_ON(src_cnt <= 1);
while (src_cnt) {
local_flags = flags;
if (device) { /* run the xor asynchronously */
xor_src_cnt = min(src_cnt, device->max_xor);
/* if we are submitting additional xors
* only set the callback on the last transaction
*/
if (src_cnt > xor_src_cnt) {
local_flags &= ~ASYNC_TX_ACK;
_cb_fn = NULL;
_cb_param = NULL;
} else {
_cb_fn = cb_fn;
_cb_param = cb_param;
}
int_en = _cb_fn ? 1 : 0;
tx = device->device_prep_dma_xor(
chan, xor_src_cnt, len, int_en);
if (tx) {
do_async_xor(tx, device, chan, dest,
&src_list[src_off], offset, xor_src_cnt, len,
local_flags, depend_tx, _cb_fn,
_cb_param);
} else /* fall through */
goto xor_sync;
} else { /* run the xor synchronously */
xor_sync:
/* in the sync case the dest is an implied source
* (assumes the dest is at the src_off index)
*/
if (flags & ASYNC_TX_XOR_DROP_DST) {
src_cnt--;
src_off++;
}
/* process up to 'MAX_XOR_BLOCKS' sources */
xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
/* if we are submitting additional xors
* only set the callback on the last transaction
*/
if (src_cnt > xor_src_cnt) {
local_flags &= ~ASYNC_TX_ACK;
_cb_fn = NULL;
_cb_param = NULL;
} else {
_cb_fn = cb_fn;
_cb_param = cb_param;
}
/* wait for any prerequisite operations */
if (depend_tx) {
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(depend_tx->ack);
if (dma_wait_for_async_tx(depend_tx) ==
DMA_ERROR)
panic("%s: DMA_ERROR waiting for "
"depend_tx\n",
__FUNCTION__);
}
do_sync_xor(dest, &src_list[src_off], offset,
xor_src_cnt, len, local_flags, depend_tx,
_cb_fn, _cb_param);
}
/* the previous tx is hidden from the client,
* so ack it
*/
if (i && depend_tx)
async_tx_ack(depend_tx);
depend_tx = tx;
if (src_cnt > xor_src_cnt) {
/* drop completed sources */
src_cnt -= xor_src_cnt;
src_off += xor_src_cnt;
/* unconditionally preserve the destination */
flags &= ~ASYNC_TX_XOR_ZERO_DST;
/* use the intermediate result a source, but remember
* it's dropped, because it's implied, in the sync case
*/
src_list[--src_off] = dest;
src_cnt++;
flags |= ASYNC_TX_XOR_DROP_DST;
} else
src_cnt = 0;
i++;
}
return tx;
}
EXPORT_SYMBOL_GPL(async_xor);
static int page_is_zero(struct page *p, unsigned int offset, size_t len)
{
char *a = page_address(p) + offset;
return ((*(u32 *) a) == 0 &&
memcmp(a, a + 4, len - 4) == 0);
}
/**
* async_xor_zero_sum - attempt a xor parity check with a dma engine.
* @dest: destination page used if the xor is performed synchronously
* @src_list: array of source pages. The dest page must be listed as a source
* at index zero. The contents of this array may be overwritten.
* @offset: offset in pages to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
* @result: 0 if sum == 0 else non-zero
* @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
* @depend_tx: xor depends on the result of this transaction.
* @cb_fn: function to call when the xor completes
* @cb_param: parameter to pass to the callback routine
*/
struct dma_async_tx_descriptor *
async_xor_zero_sum(struct page *dest, struct page **src_list,
unsigned int offset, int src_cnt, size_t len,
u32 *result, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM);
struct dma_device *device = chan ? chan->device : NULL;
int int_en = cb_fn ? 1 : 0;
struct dma_async_tx_descriptor *tx = device ?
device->device_prep_dma_zero_sum(chan, src_cnt, len, result,
int_en) : NULL;
int i;
BUG_ON(src_cnt <= 1);
if (tx) {
dma_addr_t dma_addr;
enum dma_data_direction dir;
pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
DMA_NONE : DMA_TO_DEVICE;
for (i = 0; i < src_cnt; i++) {
dma_addr = dma_map_page(device->dev, src_list[i],
offset, len, dir);
tx->tx_set_src(dma_addr, tx, i);
}
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
} else {
unsigned long xor_flags = flags;
pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
xor_flags |= ASYNC_TX_XOR_DROP_DST;
xor_flags &= ~ASYNC_TX_ACK;
tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
depend_tx, NULL, NULL);
if (tx) {
if (dma_wait_for_async_tx(tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for tx\n",
__FUNCTION__);
async_tx_ack(tx);
}
*result = page_is_zero(dest, offset, len) ? 0 : 1;
tx = NULL;
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_xor_zero_sum);
static int __init async_xor_init(void)
{
return 0;
}
static void __exit async_xor_exit(void)
{
do { } while (0);
}
module_init(async_xor_init);
module_exit(async_xor_exit);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
MODULE_LICENSE("GPL");
......@@ -26,32 +26,32 @@
static struct xor_block_template *active_template;
void
xor_block(unsigned int count, unsigned int bytes, void **ptr)
xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
{
unsigned long *p0, *p1, *p2, *p3, *p4;
unsigned long *p1, *p2, *p3, *p4;
p0 = (unsigned long *) ptr[0];
p1 = (unsigned long *) ptr[1];
if (count == 2) {
active_template->do_2(bytes, p0, p1);
p1 = (unsigned long *) srcs[0];
if (src_count == 1) {
active_template->do_2(bytes, dest, p1);
return;
}
p2 = (unsigned long *) ptr[2];
if (count == 3) {
active_template->do_3(bytes, p0, p1, p2);
p2 = (unsigned long *) srcs[1];
if (src_count == 2) {
active_template->do_3(bytes, dest, p1, p2);
return;
}
p3 = (unsigned long *) ptr[3];
if (count == 4) {
active_template->do_4(bytes, p0, p1, p2, p3);
p3 = (unsigned long *) srcs[2];
if (src_count == 3) {
active_template->do_4(bytes, dest, p1, p2, p3);
return;
}
p4 = (unsigned long *) ptr[4];
active_template->do_5(bytes, p0, p1, p2, p3, p4);
p4 = (unsigned long *) srcs[3];
active_template->do_5(bytes, dest, p1, p2, p3, p4);
}
EXPORT_SYMBOL(xor_blocks);
/* Set of all registered templates. */
static struct xor_block_template *template_list;
......@@ -78,7 +78,7 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
now = jiffies;
count = 0;
while (jiffies == now) {
mb();
mb(); /* prevent loop optimzation */
tmpl->do_2(BENCH_SIZE, b1, b2);
mb();
count++;
......@@ -91,26 +91,26 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
speed = max * (HZ * BENCH_SIZE / 1024);
tmpl->speed = speed;
printk(" %-10s: %5d.%03d MB/sec\n", tmpl->name,
printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name,
speed / 1000, speed % 1000);
}
static int
calibrate_xor_block(void)
static int __init
calibrate_xor_blocks(void)
{
void *b1, *b2;
struct xor_block_template *f, *fastest;
b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
if (! b1) {
printk("raid5: Yikes! No memory available.\n");
if (!b1) {
printk(KERN_WARNING "xor: Yikes! No memory available.\n");
return -ENOMEM;
}
b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
/*
* If this arch/cpu has a short-circuited selection, don't loop through all
* the possible functions, just test the best one
* If this arch/cpu has a short-circuited selection, don't loop through
* all the possible functions, just test the best one
*/
fastest = NULL;
......@@ -122,11 +122,12 @@ calibrate_xor_block(void)
#define xor_speed(templ) do_xor_speed((templ), b1, b2)
if (fastest) {
printk(KERN_INFO "raid5: automatically using best checksumming function: %s\n",
printk(KERN_INFO "xor: automatically using best "
"checksumming function: %s\n",
fastest->name);
xor_speed(fastest);
} else {
printk(KERN_INFO "raid5: measuring checksumming speed\n");
printk(KERN_INFO "xor: measuring software checksum speed\n");
XOR_TRY_TEMPLATES;
fastest = template_list;
for (f = fastest; f; f = f->next)
......@@ -134,7 +135,7 @@ calibrate_xor_block(void)
fastest = f;
}
printk("raid5: using function: %s (%d.%03d MB/sec)\n",
printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
fastest->name, fastest->speed / 1000, fastest->speed % 1000);
#undef xor_speed
......@@ -147,8 +148,8 @@ calibrate_xor_block(void)
static __exit void xor_exit(void) { }
EXPORT_SYMBOL(xor_block);
MODULE_LICENSE("GPL");
module_init(calibrate_xor_block);
/* when built-in xor.o must initialize before drivers/md/md.o */
core_initcall(calibrate_xor_blocks);
module_exit(xor_exit);
......@@ -8,8 +8,8 @@ menu "DMA Engine support"
config DMA_ENGINE
bool "Support for DMA engines"
---help---
DMA engines offload copy operations from the CPU to dedicated
hardware, allowing the copies to happen asynchronously.
DMA engines offload bulk memory operations from the CPU to dedicated
hardware, allowing the operations to happen asynchronously.
comment "DMA Clients"
......@@ -32,4 +32,12 @@ config INTEL_IOATDMA
---help---
Enable support for the Intel(R) I/OAT DMA engine.
config INTEL_IOP_ADMA
tristate "Intel IOP ADMA support"
depends on DMA_ENGINE && (ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX)
select ASYNC_CORE
default m
---help---
Enable support for the Intel(R) IOP Series RAID engines.
endmenu
obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
obj-$(CONFIG_NET_DMA) += iovlock.o
obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
......@@ -37,11 +37,11 @@
* Each device has a channels list, which runs unlocked but is never modified
* once the device is registered, it's just setup by the driver.
*
* Each client has a channels list, it's only modified under the client->lock
* and in an RCU callback, so it's safe to read under rcu_read_lock().
* Each client is responsible for keeping track of the channels it uses. See
* the definition of dma_event_callback in dmaengine.h.
*
* Each device has a kref, which is initialized to 1 when the device is
* registered. A kref_put is done for each class_device registered. When the
* registered. A kref_get is done for each class_device registered. When the
* class_device is released, the coresponding kref_put is done in the release
* method. Every time one of the device's channels is allocated to a client,
* a kref_get occurs. When the channel is freed, the coresponding kref_put
......@@ -51,14 +51,17 @@
* references to finish.
*
* Each channel has an open-coded implementation of Rusty Russell's "bigref,"
* with a kref and a per_cpu local_t. A single reference is set when on an
* ADDED event, and removed with a REMOVE event. Net DMA client takes an
* extra reference per outstanding transaction. The relase function does a
* kref_put on the device. -ChrisL
* with a kref and a per_cpu local_t. A dma_chan_get is called when a client
* signals that it wants to use a channel, and dma_chan_put is called when
* a channel is removed or a client using it is unregesitered. A client can
* take extra references per outstanding transaction, as is the case with
* the NET DMA client. The release function does a kref_put on the device.
* -ChrisL, DanW
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/device.h>
#include <linux/dmaengine.h>
#include <linux/hardirq.h>
......@@ -66,6 +69,7 @@
#include <linux/percpu.h>
#include <linux/rcupdate.h>
#include <linux/mutex.h>
#include <linux/jiffies.h>
static DEFINE_MUTEX(dma_list_mutex);
static LIST_HEAD(dma_device_list);
......@@ -100,8 +104,19 @@ static ssize_t show_bytes_transferred(struct class_device *cd, char *buf)
static ssize_t show_in_use(struct class_device *cd, char *buf)
{
struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
int in_use = 0;
if (unlikely(chan->slow_ref) &&
atomic_read(&chan->refcount.refcount) > 1)
in_use = 1;
else {
if (local_read(&(per_cpu_ptr(chan->local,
get_cpu())->refcount)) > 0)
in_use = 1;
put_cpu();
}
return sprintf(buf, "%d\n", (chan->client ? 1 : 0));
return sprintf(buf, "%d\n", in_use);
}
static struct class_device_attribute dma_class_attrs[] = {
......@@ -127,43 +142,72 @@ static struct class dma_devclass = {
/* --- client and device registration --- */
#define dma_chan_satisfies_mask(chan, mask) \
__dma_chan_satisfies_mask((chan), &(mask))
static int
__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want)
{
dma_cap_mask_t has;
bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits,
DMA_TX_TYPE_END);
return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
}
/**
* dma_client_chan_alloc - try to allocate a channel to a client
* dma_client_chan_alloc - try to allocate channels to a client
* @client: &dma_client
*
* Called with dma_list_mutex held.
*/
static struct dma_chan *dma_client_chan_alloc(struct dma_client *client)
static void dma_client_chan_alloc(struct dma_client *client)
{
struct dma_device *device;
struct dma_chan *chan;
unsigned long flags;
int desc; /* allocated descriptor count */
enum dma_state_client ack;
/* Find a channel, any DMA engine will do */
list_for_each_entry(device, &dma_device_list, global_node) {
/* Find a channel */
list_for_each_entry(device, &dma_device_list, global_node)
list_for_each_entry(chan, &device->channels, device_node) {
if (chan->client)
if (!dma_chan_satisfies_mask(chan, client->cap_mask))
continue;
desc = chan->device->device_alloc_chan_resources(chan);
if (desc >= 0) {
ack = client->event_callback(client,
chan,
DMA_RESOURCE_AVAILABLE);
/* we are done once this client rejects
* an available resource
*/
if (ack == DMA_ACK) {
dma_chan_get(chan);
kref_get(&device->refcount);
kref_init(&chan->refcount);
chan->slow_ref = 0;
INIT_RCU_HEAD(&chan->rcu);
chan->client = client;
spin_lock_irqsave(&client->lock, flags);
list_add_tail_rcu(&chan->client_node,
&client->channels);
spin_unlock_irqrestore(&client->lock, flags);
return chan;
} else if (ack == DMA_NAK)
return;
}
}
}
enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
{
enum dma_status status;
unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
dma_async_issue_pending(chan);
do {
status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
printk(KERN_ERR "dma_sync_wait_timeout!\n");
return DMA_ERROR;
}
} while (status == DMA_IN_PROGRESS);
return NULL;
return status;
}
EXPORT_SYMBOL(dma_sync_wait);
/**
* dma_chan_cleanup - release a DMA channel's resources
......@@ -173,7 +217,6 @@ void dma_chan_cleanup(struct kref *kref)
{
struct dma_chan *chan = container_of(kref, struct dma_chan, refcount);
chan->device->device_free_chan_resources(chan);
chan->client = NULL;
kref_put(&chan->device->refcount, dma_async_device_cleanup);
}
EXPORT_SYMBOL(dma_chan_cleanup);
......@@ -189,7 +232,7 @@ static void dma_chan_free_rcu(struct rcu_head *rcu)
kref_put(&chan->refcount, dma_chan_cleanup);
}
static void dma_client_chan_free(struct dma_chan *chan)
static void dma_chan_release(struct dma_chan *chan)
{
atomic_add(0x7FFFFFFF, &chan->refcount.refcount);
chan->slow_ref = 1;
......@@ -197,70 +240,57 @@ static void dma_client_chan_free(struct dma_chan *chan)
}
/**
* dma_chans_rebalance - reallocate channels to clients
*
* When the number of DMA channel in the system changes,
* channels need to be rebalanced among clients.
* dma_chans_notify_available - broadcast available channels to the clients
*/
static void dma_chans_rebalance(void)
static void dma_clients_notify_available(void)
{
struct dma_client *client;
struct dma_chan *chan;
unsigned long flags;
mutex_lock(&dma_list_mutex);
list_for_each_entry(client, &dma_client_list, global_node) {
while (client->chans_desired > client->chan_count) {
chan = dma_client_chan_alloc(client);
if (!chan)
break;
client->chan_count++;
client->event_callback(client,
chan,
DMA_RESOURCE_ADDED);
}
while (client->chans_desired < client->chan_count) {
spin_lock_irqsave(&client->lock, flags);
chan = list_entry(client->channels.next,
struct dma_chan,
client_node);
list_del_rcu(&chan->client_node);
spin_unlock_irqrestore(&client->lock, flags);
client->chan_count--;
client->event_callback(client,
chan,
DMA_RESOURCE_REMOVED);
dma_client_chan_free(chan);
}
}
list_for_each_entry(client, &dma_client_list, global_node)
dma_client_chan_alloc(client);
mutex_unlock(&dma_list_mutex);
}
/**
* dma_async_client_register - allocate and register a &dma_client
* @event_callback: callback for notification of channel addition/removal
* dma_chans_notify_available - tell the clients that a channel is going away
* @chan: channel on its way out
*/
struct dma_client *dma_async_client_register(dma_event_callback event_callback)
static void dma_clients_notify_removed(struct dma_chan *chan)
{
struct dma_client *client;
enum dma_state_client ack;
client = kzalloc(sizeof(*client), GFP_KERNEL);
if (!client)
return NULL;
mutex_lock(&dma_list_mutex);
list_for_each_entry(client, &dma_client_list, global_node) {
ack = client->event_callback(client, chan,
DMA_RESOURCE_REMOVED);
INIT_LIST_HEAD(&client->channels);
spin_lock_init(&client->lock);
client->chans_desired = 0;
client->chan_count = 0;
client->event_callback = event_callback;
/* client was holding resources for this channel so
* free it
*/
if (ack == DMA_ACK) {
dma_chan_put(chan);
kref_put(&chan->device->refcount,
dma_async_device_cleanup);
}
}
mutex_unlock(&dma_list_mutex);
}
/**
* dma_async_client_register - register a &dma_client
* @client: ptr to a client structure with valid 'event_callback' and 'cap_mask'
*/
void dma_async_client_register(struct dma_client *client)
{
mutex_lock(&dma_list_mutex);
list_add_tail(&client->global_node, &dma_client_list);
mutex_unlock(&dma_list_mutex);
return client;
}
EXPORT_SYMBOL(dma_async_client_register);
......@@ -272,40 +302,42 @@ EXPORT_SYMBOL(dma_async_client_register);
*/
void dma_async_client_unregister(struct dma_client *client)
{
struct dma_device *device;
struct dma_chan *chan;
enum dma_state_client ack;
if (!client)
return;
rcu_read_lock();
list_for_each_entry_rcu(chan, &client->channels, client_node)
dma_client_chan_free(chan);
rcu_read_unlock();
mutex_lock(&dma_list_mutex);
/* free all channels the client is holding */
list_for_each_entry(device, &dma_device_list, global_node)
list_for_each_entry(chan, &device->channels, device_node) {
ack = client->event_callback(client, chan,
DMA_RESOURCE_REMOVED);
if (ack == DMA_ACK) {
dma_chan_put(chan);
kref_put(&chan->device->refcount,
dma_async_device_cleanup);
}
}
list_del(&client->global_node);
mutex_unlock(&dma_list_mutex);
kfree(client);
dma_chans_rebalance();
}
EXPORT_SYMBOL(dma_async_client_unregister);
/**
* dma_async_client_chan_request - request DMA channels
* @client: &dma_client
* @number: count of DMA channels requested
*
* Clients call dma_async_client_chan_request() to specify how many
* DMA channels they need, 0 to free all currently allocated.
* The resulting allocations/frees are indicated to the client via the
* event callback.
* dma_async_client_chan_request - send all available channels to the
* client that satisfy the capability mask
* @client - requester
*/
void dma_async_client_chan_request(struct dma_client *client,
unsigned int number)
void dma_async_client_chan_request(struct dma_client *client)
{
client->chans_desired = number;
dma_chans_rebalance();
mutex_lock(&dma_list_mutex);
dma_client_chan_alloc(client);
mutex_unlock(&dma_list_mutex);
}
EXPORT_SYMBOL(dma_async_client_chan_request);
......@@ -316,12 +348,31 @@ EXPORT_SYMBOL(dma_async_client_chan_request);
int dma_async_device_register(struct dma_device *device)
{
static int id;
int chancnt = 0;
int chancnt = 0, rc;
struct dma_chan* chan;
if (!device)
return -ENODEV;
/* validate device routines */
BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) &&
!device->device_prep_dma_memcpy);
BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
!device->device_prep_dma_xor);
BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
!device->device_prep_dma_zero_sum);
BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
!device->device_prep_dma_memset);
BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
!device->device_prep_dma_interrupt);
BUG_ON(!device->device_alloc_chan_resources);
BUG_ON(!device->device_free_chan_resources);
BUG_ON(!device->device_dependency_added);
BUG_ON(!device->device_is_tx_complete);
BUG_ON(!device->device_issue_pending);
BUG_ON(!device->dev);
init_completion(&device->done);
kref_init(&device->refcount);
device->dev_id = id++;
......@@ -338,17 +389,38 @@ int dma_async_device_register(struct dma_device *device)
snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d",
device->dev_id, chan->chan_id);
rc = class_device_register(&chan->class_dev);
if (rc) {
chancnt--;
free_percpu(chan->local);
chan->local = NULL;
goto err_out;
}
kref_get(&device->refcount);
class_device_register(&chan->class_dev);
kref_init(&chan->refcount);
chan->slow_ref = 0;
INIT_RCU_HEAD(&chan->rcu);
}
mutex_lock(&dma_list_mutex);
list_add_tail(&device->global_node, &dma_device_list);
mutex_unlock(&dma_list_mutex);
dma_chans_rebalance();
dma_clients_notify_available();
return 0;
err_out:
list_for_each_entry(chan, &device->channels, device_node) {
if (chan->local == NULL)
continue;
kref_put(&device->refcount, dma_async_device_cleanup);
class_device_unregister(&chan->class_dev);
chancnt--;
free_percpu(chan->local);
}
return rc;
}
EXPORT_SYMBOL(dma_async_device_register);
......@@ -371,32 +443,165 @@ static void dma_async_device_cleanup(struct kref *kref)
void dma_async_device_unregister(struct dma_device *device)
{
struct dma_chan *chan;
unsigned long flags;
mutex_lock(&dma_list_mutex);
list_del(&device->global_node);
mutex_unlock(&dma_list_mutex);
list_for_each_entry(chan, &device->channels, device_node) {
if (chan->client) {
spin_lock_irqsave(&chan->client->lock, flags);
list_del(&chan->client_node);
chan->client->chan_count--;
spin_unlock_irqrestore(&chan->client->lock, flags);
chan->client->event_callback(chan->client,
chan,
DMA_RESOURCE_REMOVED);
dma_client_chan_free(chan);
}
dma_clients_notify_removed(chan);
class_device_unregister(&chan->class_dev);
dma_chan_release(chan);
}
dma_chans_rebalance();
kref_put(&device->refcount, dma_async_device_cleanup);
wait_for_completion(&device->done);
}
EXPORT_SYMBOL(dma_async_device_unregister);
/**
* dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
* @chan: DMA channel to offload copy to
* @dest: destination address (virtual)
* @src: source address (virtual)
* @len: length
*
* Both @dest and @src must be mappable to a bus address according to the
* DMA mapping API rules for streaming mappings.
* Both @dest and @src must stay memory resident (kernel memory or locked
* user space pages).
*/
dma_cookie_t
dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
void *src, size_t len)
{
struct dma_device *dev = chan->device;
struct dma_async_tx_descriptor *tx;
dma_addr_t addr;
dma_cookie_t cookie;
int cpu;
tx = dev->device_prep_dma_memcpy(chan, len, 0);
if (!tx)
return -ENOMEM;
tx->ack = 1;
tx->callback = NULL;
addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
tx->tx_set_src(addr, tx, 0);
addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
tx->tx_set_dest(addr, tx, 0);
cookie = tx->tx_submit(tx);
cpu = get_cpu();
per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
per_cpu_ptr(chan->local, cpu)->memcpy_count++;
put_cpu();
return cookie;
}
EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
/**
* dma_async_memcpy_buf_to_pg - offloaded copy from address to page
* @chan: DMA channel to offload copy to
* @page: destination page
* @offset: offset in page to copy to
* @kdata: source address (virtual)
* @len: length
*
* Both @page/@offset and @kdata must be mappable to a bus address according
* to the DMA mapping API rules for streaming mappings.
* Both @page/@offset and @kdata must stay memory resident (kernel memory or
* locked user space pages)
*/
dma_cookie_t
dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
unsigned int offset, void *kdata, size_t len)
{
struct dma_device *dev = chan->device;
struct dma_async_tx_descriptor *tx;
dma_addr_t addr;
dma_cookie_t cookie;
int cpu;
tx = dev->device_prep_dma_memcpy(chan, len, 0);
if (!tx)
return -ENOMEM;
tx->ack = 1;
tx->callback = NULL;
addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
tx->tx_set_src(addr, tx, 0);
addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
tx->tx_set_dest(addr, tx, 0);
cookie = tx->tx_submit(tx);
cpu = get_cpu();
per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
per_cpu_ptr(chan->local, cpu)->memcpy_count++;
put_cpu();
return cookie;
}
EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
/**
* dma_async_memcpy_pg_to_pg - offloaded copy from page to page
* @chan: DMA channel to offload copy to
* @dest_pg: destination page
* @dest_off: offset in page to copy to
* @src_pg: source page
* @src_off: offset in page to copy from
* @len: length
*
* Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
* address according to the DMA mapping API rules for streaming mappings.
* Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
* (kernel memory or locked user space pages).
*/
dma_cookie_t
dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
unsigned int dest_off, struct page *src_pg, unsigned int src_off,
size_t len)
{
struct dma_device *dev = chan->device;
struct dma_async_tx_descriptor *tx;
dma_addr_t addr;
dma_cookie_t cookie;
int cpu;
tx = dev->device_prep_dma_memcpy(chan, len, 0);
if (!tx)
return -ENOMEM;
tx->ack = 1;
tx->callback = NULL;
addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
tx->tx_set_src(addr, tx, 0);
addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE);
tx->tx_set_dest(addr, tx, 0);
cookie = tx->tx_submit(tx);
cpu = get_cpu();
per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
per_cpu_ptr(chan->local, cpu)->memcpy_count++;
put_cpu();
return cookie;
}
EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
struct dma_chan *chan)
{
tx->chan = chan;
spin_lock_init(&tx->lock);
INIT_LIST_HEAD(&tx->depend_node);
INIT_LIST_HEAD(&tx->depend_list);
}
EXPORT_SYMBOL(dma_async_tx_descriptor_init);
static int __init dma_bus_init(void)
{
mutex_init(&dma_list_mutex);
......
......@@ -32,16 +32,17 @@
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include "ioatdma.h"
#include "ioatdma_io.h"
#include "ioatdma_registers.h"
#include "ioatdma_hw.h"
#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
#define to_ioat_device(dev) container_of(dev, struct ioat_device, common)
#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
/* internal functions */
static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
static void ioat_shutdown(struct pci_dev *pdev);
static void __devexit ioat_remove(struct pci_dev *pdev);
static int enumerate_dma_channels(struct ioat_device *device)
......@@ -51,8 +52,8 @@ static int enumerate_dma_channels(struct ioat_device *device)
int i;
struct ioat_dma_chan *ioat_chan;
device->common.chancnt = ioatdma_read8(device, IOAT_CHANCNT_OFFSET);
xfercap_scale = ioatdma_read8(device, IOAT_XFERCAP_OFFSET);
device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
for (i = 0; i < device->common.chancnt; i++) {
......@@ -71,13 +72,79 @@ static int enumerate_dma_channels(struct ioat_device *device)
INIT_LIST_HEAD(&ioat_chan->used_desc);
/* This should be made common somewhere in dmaengine.c */
ioat_chan->common.device = &device->common;
ioat_chan->common.client = NULL;
list_add_tail(&ioat_chan->common.device_node,
&device->common.channels);
}
return device->common.chancnt;
}
static void
ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
{
struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
pci_unmap_addr_set(desc, src, addr);
list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
iter->hw->src_addr = addr;
addr += ioat_chan->xfercap;
}
}
static void
ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
{
struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
pci_unmap_addr_set(desc, dst, addr);
list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
iter->hw->dst_addr = addr;
addr += ioat_chan->xfercap;
}
}
static dma_cookie_t
ioat_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
int append = 0;
dma_cookie_t cookie;
struct ioat_desc_sw *group_start;
group_start = list_entry(desc->async_tx.tx_list.next,
struct ioat_desc_sw, node);
spin_lock_bh(&ioat_chan->desc_lock);
/* cookie incr and addition to used_list must be atomic */
cookie = ioat_chan->common.cookie;
cookie++;
if (cookie < 0)
cookie = 1;
ioat_chan->common.cookie = desc->async_tx.cookie = cookie;
/* write address into NextDescriptor field of last desc in chain */
to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
group_start->async_tx.phys;
list_splice_init(&desc->async_tx.tx_list, ioat_chan->used_desc.prev);
ioat_chan->pending += desc->tx_cnt;
if (ioat_chan->pending >= 4) {
append = 1;
ioat_chan->pending = 0;
}
spin_unlock_bh(&ioat_chan->desc_lock);
if (append)
writeb(IOAT_CHANCMD_APPEND,
ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
return cookie;
}
static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
struct ioat_dma_chan *ioat_chan,
gfp_t flags)
......@@ -99,8 +166,13 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
}
memset(desc, 0, sizeof(*desc));
dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
desc_sw->async_tx.tx_set_src = ioat_set_src;
desc_sw->async_tx.tx_set_dest = ioat_set_dest;
desc_sw->async_tx.tx_submit = ioat_tx_submit;
INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
desc_sw->hw = desc;
desc_sw->phys = phys;
desc_sw->async_tx.phys = phys;
return desc_sw;
}
......@@ -123,7 +195,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
* In-use bit automatically set by reading chanctrl
* If 0, we got it, if 1, someone else did
*/
chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET);
chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE)
return -EBUSY;
......@@ -132,12 +204,12 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
IOAT_CHANCTRL_ERR_INT_EN |
IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
IOAT_CHANCTRL_ERR_COMPLETION_EN;
ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl);
writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
chanerr = ioatdma_chan_read32(ioat_chan, IOAT_CHANERR_OFFSET);
chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
if (chanerr) {
printk("IOAT: CHANERR = %x, clearing\n", chanerr);
ioatdma_chan_write32(ioat_chan, IOAT_CHANERR_OFFSET, chanerr);
writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
}
/* Allocate descriptors */
......@@ -161,10 +233,10 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
&ioat_chan->completion_addr);
memset(ioat_chan->completion_virt, 0,
sizeof(*ioat_chan->completion_virt));
ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_LOW,
((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF);
ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_HIGH,
((u64) ioat_chan->completion_addr) >> 32);
writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
writel(((u64) ioat_chan->completion_addr) >> 32,
ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
ioat_start_null_desc(ioat_chan);
return i;
......@@ -182,18 +254,20 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
ioat_dma_memcpy_cleanup(ioat_chan);
ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
spin_lock_bh(&ioat_chan->desc_lock);
list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
in_use_descs++;
list_del(&desc->node);
pci_pool_free(ioat_device->dma_pool, desc->hw, desc->phys);
pci_pool_free(ioat_device->dma_pool, desc->hw,
desc->async_tx.phys);
kfree(desc);
}
list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) {
list_del(&desc->node);
pci_pool_free(ioat_device->dma_pool, desc->hw, desc->phys);
pci_pool_free(ioat_device->dma_pool, desc->hw,
desc->async_tx.phys);
kfree(desc);
}
spin_unlock_bh(&ioat_chan->desc_lock);
......@@ -210,50 +284,30 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
ioat_chan->last_completion = ioat_chan->completion_addr = 0;
/* Tell hw the chan is free */
chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET);
chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE;
ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl);
writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
}
/**
* do_ioat_dma_memcpy - actual function that initiates a IOAT DMA transaction
* @ioat_chan: IOAT DMA channel handle
* @dest: DMA destination address
* @src: DMA source address
* @len: transaction length in bytes
*/
static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan,
dma_addr_t dest,
dma_addr_t src,
size_t len)
static struct dma_async_tx_descriptor *
ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
{
struct ioat_desc_sw *first;
struct ioat_desc_sw *prev;
struct ioat_desc_sw *new;
dma_cookie_t cookie;
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
struct ioat_desc_sw *first, *prev, *new;
LIST_HEAD(new_chain);
u32 copy;
size_t orig_len;
dma_addr_t orig_src, orig_dst;
unsigned int desc_count = 0;
unsigned int append = 0;
if (!ioat_chan || !dest || !src)
return -EFAULT;
int desc_count = 0;
if (!len)
return ioat_chan->common.cookie;
return NULL;
orig_len = len;
orig_src = src;
orig_dst = dest;
first = NULL;
prev = NULL;
spin_lock_bh(&ioat_chan->desc_lock);
while (len) {
if (!list_empty(&ioat_chan->free_desc)) {
new = to_ioat_desc(ioat_chan->free_desc.next);
......@@ -270,141 +324,36 @@ static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan,
new->hw->size = copy;
new->hw->ctl = 0;
new->hw->src_addr = src;
new->hw->dst_addr = dest;
new->cookie = 0;
new->async_tx.cookie = 0;
new->async_tx.ack = 1;
/* chain together the physical address list for the HW */
if (!first)
first = new;
else
prev->hw->next = (u64) new->phys;
prev->hw->next = (u64) new->async_tx.phys;
prev = new;
len -= copy;
dest += copy;
src += copy;
list_add_tail(&new->node, &new_chain);
desc_count++;
}
new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
new->hw->next = 0;
/* cookie incr and addition to used_list must be atomic */
list_splice(&new_chain, &new->async_tx.tx_list);
cookie = ioat_chan->common.cookie;
cookie++;
if (cookie < 0)
cookie = 1;
ioat_chan->common.cookie = new->cookie = cookie;
new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
new->hw->next = 0;
new->tx_cnt = desc_count;
new->async_tx.ack = 0; /* client is in control of this ack */
new->async_tx.cookie = -EBUSY;
pci_unmap_addr_set(new, src, orig_src);
pci_unmap_addr_set(new, dst, orig_dst);
pci_unmap_len_set(new, src_len, orig_len);
pci_unmap_len_set(new, dst_len, orig_len);
/* write address into NextDescriptor field of last desc in chain */
to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = first->phys;
list_splice_init(&new_chain, ioat_chan->used_desc.prev);
ioat_chan->pending += desc_count;
if (ioat_chan->pending >= 20) {
append = 1;
ioat_chan->pending = 0;
}
spin_unlock_bh(&ioat_chan->desc_lock);
if (append)
ioatdma_chan_write8(ioat_chan,
IOAT_CHANCMD_OFFSET,
IOAT_CHANCMD_APPEND);
return cookie;
return new ? &new->async_tx : NULL;
}
/**
* ioat_dma_memcpy_buf_to_buf - wrapper that takes src & dest bufs
* @chan: IOAT DMA channel handle
* @dest: DMA destination address
* @src: DMA source address
* @len: transaction length in bytes
*/
static dma_cookie_t ioat_dma_memcpy_buf_to_buf(struct dma_chan *chan,
void *dest,
void *src,
size_t len)
{
dma_addr_t dest_addr;
dma_addr_t src_addr;
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
dest_addr = pci_map_single(ioat_chan->device->pdev,
dest, len, PCI_DMA_FROMDEVICE);
src_addr = pci_map_single(ioat_chan->device->pdev,
src, len, PCI_DMA_TODEVICE);
return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
}
/**
* ioat_dma_memcpy_buf_to_pg - wrapper, copying from a buf to a page
* @chan: IOAT DMA channel handle
* @page: pointer to the page to copy to
* @offset: offset into that page
* @src: DMA source address
* @len: transaction length in bytes
*/
static dma_cookie_t ioat_dma_memcpy_buf_to_pg(struct dma_chan *chan,
struct page *page,
unsigned int offset,
void *src,
size_t len)
{
dma_addr_t dest_addr;
dma_addr_t src_addr;
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
dest_addr = pci_map_page(ioat_chan->device->pdev,
page, offset, len, PCI_DMA_FROMDEVICE);
src_addr = pci_map_single(ioat_chan->device->pdev,
src, len, PCI_DMA_TODEVICE);
return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
}
/**
* ioat_dma_memcpy_pg_to_pg - wrapper, copying between two pages
* @chan: IOAT DMA channel handle
* @dest_pg: pointer to the page to copy to
* @dest_off: offset into that page
* @src_pg: pointer to the page to copy from
* @src_off: offset into that page
* @len: transaction length in bytes. This is guaranteed not to make a copy
* across a page boundary.
*/
static dma_cookie_t ioat_dma_memcpy_pg_to_pg(struct dma_chan *chan,
struct page *dest_pg,
unsigned int dest_off,
struct page *src_pg,
unsigned int src_off,
size_t len)
{
dma_addr_t dest_addr;
dma_addr_t src_addr;
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
dest_addr = pci_map_page(ioat_chan->device->pdev,
dest_pg, dest_off, len, PCI_DMA_FROMDEVICE);
src_addr = pci_map_page(ioat_chan->device->pdev,
src_pg, src_off, len, PCI_DMA_TODEVICE);
return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
}
/**
* ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw
......@@ -417,9 +366,8 @@ static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan)
if (ioat_chan->pending != 0) {
ioat_chan->pending = 0;
ioatdma_chan_write8(ioat_chan,
IOAT_CHANCMD_OFFSET,
IOAT_CHANCMD_APPEND);
writeb(IOAT_CHANCMD_APPEND,
ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
}
}
......@@ -449,7 +397,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
printk("IOAT: Channel halted, chanerr = %x\n",
ioatdma_chan_read32(chan, IOAT_CHANERR_OFFSET));
readl(chan->reg_base + IOAT_CHANERR_OFFSET));
/* TODO do something to salvage the situation */
}
......@@ -467,8 +415,8 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
* exceeding xfercap, perhaps. If so, only the last one will
* have a cookie, and require unmapping.
*/
if (desc->cookie) {
cookie = desc->cookie;
if (desc->async_tx.cookie) {
cookie = desc->async_tx.cookie;
/* yes we are unmapping both _page and _single alloc'd
regions with unmap_page. Is this *really* that bad?
......@@ -483,14 +431,19 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
PCI_DMA_TODEVICE);
}
if (desc->phys != phys_complete) {
/* a completed entry, but not the last, so cleanup */
if (desc->async_tx.phys != phys_complete) {
/* a completed entry, but not the last, so cleanup
* if the client is done with the descriptor
*/
if (desc->async_tx.ack) {
list_del(&desc->node);
list_add_tail(&desc->node, &chan->free_desc);
} else
desc->async_tx.cookie = 0;
} else {
/* last used desc. Do not remove, so we can append from
it, but don't look at it next time, either */
desc->cookie = 0;
desc->async_tx.cookie = 0;
/* TODO check status bits? */
break;
......@@ -506,6 +459,17 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
spin_unlock(&chan->cleanup_lock);
}
static void ioat_dma_dependency_added(struct dma_chan *chan)
{
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
spin_lock_bh(&ioat_chan->desc_lock);
if (ioat_chan->pending == 0) {
spin_unlock_bh(&ioat_chan->desc_lock);
ioat_dma_memcpy_cleanup(ioat_chan);
} else
spin_unlock_bh(&ioat_chan->desc_lock);
}
/**
* ioat_dma_is_complete - poll the status of a IOAT DMA transaction
* @chan: IOAT DMA channel handle
......@@ -553,6 +517,8 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
static struct pci_device_id ioat_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
{ PCI_DEVICE(PCI_VENDOR_ID_UNISYS,
PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
{ 0, }
};
......@@ -560,6 +526,7 @@ static struct pci_driver ioat_pci_driver = {
.name = "ioatdma",
.id_table = ioat_pci_tbl,
.probe = ioat_probe,
.shutdown = ioat_shutdown,
.remove = __devexit_p(ioat_remove),
};
......@@ -569,21 +536,21 @@ static irqreturn_t ioat_do_interrupt(int irq, void *data)
unsigned long attnstatus;
u8 intrctrl;
intrctrl = ioatdma_read8(instance, IOAT_INTRCTRL_OFFSET);
intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
return IRQ_NONE;
if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl);
writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
return IRQ_NONE;
}
attnstatus = ioatdma_read32(instance, IOAT_ATTNSTATUS_OFFSET);
attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus);
ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl);
writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
return IRQ_HANDLED;
}
......@@ -607,19 +574,17 @@ static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan)
desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
desc->hw->next = 0;
desc->async_tx.ack = 1;
list_add_tail(&desc->node, &ioat_chan->used_desc);
spin_unlock_bh(&ioat_chan->desc_lock);
#if (BITS_PER_LONG == 64)
ioatdma_chan_write64(ioat_chan, IOAT_CHAINADDR_OFFSET, desc->phys);
#else
ioatdma_chan_write32(ioat_chan,
IOAT_CHAINADDR_OFFSET_LOW,
(u32) desc->phys);
ioatdma_chan_write32(ioat_chan, IOAT_CHAINADDR_OFFSET_HIGH, 0);
#endif
ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_START);
writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW);
writel(((u64) desc->async_tx.phys) >> 32,
ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH);
writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
}
/*
......@@ -633,6 +598,8 @@ static int ioat_self_test(struct ioat_device *device)
u8 *src;
u8 *dest;
struct dma_chan *dma_chan;
struct dma_async_tx_descriptor *tx;
dma_addr_t addr;
dma_cookie_t cookie;
int err = 0;
......@@ -658,7 +625,15 @@ static int ioat_self_test(struct ioat_device *device)
goto out;
}
cookie = ioat_dma_memcpy_buf_to_buf(dma_chan, dest, src, IOAT_TEST_SIZE);
tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
async_tx_ack(tx);
addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
DMA_TO_DEVICE);
ioat_set_src(addr, tx, 0);
addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
DMA_FROM_DEVICE);
ioat_set_dest(addr, tx, 0);
cookie = ioat_tx_submit(tx);
ioat_dma_memcpy_issue_pending(dma_chan);
msleep(1);
......@@ -748,19 +723,20 @@ static int __devinit ioat_probe(struct pci_dev *pdev,
device->reg_base = reg_base;
ioatdma_write8(device, IOAT_INTRCTRL_OFFSET, IOAT_INTRCTRL_MASTER_INT_EN);
writeb(IOAT_INTRCTRL_MASTER_INT_EN, device->reg_base + IOAT_INTRCTRL_OFFSET);
pci_set_master(pdev);
INIT_LIST_HEAD(&device->common.channels);
enumerate_dma_channels(device);
dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources;
device->common.device_free_chan_resources = ioat_dma_free_chan_resources;
device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf;
device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg;
device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg;
device->common.device_memcpy_complete = ioat_dma_is_complete;
device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending;
device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy;
device->common.device_is_tx_complete = ioat_dma_is_complete;
device->common.device_issue_pending = ioat_dma_memcpy_issue_pending;
device->common.device_dependency_added = ioat_dma_dependency_added;
device->common.dev = &pdev->dev;
printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n",
device->common.chancnt);
......@@ -787,9 +763,20 @@ static int __devinit ioat_probe(struct pci_dev *pdev,
err_set_dma_mask:
pci_disable_device(pdev);
err_enable_device:
printk(KERN_ERR "Intel(R) I/OAT DMA Engine initialization failed\n");
return err;
}
static void ioat_shutdown(struct pci_dev *pdev)
{
struct ioat_device *device;
device = pci_get_drvdata(pdev);
dma_async_device_unregister(&device->common);
}
static void __devexit ioat_remove(struct pci_dev *pdev)
{
struct ioat_device *device;
......@@ -818,7 +805,7 @@ static void __devexit ioat_remove(struct pci_dev *pdev)
}
/* MODULE API */
MODULE_VERSION("1.7");
MODULE_VERSION("1.9");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Intel Corporation");
......
......@@ -30,9 +30,6 @@
#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
extern struct list_head dma_device_list;
extern struct list_head dma_client_list;
/**
* struct ioat_device - internal representation of a IOAT device
* @pdev: PCI-Express device
......@@ -105,21 +102,20 @@ struct ioat_dma_chan {
/**
* struct ioat_desc_sw - wrapper around hardware descriptor
* @hw: hardware DMA descriptor
* @node:
* @cookie:
* @phys:
* @node: this descriptor will either be on the free list,
* or attached to a transaction list (async_tx.tx_list)
* @tx_cnt: number of descriptors required to complete the transaction
* @async_tx: the generic software descriptor for all engines
*/
struct ioat_desc_sw {
struct ioat_dma_descriptor *hw;
struct list_head node;
dma_cookie_t cookie;
dma_addr_t phys;
int tx_cnt;
DECLARE_PCI_UNMAP_ADDR(src)
DECLARE_PCI_UNMAP_LEN(src_len)
DECLARE_PCI_UNMAP_ADDR(dst)
DECLARE_PCI_UNMAP_LEN(dst_len)
struct dma_async_tx_descriptor async_tx;
};
#endif /* IOATDMA_H */
/*
* Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
#ifndef IOATDMA_IO_H
#define IOATDMA_IO_H
#include <asm/io.h>
/*
* device and per-channel MMIO register read and write functions
* this is a lot of anoying inline functions, but it's typesafe
*/
static inline u8 ioatdma_read8(struct ioat_device *device,
unsigned int offset)
{
return readb(device->reg_base + offset);
}
static inline u16 ioatdma_read16(struct ioat_device *device,
unsigned int offset)
{
return readw(device->reg_base + offset);
}
static inline u32 ioatdma_read32(struct ioat_device *device,
unsigned int offset)
{
return readl(device->reg_base + offset);
}
static inline void ioatdma_write8(struct ioat_device *device,
unsigned int offset, u8 value)
{
writeb(value, device->reg_base + offset);
}
static inline void ioatdma_write16(struct ioat_device *device,
unsigned int offset, u16 value)
{
writew(value, device->reg_base + offset);
}
static inline void ioatdma_write32(struct ioat_device *device,
unsigned int offset, u32 value)
{
writel(value, device->reg_base + offset);
}
static inline u8 ioatdma_chan_read8(struct ioat_dma_chan *chan,
unsigned int offset)
{
return readb(chan->reg_base + offset);
}
static inline u16 ioatdma_chan_read16(struct ioat_dma_chan *chan,
unsigned int offset)
{
return readw(chan->reg_base + offset);
}
static inline u32 ioatdma_chan_read32(struct ioat_dma_chan *chan,
unsigned int offset)
{
return readl(chan->reg_base + offset);
}
static inline void ioatdma_chan_write8(struct ioat_dma_chan *chan,
unsigned int offset, u8 value)
{
writeb(value, chan->reg_base + offset);
}
static inline void ioatdma_chan_write16(struct ioat_dma_chan *chan,
unsigned int offset, u16 value)
{
writew(value, chan->reg_base + offset);
}
static inline void ioatdma_chan_write32(struct ioat_dma_chan *chan,
unsigned int offset, u32 value)
{
writel(value, chan->reg_base + offset);
}
#if (BITS_PER_LONG == 64)
static inline u64 ioatdma_chan_read64(struct ioat_dma_chan *chan,
unsigned int offset)
{
return readq(chan->reg_base + offset);
}
static inline void ioatdma_chan_write64(struct ioat_dma_chan *chan,
unsigned int offset, u64 value)
{
writeq(value, chan->reg_base + offset);
}
#endif
#endif /* IOATDMA_IO_H */
/*
* offload engine driver for the Intel Xscale series of i/o processors
* Copyright © 2006, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
/*
* This driver supports the asynchrounous DMA copy and RAID engines available
* on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/async_tx.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/memory.h>
#include <linux/ioport.h>
#include <asm/arch/adma.h>
#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
#define to_iop_adma_device(dev) \
container_of(dev, struct iop_adma_device, common)
#define tx_to_iop_adma_slot(tx) \
container_of(tx, struct iop_adma_desc_slot, async_tx)
/**
* iop_adma_free_slots - flags descriptor slots for reuse
* @slot: Slot to free
* Caller must hold &iop_chan->lock while calling this function
*/
static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
{
int stride = slot->slots_per_op;
while (stride--) {
slot->slots_per_op = 0;
slot = list_entry(slot->slot_node.next,
struct iop_adma_desc_slot,
slot_node);
}
}
static dma_cookie_t
iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
{
BUG_ON(desc->async_tx.cookie < 0);
spin_lock_bh(&desc->async_tx.lock);
if (desc->async_tx.cookie > 0) {
cookie = desc->async_tx.cookie;
desc->async_tx.cookie = 0;
/* call the callback (must not sleep or submit new
* operations to this channel)
*/
if (desc->async_tx.callback)
desc->async_tx.callback(
desc->async_tx.callback_param);
/* unmap dma addresses
* (unmap_single vs unmap_page?)
*/
if (desc->group_head && desc->unmap_len) {
struct iop_adma_desc_slot *unmap = desc->group_head;
struct device *dev =
&iop_chan->device->pdev->dev;
u32 len = unmap->unmap_len;
u32 src_cnt = unmap->unmap_src_cnt;
dma_addr_t addr = iop_desc_get_dest_addr(unmap,
iop_chan);
dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
while (src_cnt--) {
addr = iop_desc_get_src_addr(unmap,
iop_chan,
src_cnt);
dma_unmap_page(dev, addr, len,
DMA_TO_DEVICE);
}
desc->group_head = NULL;
}
}
/* run dependent operations */
async_tx_run_dependencies(&desc->async_tx);
spin_unlock_bh(&desc->async_tx.lock);
return cookie;
}
static int
iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *iop_chan)
{
/* the client is allowed to attach dependent operations
* until 'ack' is set
*/
if (!desc->async_tx.ack)
return 0;
/* leave the last descriptor in the chain
* so we can append to it
*/
if (desc->chain_node.next == &iop_chan->chain)
return 1;
dev_dbg(iop_chan->device->common.dev,
"\tfree slot: %d slots_per_op: %d\n",
desc->idx, desc->slots_per_op);
list_del(&desc->chain_node);
iop_adma_free_slots(desc);
return 0;
}
static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
{
struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
dma_cookie_t cookie = 0;
u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
int busy = iop_chan_is_busy(iop_chan);
int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
/* free completed slots from the chain starting with
* the oldest descriptor
*/
list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
chain_node) {
pr_debug("\tcookie: %d slot: %d busy: %d "
"this_desc: %#x next_desc: %#x ack: %d\n",
iter->async_tx.cookie, iter->idx, busy,
iter->async_tx.phys, iop_desc_get_next_desc(iter),
iter->async_tx.ack);
prefetch(_iter);
prefetch(&_iter->async_tx);
/* do not advance past the current descriptor loaded into the
* hardware channel, subsequent descriptors are either in
* process or have not been submitted
*/
if (seen_current)
break;
/* stop the search if we reach the current descriptor and the
* channel is busy, or if it appears that the current descriptor
* needs to be re-read (i.e. has been appended to)
*/
if (iter->async_tx.phys == current_desc) {
BUG_ON(seen_current++);
if (busy || iop_desc_get_next_desc(iter))
break;
}
/* detect the start of a group transaction */
if (!slot_cnt && !slots_per_op) {
slot_cnt = iter->slot_cnt;
slots_per_op = iter->slots_per_op;
if (slot_cnt <= slots_per_op) {
slot_cnt = 0;
slots_per_op = 0;
}
}
if (slot_cnt) {
pr_debug("\tgroup++\n");
if (!grp_start)
grp_start = iter;
slot_cnt -= slots_per_op;
}
/* all the members of a group are complete */
if (slots_per_op != 0 && slot_cnt == 0) {
struct iop_adma_desc_slot *grp_iter, *_grp_iter;
int end_of_chain = 0;
pr_debug("\tgroup end\n");
/* collect the total results */
if (grp_start->xor_check_result) {
u32 zero_sum_result = 0;
slot_cnt = grp_start->slot_cnt;
grp_iter = grp_start;
list_for_each_entry_from(grp_iter,
&iop_chan->chain, chain_node) {
zero_sum_result |=
iop_desc_get_zero_result(grp_iter);
pr_debug("\titer%d result: %d\n",
grp_iter->idx, zero_sum_result);
slot_cnt -= slots_per_op;
if (slot_cnt == 0)
break;
}
pr_debug("\tgrp_start->xor_check_result: %p\n",
grp_start->xor_check_result);
*grp_start->xor_check_result = zero_sum_result;
}
/* clean up the group */
slot_cnt = grp_start->slot_cnt;
grp_iter = grp_start;
list_for_each_entry_safe_from(grp_iter, _grp_iter,
&iop_chan->chain, chain_node) {
cookie = iop_adma_run_tx_complete_actions(
grp_iter, iop_chan, cookie);
slot_cnt -= slots_per_op;
end_of_chain = iop_adma_clean_slot(grp_iter,
iop_chan);
if (slot_cnt == 0 || end_of_chain)
break;
}
/* the group should be complete at this point */
BUG_ON(slot_cnt);
slots_per_op = 0;
grp_start = NULL;
if (end_of_chain)
break;
else
continue;
} else if (slots_per_op) /* wait for group completion */
continue;
/* write back zero sum results (single descriptor case) */
if (iter->xor_check_result && iter->async_tx.cookie)
*iter->xor_check_result =
iop_desc_get_zero_result(iter);
cookie = iop_adma_run_tx_complete_actions(
iter, iop_chan, cookie);
if (iop_adma_clean_slot(iter, iop_chan))
break;
}
BUG_ON(!seen_current);
iop_chan_idle(busy, iop_chan);
if (cookie > 0) {
iop_chan->completed_cookie = cookie;
pr_debug("\tcompleted cookie %d\n", cookie);
}
}
static void
iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
{
spin_lock_bh(&iop_chan->lock);
__iop_adma_slot_cleanup(iop_chan);
spin_unlock_bh(&iop_chan->lock);
}
static void iop_adma_tasklet(unsigned long data)
{
struct iop_adma_chan *chan = (struct iop_adma_chan *) data;
__iop_adma_slot_cleanup(chan);
}
static struct iop_adma_desc_slot *
iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
int slots_per_op)
{
struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
struct list_head chain = LIST_HEAD_INIT(chain);
int slots_found, retry = 0;
/* start search from the last allocated descrtiptor
* if a contiguous allocation can not be found start searching
* from the beginning of the list
*/
retry:
slots_found = 0;
if (retry == 0)
iter = iop_chan->last_used;
else
iter = list_entry(&iop_chan->all_slots,
struct iop_adma_desc_slot,
slot_node);
list_for_each_entry_safe_continue(
iter, _iter, &iop_chan->all_slots, slot_node) {
prefetch(_iter);
prefetch(&_iter->async_tx);
if (iter->slots_per_op) {
/* give up after finding the first busy slot
* on the second pass through the list
*/
if (retry)
break;
slots_found = 0;
continue;
}
/* start the allocation if the slot is correctly aligned */
if (!slots_found++) {
if (iop_desc_is_aligned(iter, slots_per_op))
alloc_start = iter;
else {
slots_found = 0;
continue;
}
}
if (slots_found == num_slots) {
struct iop_adma_desc_slot *alloc_tail = NULL;
struct iop_adma_desc_slot *last_used = NULL;
iter = alloc_start;
while (num_slots) {
int i;
dev_dbg(iop_chan->device->common.dev,
"allocated slot: %d "
"(desc %p phys: %#x) slots_per_op %d\n",
iter->idx, iter->hw_desc,
iter->async_tx.phys, slots_per_op);
/* pre-ack all but the last descriptor */
if (num_slots != slots_per_op)
iter->async_tx.ack = 1;
else
iter->async_tx.ack = 0;
list_add_tail(&iter->chain_node, &chain);
alloc_tail = iter;
iter->async_tx.cookie = 0;
iter->slot_cnt = num_slots;
iter->xor_check_result = NULL;
for (i = 0; i < slots_per_op; i++) {
iter->slots_per_op = slots_per_op - i;
last_used = iter;
iter = list_entry(iter->slot_node.next,
struct iop_adma_desc_slot,
slot_node);
}
num_slots -= slots_per_op;
}
alloc_tail->group_head = alloc_start;
alloc_tail->async_tx.cookie = -EBUSY;
list_splice(&chain, &alloc_tail->async_tx.tx_list);
iop_chan->last_used = last_used;
iop_desc_clear_next_desc(alloc_start);
iop_desc_clear_next_desc(alloc_tail);
return alloc_tail;
}
}
if (!retry++)
goto retry;
/* try to free some slots if the allocation fails */
tasklet_schedule(&iop_chan->irq_tasklet);
return NULL;
}
static dma_cookie_t
iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
struct iop_adma_desc_slot *desc)
{
dma_cookie_t cookie = iop_chan->common.cookie;
cookie++;
if (cookie < 0)
cookie = 1;
iop_chan->common.cookie = desc->async_tx.cookie = cookie;
return cookie;
}
static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
{
dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
iop_chan->pending);
if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
iop_chan->pending = 0;
iop_chan_append(iop_chan);
}
}
static dma_cookie_t
iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
struct iop_adma_desc_slot *grp_start, *old_chain_tail;
int slot_cnt;
int slots_per_op;
dma_cookie_t cookie;
grp_start = sw_desc->group_head;
slot_cnt = grp_start->slot_cnt;
slots_per_op = grp_start->slots_per_op;
spin_lock_bh(&iop_chan->lock);
cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
old_chain_tail = list_entry(iop_chan->chain.prev,
struct iop_adma_desc_slot, chain_node);
list_splice_init(&sw_desc->async_tx.tx_list,
&old_chain_tail->chain_node);
/* fix up the hardware chain */
iop_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
/* 1/ don't add pre-chained descriptors
* 2/ dummy read to flush next_desc write
*/
BUG_ON(iop_desc_get_next_desc(sw_desc));
/* increment the pending count by the number of slots
* memcpy operations have a 1:1 (slot:operation) relation
* other operations are heavier and will pop the threshold
* more often.
*/
iop_chan->pending += slot_cnt;
iop_adma_check_threshold(iop_chan);
spin_unlock_bh(&iop_chan->lock);
dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
__FUNCTION__, sw_desc->async_tx.cookie, sw_desc->idx);
return cookie;
}
static void
iop_adma_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
int index)
{
struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
/* to do: support transfers lengths > IOP_ADMA_MAX_BYTE_COUNT */
iop_desc_set_dest_addr(sw_desc->group_head, iop_chan, addr);
}
static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
/* returns the number of allocated descriptors */
static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
{
char *hw_desc;
int idx;
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *slot = NULL;
int init = iop_chan->slots_allocated ? 0 : 1;
struct iop_adma_platform_data *plat_data =
iop_chan->device->pdev->dev.platform_data;
int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
/* Allocate descriptor slots */
do {
idx = iop_chan->slots_allocated;
if (idx == num_descs_in_pool)
break;
slot = kzalloc(sizeof(*slot), GFP_KERNEL);
if (!slot) {
printk(KERN_INFO "IOP ADMA Channel only initialized"
" %d descriptor slots", idx);
break;
}
hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
dma_async_tx_descriptor_init(&slot->async_tx, chan);
slot->async_tx.tx_submit = iop_adma_tx_submit;
slot->async_tx.tx_set_dest = iop_adma_set_dest;
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
INIT_LIST_HEAD(&slot->async_tx.tx_list);
hw_desc = (char *) iop_chan->device->dma_desc_pool;
slot->async_tx.phys =
(dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
slot->idx = idx;
spin_lock_bh(&iop_chan->lock);
iop_chan->slots_allocated++;
list_add_tail(&slot->slot_node, &iop_chan->all_slots);
spin_unlock_bh(&iop_chan->lock);
} while (iop_chan->slots_allocated < num_descs_in_pool);
if (idx && !iop_chan->last_used)
iop_chan->last_used = list_entry(iop_chan->all_slots.next,
struct iop_adma_desc_slot,
slot_node);
dev_dbg(iop_chan->device->common.dev,
"allocated %d descriptor slots last_used: %p\n",
iop_chan->slots_allocated, iop_chan->last_used);
/* initialize the channel and the chain with a null operation */
if (init) {
if (dma_has_cap(DMA_MEMCPY,
iop_chan->device->common.cap_mask))
iop_chan_start_null_memcpy(iop_chan);
else if (dma_has_cap(DMA_XOR,
iop_chan->device->common.cap_mask))
iop_chan_start_null_xor(iop_chan);
else
BUG();
}
return (idx > 0) ? idx : -ENOMEM;
}
static struct dma_async_tx_descriptor *
iop_adma_prep_dma_interrupt(struct dma_chan *chan)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *sw_desc, *grp_start;
int slot_cnt, slots_per_op;
dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
spin_lock_bh(&iop_chan->lock);
slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
iop_desc_init_interrupt(grp_start, iop_chan);
grp_start->unmap_len = 0;
}
spin_unlock_bh(&iop_chan->lock);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static void
iop_adma_memcpy_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
int index)
{
struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
iop_desc_set_memcpy_src_addr(grp_start, addr);
}
static struct dma_async_tx_descriptor *
iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *sw_desc, *grp_start;
int slot_cnt, slots_per_op;
if (unlikely(!len))
return NULL;
BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
__FUNCTION__, len);
spin_lock_bh(&iop_chan->lock);
slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
iop_desc_init_memcpy(grp_start, int_en);
iop_desc_set_byte_count(grp_start, iop_chan, len);
sw_desc->unmap_src_cnt = 1;
sw_desc->unmap_len = len;
sw_desc->async_tx.tx_set_src = iop_adma_memcpy_set_src;
}
spin_unlock_bh(&iop_chan->lock);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static struct dma_async_tx_descriptor *
iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len,
int int_en)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *sw_desc, *grp_start;
int slot_cnt, slots_per_op;
if (unlikely(!len))
return NULL;
BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
__FUNCTION__, len);
spin_lock_bh(&iop_chan->lock);
slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
iop_desc_init_memset(grp_start, int_en);
iop_desc_set_byte_count(grp_start, iop_chan, len);
iop_desc_set_block_fill_val(grp_start, value);
sw_desc->unmap_src_cnt = 1;
sw_desc->unmap_len = len;
}
spin_unlock_bh(&iop_chan->lock);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static void
iop_adma_xor_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
int index)
{
struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
iop_desc_set_xor_src_addr(grp_start, index, addr);
}
static struct dma_async_tx_descriptor *
iop_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len,
int int_en)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *sw_desc, *grp_start;
int slot_cnt, slots_per_op;
if (unlikely(!len))
return NULL;
BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
dev_dbg(iop_chan->device->common.dev,
"%s src_cnt: %d len: %u int_en: %d\n",
__FUNCTION__, src_cnt, len, int_en);
spin_lock_bh(&iop_chan->lock);
slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
iop_desc_init_xor(grp_start, src_cnt, int_en);
iop_desc_set_byte_count(grp_start, iop_chan, len);
sw_desc->unmap_src_cnt = src_cnt;
sw_desc->unmap_len = len;
sw_desc->async_tx.tx_set_src = iop_adma_xor_set_src;
}
spin_unlock_bh(&iop_chan->lock);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static void
iop_adma_xor_zero_sum_set_src(dma_addr_t addr,
struct dma_async_tx_descriptor *tx,
int index)
{
struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
iop_desc_set_zero_sum_src_addr(grp_start, index, addr);
}
static struct dma_async_tx_descriptor *
iop_adma_prep_dma_zero_sum(struct dma_chan *chan, unsigned int src_cnt,
size_t len, u32 *result, int int_en)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *sw_desc, *grp_start;
int slot_cnt, slots_per_op;
if (unlikely(!len))
return NULL;
dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
__FUNCTION__, src_cnt, len);
spin_lock_bh(&iop_chan->lock);
slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
iop_desc_init_zero_sum(grp_start, src_cnt, int_en);
iop_desc_set_zero_sum_byte_count(grp_start, len);
grp_start->xor_check_result = result;
pr_debug("\t%s: grp_start->xor_check_result: %p\n",
__FUNCTION__, grp_start->xor_check_result);
sw_desc->unmap_src_cnt = src_cnt;
sw_desc->unmap_len = len;
sw_desc->async_tx.tx_set_src = iop_adma_xor_zero_sum_set_src;
}
spin_unlock_bh(&iop_chan->lock);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static void iop_adma_dependency_added(struct dma_chan *chan)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
tasklet_schedule(&iop_chan->irq_tasklet);
}
static void iop_adma_free_chan_resources(struct dma_chan *chan)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *iter, *_iter;
int in_use_descs = 0;
iop_adma_slot_cleanup(iop_chan);
spin_lock_bh(&iop_chan->lock);
list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
chain_node) {
in_use_descs++;
list_del(&iter->chain_node);
}
list_for_each_entry_safe_reverse(
iter, _iter, &iop_chan->all_slots, slot_node) {
list_del(&iter->slot_node);
kfree(iter);
iop_chan->slots_allocated--;
}
iop_chan->last_used = NULL;
dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
__FUNCTION__, iop_chan->slots_allocated);
spin_unlock_bh(&iop_chan->lock);
/* one is ok since we left it on there on purpose */
if (in_use_descs > 1)
printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
in_use_descs - 1);
}
/**
* iop_adma_is_complete - poll the status of an ADMA transaction
* @chan: ADMA channel handle
* @cookie: ADMA transaction identifier
*/
static enum dma_status iop_adma_is_complete(struct dma_chan *chan,
dma_cookie_t cookie,
dma_cookie_t *done,
dma_cookie_t *used)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
dma_cookie_t last_used;
dma_cookie_t last_complete;
enum dma_status ret;
last_used = chan->cookie;
last_complete = iop_chan->completed_cookie;
if (done)
*done = last_complete;
if (used)
*used = last_used;
ret = dma_async_is_complete(cookie, last_complete, last_used);
if (ret == DMA_SUCCESS)
return ret;
iop_adma_slot_cleanup(iop_chan);
last_used = chan->cookie;
last_complete = iop_chan->completed_cookie;
if (done)
*done = last_complete;
if (used)
*used = last_used;
return dma_async_is_complete(cookie, last_complete, last_used);
}
static irqreturn_t iop_adma_eot_handler(int irq, void *data)
{
struct iop_adma_chan *chan = data;
dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__);
tasklet_schedule(&chan->irq_tasklet);
iop_adma_device_clear_eot_status(chan);
return IRQ_HANDLED;
}
static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
{
struct iop_adma_chan *chan = data;
dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__);
tasklet_schedule(&chan->irq_tasklet);
iop_adma_device_clear_eoc_status(chan);
return IRQ_HANDLED;
}
static irqreturn_t iop_adma_err_handler(int irq, void *data)
{
struct iop_adma_chan *chan = data;
unsigned long status = iop_chan_get_status(chan);
dev_printk(KERN_ERR, chan->device->common.dev,
"error ( %s%s%s%s%s%s%s)\n",
iop_is_err_int_parity(status, chan) ? "int_parity " : "",
iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
iop_is_err_split_tx(status, chan) ? "split_tx " : "");
iop_adma_device_clear_err_status(chan);
BUG();
return IRQ_HANDLED;
}
static void iop_adma_issue_pending(struct dma_chan *chan)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
if (iop_chan->pending) {
iop_chan->pending = 0;
iop_chan_append(iop_chan);
}
}
/*
* Perform a transaction to verify the HW works.
*/
#define IOP_ADMA_TEST_SIZE 2000
static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
{
int i;
void *src, *dest;
dma_addr_t src_dma, dest_dma;
struct dma_chan *dma_chan;
dma_cookie_t cookie;
struct dma_async_tx_descriptor *tx;
int err = 0;
struct iop_adma_chan *iop_chan;
dev_dbg(device->common.dev, "%s\n", __FUNCTION__);
src = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL);
if (!src)
return -ENOMEM;
dest = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL);
if (!dest) {
kfree(src);
return -ENOMEM;
}
/* Fill in src buffer */
for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
((u8 *) src)[i] = (u8)i;
memset(dest, 0, IOP_ADMA_TEST_SIZE);
/* Start copy, using first DMA channel */
dma_chan = container_of(device->common.channels.next,
struct dma_chan,
device_node);
if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
err = -ENODEV;
goto out;
}
tx = iop_adma_prep_dma_memcpy(dma_chan, IOP_ADMA_TEST_SIZE, 1);
dest_dma = dma_map_single(dma_chan->device->dev, dest,
IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
iop_adma_set_dest(dest_dma, tx, 0);
src_dma = dma_map_single(dma_chan->device->dev, src,
IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
iop_adma_memcpy_set_src(src_dma, tx, 0);
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
async_tx_ack(tx);
msleep(1);
if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
DMA_SUCCESS) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test copy timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
iop_chan = to_iop_adma_chan(dma_chan);
dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test copy failed compare, disabling\n");
err = -ENODEV;
goto free_resources;
}
free_resources:
iop_adma_free_chan_resources(dma_chan);
out:
kfree(src);
kfree(dest);
return err;
}
#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
static int __devinit
iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
{
int i, src_idx;
struct page *dest;
struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
dma_addr_t dma_addr, dest_dma;
struct dma_async_tx_descriptor *tx;
struct dma_chan *dma_chan;
dma_cookie_t cookie;
u8 cmp_byte = 0;
u32 cmp_word;
u32 zero_sum_result;
int err = 0;
struct iop_adma_chan *iop_chan;
dev_dbg(device->common.dev, "%s\n", __FUNCTION__);
for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
if (!xor_srcs[src_idx])
while (src_idx--) {
__free_page(xor_srcs[src_idx]);
return -ENOMEM;
}
}
dest = alloc_page(GFP_KERNEL);
if (!dest)
while (src_idx--) {
__free_page(xor_srcs[src_idx]);
return -ENOMEM;
}
/* Fill in src buffers */
for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
u8 *ptr = page_address(xor_srcs[src_idx]);
for (i = 0; i < PAGE_SIZE; i++)
ptr[i] = (1 << src_idx);
}
for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
cmp_byte ^= (u8) (1 << src_idx);
cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
(cmp_byte << 8) | cmp_byte;
memset(page_address(dest), 0, PAGE_SIZE);
dma_chan = container_of(device->common.channels.next,
struct dma_chan,
device_node);
if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
err = -ENODEV;
goto out;
}
/* test xor */
tx = iop_adma_prep_dma_xor(dma_chan, IOP_ADMA_NUM_SRC_TEST,
PAGE_SIZE, 1);
dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
PAGE_SIZE, DMA_FROM_DEVICE);
iop_adma_set_dest(dest_dma, tx, 0);
for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
dma_addr = dma_map_page(dma_chan->device->dev, xor_srcs[i], 0,
PAGE_SIZE, DMA_TO_DEVICE);
iop_adma_xor_set_src(dma_addr, tx, i);
}
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
async_tx_ack(tx);
msleep(8);
if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
DMA_SUCCESS) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test xor timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
iop_chan = to_iop_adma_chan(dma_chan);
dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
PAGE_SIZE, DMA_FROM_DEVICE);
for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
u32 *ptr = page_address(dest);
if (ptr[i] != cmp_word) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test xor failed compare, disabling\n");
err = -ENODEV;
goto free_resources;
}
}
dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
PAGE_SIZE, DMA_TO_DEVICE);
/* skip zero sum if the capability is not present */
if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
goto free_resources;
/* zero sum the sources with the destintation page */
for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
zero_sum_srcs[i] = xor_srcs[i];
zero_sum_srcs[i] = dest;
zero_sum_result = 1;
tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
PAGE_SIZE, &zero_sum_result, 1);
for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
0, PAGE_SIZE, DMA_TO_DEVICE);
iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
}
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
async_tx_ack(tx);
msleep(8);
if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test zero sum timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
if (zero_sum_result != 0) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test zero sum failed compare, disabling\n");
err = -ENODEV;
goto free_resources;
}
/* test memset */
tx = iop_adma_prep_dma_memset(dma_chan, 0, PAGE_SIZE, 1);
dma_addr = dma_map_page(dma_chan->device->dev, dest, 0,
PAGE_SIZE, DMA_FROM_DEVICE);
iop_adma_set_dest(dma_addr, tx, 0);
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
async_tx_ack(tx);
msleep(8);
if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test memset timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
u32 *ptr = page_address(dest);
if (ptr[i]) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test memset failed compare, disabling\n");
err = -ENODEV;
goto free_resources;
}
}
/* test for non-zero parity sum */
zero_sum_result = 0;
tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
PAGE_SIZE, &zero_sum_result, 1);
for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
0, PAGE_SIZE, DMA_TO_DEVICE);
iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
}
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
async_tx_ack(tx);
msleep(8);
if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test non-zero sum timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
if (zero_sum_result != 1) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test non-zero sum failed compare, disabling\n");
err = -ENODEV;
goto free_resources;
}
free_resources:
iop_adma_free_chan_resources(dma_chan);
out:
src_idx = IOP_ADMA_NUM_SRC_TEST;
while (src_idx--)
__free_page(xor_srcs[src_idx]);
__free_page(dest);
return err;
}
static int __devexit iop_adma_remove(struct platform_device *dev)
{
struct iop_adma_device *device = platform_get_drvdata(dev);
struct dma_chan *chan, *_chan;
struct iop_adma_chan *iop_chan;
int i;
struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
dma_async_device_unregister(&device->common);
for (i = 0; i < 3; i++) {
unsigned int irq;
irq = platform_get_irq(dev, i);
free_irq(irq, device);
}
dma_free_coherent(&dev->dev, plat_data->pool_size,
device->dma_desc_pool_virt, device->dma_desc_pool);
do {
struct resource *res;
res = platform_get_resource(dev, IORESOURCE_MEM, 0);
release_mem_region(res->start, res->end - res->start);
} while (0);
list_for_each_entry_safe(chan, _chan, &device->common.channels,
device_node) {
iop_chan = to_iop_adma_chan(chan);
list_del(&chan->device_node);
kfree(iop_chan);
}
kfree(device);
return 0;
}
static int __devinit iop_adma_probe(struct platform_device *pdev)
{
struct resource *res;
int ret = 0, i;
struct iop_adma_device *adev;
struct iop_adma_chan *iop_chan;
struct dma_device *dma_dev;
struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -ENODEV;
if (!devm_request_mem_region(&pdev->dev, res->start,
res->end - res->start, pdev->name))
return -EBUSY;
adev = kzalloc(sizeof(*adev), GFP_KERNEL);
if (!adev)
return -ENOMEM;
dma_dev = &adev->common;
/* allocate coherent memory for hardware descriptors
* note: writecombine gives slightly better performance, but
* requires that we explicitly flush the writes
*/
if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
plat_data->pool_size,
&adev->dma_desc_pool,
GFP_KERNEL)) == NULL) {
ret = -ENOMEM;
goto err_free_adev;
}
dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n",
__FUNCTION__, adev->dma_desc_pool_virt,
(void *) adev->dma_desc_pool);
adev->id = plat_data->hw_id;
/* discover transaction capabilites from the platform data */
dma_dev->cap_mask = plat_data->cap_mask;
adev->pdev = pdev;
platform_set_drvdata(pdev, adev);
INIT_LIST_HEAD(&dma_dev->channels);
/* set base routines */
dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
dma_dev->device_is_tx_complete = iop_adma_is_complete;
dma_dev->device_issue_pending = iop_adma_issue_pending;
dma_dev->device_dependency_added = iop_adma_dependency_added;
dma_dev->dev = &pdev->dev;
/* set prep routines based on capability */
if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset;
if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
dma_dev->max_xor = iop_adma_get_max_xor();
dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
}
if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
dma_dev->device_prep_dma_zero_sum =
iop_adma_prep_dma_zero_sum;
if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
dma_dev->device_prep_dma_interrupt =
iop_adma_prep_dma_interrupt;
iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
if (!iop_chan) {
ret = -ENOMEM;
goto err_free_dma;
}
iop_chan->device = adev;
iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
res->end - res->start);
if (!iop_chan->mmr_base) {
ret = -ENOMEM;
goto err_free_iop_chan;
}
tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long)
iop_chan);
/* clear errors before enabling interrupts */
iop_adma_device_clear_err_status(iop_chan);
for (i = 0; i < 3; i++) {
irq_handler_t handler[] = { iop_adma_eot_handler,
iop_adma_eoc_handler,
iop_adma_err_handler };
int irq = platform_get_irq(pdev, i);
if (irq < 0) {
ret = -ENXIO;
goto err_free_iop_chan;
} else {
ret = devm_request_irq(&pdev->dev, irq,
handler[i], 0, pdev->name, iop_chan);
if (ret)
goto err_free_iop_chan;
}
}
spin_lock_init(&iop_chan->lock);
init_timer(&iop_chan->cleanup_watchdog);
iop_chan->cleanup_watchdog.data = (unsigned long) iop_chan;
iop_chan->cleanup_watchdog.function = iop_adma_tasklet;
INIT_LIST_HEAD(&iop_chan->chain);
INIT_LIST_HEAD(&iop_chan->all_slots);
INIT_RCU_HEAD(&iop_chan->common.rcu);
iop_chan->common.device = dma_dev;
list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
ret = iop_adma_memcpy_self_test(adev);
dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
if (ret)
goto err_free_iop_chan;
}
if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
ret = iop_adma_xor_zero_sum_self_test(adev);
dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
if (ret)
goto err_free_iop_chan;
}
dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
"( %s%s%s%s%s%s%s%s%s%s)\n",
dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
dma_async_device_register(dma_dev);
goto out;
err_free_iop_chan:
kfree(iop_chan);
err_free_dma:
dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
adev->dma_desc_pool_virt, adev->dma_desc_pool);
err_free_adev:
kfree(adev);
out:
return ret;
}
static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
{
struct iop_adma_desc_slot *sw_desc, *grp_start;
dma_cookie_t cookie;
int slot_cnt, slots_per_op;
dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
spin_lock_bh(&iop_chan->lock);
slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
sw_desc->async_tx.ack = 1;
iop_desc_init_memcpy(grp_start, 0);
iop_desc_set_byte_count(grp_start, iop_chan, 0);
iop_desc_set_dest_addr(grp_start, iop_chan, 0);
iop_desc_set_memcpy_src_addr(grp_start, 0);
cookie = iop_chan->common.cookie;
cookie++;
if (cookie <= 1)
cookie = 2;
/* initialize the completed cookie to be less than
* the most recently used cookie
*/
iop_chan->completed_cookie = cookie - 1;
iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
/* channel should not be busy */
BUG_ON(iop_chan_is_busy(iop_chan));
/* clear any prior error-status bits */
iop_adma_device_clear_err_status(iop_chan);
/* disable operation */
iop_chan_disable(iop_chan);
/* set the descriptor address */
iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
/* 1/ don't add pre-chained descriptors
* 2/ dummy read to flush next_desc write
*/
BUG_ON(iop_desc_get_next_desc(sw_desc));
/* run the descriptor */
iop_chan_enable(iop_chan);
} else
dev_printk(KERN_ERR, iop_chan->device->common.dev,
"failed to allocate null descriptor\n");
spin_unlock_bh(&iop_chan->lock);
}
static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
{
struct iop_adma_desc_slot *sw_desc, *grp_start;
dma_cookie_t cookie;
int slot_cnt, slots_per_op;
dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
spin_lock_bh(&iop_chan->lock);
slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
sw_desc->async_tx.ack = 1;
iop_desc_init_null_xor(grp_start, 2, 0);
iop_desc_set_byte_count(grp_start, iop_chan, 0);
iop_desc_set_dest_addr(grp_start, iop_chan, 0);
iop_desc_set_xor_src_addr(grp_start, 0, 0);
iop_desc_set_xor_src_addr(grp_start, 1, 0);
cookie = iop_chan->common.cookie;
cookie++;
if (cookie <= 1)
cookie = 2;
/* initialize the completed cookie to be less than
* the most recently used cookie
*/
iop_chan->completed_cookie = cookie - 1;
iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
/* channel should not be busy */
BUG_ON(iop_chan_is_busy(iop_chan));
/* clear any prior error-status bits */
iop_adma_device_clear_err_status(iop_chan);
/* disable operation */
iop_chan_disable(iop_chan);
/* set the descriptor address */
iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
/* 1/ don't add pre-chained descriptors
* 2/ dummy read to flush next_desc write
*/
BUG_ON(iop_desc_get_next_desc(sw_desc));
/* run the descriptor */
iop_chan_enable(iop_chan);
} else
dev_printk(KERN_ERR, iop_chan->device->common.dev,
"failed to allocate null descriptor\n");
spin_unlock_bh(&iop_chan->lock);
}
static struct platform_driver iop_adma_driver = {
.probe = iop_adma_probe,
.remove = iop_adma_remove,
.driver = {
.owner = THIS_MODULE,
.name = "iop-adma",
},
};
static int __init iop_adma_init (void)
{
/* it's currently unsafe to unload this module */
/* if forced, worst case is that rmmod hangs */
__unsafe(THIS_MODULE);
return platform_driver_register(&iop_adma_driver);
}
static void __exit iop_adma_exit (void)
{
platform_driver_unregister(&iop_adma_driver);
return;
}
module_init(iop_adma_init);
module_exit(iop_adma_exit);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("IOP ADMA Engine Driver");
MODULE_LICENSE("GPL");
......@@ -109,6 +109,8 @@ config MD_RAID10
config MD_RAID456
tristate "RAID-4/RAID-5/RAID-6 mode"
depends on BLK_DEV_MD
select ASYNC_MEMCPY
select ASYNC_XOR
---help---
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
......
......@@ -18,7 +18,7 @@ raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
hostprogs-y := mktables
# Note: link order is important. All raid personalities
# and xor.o must come before md.o, as they each initialise
# and must come before md.o, as they each initialise
# themselves, and md.o may use the personalities when it
# auto-initialised.
......@@ -26,7 +26,7 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
obj-$(CONFIG_MD_RAID0) += raid0.o
obj-$(CONFIG_MD_RAID1) += raid1.o
obj-$(CONFIG_MD_RAID10) += raid10.o
obj-$(CONFIG_MD_RAID456) += raid456.o xor.o
obj-$(CONFIG_MD_RAID456) += raid456.o
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
obj-$(CONFIG_MD_FAULTY) += faulty.o
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
......
......@@ -5814,7 +5814,7 @@ static __exit void md_exit(void)
}
}
module_init(md_init)
subsys_initcall(md_init);
module_exit(md_exit)
static int get_ro(char *buffer, struct kernel_param *kp)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* Copyright(c) 2006, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#ifndef _ADMA_H
#define _ADMA_H
#include <linux/types.h>
#include <linux/io.h>
#include <asm/hardware.h>
#include <asm/hardware/iop_adma.h>
#define ADMA_ACCR(chan) (chan->mmr_base + 0x0)
#define ADMA_ACSR(chan) (chan->mmr_base + 0x4)
#define ADMA_ADAR(chan) (chan->mmr_base + 0x8)
#define ADMA_IIPCR(chan) (chan->mmr_base + 0x18)
#define ADMA_IIPAR(chan) (chan->mmr_base + 0x1c)
#define ADMA_IIPUAR(chan) (chan->mmr_base + 0x20)
#define ADMA_ANDAR(chan) (chan->mmr_base + 0x24)
#define ADMA_ADCR(chan) (chan->mmr_base + 0x28)
#define ADMA_CARMD(chan) (chan->mmr_base + 0x2c)
#define ADMA_ABCR(chan) (chan->mmr_base + 0x30)
#define ADMA_DLADR(chan) (chan->mmr_base + 0x34)
#define ADMA_DUADR(chan) (chan->mmr_base + 0x38)
#define ADMA_SLAR(src, chan) (chan->mmr_base + (0x3c + (src << 3)))
#define ADMA_SUAR(src, chan) (chan->mmr_base + (0x40 + (src << 3)))
struct iop13xx_adma_src {
u32 src_addr;
union {
u32 upper_src_addr;
struct {
unsigned int pq_upper_src_addr:24;
unsigned int pq_dmlt:8;
};
};
};
struct iop13xx_adma_desc_ctrl {
unsigned int int_en:1;
unsigned int xfer_dir:2;
unsigned int src_select:4;
unsigned int zero_result:1;
unsigned int block_fill_en:1;
unsigned int crc_gen_en:1;
unsigned int crc_xfer_dis:1;
unsigned int crc_seed_fetch_dis:1;
unsigned int status_write_back_en:1;
unsigned int endian_swap_en:1;
unsigned int reserved0:2;
unsigned int pq_update_xfer_en:1;
unsigned int dual_xor_en:1;
unsigned int pq_xfer_en:1;
unsigned int p_xfer_dis:1;
unsigned int reserved1:10;
unsigned int relax_order_en:1;
unsigned int no_snoop_en:1;
};
struct iop13xx_adma_byte_count {
unsigned int byte_count:24;
unsigned int host_if:3;
unsigned int reserved:2;
unsigned int zero_result_err_q:1;
unsigned int zero_result_err:1;
unsigned int tx_complete:1;
};
struct iop13xx_adma_desc_hw {
u32 next_desc;
union {
u32 desc_ctrl;
struct iop13xx_adma_desc_ctrl desc_ctrl_field;
};
union {
u32 crc_addr;
u32 block_fill_data;
u32 q_dest_addr;
};
union {
u32 byte_count;
struct iop13xx_adma_byte_count byte_count_field;
};
union {
u32 dest_addr;
u32 p_dest_addr;
};
union {
u32 upper_dest_addr;
u32 pq_upper_dest_addr;
};
struct iop13xx_adma_src src[1];
};
struct iop13xx_adma_desc_dual_xor {
u32 next_desc;
u32 desc_ctrl;
u32 reserved;
u32 byte_count;
u32 h_dest_addr;
u32 h_upper_dest_addr;
u32 src0_addr;
u32 upper_src0_addr;
u32 src1_addr;
u32 upper_src1_addr;
u32 h_src_addr;
u32 h_upper_src_addr;
u32 d_src_addr;
u32 d_upper_src_addr;
u32 d_dest_addr;
u32 d_upper_dest_addr;
};
struct iop13xx_adma_desc_pq_update {
u32 next_desc;
u32 desc_ctrl;
u32 reserved;
u32 byte_count;
u32 p_dest_addr;
u32 p_upper_dest_addr;
u32 src0_addr;
u32 upper_src0_addr;
u32 src1_addr;
u32 upper_src1_addr;
u32 p_src_addr;
u32 p_upper_src_addr;
u32 q_src_addr;
struct {
unsigned int q_upper_src_addr:24;
unsigned int q_dmlt:8;
};
u32 q_dest_addr;
u32 q_upper_dest_addr;
};
static inline int iop_adma_get_max_xor(void)
{
return 16;
}
static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
{
return __raw_readl(ADMA_ADAR(chan));
}
static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
u32 next_desc_addr)
{
__raw_writel(next_desc_addr, ADMA_ANDAR(chan));
}
#define ADMA_STATUS_BUSY (1 << 13)
static inline char iop_chan_is_busy(struct iop_adma_chan *chan)
{
if (__raw_readl(ADMA_ACSR(chan)) &
ADMA_STATUS_BUSY)
return 1;
else
return 0;
}
static inline int
iop_chan_get_desc_align(struct iop_adma_chan *chan, int num_slots)
{
return 1;
}
#define iop_desc_is_aligned(x, y) 1
static inline int
iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
{
*slots_per_op = 1;
return 1;
}
#define iop_chan_interrupt_slot_count(s, c) iop_chan_memcpy_slot_count(0, s)
static inline int
iop_chan_memset_slot_count(size_t len, int *slots_per_op)
{
*slots_per_op = 1;
return 1;
}
static inline int
iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
{
int num_slots;
/* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1
* (1 source => 8 bytes) (1 slot => 32 bytes)
*/
num_slots = 1 + (((src_cnt - 1) << 3) >> 5);
if (((src_cnt - 1) << 3) & 0x1f)
num_slots++;
*slots_per_op = num_slots;
return num_slots;
}
#define ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
return hw_desc->dest_addr;
}
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
return hw_desc->byte_count_field.byte_count;
}
static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
int src_idx)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
return hw_desc->src[src_idx].src_addr;
}
static inline u32 iop_desc_get_src_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
return hw_desc->desc_ctrl_field.src_select + 1;
}
static inline void
iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop13xx_adma_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
u_desc_ctrl.field.int_en = int_en;
hw_desc->desc_ctrl = u_desc_ctrl.value;
hw_desc->crc_addr = 0;
}
static inline void
iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop13xx_adma_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
u_desc_ctrl.field.block_fill_en = 1;
u_desc_ctrl.field.int_en = int_en;
hw_desc->desc_ctrl = u_desc_ctrl.value;
hw_desc->crc_addr = 0;
}
/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
static inline void
iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop13xx_adma_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
u_desc_ctrl.field.src_select = src_cnt - 1;
u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
u_desc_ctrl.field.int_en = int_en;
hw_desc->desc_ctrl = u_desc_ctrl.value;
hw_desc->crc_addr = 0;
}
#define iop_desc_init_null_xor(d, s, i) iop_desc_init_xor(d, s, i)
/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
static inline int
iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop13xx_adma_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
u_desc_ctrl.field.src_select = src_cnt - 1;
u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
u_desc_ctrl.field.zero_result = 1;
u_desc_ctrl.field.status_write_back_en = 1;
u_desc_ctrl.field.int_en = int_en;
hw_desc->desc_ctrl = u_desc_ctrl.value;
hw_desc->crc_addr = 0;
return 1;
}
static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
u32 byte_count)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
hw_desc->byte_count = byte_count;
}
static inline void
iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
{
int slots_per_op = desc->slots_per_op;
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
int i = 0;
if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
hw_desc->byte_count = len;
} else {
do {
iter = iop_hw_desc_slot_idx(hw_desc, i);
iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
i += slots_per_op;
} while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
if (len) {
iter = iop_hw_desc_slot_idx(hw_desc, i);
iter->byte_count = len;
}
}
}
static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
dma_addr_t addr)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
hw_desc->dest_addr = addr;
hw_desc->upper_dest_addr = 0;
}
static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
dma_addr_t addr)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
hw_desc->src[0].src_addr = addr;
hw_desc->src[0].upper_src_addr = 0;
}
static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
int src_idx, dma_addr_t addr)
{
int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
int i = 0;
do {
iter = iop_hw_desc_slot_idx(hw_desc, i);
iter->src[src_idx].src_addr = addr;
iter->src[src_idx].upper_src_addr = 0;
slot_cnt -= slots_per_op;
if (slot_cnt) {
i += slots_per_op;
addr += IOP_ADMA_XOR_MAX_BYTE_COUNT;
}
} while (slot_cnt);
}
static inline void
iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
iop_desc_init_memcpy(desc, 1);
iop_desc_set_byte_count(desc, chan, 0);
iop_desc_set_dest_addr(desc, chan, 0);
iop_desc_set_memcpy_src_addr(desc, 0);
}
#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
u32 next_desc_addr)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
BUG_ON(hw_desc->next_desc);
hw_desc->next_desc = next_desc_addr;
}
static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
return hw_desc->next_desc;
}
static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
hw_desc->next_desc = 0;
}
static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
u32 val)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
hw_desc->block_fill_data = val;
}
static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
if (desc_ctrl.pq_xfer_en)
return byte_count.zero_result_err_q;
else
return byte_count.zero_result_err;
}
static inline void iop_chan_append(struct iop_adma_chan *chan)
{
u32 adma_accr;
adma_accr = __raw_readl(ADMA_ACCR(chan));
adma_accr |= 0x2;
__raw_writel(adma_accr, ADMA_ACCR(chan));
}
static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan)
{
do { } while (0);
}
static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
{
return __raw_readl(ADMA_ACSR(chan));
}
static inline void iop_chan_disable(struct iop_adma_chan *chan)
{
u32 adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan));
adma_chan_ctrl &= ~0x1;
__raw_writel(adma_chan_ctrl, ADMA_ACCR(chan));
}
static inline void iop_chan_enable(struct iop_adma_chan *chan)
{
u32 adma_chan_ctrl;
adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan));
adma_chan_ctrl |= 0x1;
__raw_writel(adma_chan_ctrl, ADMA_ACCR(chan));
}
static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
{
u32 status = __raw_readl(ADMA_ACSR(chan));
status &= (1 << 12);
__raw_writel(status, ADMA_ACSR(chan));
}
static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
{
u32 status = __raw_readl(ADMA_ACSR(chan));
status &= (1 << 11);
__raw_writel(status, ADMA_ACSR(chan));
}
static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
{
u32 status = __raw_readl(ADMA_ACSR(chan));
status &= (1 << 9) | (1 << 5) | (1 << 4) | (1 << 3);
__raw_writel(status, ADMA_ACSR(chan));
}
static inline int
iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
{
return test_bit(9, &status);
}
static inline int
iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
{
return test_bit(5, &status);
}
static inline int
iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
{
return test_bit(4, &status);
}
static inline int
iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
{
return test_bit(3, &status);
}
static inline int
iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
{
return 0;
}
static inline int
iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
{
return 0;
}
static inline int
iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
{
return 0;
}
#endif /* _ADMA_H */
......@@ -166,12 +166,22 @@ static inline int iop13xx_cpu_id(void)
#define IOP13XX_INIT_I2C_1 (1 << 1)
#define IOP13XX_INIT_I2C_2 (1 << 2)
/* ADMA selection flags */
/* INIT_ADMA_DEFAULT = Rely on CONFIG_IOP13XX_ADMA* */
#define IOP13XX_INIT_ADMA_DEFAULT (0)
#define IOP13XX_INIT_ADMA_0 (1 << 0)
#define IOP13XX_INIT_ADMA_1 (1 << 1)
#define IOP13XX_INIT_ADMA_2 (1 << 2)
/* Platform devices */
#define IQ81340_NUM_UART 2
#define IQ81340_NUM_I2C 3
#define IQ81340_NUM_PHYS_MAP_FLASH 1
#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART +\
IQ81340_NUM_I2C +\
IQ81340_NUM_PHYS_MAP_FLASH)
#define IQ81340_NUM_ADMA 3
#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART + \
IQ81340_NUM_I2C + \
IQ81340_NUM_PHYS_MAP_FLASH + \
IQ81340_NUM_ADMA)
/*========================== PMMR offsets for key registers ============*/
#define IOP13XX_ATU0_PMMR_OFFSET 0x00048000
......@@ -444,22 +454,6 @@ static inline int iop13xx_cpu_id(void)
/*==============================ADMA UNITS===============================*/
#define IOP13XX_ADMA_PHYS_BASE(chan) IOP13XX_REG_ADDR32_PHYS((chan << 9))
#define IOP13XX_ADMA_UPPER_PA(chan) (IOP13XX_ADMA_PHYS_BASE(chan) + 0xc0)
#define IOP13XX_ADMA_OFFSET(chan, ofs) IOP13XX_REG_ADDR32((chan << 9) + (ofs))
#define IOP13XX_ADMA_ACCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x0)
#define IOP13XX_ADMA_ACSR(chan) IOP13XX_ADMA_OFFSET(chan, 0x4)
#define IOP13XX_ADMA_ADAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x8)
#define IOP13XX_ADMA_IIPCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x18)
#define IOP13XX_ADMA_IIPAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x1c)
#define IOP13XX_ADMA_IIPUAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x20)
#define IOP13XX_ADMA_ANDAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x24)
#define IOP13XX_ADMA_ADCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x28)
#define IOP13XX_ADMA_CARMD(chan) IOP13XX_ADMA_OFFSET(chan, 0x2c)
#define IOP13XX_ADMA_ABCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x30)
#define IOP13XX_ADMA_DLADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x34)
#define IOP13XX_ADMA_DUADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x38)
#define IOP13XX_ADMA_SLAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x3c + (src <<3))
#define IOP13XX_ADMA_SUAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x40 + (src <<3))
/*==============================XSI BRIDGE===============================*/
#define IOP13XX_XBG_BECSR IOP13XX_REG_ADDR32(0x178c)
......
#ifndef IOP32X_ADMA_H
#define IOP32X_ADMA_H
#include <asm/hardware/iop3xx-adma.h>
#endif
#ifndef IOP33X_ADMA_H
#define IOP33X_ADMA_H
#include <asm/hardware/iop3xx-adma.h>
#endif
/*
* Copyright © 2006, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#ifndef _ADMA_H
#define _ADMA_H
#include <linux/types.h>
#include <linux/io.h>
#include <asm/hardware.h>
#include <asm/hardware/iop_adma.h>
/* Memory copy units */
#define DMA_CCR(chan) (chan->mmr_base + 0x0)
#define DMA_CSR(chan) (chan->mmr_base + 0x4)
#define DMA_DAR(chan) (chan->mmr_base + 0xc)
#define DMA_NDAR(chan) (chan->mmr_base + 0x10)
#define DMA_PADR(chan) (chan->mmr_base + 0x14)
#define DMA_PUADR(chan) (chan->mmr_base + 0x18)
#define DMA_LADR(chan) (chan->mmr_base + 0x1c)
#define DMA_BCR(chan) (chan->mmr_base + 0x20)
#define DMA_DCR(chan) (chan->mmr_base + 0x24)
/* Application accelerator unit */
#define AAU_ACR(chan) (chan->mmr_base + 0x0)
#define AAU_ASR(chan) (chan->mmr_base + 0x4)
#define AAU_ADAR(chan) (chan->mmr_base + 0x8)
#define AAU_ANDAR(chan) (chan->mmr_base + 0xc)
#define AAU_SAR(src, chan) (chan->mmr_base + (0x10 + ((src) << 2)))
#define AAU_DAR(chan) (chan->mmr_base + 0x20)
#define AAU_ABCR(chan) (chan->mmr_base + 0x24)
#define AAU_ADCR(chan) (chan->mmr_base + 0x28)
#define AAU_SAR_EDCR(src_edc) (chan->mmr_base + (0x02c + ((src_edc-4) << 2)))
#define AAU_EDCR0_IDX 8
#define AAU_EDCR1_IDX 17
#define AAU_EDCR2_IDX 26
#define DMA0_ID 0
#define DMA1_ID 1
#define AAU_ID 2
struct iop3xx_aau_desc_ctrl {
unsigned int int_en:1;
unsigned int blk1_cmd_ctrl:3;
unsigned int blk2_cmd_ctrl:3;
unsigned int blk3_cmd_ctrl:3;
unsigned int blk4_cmd_ctrl:3;
unsigned int blk5_cmd_ctrl:3;
unsigned int blk6_cmd_ctrl:3;
unsigned int blk7_cmd_ctrl:3;
unsigned int blk8_cmd_ctrl:3;
unsigned int blk_ctrl:2;
unsigned int dual_xor_en:1;
unsigned int tx_complete:1;
unsigned int zero_result_err:1;
unsigned int zero_result_en:1;
unsigned int dest_write_en:1;
};
struct iop3xx_aau_e_desc_ctrl {
unsigned int reserved:1;
unsigned int blk1_cmd_ctrl:3;
unsigned int blk2_cmd_ctrl:3;
unsigned int blk3_cmd_ctrl:3;
unsigned int blk4_cmd_ctrl:3;
unsigned int blk5_cmd_ctrl:3;
unsigned int blk6_cmd_ctrl:3;
unsigned int blk7_cmd_ctrl:3;
unsigned int blk8_cmd_ctrl:3;
unsigned int reserved2:7;
};
struct iop3xx_dma_desc_ctrl {
unsigned int pci_transaction:4;
unsigned int int_en:1;
unsigned int dac_cycle_en:1;
unsigned int mem_to_mem_en:1;
unsigned int crc_data_tx_en:1;
unsigned int crc_gen_en:1;
unsigned int crc_seed_dis:1;
unsigned int reserved:21;
unsigned int crc_tx_complete:1;
};
struct iop3xx_desc_dma {
u32 next_desc;
union {
u32 pci_src_addr;
u32 pci_dest_addr;
u32 src_addr;
};
union {
u32 upper_pci_src_addr;
u32 upper_pci_dest_addr;
};
union {
u32 local_pci_src_addr;
u32 local_pci_dest_addr;
u32 dest_addr;
};
u32 byte_count;
union {
u32 desc_ctrl;
struct iop3xx_dma_desc_ctrl desc_ctrl_field;
};
u32 crc_addr;
};
struct iop3xx_desc_aau {
u32 next_desc;
u32 src[4];
u32 dest_addr;
u32 byte_count;
union {
u32 desc_ctrl;
struct iop3xx_aau_desc_ctrl desc_ctrl_field;
};
union {
u32 src_addr;
u32 e_desc_ctrl;
struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
} src_edc[31];
};
struct iop3xx_aau_gfmr {
unsigned int gfmr1:8;
unsigned int gfmr2:8;
unsigned int gfmr3:8;
unsigned int gfmr4:8;
};
struct iop3xx_desc_pq_xor {
u32 next_desc;
u32 src[3];
union {
u32 data_mult1;
struct iop3xx_aau_gfmr data_mult1_field;
};
u32 dest_addr;
u32 byte_count;
union {
u32 desc_ctrl;
struct iop3xx_aau_desc_ctrl desc_ctrl_field;
};
union {
u32 src_addr;
u32 e_desc_ctrl;
struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
u32 data_multiplier;
struct iop3xx_aau_gfmr data_mult_field;
u32 reserved;
} src_edc_gfmr[19];
};
struct iop3xx_desc_dual_xor {
u32 next_desc;
u32 src0_addr;
u32 src1_addr;
u32 h_src_addr;
u32 d_src_addr;
u32 h_dest_addr;
u32 byte_count;
union {
u32 desc_ctrl;
struct iop3xx_aau_desc_ctrl desc_ctrl_field;
};
u32 d_dest_addr;
};
union iop3xx_desc {
struct iop3xx_desc_aau *aau;
struct iop3xx_desc_dma *dma;
struct iop3xx_desc_pq_xor *pq_xor;
struct iop3xx_desc_dual_xor *dual_xor;
void *ptr;
};
static inline int iop_adma_get_max_xor(void)
{
return 32;
}
static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
{
int id = chan->device->id;
switch (id) {
case DMA0_ID:
case DMA1_ID:
return __raw_readl(DMA_DAR(chan));
case AAU_ID:
return __raw_readl(AAU_ADAR(chan));
default:
BUG();
}
return 0;
}
static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
u32 next_desc_addr)
{
int id = chan->device->id;
switch (id) {
case DMA0_ID:
case DMA1_ID:
__raw_writel(next_desc_addr, DMA_NDAR(chan));
break;
case AAU_ID:
__raw_writel(next_desc_addr, AAU_ANDAR(chan));
break;
}
}
#define IOP_ADMA_STATUS_BUSY (1 << 10)
#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024)
#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024)
#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
static inline int iop_chan_is_busy(struct iop_adma_chan *chan)
{
u32 status = __raw_readl(DMA_CSR(chan));
return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0;
}
static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc,
int num_slots)
{
/* num_slots will only ever be 1, 2, 4, or 8 */
return (desc->idx & (num_slots - 1)) ? 0 : 1;
}
/* to do: support large (i.e. > hw max) buffer sizes */
static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
{
*slots_per_op = 1;
return 1;
}
/* to do: support large (i.e. > hw max) buffer sizes */
static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
{
*slots_per_op = 1;
return 1;
}
static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
int *slots_per_op)
{
const static int slot_count_table[] = { 0,
1, 1, 1, 1, /* 01 - 04 */
2, 2, 2, 2, /* 05 - 08 */
4, 4, 4, 4, /* 09 - 12 */
4, 4, 4, 4, /* 13 - 16 */
8, 8, 8, 8, /* 17 - 20 */
8, 8, 8, 8, /* 21 - 24 */
8, 8, 8, 8, /* 25 - 28 */
8, 8, 8, 8, /* 29 - 32 */
};
*slots_per_op = slot_count_table[src_cnt];
return *slots_per_op;
}
static inline int
iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan)
{
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
return iop_chan_memcpy_slot_count(0, slots_per_op);
case AAU_ID:
return iop3xx_aau_xor_slot_count(0, 2, slots_per_op);
default:
BUG();
}
return 0;
}
static inline int iop_chan_xor_slot_count(size_t len, int src_cnt,
int *slots_per_op)
{
int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT)
return slot_cnt;
len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) {
len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
slot_cnt += *slots_per_op;
}
if (len)
slot_cnt += *slots_per_op;
return slot_cnt;
}
/* zero sum on iop3xx is limited to 1k at a time so it requires multiple
* descriptors
*/
static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
int *slots_per_op)
{
int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT)
return slot_cnt;
len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
slot_cnt += *slots_per_op;
}
if (len)
slot_cnt += *slots_per_op;
return slot_cnt;
}
static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
return hw_desc.dma->dest_addr;
case AAU_ID:
return hw_desc.aau->dest_addr;
default:
BUG();
}
return 0;
}
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
return hw_desc.dma->byte_count;
case AAU_ID:
return hw_desc.aau->byte_count;
default:
BUG();
}
return 0;
}
/* translate the src_idx to a descriptor word index */
static inline int __desc_idx(int src_idx)
{
const static int desc_idx_table[] = { 0, 0, 0, 0,
0, 1, 2, 3,
5, 6, 7, 8,
9, 10, 11, 12,
14, 15, 16, 17,
18, 19, 20, 21,
23, 24, 25, 26,
27, 28, 29, 30,
};
return desc_idx_table[src_idx];
}
static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
int src_idx)
{
union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
return hw_desc.dma->src_addr;
case AAU_ID:
break;
default:
BUG();
}
if (src_idx < 4)
return hw_desc.aau->src[src_idx];
else
return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr;
}
static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc,
int src_idx, dma_addr_t addr)
{
if (src_idx < 4)
hw_desc->src[src_idx] = addr;
else
hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr;
}
static inline void
iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
{
struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop3xx_dma_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
u_desc_ctrl.field.mem_to_mem_en = 1;
u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
u_desc_ctrl.field.int_en = int_en;
hw_desc->desc_ctrl = u_desc_ctrl.value;
hw_desc->upper_pci_src_addr = 0;
hw_desc->crc_addr = 0;
}
static inline void
iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
{
struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop3xx_aau_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
u_desc_ctrl.field.dest_write_en = 1;
u_desc_ctrl.field.int_en = int_en;
hw_desc->desc_ctrl = u_desc_ctrl.value;
}
static inline u32
iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en)
{
int i, shift;
u32 edcr;
union {
u32 value;
struct iop3xx_aau_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
switch (src_cnt) {
case 25 ... 32:
u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
edcr = 0;
shift = 1;
for (i = 24; i < src_cnt; i++) {
edcr |= (1 << shift);
shift += 3;
}
hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr;
src_cnt = 24;
/* fall through */
case 17 ... 24:
if (!u_desc_ctrl.field.blk_ctrl) {
hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
}
edcr = 0;
shift = 1;
for (i = 16; i < src_cnt; i++) {
edcr |= (1 << shift);
shift += 3;
}
hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr;
src_cnt = 16;
/* fall through */
case 9 ... 16:
if (!u_desc_ctrl.field.blk_ctrl)
u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
edcr = 0;
shift = 1;
for (i = 8; i < src_cnt; i++) {
edcr |= (1 << shift);
shift += 3;
}
hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr;
src_cnt = 8;
/* fall through */
case 2 ... 8:
shift = 1;
for (i = 0; i < src_cnt; i++) {
u_desc_ctrl.value |= (1 << shift);
shift += 3;
}
if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
}
u_desc_ctrl.field.dest_write_en = 1;
u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
u_desc_ctrl.field.int_en = int_en;
hw_desc->desc_ctrl = u_desc_ctrl.value;
return u_desc_ctrl.value;
}
static inline void
iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
{
iop3xx_desc_init_xor(desc->hw_desc, src_cnt, int_en);
}
/* return the number of operations */
static inline int
iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
{
int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
union {
u32 value;
struct iop3xx_aau_desc_ctrl field;
} u_desc_ctrl;
int i, j;
hw_desc = desc->hw_desc;
for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
i += slots_per_op, j++) {
iter = iop_hw_desc_slot_idx(hw_desc, i);
u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, int_en);
u_desc_ctrl.field.dest_write_en = 0;
u_desc_ctrl.field.zero_result_en = 1;
u_desc_ctrl.field.int_en = int_en;
iter->desc_ctrl = u_desc_ctrl.value;
/* for the subsequent descriptors preserve the store queue
* and chain them together
*/
if (i) {
prev_hw_desc =
iop_hw_desc_slot_idx(hw_desc, i - slots_per_op);
prev_hw_desc->next_desc =
(u32) (desc->async_tx.phys + (i << 5));
}
}
return j;
}
static inline void
iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
{
struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop3xx_aau_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
switch (src_cnt) {
case 25 ... 32:
u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
/* fall through */
case 17 ... 24:
if (!u_desc_ctrl.field.blk_ctrl) {
hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
}
hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0;
/* fall through */
case 9 ... 16:
if (!u_desc_ctrl.field.blk_ctrl)
u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0;
/* fall through */
case 1 ... 8:
if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
}
u_desc_ctrl.field.dest_write_en = 0;
u_desc_ctrl.field.int_en = int_en;
hw_desc->desc_ctrl = u_desc_ctrl.value;
}
static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
u32 byte_count)
{
union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
hw_desc.dma->byte_count = byte_count;
break;
case AAU_ID:
hw_desc.aau->byte_count = byte_count;
break;
default:
BUG();
}
}
static inline void
iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
iop_desc_init_memcpy(desc, 1);
hw_desc.dma->byte_count = 0;
hw_desc.dma->dest_addr = 0;
hw_desc.dma->src_addr = 0;
break;
case AAU_ID:
iop_desc_init_null_xor(desc, 2, 1);
hw_desc.aau->byte_count = 0;
hw_desc.aau->dest_addr = 0;
hw_desc.aau->src[0] = 0;
hw_desc.aau->src[1] = 0;
break;
default:
BUG();
}
}
static inline void
iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
{
int slots_per_op = desc->slots_per_op;
struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
int i = 0;
if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
hw_desc->byte_count = len;
} else {
do {
iter = iop_hw_desc_slot_idx(hw_desc, i);
iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
i += slots_per_op;
} while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
if (len) {
iter = iop_hw_desc_slot_idx(hw_desc, i);
iter->byte_count = len;
}
}
}
static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
dma_addr_t addr)
{
union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
hw_desc.dma->dest_addr = addr;
break;
case AAU_ID:
hw_desc.aau->dest_addr = addr;
break;
default:
BUG();
}
}
static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
dma_addr_t addr)
{
struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
hw_desc->src_addr = addr;
}
static inline void
iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
dma_addr_t addr)
{
struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
int i;
for (i = 0; (slot_cnt -= slots_per_op) >= 0;
i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
iter = iop_hw_desc_slot_idx(hw_desc, i);
iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
}
}
static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
int src_idx, dma_addr_t addr)
{
struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
int i;
for (i = 0; (slot_cnt -= slots_per_op) >= 0;
i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) {
iter = iop_hw_desc_slot_idx(hw_desc, i);
iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
}
}
static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
u32 next_desc_addr)
{
/* hw_desc->next_desc is the same location for all channels */
union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
BUG_ON(hw_desc.dma->next_desc);
hw_desc.dma->next_desc = next_desc_addr;
}
static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
{
/* hw_desc->next_desc is the same location for all channels */
union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
return hw_desc.dma->next_desc;
}
static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
{
/* hw_desc->next_desc is the same location for all channels */
union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
hw_desc.dma->next_desc = 0;
}
static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
u32 val)
{
struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
hw_desc->src[0] = val;
}
static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{
struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
BUG_ON(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
return desc_ctrl.zero_result_err;
}
static inline void iop_chan_append(struct iop_adma_chan *chan)
{
u32 dma_chan_ctrl;
/* workaround dropped interrupts on 3xx */
mod_timer(&chan->cleanup_watchdog, jiffies + msecs_to_jiffies(3));
dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
dma_chan_ctrl |= 0x2;
__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
}
static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan)
{
if (!busy)
del_timer(&chan->cleanup_watchdog);
}
static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
{
return __raw_readl(DMA_CSR(chan));
}
static inline void iop_chan_disable(struct iop_adma_chan *chan)
{
u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
dma_chan_ctrl &= ~1;
__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
}
static inline void iop_chan_enable(struct iop_adma_chan *chan)
{
u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
dma_chan_ctrl |= 1;
__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
}
static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
{
u32 status = __raw_readl(DMA_CSR(chan));
status &= (1 << 9);
__raw_writel(status, DMA_CSR(chan));
}
static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
{
u32 status = __raw_readl(DMA_CSR(chan));
status &= (1 << 8);
__raw_writel(status, DMA_CSR(chan));
}
static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
{
u32 status = __raw_readl(DMA_CSR(chan));
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1);
break;
case AAU_ID:
status &= (1 << 5);
break;
default:
BUG();
}
__raw_writel(status, DMA_CSR(chan));
}
static inline int
iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
{
return 0;
}
static inline int
iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
{
return 0;
}
static inline int
iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
{
return 0;
}
static inline int
iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
{
return test_bit(5, &status);
}
static inline int
iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
{
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
return test_bit(2, &status);
default:
return 0;
}
}
static inline int
iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
{
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
return test_bit(3, &status);
default:
return 0;
}
}
static inline int
iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
{
switch (chan->device->id) {
case DMA0_ID:
case DMA1_ID:
return test_bit(1, &status);
default:
return 0;
}
}
#endif /* _ADMA_H */
......@@ -144,24 +144,9 @@ extern int init_atu;
#define IOP3XX_IAR (volatile u32 *)IOP3XX_REG_ADDR(0x0380)
/* DMA Controller */
#define IOP3XX_DMA0_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0400)
#define IOP3XX_DMA0_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0404)
#define IOP3XX_DMA0_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x040c)
#define IOP3XX_DMA0_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0410)
#define IOP3XX_DMA0_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0414)
#define IOP3XX_DMA0_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0418)
#define IOP3XX_DMA0_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x041c)
#define IOP3XX_DMA0_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0420)
#define IOP3XX_DMA0_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0424)
#define IOP3XX_DMA1_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0440)
#define IOP3XX_DMA1_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0444)
#define IOP3XX_DMA1_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x044c)
#define IOP3XX_DMA1_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0450)
#define IOP3XX_DMA1_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0454)
#define IOP3XX_DMA1_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0458)
#define IOP3XX_DMA1_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x045c)
#define IOP3XX_DMA1_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0460)
#define IOP3XX_DMA1_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0464)
#define IOP3XX_DMA_PHYS_BASE(chan) (IOP3XX_PERIPHERAL_PHYS_BASE + \
(0x400 + (chan << 6)))
#define IOP3XX_DMA_UPPER_PA(chan) (IOP3XX_DMA_PHYS_BASE(chan) + 0x27)
/* Peripheral bus interface */
#define IOP3XX_PBCR (volatile u32 *)IOP3XX_REG_ADDR(0x0680)
......@@ -210,48 +195,8 @@ extern int init_atu;
#define IOP_TMR_RATIO_1_1 0x00
/* Application accelerator unit */
#define IOP3XX_AAU_ACR (volatile u32 *)IOP3XX_REG_ADDR(0x0800)
#define IOP3XX_AAU_ASR (volatile u32 *)IOP3XX_REG_ADDR(0x0804)
#define IOP3XX_AAU_ADAR (volatile u32 *)IOP3XX_REG_ADDR(0x0808)
#define IOP3XX_AAU_ANDAR (volatile u32 *)IOP3XX_REG_ADDR(0x080c)
#define IOP3XX_AAU_SAR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0810)
#define IOP3XX_AAU_SAR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0814)
#define IOP3XX_AAU_SAR3 (volatile u32 *)IOP3XX_REG_ADDR(0x0818)
#define IOP3XX_AAU_SAR4 (volatile u32 *)IOP3XX_REG_ADDR(0x081c)
#define IOP3XX_AAU_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x0820)
#define IOP3XX_AAU_ABCR (volatile u32 *)IOP3XX_REG_ADDR(0x0824)
#define IOP3XX_AAU_ADCR (volatile u32 *)IOP3XX_REG_ADDR(0x0828)
#define IOP3XX_AAU_SAR5 (volatile u32 *)IOP3XX_REG_ADDR(0x082c)
#define IOP3XX_AAU_SAR6 (volatile u32 *)IOP3XX_REG_ADDR(0x0830)
#define IOP3XX_AAU_SAR7 (volatile u32 *)IOP3XX_REG_ADDR(0x0834)
#define IOP3XX_AAU_SAR8 (volatile u32 *)IOP3XX_REG_ADDR(0x0838)
#define IOP3XX_AAU_EDCR0 (volatile u32 *)IOP3XX_REG_ADDR(0x083c)
#define IOP3XX_AAU_SAR9 (volatile u32 *)IOP3XX_REG_ADDR(0x0840)
#define IOP3XX_AAU_SAR10 (volatile u32 *)IOP3XX_REG_ADDR(0x0844)
#define IOP3XX_AAU_SAR11 (volatile u32 *)IOP3XX_REG_ADDR(0x0848)
#define IOP3XX_AAU_SAR12 (volatile u32 *)IOP3XX_REG_ADDR(0x084c)
#define IOP3XX_AAU_SAR13 (volatile u32 *)IOP3XX_REG_ADDR(0x0850)
#define IOP3XX_AAU_SAR14 (volatile u32 *)IOP3XX_REG_ADDR(0x0854)
#define IOP3XX_AAU_SAR15 (volatile u32 *)IOP3XX_REG_ADDR(0x0858)
#define IOP3XX_AAU_SAR16 (volatile u32 *)IOP3XX_REG_ADDR(0x085c)
#define IOP3XX_AAU_EDCR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0860)
#define IOP3XX_AAU_SAR17 (volatile u32 *)IOP3XX_REG_ADDR(0x0864)
#define IOP3XX_AAU_SAR18 (volatile u32 *)IOP3XX_REG_ADDR(0x0868)
#define IOP3XX_AAU_SAR19 (volatile u32 *)IOP3XX_REG_ADDR(0x086c)
#define IOP3XX_AAU_SAR20 (volatile u32 *)IOP3XX_REG_ADDR(0x0870)
#define IOP3XX_AAU_SAR21 (volatile u32 *)IOP3XX_REG_ADDR(0x0874)
#define IOP3XX_AAU_SAR22 (volatile u32 *)IOP3XX_REG_ADDR(0x0878)
#define IOP3XX_AAU_SAR23 (volatile u32 *)IOP3XX_REG_ADDR(0x087c)
#define IOP3XX_AAU_SAR24 (volatile u32 *)IOP3XX_REG_ADDR(0x0880)
#define IOP3XX_AAU_EDCR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0884)
#define IOP3XX_AAU_SAR25 (volatile u32 *)IOP3XX_REG_ADDR(0x0888)
#define IOP3XX_AAU_SAR26 (volatile u32 *)IOP3XX_REG_ADDR(0x088c)
#define IOP3XX_AAU_SAR27 (volatile u32 *)IOP3XX_REG_ADDR(0x0890)
#define IOP3XX_AAU_SAR28 (volatile u32 *)IOP3XX_REG_ADDR(0x0894)
#define IOP3XX_AAU_SAR29 (volatile u32 *)IOP3XX_REG_ADDR(0x0898)
#define IOP3XX_AAU_SAR30 (volatile u32 *)IOP3XX_REG_ADDR(0x089c)
#define IOP3XX_AAU_SAR31 (volatile u32 *)IOP3XX_REG_ADDR(0x08a0)
#define IOP3XX_AAU_SAR32 (volatile u32 *)IOP3XX_REG_ADDR(0x08a4)
#define IOP3XX_AAU_PHYS_BASE (IOP3XX_PERIPHERAL_PHYS_BASE + 0x800)
#define IOP3XX_AAU_UPPER_PA (IOP3XX_AAU_PHYS_BASE + 0xa7)
/* I2C bus interface unit */
#define IOP3XX_ICR0 (volatile u32 *)IOP3XX_REG_ADDR(0x1680)
......@@ -329,6 +274,9 @@ static inline void write_tisr(u32 val)
asm volatile("mcr p6, 0, %0, c6, c1, 0" : : "r" (val));
}
extern struct platform_device iop3xx_dma_0_channel;
extern struct platform_device iop3xx_dma_1_channel;
extern struct platform_device iop3xx_aau_channel;
extern struct platform_device iop3xx_i2c0_device;
extern struct platform_device iop3xx_i2c1_device;
......
/*
* Copyright © 2006, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#ifndef IOP_ADMA_H
#define IOP_ADMA_H
#include <linux/types.h>
#include <linux/dmaengine.h>
#include <linux/interrupt.h>
#define IOP_ADMA_SLOT_SIZE 32
#define IOP_ADMA_THRESHOLD 4
/**
* struct iop_adma_device - internal representation of an ADMA device
* @pdev: Platform device
* @id: HW ADMA Device selector
* @dma_desc_pool: base of DMA descriptor region (DMA address)
* @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
* @common: embedded struct dma_device
*/
struct iop_adma_device {
struct platform_device *pdev;
int id;
dma_addr_t dma_desc_pool;
void *dma_desc_pool_virt;
struct dma_device common;
};
/**
* struct iop_adma_chan - internal representation of an ADMA device
* @pending: allows batching of hardware operations
* @completed_cookie: identifier for the most recently completed operation
* @lock: serializes enqueue/dequeue operations to the slot pool
* @mmr_base: memory mapped register base
* @chain: device chain view of the descriptors
* @device: parent device
* @common: common dmaengine channel object members
* @last_used: place holder for allocation to continue from where it left off
* @all_slots: complete domain of slots usable by the channel
* @cleanup_watchdog: workaround missed interrupts on iop3xx
* @slots_allocated: records the actual size of the descriptor slot pool
* @irq_tasklet: bottom half where iop_adma_slot_cleanup runs
*/
struct iop_adma_chan {
int pending;
dma_cookie_t completed_cookie;
spinlock_t lock; /* protects the descriptor slot pool */
void __iomem *mmr_base;
struct list_head chain;
struct iop_adma_device *device;
struct dma_chan common;
struct iop_adma_desc_slot *last_used;
struct list_head all_slots;
struct timer_list cleanup_watchdog;
int slots_allocated;
struct tasklet_struct irq_tasklet;
};
/**
* struct iop_adma_desc_slot - IOP-ADMA software descriptor
* @slot_node: node on the iop_adma_chan.all_slots list
* @chain_node: node on the op_adma_chan.chain list
* @hw_desc: virtual address of the hardware descriptor chain
* @phys: hardware address of the hardware descriptor chain
* @group_head: first operation in a transaction
* @slot_cnt: total slots used in an transaction (group of operations)
* @slots_per_op: number of slots per operation
* @idx: pool index
* @unmap_src_cnt: number of xor sources
* @unmap_len: transaction bytecount
* @async_tx: support for the async_tx api
* @group_list: list of slots that make up a multi-descriptor transaction
* for example transfer lengths larger than the supported hw max
* @xor_check_result: result of zero sum
* @crc32_result: result crc calculation
*/
struct iop_adma_desc_slot {
struct list_head slot_node;
struct list_head chain_node;
void *hw_desc;
struct iop_adma_desc_slot *group_head;
u16 slot_cnt;
u16 slots_per_op;
u16 idx;
u16 unmap_src_cnt;
size_t unmap_len;
struct dma_async_tx_descriptor async_tx;
union {
u32 *xor_check_result;
u32 *crc32_result;
};
};
struct iop_adma_platform_data {
int hw_id;
dma_cap_mask_t cap_mask;
size_t pool_size;
};
#define to_iop_sw_desc(addr_hw_desc) \
container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc)
#define iop_hw_desc_slot_idx(hw_desc, idx) \
( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) )
#endif
/*
* Copyright © 2006, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#ifndef _ASYNC_TX_H_
#define _ASYNC_TX_H_
#include <linux/dmaengine.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
/**
* dma_chan_ref - object used to manage dma channels received from the
* dmaengine core.
* @chan - the channel being tracked
* @node - node for the channel to be placed on async_tx_master_list
* @rcu - for list_del_rcu
* @count - number of times this channel is listed in the pool
* (for channels with multiple capabiities)
*/
struct dma_chan_ref {
struct dma_chan *chan;
struct list_head node;
struct rcu_head rcu;
atomic_t count;
};
/**
* async_tx_flags - modifiers for the async_* calls
* @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the
* the destination address is not a source. The asynchronous case handles this
* implicitly, the synchronous case needs to zero the destination block.
* @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is
* also one of the source addresses. In the synchronous case the destination
* address is an implied source, whereas the asynchronous case it must be listed
* as a source. The destination address must be the first address in the source
* array.
* @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations
* @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
* dependency chain
* @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining.
* @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously
* take an atomic mapping (KM_USER0) on the source page(s)
* @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously
* take an atomic mapping (KM_USER0) on the dest page(s)
*/
enum async_tx_flags {
ASYNC_TX_XOR_ZERO_DST = (1 << 0),
ASYNC_TX_XOR_DROP_DST = (1 << 1),
ASYNC_TX_ASSUME_COHERENT = (1 << 2),
ASYNC_TX_ACK = (1 << 3),
ASYNC_TX_DEP_ACK = (1 << 4),
ASYNC_TX_KMAP_SRC = (1 << 5),
ASYNC_TX_KMAP_DST = (1 << 6),
};
#ifdef CONFIG_DMA_ENGINE
void async_tx_issue_pending_all(void);
enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx);
struct dma_chan *
async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
enum dma_transaction_type tx_type);
#else
static inline void async_tx_issue_pending_all(void)
{
do { } while (0);
}
static inline enum dma_status
dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
{
return DMA_SUCCESS;
}
static inline void
async_tx_run_dependencies(struct dma_async_tx_descriptor *tx,
struct dma_chan *host_chan)
{
do { } while (0);
}
static inline struct dma_chan *
async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
enum dma_transaction_type tx_type)
{
return NULL;
}
#endif
/**
* async_tx_sync_epilog - actions to take if an operation is run synchronously
* @flags: async_tx flags
* @depend_tx: transaction depends on depend_tx
* @cb_fn: function to call when the transaction completes
* @cb_fn_param: parameter to pass to the callback routine
*/
static inline void
async_tx_sync_epilog(unsigned long flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param)
{
if (cb_fn)
cb_fn(cb_fn_param);
if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
async_tx_ack(depend_tx);
}
void
async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
struct dma_async_tx_descriptor *
async_xor_zero_sum(struct page *dest, struct page **src_list,
unsigned int offset, int src_cnt, size_t len,
u32 *result, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
unsigned int src_offset, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset,
size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
struct dma_async_tx_descriptor *
async_trigger_callback(enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
#endif /* _ASYNC_TX_H_ */
......@@ -21,28 +21,39 @@
#ifndef DMAENGINE_H
#define DMAENGINE_H
#ifdef CONFIG_DMA_ENGINE
#include <linux/device.h>
#include <linux/uio.h>
#include <linux/kref.h>
#include <linux/completion.h>
#include <linux/rcupdate.h>
#include <linux/dma-mapping.h>
/**
* enum dma_event - resource PNP/power managment events
* enum dma_state - resource PNP/power managment state
* @DMA_RESOURCE_SUSPEND: DMA device going into low power state
* @DMA_RESOURCE_RESUME: DMA device returning to full power
* @DMA_RESOURCE_ADDED: DMA device added to the system
* @DMA_RESOURCE_AVAILABLE: DMA device available to the system
* @DMA_RESOURCE_REMOVED: DMA device removed from the system
*/
enum dma_event {
enum dma_state {
DMA_RESOURCE_SUSPEND,
DMA_RESOURCE_RESUME,
DMA_RESOURCE_ADDED,
DMA_RESOURCE_AVAILABLE,
DMA_RESOURCE_REMOVED,
};
/**
* enum dma_state_client - state of the channel in the client
* @DMA_ACK: client would like to use, or was using this channel
* @DMA_DUP: client has already seen this channel, or is not using this channel
* @DMA_NAK: client does not want to see any more channels
*/
enum dma_state_client {
DMA_ACK,
DMA_DUP,
DMA_NAK,
};
/**
* typedef dma_cookie_t - an opaque DMA cookie
*
......@@ -64,6 +75,31 @@ enum dma_status {
DMA_ERROR,
};
/**
* enum dma_transaction_type - DMA transaction types/indexes
*/
enum dma_transaction_type {
DMA_MEMCPY,
DMA_XOR,
DMA_PQ_XOR,
DMA_DUAL_XOR,
DMA_PQ_UPDATE,
DMA_ZERO_SUM,
DMA_PQ_ZERO_SUM,
DMA_MEMSET,
DMA_MEMCPY_CRC32C,
DMA_INTERRUPT,
};
/* last transaction type for creation of the capabilities mask */
#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1)
/**
* dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
* See linux/cpumask.h
*/
typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
/**
* struct dma_chan_percpu - the per-CPU part of struct dma_chan
* @refcount: local_t used for open-coded "bigref" counting
......@@ -80,7 +116,6 @@ struct dma_chan_percpu {
/**
* struct dma_chan - devices supply DMA channels, clients use them
* @client: ptr to the client user of this chan, will be %NULL when unused
* @device: ptr to the dma device who supplies this channel, always !%NULL
* @cookie: last cookie value returned to client
* @chan_id: channel ID for sysfs
......@@ -88,12 +123,10 @@ struct dma_chan_percpu {
* @refcount: kref, used in "bigref" slow-mode
* @slow_ref: indicates that the DMA channel is free
* @rcu: the DMA channel's RCU head
* @client_node: used to add this to the client chan list
* @device_node: used to add this to the device chan list
* @local: per-cpu pointer to a struct dma_chan_percpu
*/
struct dma_chan {
struct dma_client *client;
struct dma_device *device;
dma_cookie_t cookie;
......@@ -105,11 +138,11 @@ struct dma_chan {
int slow_ref;
struct rcu_head rcu;
struct list_head client_node;
struct list_head device_node;
struct dma_chan_percpu *local;
};
void dma_chan_cleanup(struct kref *kref);
static inline void dma_chan_get(struct dma_chan *chan)
......@@ -134,27 +167,75 @@ static inline void dma_chan_put(struct dma_chan *chan)
/*
* typedef dma_event_callback - function pointer to a DMA event callback
* For each channel added to the system this routine is called for each client.
* If the client would like to use the channel it returns '1' to signal (ack)
* the dmaengine core to take out a reference on the channel and its
* corresponding device. A client must not 'ack' an available channel more
* than once. When a channel is removed all clients are notified. If a client
* is using the channel it must 'ack' the removal. A client must not 'ack' a
* removed channel more than once.
* @client - 'this' pointer for the client context
* @chan - channel to be acted upon
* @state - available or removed
*/
typedef void (*dma_event_callback) (struct dma_client *client,
struct dma_chan *chan, enum dma_event event);
struct dma_client;
typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
struct dma_chan *chan, enum dma_state state);
/**
* struct dma_client - info on the entity making use of DMA services
* @event_callback: func ptr to call when something happens
* @chan_count: number of chans allocated
* @chans_desired: number of chans requested. Can be +/- chan_count
* @lock: protects access to the channels list
* @channels: the list of DMA channels allocated
* @cap_mask: only return channels that satisfy the requested capabilities
* a value of zero corresponds to any capability
* @global_node: list_head for global dma_client_list
*/
struct dma_client {
dma_event_callback event_callback;
unsigned int chan_count;
unsigned int chans_desired;
dma_cap_mask_t cap_mask;
struct list_head global_node;
};
typedef void (*dma_async_tx_callback)(void *dma_async_param);
/**
* struct dma_async_tx_descriptor - async transaction descriptor
* ---dma generic offload fields---
* @cookie: tracking cookie for this transaction, set to -EBUSY if
* this tx is sitting on a dependency list
* @ack: the descriptor can not be reused until the client acknowledges
* receipt, i.e. has has a chance to establish any dependency chains
* @phys: physical address of the descriptor
* @tx_list: driver common field for operations that require multiple
* descriptors
* @chan: target channel for this operation
* @tx_submit: set the prepared descriptor(s) to be executed by the engine
* @tx_set_dest: set a destination address in a hardware descriptor
* @tx_set_src: set a source address in a hardware descriptor
* @callback: routine to call after this operation is complete
* @callback_param: general parameter to pass to the callback routine
* ---async_tx api specific fields---
* @depend_list: at completion this list of transactions are submitted
* @depend_node: allow this transaction to be executed after another
* transaction has completed, possibly on another channel
* @parent: pointer to the next level up in the dependency chain
* @lock: protect the dependency list
*/
struct dma_async_tx_descriptor {
dma_cookie_t cookie;
int ack;
dma_addr_t phys;
struct list_head tx_list;
struct dma_chan *chan;
dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
void (*tx_set_dest)(dma_addr_t addr,
struct dma_async_tx_descriptor *tx, int index);
void (*tx_set_src)(dma_addr_t addr,
struct dma_async_tx_descriptor *tx, int index);
dma_async_tx_callback callback;
void *callback_param;
struct list_head depend_list;
struct list_head depend_node;
struct dma_async_tx_descriptor *parent;
spinlock_t lock;
struct list_head channels;
struct list_head global_node;
};
/**
......@@ -162,141 +243,130 @@ struct dma_client {
* @chancnt: how many DMA channels are supported
* @channels: the list of struct dma_chan
* @global_node: list_head for global dma_device_list
* @cap_mask: one or more dma_capability flags
* @max_xor: maximum number of xor sources, 0 if no capability
* @refcount: reference count
* @done: IO completion struct
* @dev_id: unique device ID
* @dev: struct device reference for dma mapping api
* @device_alloc_chan_resources: allocate resources and return the
* number of allocated descriptors
* @device_free_chan_resources: release DMA channel's resources
* @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer
* @device_memcpy_buf_to_pg: memcpy buf pointer to struct page
* @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset
* @device_memcpy_complete: poll the status of an IOAT DMA transaction
* @device_memcpy_issue_pending: push appended descriptors to hardware
* @device_prep_dma_memcpy: prepares a memcpy operation
* @device_prep_dma_xor: prepares a xor operation
* @device_prep_dma_zero_sum: prepares a zero_sum operation
* @device_prep_dma_memset: prepares a memset operation
* @device_prep_dma_interrupt: prepares an end of chain interrupt operation
* @device_dependency_added: async_tx notifies the channel about new deps
* @device_issue_pending: push pending transactions to hardware
*/
struct dma_device {
unsigned int chancnt;
struct list_head channels;
struct list_head global_node;
dma_cap_mask_t cap_mask;
int max_xor;
struct kref refcount;
struct completion done;
int dev_id;
struct device *dev;
int (*device_alloc_chan_resources)(struct dma_chan *chan);
void (*device_free_chan_resources)(struct dma_chan *chan);
dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan,
void *dest, void *src, size_t len);
dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan,
struct page *page, unsigned int offset, void *kdata,
size_t len);
dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan,
struct page *dest_pg, unsigned int dest_off,
struct page *src_pg, unsigned int src_off, size_t len);
enum dma_status (*device_memcpy_complete)(struct dma_chan *chan,
struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
struct dma_chan *chan, size_t len, int int_en);
struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
struct dma_chan *chan, unsigned int src_cnt, size_t len,
int int_en);
struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
struct dma_chan *chan, unsigned int src_cnt, size_t len,
u32 *result, int int_en);
struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
struct dma_chan *chan, int value, size_t len, int int_en);
struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
struct dma_chan *chan);
void (*device_dependency_added)(struct dma_chan *chan);
enum dma_status (*device_is_tx_complete)(struct dma_chan *chan,
dma_cookie_t cookie, dma_cookie_t *last,
dma_cookie_t *used);
void (*device_memcpy_issue_pending)(struct dma_chan *chan);
void (*device_issue_pending)(struct dma_chan *chan);
};
/* --- public DMA engine API --- */
struct dma_client *dma_async_client_register(dma_event_callback event_callback);
void dma_async_client_register(struct dma_client *client);
void dma_async_client_unregister(struct dma_client *client);
void dma_async_client_chan_request(struct dma_client *client,
unsigned int number);
void dma_async_client_chan_request(struct dma_client *client);
dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
void *dest, void *src, size_t len);
dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
struct page *page, unsigned int offset, void *kdata, size_t len);
dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
unsigned int src_off, size_t len);
void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
struct dma_chan *chan);
/**
* dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
* @chan: DMA channel to offload copy to
* @dest: destination address (virtual)
* @src: source address (virtual)
* @len: length
*
* Both @dest and @src must be mappable to a bus address according to the
* DMA mapping API rules for streaming mappings.
* Both @dest and @src must stay memory resident (kernel memory or locked
* user space pages).
*/
static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
void *dest, void *src, size_t len)
static inline void
async_tx_ack(struct dma_async_tx_descriptor *tx)
{
int cpu = get_cpu();
per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
per_cpu_ptr(chan->local, cpu)->memcpy_count++;
put_cpu();
return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len);
tx->ack = 1;
}
/**
* dma_async_memcpy_buf_to_pg - offloaded copy from address to page
* @chan: DMA channel to offload copy to
* @page: destination page
* @offset: offset in page to copy to
* @kdata: source address (virtual)
* @len: length
*
* Both @page/@offset and @kdata must be mappable to a bus address according
* to the DMA mapping API rules for streaming mappings.
* Both @page/@offset and @kdata must stay memory resident (kernel memory or
* locked user space pages)
*/
static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
struct page *page, unsigned int offset, void *kdata, size_t len)
#define first_dma_cap(mask) __first_dma_cap(&(mask))
static inline int __first_dma_cap(const dma_cap_mask_t *srcp)
{
int cpu = get_cpu();
per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
per_cpu_ptr(chan->local, cpu)->memcpy_count++;
put_cpu();
return min_t(int, DMA_TX_TYPE_END,
find_first_bit(srcp->bits, DMA_TX_TYPE_END));
}
return chan->device->device_memcpy_buf_to_pg(chan, page, offset,
kdata, len);
#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask))
static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp)
{
return min_t(int, DMA_TX_TYPE_END,
find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1));
}
/**
* dma_async_memcpy_pg_to_pg - offloaded copy from page to page
* @chan: DMA channel to offload copy to
* @dest_pg: destination page
* @dest_off: offset in page to copy to
* @src_pg: source page
* @src_off: offset in page to copy from
* @len: length
*
* Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
* address according to the DMA mapping API rules for streaming mappings.
* Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
* (kernel memory or locked user space pages).
*/
static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
unsigned int src_off, size_t len)
#define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask))
static inline void
__dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp)
{
int cpu = get_cpu();
per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
per_cpu_ptr(chan->local, cpu)->memcpy_count++;
put_cpu();
set_bit(tx_type, dstp->bits);
}
return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off,
src_pg, src_off, len);
#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask))
static inline int
__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
{
return test_bit(tx_type, srcp->bits);
}
#define for_each_dma_cap_mask(cap, mask) \
for ((cap) = first_dma_cap(mask); \
(cap) < DMA_TX_TYPE_END; \
(cap) = next_dma_cap((cap), (mask)))
/**
* dma_async_memcpy_issue_pending - flush pending copies to HW
* dma_async_issue_pending - flush pending transactions to HW
* @chan: target DMA channel
*
* This allows drivers to push copies to HW in batches,
* reducing MMIO writes where possible.
*/
static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan)
static inline void dma_async_issue_pending(struct dma_chan *chan)
{
return chan->device->device_memcpy_issue_pending(chan);
return chan->device->device_issue_pending(chan);
}
#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan)
/**
* dma_async_memcpy_complete - poll for transaction completion
* dma_async_is_tx_complete - poll for transaction completion
* @chan: DMA channel
* @cookie: transaction identifier to check status of
* @last: returns last completed cookie, can be NULL
......@@ -306,12 +376,15 @@ static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan)
* internal state and can be used with dma_async_is_complete() to check
* the status of multiple cookies without re-checking hardware state.
*/
static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan,
static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
{
return chan->device->device_memcpy_complete(chan, cookie, last, used);
return chan->device->device_is_tx_complete(chan, cookie, last, used);
}
#define dma_async_memcpy_complete(chan, cookie, last, used)\
dma_async_is_tx_complete(chan, cookie, last, used)
/**
* dma_async_is_complete - test a cookie against chan state
* @cookie: transaction identifier to test status of
......@@ -334,6 +407,7 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
return DMA_IN_PROGRESS;
}
enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
/* --- DMA device --- */
......@@ -362,5 +436,4 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
struct dma_pinned_list *pinned_list, struct page *page,
unsigned int offset, size_t len);
#endif /* CONFIG_DMA_ENGINE */
#endif /* DMAENGINE_H */
......@@ -479,6 +479,9 @@
#define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361
#define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL 0x252
#define PCI_VENDOR_ID_UNISYS 0x1018
#define PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR 0x001C
#define PCI_VENDOR_ID_COMPEX2 0x101a /* pci.ids says "AT&T GIS (NCR)" */
#define PCI_DEVICE_ID_COMPEX2_100VG 0x0005
......
......@@ -116,13 +116,46 @@
* attach a request to an active stripe (add_stripe_bh())
* lockdev attach-buffer unlockdev
* handle a stripe (handle_stripe())
* lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io
* lockstripe clrSTRIPE_HANDLE ...
* (lockdev check-buffers unlockdev) ..
* change-state ..
* record io/ops needed unlockstripe schedule io/ops
* release an active stripe (release_stripe())
* lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
*
* The refcount counts each thread that have activated the stripe,
* plus raid5d if it is handling it, plus one for each active request
* on a cached buffer.
* on a cached buffer, and plus one if the stripe is undergoing stripe
* operations.
*
* Stripe operations are performed outside the stripe lock,
* the stripe operations are:
* -copying data between the stripe cache and user application buffers
* -computing blocks to save a disk access, or to recover a missing block
* -updating the parity on a write operation (reconstruct write and
* read-modify-write)
* -checking parity correctness
* -running i/o to disk
* These operations are carried out by raid5_run_ops which uses the async_tx
* api to (optionally) offload operations to dedicated hardware engines.
* When requesting an operation handle_stripe sets the pending bit for the
* operation and increments the count. raid5_run_ops is then run whenever
* the count is non-zero.
* There are some critical dependencies between the operations that prevent some
* from being requested while another is in flight.
* 1/ Parity check operations destroy the in cache version of the parity block,
* so we prevent parity dependent operations like writes and compute_blocks
* from starting while a check is in progress. Some dma engines can perform
* the check without damaging the parity block, in these cases the parity
* block is re-marked up to date (assuming the check was successful) and is
* not re-read from disk.
* 2/ When a write operation is requested we immediately lock the affected
* blocks, and mark them as not up to date. This causes new read requests
* to be held off, as well as parity checks and compute block operations.
* 3/ Once a compute block operation has been requested handle_stripe treats
* that block as if it is up to date. raid5_run_ops guaruntees that any
* operation that is dependent on the compute block result is initiated after
* the compute block completes.
*/
struct stripe_head {
......@@ -136,15 +169,46 @@ struct stripe_head {
spinlock_t lock;
int bm_seq; /* sequence number for bitmap flushes */
int disks; /* disks in stripe */
/* stripe_operations
* @pending - pending ops flags (set for request->issue->complete)
* @ack - submitted ops flags (set for issue->complete)
* @complete - completed ops flags (set for complete)
* @target - STRIPE_OP_COMPUTE_BLK target
* @count - raid5_runs_ops is set to run when this is non-zero
*/
struct stripe_operations {
unsigned long pending;
unsigned long ack;
unsigned long complete;
int target;
int count;
u32 zero_sum_result;
} ops;
struct r5dev {
struct bio req;
struct bio_vec vec;
struct page *page;
struct bio *toread, *towrite, *written;
struct bio *toread, *read, *towrite, *written;
sector_t sector; /* sector of this page */
unsigned long flags;
} dev[1]; /* allocated with extra space depending of RAID geometry */
};
/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
* for handle_stripe. It is only valid under spin_lock(sh->lock);
*/
struct stripe_head_state {
int syncing, expanding, expanded;
int locked, uptodate, to_read, to_write, failed, written;
int to_fill, compute, req_compute, non_overwrite;
int failed_num;
};
/* r6_state - extra state data only relevant to r6 */
struct r6_state {
int p_failed, q_failed, qd_idx, failed_num[2];
};
/* Flags */
#define R5_UPTODATE 0 /* page contains current data */
#define R5_LOCKED 1 /* IO has been submitted on "req" */
......@@ -158,6 +222,15 @@ struct stripe_head {
#define R5_ReWrite 9 /* have tried to over-write the readerror */
#define R5_Expanded 10 /* This block now has post-expand data */
#define R5_Wantcompute 11 /* compute_block in progress treat as
* uptodate
*/
#define R5_Wantfill 12 /* dev->toread contains a bio that needs
* filling
*/
#define R5_Wantprexor 13 /* distinguish blocks ready for rmw from
* other "towrites"
*/
/*
* Write method
*/
......@@ -179,6 +252,24 @@ struct stripe_head {
#define STRIPE_EXPANDING 9
#define STRIPE_EXPAND_SOURCE 10
#define STRIPE_EXPAND_READY 11
/*
* Operations flags (in issue order)
*/
#define STRIPE_OP_BIOFILL 0
#define STRIPE_OP_COMPUTE_BLK 1
#define STRIPE_OP_PREXOR 2
#define STRIPE_OP_BIODRAIN 3
#define STRIPE_OP_POSTXOR 4
#define STRIPE_OP_CHECK 5
#define STRIPE_OP_IO 6
/* modifiers to the base operations
* STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back
* STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check
*/
#define STRIPE_OP_MOD_REPAIR_PD 7
#define STRIPE_OP_MOD_DMA_CHECK 8
/*
* Plugging:
*
......
......@@ -3,9 +3,10 @@
#include <linux/raid/md.h>
#define MAX_XOR_BLOCKS 5
#define MAX_XOR_BLOCKS 4
extern void xor_block(unsigned int count, unsigned int bytes, void **ptr);
extern void xor_blocks(unsigned int count, unsigned int bytes,
void *dest, void **srcs);
struct xor_block_template {
struct xor_block_template *next;
......
......@@ -151,9 +151,22 @@ static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
static struct list_head ptype_all __read_mostly; /* Taps */
#ifdef CONFIG_NET_DMA
static struct dma_client *net_dma_client;
static unsigned int net_dma_count;
static spinlock_t net_dma_event_lock;
struct net_dma {
struct dma_client client;
spinlock_t lock;
cpumask_t channel_mask;
struct dma_chan *channels[NR_CPUS];
};
static enum dma_state_client
netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
enum dma_state state);
static struct net_dma net_dma = {
.client = {
.event_callback = netdev_dma_event,
},
};
#endif
/*
......@@ -2022,12 +2035,13 @@ static void net_rx_action(struct softirq_action *h)
* There may not be any more sk_buffs coming right now, so push
* any pending DMA copies to hardware
*/
if (net_dma_client) {
struct dma_chan *chan;
rcu_read_lock();
list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
if (!cpus_empty(net_dma.channel_mask)) {
int chan_idx;
for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
struct dma_chan *chan = net_dma.channels[chan_idx];
if (chan)
dma_async_memcpy_issue_pending(chan);
rcu_read_unlock();
}
}
#endif
return;
......@@ -3775,12 +3789,13 @@ static int dev_cpu_callback(struct notifier_block *nfb,
* This is called when the number of channels allocated to the net_dma_client
* changes. The net_dma_client tries to have one DMA channel per CPU.
*/
static void net_dma_rebalance(void)
static void net_dma_rebalance(struct net_dma *net_dma)
{
unsigned int cpu, i, n;
unsigned int cpu, i, n, chan_idx;
struct dma_chan *chan;
if (net_dma_count == 0) {
if (cpus_empty(net_dma->channel_mask)) {
for_each_online_cpu(cpu)
rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
return;
......@@ -3789,10 +3804,12 @@ static void net_dma_rebalance(void)
i = 0;
cpu = first_cpu(cpu_online_map);
rcu_read_lock();
list_for_each_entry(chan, &net_dma_client->channels, client_node) {
n = ((num_online_cpus() / net_dma_count)
+ (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
chan = net_dma->channels[chan_idx];
n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
+ (i < (num_online_cpus() %
cpus_weight(net_dma->channel_mask)) ? 1 : 0));
while(n) {
per_cpu(softnet_data, cpu).net_dma = chan;
......@@ -3801,7 +3818,6 @@ static void net_dma_rebalance(void)
}
i++;
}
rcu_read_unlock();
}
/**
......@@ -3810,23 +3826,53 @@ static void net_dma_rebalance(void)
* @chan: DMA channel for the event
* @event: event type
*/
static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
enum dma_event event)
{
spin_lock(&net_dma_event_lock);
switch (event) {
case DMA_RESOURCE_ADDED:
net_dma_count++;
net_dma_rebalance();
static enum dma_state_client
netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
enum dma_state state)
{
int i, found = 0, pos = -1;
struct net_dma *net_dma =
container_of(client, struct net_dma, client);
enum dma_state_client ack = DMA_DUP; /* default: take no action */
spin_lock(&net_dma->lock);
switch (state) {
case DMA_RESOURCE_AVAILABLE:
for (i = 0; i < NR_CPUS; i++)
if (net_dma->channels[i] == chan) {
found = 1;
break;
} else if (net_dma->channels[i] == NULL && pos < 0)
pos = i;
if (!found && pos >= 0) {
ack = DMA_ACK;
net_dma->channels[pos] = chan;
cpu_set(pos, net_dma->channel_mask);
net_dma_rebalance(net_dma);
}
break;
case DMA_RESOURCE_REMOVED:
net_dma_count--;
net_dma_rebalance();
for (i = 0; i < NR_CPUS; i++)
if (net_dma->channels[i] == chan) {
found = 1;
pos = i;
break;
}
if (found) {
ack = DMA_ACK;
cpu_clear(pos, net_dma->channel_mask);
net_dma->channels[i] = NULL;
net_dma_rebalance(net_dma);
}
break;
default:
break;
}
spin_unlock(&net_dma_event_lock);
spin_unlock(&net_dma->lock);
return ack;
}
/**
......@@ -3834,12 +3880,10 @@ static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
*/
static int __init netdev_dma_register(void)
{
spin_lock_init(&net_dma_event_lock);
net_dma_client = dma_async_client_register(netdev_dma_event);
if (net_dma_client == NULL)
return -ENOMEM;
dma_async_client_chan_request(net_dma_client, num_online_cpus());
spin_lock_init(&net_dma.lock);
dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
dma_async_client_register(&net_dma.client);
dma_async_client_chan_request(&net_dma.client);
return 0;
}
......
......@@ -1116,6 +1116,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
long timeo;
struct task_struct *user_recv = NULL;
int copied_early = 0;
struct sk_buff *skb;
lock_sock(sk);
......@@ -1142,16 +1143,26 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
#ifdef CONFIG_NET_DMA
tp->ucopy.dma_chan = NULL;
preempt_disable();
if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
!sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) {
skb = skb_peek_tail(&sk->sk_receive_queue);
{
int available = 0;
if (skb)
available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
if ((available < target) &&
(len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
!sysctl_tcp_low_latency &&
__get_cpu_var(softnet_data).net_dma) {
preempt_enable_no_resched();
tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len);
} else
tp->ucopy.pinned_list =
dma_pin_iovec_pages(msg->msg_iov, len);
} else {
preempt_enable_no_resched();
}
}
#endif
do {
struct sk_buff *skb;
u32 offset;
/* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
......@@ -1439,7 +1450,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
#ifdef CONFIG_NET_DMA
if (tp->ucopy.dma_chan) {
struct sk_buff *skb;
dma_cookie_t done, used;
dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment