Commit 5047887c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

* 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (34 commits)
  powerpc: Wireup new syscalls
  Move update_mmu_cache() declaration from tlbflush.h to pgtable.h
  powerpc/pseries: Remove kmalloc call in handling writes to lparcfg
  powerpc/pseries: Update arch vector to indicate support for CMO
  ibmvfc: Add support for collaborative memory overcommit
  ibmvscsi: driver enablement for CMO
  ibmveth: enable driver for CMO
  ibmveth: Automatically enable larger rx buffer pools for larger mtu
  powerpc/pseries: Verify CMO memory entitlement updates with virtual I/O
  powerpc/pseries: vio bus support for CMO
  powerpc/pseries: iommu enablement for CMO
  powerpc/pseries: Add CMO paging statistics
  powerpc/pseries: Add collaborative memory manager
  powerpc/pseries: Utilities to set firmware page state
  powerpc/pseries: Enable CMO feature during platform setup
  powerpc/pseries: Split retrieval of processor entitlement data into a helper routine
  powerpc/pseries: Add memory entitlement capabilities to /proc/ppc64/lparcfg
  powerpc/pseries: Split processor entitlement retrieval and gathering to helper routines
  powerpc/pseries: Remove extraneous error reporting for hcall failures in lparcfg
  powerpc: Fix compile error with binutils 2.15
  ...

Fixed up conflict in arch/powerpc/platforms/52xx/Kconfig manually.
parents 996abf05 973b7d83
......@@ -59,6 +59,7 @@ Table of Contents
p) Freescale Synchronous Serial Interface
q) USB EHCI controllers
r) MDIO on GPIOs
s) SPI busses
VII - Marvell Discovery mv64[345]6x System Controller chips
1) The /system-controller node
......@@ -1883,6 +1884,62 @@ platforms are moved over to use the flattened-device-tree model.
&qe_pio_c 6>;
};
s) SPI (Serial Peripheral Interface) busses
SPI busses can be described with a node for the SPI master device
and a set of child nodes for each SPI slave on the bus. For this
discussion, it is assumed that the system's SPI controller is in
SPI master mode. This binding does not describe SPI controllers
in slave mode.
The SPI master node requires the following properties:
- #address-cells - number of cells required to define a chip select
address on the SPI bus.
- #size-cells - should be zero.
- compatible - name of SPI bus controller following generic names
recommended practice.
No other properties are required in the SPI bus node. It is assumed
that a driver for an SPI bus device will understand that it is an SPI bus.
However, the binding does not attempt to define the specific method for
assigning chip select numbers. Since SPI chip select configuration is
flexible and non-standardized, it is left out of this binding with the
assumption that board specific platform code will be used to manage
chip selects. Individual drivers can define additional properties to
support describing the chip select layout.
SPI slave nodes must be children of the SPI master node and can
contain the following properties.
- reg - (required) chip select address of device.
- compatible - (required) name of SPI device following generic names
recommended practice
- spi-max-frequency - (required) Maximum SPI clocking speed of device in Hz
- spi-cpol - (optional) Empty property indicating device requires
inverse clock polarity (CPOL) mode
- spi-cpha - (optional) Empty property indicating device requires
shifted clock phase (CPHA) mode
SPI example for an MPC5200 SPI bus:
spi@f00 {
#address-cells = <1>;
#size-cells = <0>;
compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
reg = <0xf00 0x20>;
interrupts = <2 13 0 2 14 0>;
interrupt-parent = <&mpc5200_pic>;
ethernet-switch@0 {
compatible = "micrel,ks8995m";
spi-max-frequency = <1000000>;
reg = <0>;
};
codec@1 {
compatible = "ti,tlv320aic26";
spi-max-frequency = <100000>;
reg = <1>;
};
};
VII - Marvell Discovery mv64[345]6x System Controller chips
===========================================================
......
......@@ -23,6 +23,9 @@
struct cpu_spec* cur_cpu_spec = NULL;
EXPORT_SYMBOL(cur_cpu_spec);
/* The platform string corresponding to the real PVR */
const char *powerpc_base_platform;
/* NOTE:
* Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
* the responsibility of the appropriate CPU save/restore functions to
......@@ -1652,6 +1655,14 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
} else
*t = *s;
*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
/*
* Set the base platform string once; assumes
* we're called with real pvr first.
*/
if (powerpc_base_platform == NULL)
powerpc_base_platform = t->platform;
#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
/* ppc64 and booke expect identify_cpu to also call
* setup_cpu for that processor. I will consolidate
......
......@@ -148,7 +148,7 @@ transfer_to_handler:
/* Check to see if the dbcr0 register is set up to debug. Use the
internal debug mode bit to do this. */
lwz r12,THREAD_DBCR0(r12)
andis. r12,r12,DBCR0_IDM@h
andis. r12,r12,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
li r12,-1 /* clear all pending debug events */
......@@ -292,7 +292,7 @@ syscall_exit_cont:
/* If the process has its own DBCR0 value, load it up. The internal
debug mode bit tells us that dbcr0 should be loaded. */
lwz r0,THREAD+THREAD_DBCR0(r2)
andis. r10,r0,DBCR0_IDM@h
andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
bnel- load_dbcr0
#endif
#ifdef CONFIG_44x
......@@ -720,7 +720,7 @@ restore_user:
/* Check whether this process has its own DBCR0 value. The internal
debug mode bit tells us that dbcr0 should be loaded. */
lwz r0,THREAD+THREAD_DBCR0(r2)
andis. r10,r0,DBCR0_IDM@h
andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
bnel- load_dbcr0
#endif
......
......@@ -49,6 +49,8 @@ static int novmerge = 1;
static int protect4gb = 1;
static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
static inline unsigned long iommu_num_pages(unsigned long vaddr,
unsigned long slen)
{
......@@ -191,6 +193,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
{
unsigned long entry, flags;
dma_addr_t ret = DMA_ERROR_CODE;
int build_fail;
spin_lock_irqsave(&(tbl->it_lock), flags);
......@@ -205,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
/* Put the TCEs in the HW table */
ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK,
build_fail = ppc_md.tce_build(tbl, entry, npages,
(unsigned long)page & IOMMU_PAGE_MASK,
direction, attrs);
/* ppc_md.tce_build() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
* DMA_ERROR_CODE. For all other errors the functionality is
* not altered.
*/
if (unlikely(build_fail)) {
__iommu_free(tbl, ret, npages);
spin_unlock_irqrestore(&(tbl->it_lock), flags);
return DMA_ERROR_CODE;
}
/* Flush/invalidate TLB caches if necessary */
if (ppc_md.tce_flush)
......@@ -276,7 +291,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
dma_addr_t dma_next = 0, dma_addr;
unsigned long flags;
struct scatterlist *s, *outs, *segstart;
int outcount, incount, i;
int outcount, incount, i, build_fail = 0;
unsigned int align;
unsigned long handle;
unsigned int max_seg_size;
......@@ -337,8 +352,11 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
npages, entry, dma_addr);
/* Insert into HW table */
ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK,
build_fail = ppc_md.tce_build(tbl, entry, npages,
vaddr & IOMMU_PAGE_MASK,
direction, attrs);
if(unlikely(build_fail))
goto failure;
/* If we are in an open segment, try merging */
if (segstart != s) {
......
......@@ -34,8 +34,9 @@
#include <asm/time.h>
#include <asm/prom.h>
#include <asm/vdso_datapage.h>
#include <asm/vio.h>
#define MODULE_VERS "1.7"
#define MODULE_VERS "1.8"
#define MODULE_NAME "lparcfg"
/* #define LPARCFG_DEBUG */
......@@ -129,32 +130,46 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
/*
* Methods used to fetch LPAR data when running on a pSeries platform.
*/
static void log_plpar_hcall_return(unsigned long rc, char *tag)
/**
* h_get_mpp
* H_GET_MPP hcall returns info in 7 parms
*/
int h_get_mpp(struct hvcall_mpp_data *mpp_data)
{
switch(rc) {
case 0:
return;
case H_HARDWARE:
printk(KERN_INFO "plpar-hcall (%s) "
"Hardware fault\n", tag);
return;
case H_FUNCTION:
printk(KERN_INFO "plpar-hcall (%s) "
"Function not allowed\n", tag);
return;
case H_AUTHORITY:
printk(KERN_INFO "plpar-hcall (%s) "
"Not authorized to this function\n", tag);
return;
case H_PARAMETER:
printk(KERN_INFO "plpar-hcall (%s) "
"Bad parameter(s)\n",tag);
return;
default:
printk(KERN_INFO "plpar-hcall (%s) "
"Unexpected rc(0x%lx)\n", tag, rc);
}
int rc;
unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
rc = plpar_hcall9(H_GET_MPP, retbuf);
mpp_data->entitled_mem = retbuf[0];
mpp_data->mapped_mem = retbuf[1];
mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
mpp_data->pool_num = retbuf[2] & 0xffff;
mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
mpp_data->pool_size = retbuf[4];
mpp_data->loan_request = retbuf[5];
mpp_data->backing_mem = retbuf[6];
return rc;
}
EXPORT_SYMBOL(h_get_mpp);
struct hvcall_ppp_data {
u64 entitlement;
u64 unallocated_entitlement;
u16 group_num;
u16 pool_num;
u8 capped;
u8 weight;
u8 unallocated_weight;
u16 active_procs_in_pool;
u16 active_system_procs;
};
/*
* H_GET_PPP hcall returns info in 4 parms.
......@@ -176,27 +191,30 @@ static void log_plpar_hcall_return(unsigned long rc, char *tag)
* XXXX - Active processors in Physical Processor Pool.
* XXXX - Processors active on platform.
*/
static unsigned int h_get_ppp(unsigned long *entitled,
unsigned long *unallocated,
unsigned long *aggregation,
unsigned long *resource)
static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
{
unsigned long rc;
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
rc = plpar_hcall(H_GET_PPP, retbuf);
*entitled = retbuf[0];
*unallocated = retbuf[1];
*aggregation = retbuf[2];
*resource = retbuf[3];
ppp_data->entitlement = retbuf[0];
ppp_data->unallocated_entitlement = retbuf[1];
log_plpar_hcall_return(rc, "H_GET_PPP");
ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
ppp_data->pool_num = retbuf[2] & 0xffff;
ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
ppp_data->active_system_procs = retbuf[3] & 0xffff;
return rc;
}
static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
static unsigned h_pic(unsigned long *pool_idle_time,
unsigned long *num_procs)
{
unsigned long rc;
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
......@@ -206,8 +224,87 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
*pool_idle_time = retbuf[0];
*num_procs = retbuf[1];
if (rc != H_AUTHORITY)
log_plpar_hcall_return(rc, "H_PIC");
return rc;
}
/*
* parse_ppp_data
* Parse out the data returned from h_get_ppp and h_pic
*/
static void parse_ppp_data(struct seq_file *m)
{
struct hvcall_ppp_data ppp_data;
int rc;
rc = h_get_ppp(&ppp_data);
if (rc)
return;
seq_printf(m, "partition_entitled_capacity=%ld\n",
ppp_data.entitlement);
seq_printf(m, "group=%d\n", ppp_data.group_num);
seq_printf(m, "system_active_processors=%d\n",
ppp_data.active_system_procs);
/* pool related entries are apropriate for shared configs */
if (lppaca[0].shared_proc) {
unsigned long pool_idle_time, pool_procs;
seq_printf(m, "pool=%d\n", ppp_data.pool_num);
/* report pool_capacity in percentage */
seq_printf(m, "pool_capacity=%d\n",
ppp_data.active_procs_in_pool * 100);
h_pic(&pool_idle_time, &pool_procs);
seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
}
seq_printf(m, "unallocated_capacity_weight=%d\n",
ppp_data.unallocated_weight);
seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
seq_printf(m, "capped=%d\n", ppp_data.capped);
seq_printf(m, "unallocated_capacity=%ld\n",
ppp_data.unallocated_entitlement);
}
/**
* parse_mpp_data
* Parse out data returned from h_get_mpp
*/
static void parse_mpp_data(struct seq_file *m)
{
struct hvcall_mpp_data mpp_data;
int rc;
rc = h_get_mpp(&mpp_data);
if (rc)
return;
seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
if (mpp_data.mapped_mem != -1)
seq_printf(m, "mapped_entitled_memory=%ld\n",
mpp_data.mapped_mem);
seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
mpp_data.unallocated_mem_weight);
seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
mpp_data.unallocated_entitlement);
if (mpp_data.pool_size != -1)
seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
mpp_data.pool_size);
seq_printf(m, "entitled_memory_loan_request=%ld\n",
mpp_data.loan_request);
seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
}
#define SPLPAR_CHARACTERISTICS_TOKEN 20
......@@ -313,6 +410,25 @@ static int lparcfg_count_active_processors(void)
return count;
}
static void pseries_cmo_data(struct seq_file *m)
{
int cpu;
unsigned long cmo_faults = 0;
unsigned long cmo_fault_time = 0;
if (!firmware_has_feature(FW_FEATURE_CMO))
return;
for_each_possible_cpu(cpu) {
cmo_faults += lppaca[cpu].cmo_faults;
cmo_fault_time += lppaca[cpu].cmo_fault_time;
}
seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
seq_printf(m, "cmo_fault_time_usec=%lu\n",
cmo_fault_time / tb_ticks_per_usec);
}
static int pseries_lparcfg_data(struct seq_file *m, void *v)
{
int partition_potential_processors;
......@@ -334,60 +450,13 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
partition_active_processors = lparcfg_count_active_processors();
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
unsigned long h_entitled, h_unallocated;
unsigned long h_aggregation, h_resource;
unsigned long pool_idle_time, pool_procs;
unsigned long purr;
h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
&h_resource);
seq_printf(m, "R4=0x%lx\n", h_entitled);
seq_printf(m, "R5=0x%lx\n", h_unallocated);
seq_printf(m, "R6=0x%lx\n", h_aggregation);
seq_printf(m, "R7=0x%lx\n", h_resource);
purr = get_purr();
/* this call handles the ibm,get-system-parameter contents */
parse_system_parameter_string(m);
parse_ppp_data(m);
parse_mpp_data(m);
pseries_cmo_data(m);
seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
seq_printf(m, "system_active_processors=%ld\n",
(h_resource >> 0 * 8) & 0xffff);
/* pool related entries are apropriate for shared configs */
if (lppaca[0].shared_proc) {
h_pic(&pool_idle_time, &pool_procs);
seq_printf(m, "pool=%ld\n",
(h_aggregation >> 0 * 8) & 0xffff);
/* report pool_capacity in percentage */
seq_printf(m, "pool_capacity=%ld\n",
((h_resource >> 2 * 8) & 0xffff) * 100);
seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
}
seq_printf(m, "unallocated_capacity_weight=%ld\n",
(h_resource >> 4 * 8) & 0xFF);
seq_printf(m, "capacity_weight=%ld\n",
(h_resource >> 5 * 8) & 0xFF);
seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
seq_printf(m, "purr=%ld\n", purr);
seq_printf(m, "purr=%ld\n", get_purr());
} else { /* non SPLPAR case */
seq_printf(m, "system_active_processors=%d\n",
......@@ -414,6 +483,83 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
return 0;
}
static ssize_t update_ppp(u64 *entitlement, u8 *weight)
{
struct hvcall_ppp_data ppp_data;
u8 new_weight;
u64 new_entitled;
ssize_t retval;
/* Get our current parameters */
retval = h_get_ppp(&ppp_data);
if (retval)
return retval;
if (entitlement) {
new_weight = ppp_data.weight;
new_entitled = *entitlement;
} else if (weight) {
new_weight = *weight;
new_entitled = ppp_data.entitlement;
} else
return -EINVAL;
pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
__FUNCTION__, ppp_data.entitlement, ppp_data.weight);
pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
__FUNCTION__, new_entitled, new_weight);
retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
return retval;
}
/**
* update_mpp
*
* Update the memory entitlement and weight for the partition. Caller must
* specify either a new entitlement or weight, not both, to be updated
* since the h_set_mpp call takes both entitlement and weight as parameters.
*/
static ssize_t update_mpp(u64 *entitlement, u8 *weight)
{
struct hvcall_mpp_data mpp_data;
u64 new_entitled;
u8 new_weight;
ssize_t rc;
if (entitlement) {
/* Check with vio to ensure the new memory entitlement
* can be handled.
*/
rc = vio_cmo_entitlement_update(*entitlement);
if (rc)
return rc;
}
rc = h_get_mpp(&mpp_data);
if (rc)
return rc;
if (entitlement) {
new_weight = mpp_data.mem_weight;
new_entitled = *entitlement;
} else if (weight) {
new_weight = *weight;
new_entitled = mpp_data.entitled_mem;
} else
return -EINVAL;
pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
__FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight);
pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
__FUNCTION__, new_entitled, new_weight);
rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
return rc;
}
/*
* Interface for changing system parameters (variable capacity weight
* and entitled capacity). Format of input is "param_name=value";
......@@ -427,35 +573,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
static ssize_t lparcfg_write(struct file *file, const char __user * buf,
size_t count, loff_t * off)
{
char *kbuf;
int kbuf_sz = 64;
char kbuf[kbuf_sz];
char *tmp;
u64 new_entitled, *new_entitled_ptr = &new_entitled;
u8 new_weight, *new_weight_ptr = &new_weight;
unsigned long current_entitled; /* parameters for h_get_ppp */
unsigned long dummy;
unsigned long resource;
u8 current_weight;
ssize_t retval = -ENOMEM;
ssize_t retval;
if (!firmware_has_feature(FW_FEATURE_SPLPAR) ||
firmware_has_feature(FW_FEATURE_ISERIES))
return -EINVAL;
kbuf = kmalloc(count, GFP_KERNEL);
if (!kbuf)
goto out;
if (count > kbuf_sz)
return -EINVAL;
retval = -EFAULT;
if (copy_from_user(kbuf, buf, count))
goto out;
return -EFAULT;
retval = -EINVAL;
kbuf[count - 1] = '\0';
tmp = strchr(kbuf, '=');
if (!tmp)
goto out;
return -EINVAL;
*tmp++ = '\0';
......@@ -463,34 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
char *endp;
*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
if (endp == tmp)
goto out;
new_weight_ptr = &current_weight;
return -EINVAL;
retval = update_ppp(new_entitled_ptr, NULL);
} else if (!strcmp(kbuf, "capacity_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
if (endp == tmp)
goto out;
new_entitled_ptr = &current_entitled;
} else
goto out;
/* Get our current parameters */
retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
if (retval) {
retval = -EIO;
goto out;
}
current_weight = (resource >> 5 * 8) & 0xFF;
return -EINVAL;
pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
__func__, current_entitled, current_weight);
retval = update_ppp(NULL, new_weight_ptr);
} else if (!strcmp(kbuf, "entitled_memory")) {
char *endp;
*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
if (endp == tmp)
return -EINVAL;
pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
__func__, *new_entitled_ptr, *new_weight_ptr);
retval = update_mpp(new_entitled_ptr, NULL);
} else if (!strcmp(kbuf, "entitled_memory_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
if (endp == tmp)
return -EINVAL;
retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
*new_weight_ptr);
retval = update_mpp(NULL, new_weight_ptr);
} else
return -EINVAL;
if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
retval = count;
......@@ -506,8 +642,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
retval = -EIO;
}
out:
kfree(kbuf);
return retval;
}
......
......@@ -47,6 +47,8 @@
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#endif
#include <linux/kprobes.h>
#include <linux/kdebug.h>
extern unsigned long _get_SP(void);
......@@ -239,6 +241,35 @@ void discard_lazy_cpu_state(void)
}
#endif /* CONFIG_SMP */
void do_dabr(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
siginfo_t info;
if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
return;
if (debugger_dabr_match(regs))
return;
/* Clear the DAC and struct entries. One shot trigger */
#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE))
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W
| DBCR0_IDM));
#endif
/* Clear the DABR */
set_dabr(0);
/* Deliver the signal to userspace */
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_HWBKPT;
info.si_addr = (void __user *)address;
force_sig_info(SIGTRAP, &info, current);
}
static DEFINE_PER_CPU(unsigned long, current_dabr);
int set_dabr(unsigned long dabr)
......@@ -254,6 +285,11 @@ int set_dabr(unsigned long dabr)
#if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
mtspr(SPRN_DABR, dabr);
#endif
#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
mtspr(SPRN_DAC1, dabr);
#endif
return 0;
}
......@@ -337,6 +373,12 @@ struct task_struct *__switch_to(struct task_struct *prev,
if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
set_dabr(new->thread.dabr);
#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
/* If new thread DAC (HW breakpoint) is the same then leave it */
if (new->thread.dabr)
set_dabr(new->thread.dabr);
#endif
new_thread = &new->thread;
old_thread = &current->thread;
......@@ -525,6 +567,10 @@ void flush_thread(void)
if (current->thread.dabr) {
current->thread.dabr = 0;
set_dabr(0);
#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W);
#endif
}
}
......
......@@ -643,6 +643,11 @@ static void __init early_cmdline_parse(void)
#else
#define OV5_MSI 0x00
#endif /* CONFIG_PCI_MSI */
#ifdef CONFIG_PPC_SMLPAR
#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */
#else
#define OV5_CMO 0x00
#endif
/*
* The architecture vector has an array of PVR mask/value pairs,
......@@ -687,10 +692,12 @@ static unsigned char ibm_architecture_vec[] = {
0, /* don't halt */
/* option vector 5: PAPR/OF options */
3 - 2, /* length */
5 - 2, /* length */
0, /* don't ignore, don't halt */
OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
OV5_DONATE_DEDICATE_CPU | OV5_MSI,
0,
OV5_CMO,
};
/* Old method - ELF header with PT_NOTE sections */
......
......@@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task)
if (regs != NULL) {
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC;
task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
regs->msr |= MSR_DE;
#else
regs->msr |= MSR_SE;
......@@ -716,9 +716,16 @@ void user_disable_single_step(struct task_struct *task)
{
struct pt_regs *regs = task->thread.regs;
#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
/* If DAC then do not single step, skip */
if (task->thread.dabr)
return;
#endif
if (regs != NULL) {
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
task->thread.dbcr0 = 0;
task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM);
regs->msr &= ~MSR_DE;
#else
regs->msr &= ~MSR_SE;
......@@ -727,22 +734,75 @@ void user_disable_single_step(struct task_struct *task)
clear_tsk_thread_flag(task, TIF_SINGLESTEP);
}
static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
unsigned long data)
{
/* We only support one DABR and no IABRS at the moment */
/* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
* For embedded processors we support one DAC and no IAC's at the
* moment.
*/
if (addr > 0)
return -EINVAL;
/* The bottom 3 bits are flags */
if ((data & ~0x7UL) >= TASK_SIZE)
return -EIO;
/* Ensure translation is on */
#ifdef CONFIG_PPC64
/* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
* It was assumed, on previous implementations, that 3 bits were
* passed together with the data address, fitting the design of the
* DABR register, as follows:
*
* bit 0: Read flag
* bit 1: Write flag
* bit 2: Breakpoint translation
*
* Thus, we use them here as so.
*/
/* Ensure breakpoint translation bit is set */
if (data && !(data & DABR_TRANSLATION))
return -EIO;
/* Move contents to the DABR register */
task->thread.dabr = data;
#endif
#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
/* As described above, it was assumed 3 bits were passed with the data
* address, but we will assume only the mode bits will be passed
* as to not cause alignment restrictions for DAC-based processors.
*/
/* DAC's hold the whole address without any mode flags */
task->thread.dabr = data & ~0x3UL;
if (task->thread.dabr == 0) {
task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM);
task->thread.regs->msr &= ~MSR_DE;
return 0;
}
/* Read or Write bits must be set */
if (!(data & 0x3UL))
return -EINVAL;
/* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
register */
task->thread.dbcr0 = DBCR0_IDM;
/* Check for write and read flags and set DBCR0
accordingly */
if (data & 0x1UL)
task->thread.dbcr0 |= DBSR_DAC1R;
if (data & 0x2UL)
task->thread.dbcr0 |= DBSR_DAC1W;
task->thread.regs->msr |= MSR_DE;
#endif
return 0;
}
......
......@@ -145,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
* user space. The DABR will have been cleared if it
* triggered inside the kernel.
*/
if (current->thread.dabr)
if (current->thread.dabr) {
set_dabr(current->thread.dabr);
#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
mtspr(SPRN_DBCR0, current->thread.dbcr0);
#endif
}
if (is32) {
if (ka.sa.sa_flags & SA_SIGINFO)
......
......@@ -529,7 +529,8 @@ static void register_nodes(void)
#endif
/* Only valid if CPU is present. */
static ssize_t show_physical_id(struct sys_device *dev, char *buf)
static ssize_t show_physical_id(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct cpu *cpu = container_of(dev, struct cpu, sysdev);
......
......@@ -1067,6 +1067,22 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
}
_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
} else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
regs->msr &= ~MSR_DE;
if (user_mode(regs)) {
current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W |
DBCR0_IDM);
} else {
/* Disable DAC interupts */
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R |
DBSR_DAC1W | DBCR0_IDM));
/* Clear the DAC event */
mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W));
}
/* Setup and send the trap to the handler */
do_dabr(regs, mfspr(SPRN_DAC1), debug_status);
}
}
#endif /* CONFIG_4xx || CONFIG_BOOKE */
......
/*
* IBM PowerPC Virtual I/O Infrastructure Support.
*
* Copyright (c) 2003-2005 IBM Corp.
* Copyright (c) 2003,2008 IBM Corp.
* Dave Engebretsen engebret@us.ibm.com
* Santiago Leon santil@us.ibm.com
* Hollis Blanchard <hollisb@us.ibm.com>
* Stephen Rothwell
* Robert Jennings <rcjenn@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
......@@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device = { /* fake "parent" device */
.dev.bus = &vio_bus_type,
};
#ifdef CONFIG_PPC_SMLPAR
/**
* vio_cmo_pool - A pool of IO memory for CMO use
*
* @size: The size of the pool in bytes
* @free: The amount of free memory in the pool
*/
struct vio_cmo_pool {
size_t size;
size_t free;
};
/* How many ms to delay queued balance work */
#define VIO_CMO_BALANCE_DELAY 100
/* Portion out IO memory to CMO devices by this chunk size */
#define VIO_CMO_BALANCE_CHUNK 131072
/**
* vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
*
* @vio_dev: struct vio_dev pointer
* @list: pointer to other devices on bus that are being tracked
*/
struct vio_cmo_dev_entry {
struct vio_dev *viodev;
struct list_head list;
};
/**
* vio_cmo - VIO bus accounting structure for CMO entitlement
*
* @lock: spinlock for entire structure
* @balance_q: work queue for balancing system entitlement
* @device_list: list of CMO-enabled devices requiring entitlement
* @entitled: total system entitlement in bytes
* @reserve: pool of memory from which devices reserve entitlement, incl. spare
* @excess: pool of excess entitlement not needed for device reserves or spare
* @spare: IO memory for device hotplug functionality
* @min: minimum necessary for system operation
* @desired: desired memory for system operation
* @curr: bytes currently allocated
* @high: high water mark for IO data usage
*/
struct vio_cmo {
spinlock_t lock;
struct delayed_work balance_q;
struct list_head device_list;
size_t entitled;
struct vio_cmo_pool reserve;
struct vio_cmo_pool excess;
size_t spare;
size_t min;
size_t desired;
size_t curr;
size_t high;
} vio_cmo;
/**
* vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
*/
static int vio_cmo_num_OF_devs(void)
{
struct device_node *node_vroot;
int count = 0;
/*
* Count the number of vdevice entries with an
* ibm,my-dma-window OF property
*/
node_vroot = of_find_node_by_name(NULL, "vdevice");
if (node_vroot) {
struct device_node *of_node;
struct property *prop;
for_each_child_of_node(node_vroot, of_node) {
prop = of_find_property(of_node, "ibm,my-dma-window",
NULL);
if (prop)
count++;
}
}
of_node_put(node_vroot);
return count;
}
/**
* vio_cmo_alloc - allocate IO memory for CMO-enable devices
*
* @viodev: VIO device requesting IO memory
* @size: size of allocation requested
*
* Allocations come from memory reserved for the devices and any excess
* IO memory available to all devices. The spare pool used to service
* hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
* made available.
*
* Return codes:
* 0 for successful allocation and -ENOMEM for a failure
*/
static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
{
unsigned long flags;
size_t reserve_free = 0;
size_t excess_free = 0;
int ret = -ENOMEM;
spin_lock_irqsave(&vio_cmo.lock, flags);
/* Determine the amount of free entitlement available in reserve */
if (viodev->cmo.entitled > viodev->cmo.allocated)
reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
/* If spare is not fulfilled, the excess pool can not be used. */
if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
excess_free = vio_cmo.excess.free;
/* The request can be satisfied */
if ((reserve_free + excess_free) >= size) {
vio_cmo.curr += size;
if (vio_cmo.curr > vio_cmo.high)
vio_cmo.high = vio_cmo.curr;
viodev->cmo.allocated += size;
size -= min(reserve_free, size);
vio_cmo.excess.free -= size;
ret = 0;
}
spin_unlock_irqrestore(&vio_cmo.lock, flags);
return ret;
}
/**
* vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
* @viodev: VIO device freeing IO memory
* @size: size of deallocation
*
* IO memory is freed by the device back to the correct memory pools.
* The spare pool is replenished first from either memory pool, then
* the reserve pool is used to reduce device entitlement, the excess
* pool is used to increase the reserve pool toward the desired entitlement
* target, and then the remaining memory is returned to the pools.
*
*/
static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
{
unsigned long flags;
size_t spare_needed = 0;
size_t excess_freed = 0;
size_t reserve_freed = size;
size_t tmp;
int balance = 0;
spin_lock_irqsave(&vio_cmo.lock, flags);
vio_cmo.curr -= size;
/* Amount of memory freed from the excess pool */
if (viodev->cmo.allocated > viodev->cmo.entitled) {
excess_freed = min(reserve_freed, (viodev->cmo.allocated -
viodev->cmo.entitled));
reserve_freed -= excess_freed;
}
/* Remove allocation from device */
viodev->cmo.allocated -= (reserve_freed + excess_freed);
/* Spare is a subset of the reserve pool, replenish it first. */
spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
/*
* Replenish the spare in the reserve pool from the excess pool.
* This moves entitlement into the reserve pool.
*/
if (spare_needed && excess_freed) {
tmp = min(excess_freed, spare_needed);
vio_cmo.excess.size -= tmp;
vio_cmo.reserve.size += tmp;
vio_cmo.spare += tmp;
excess_freed -= tmp;
spare_needed -= tmp;
balance = 1;
}
/*
* Replenish the spare in the reserve pool from the reserve pool.
* This removes entitlement from the device down to VIO_CMO_MIN_ENT,
* if needed, and gives it to the spare pool. The amount of used
* memory in this pool does not change.
*/
if (spare_needed && reserve_freed) {
tmp = min(spare_needed, min(reserve_freed,
(viodev->cmo.entitled -
VIO_CMO_MIN_ENT)));
vio_cmo.spare += tmp;
viodev->cmo.entitled -= tmp;
reserve_freed -= tmp;
spare_needed -= tmp;
balance = 1;
}
/*
* Increase the reserve pool until the desired allocation is met.
* Move an allocation freed from the excess pool into the reserve
* pool and schedule a balance operation.
*/
if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
vio_cmo.excess.size -= tmp;
vio_cmo.reserve.size += tmp;
excess_freed -= tmp;
balance = 1;
}
/* Return memory from the excess pool to that pool */
if (excess_freed)
vio_cmo.excess.free += excess_freed;
if (balance)
schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
spin_unlock_irqrestore(&vio_cmo.lock, flags);
}
/**
* vio_cmo_entitlement_update - Manage system entitlement changes
*
* @new_entitlement: new system entitlement to attempt to accommodate
*
* Increases in entitlement will be used to fulfill the spare entitlement
* and the rest is given to the excess pool. Decreases, if they are
* possible, come from the excess pool and from unused device entitlement
*
* Returns: 0 on success, -ENOMEM when change can not be made
*/
int vio_cmo_entitlement_update(size_t new_entitlement)
{
struct vio_dev *viodev;
struct vio_cmo_dev_entry *dev_ent;
unsigned long flags;
size_t avail, delta, tmp;
spin_lock_irqsave(&vio_cmo.lock, flags);
/* Entitlement increases */
if (new_entitlement > vio_cmo.entitled) {
delta = new_entitlement - vio_cmo.entitled;
/* Fulfill spare allocation */
if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
vio_cmo.spare += tmp;
vio_cmo.reserve.size += tmp;
delta -= tmp;
}
/* Remaining new allocation goes to the excess pool */
vio_cmo.entitled += delta;
vio_cmo.excess.size += delta;
vio_cmo.excess.free += delta;
goto out;
}
/* Entitlement decreases */
delta = vio_cmo.entitled - new_entitlement;
avail = vio_cmo.excess.free;
/*
* Need to check how much unused entitlement each device can
* sacrifice to fulfill entitlement change.
*/
list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
if (avail >= delta)
break;
viodev = dev_ent->viodev;
if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
(viodev->cmo.entitled > VIO_CMO_MIN_ENT))
avail += viodev->cmo.entitled -
max_t(size_t, viodev->cmo.allocated,
VIO_CMO_MIN_ENT);
}
if (delta <= avail) {
vio_cmo.entitled -= delta;
/* Take entitlement from the excess pool first */
tmp = min(vio_cmo.excess.free, delta);
vio_cmo.excess.size -= tmp;
vio_cmo.excess.free -= tmp;
delta -= tmp;
/*
* Remove all but VIO_CMO_MIN_ENT bytes from devices
* until entitlement change is served
*/
list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
if (!delta)
break;
viodev = dev_ent->viodev;
tmp = 0;
if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
(viodev->cmo.entitled > VIO_CMO_MIN_ENT))
tmp = viodev->cmo.entitled -
max_t(size_t, viodev->cmo.allocated,
VIO_CMO_MIN_ENT);
viodev->cmo.entitled -= min(tmp, delta);
delta -= min(tmp, delta);
}
} else {
spin_unlock_irqrestore(&vio_cmo.lock, flags);
return -ENOMEM;
}
out:
schedule_delayed_work(&vio_cmo.balance_q, 0);
spin_unlock_irqrestore(&vio_cmo.lock, flags);
return 0;
}
/**
* vio_cmo_balance - Balance entitlement among devices
*
* @work: work queue structure for this operation
*
* Any system entitlement above the minimum needed for devices, or
* already allocated to devices, can be distributed to the devices.
* The list of devices is iterated through to recalculate the desired
* entitlement level and to determine how much entitlement above the
* minimum entitlement is allocated to devices.
*
* Small chunks of the available entitlement are given to devices until
* their requirements are fulfilled or there is no entitlement left to give.
* Upon completion sizes of the reserve and excess pools are calculated.
*
* The system minimum entitlement level is also recalculated here.
* Entitlement will be reserved for devices even after vio_bus_remove to
* accommodate reloading the driver. The OF tree is walked to count the
* number of devices present and this will remove entitlement for devices
* that have actually left the system after having vio_bus_remove called.
*/
static void vio_cmo_balance(struct work_struct *work)
{
struct vio_cmo *cmo;
struct vio_dev *viodev;
struct vio_cmo_dev_entry *dev_ent;
unsigned long flags;
size_t avail = 0, level, chunk, need;
int devcount = 0, fulfilled;
cmo = container_of(work, struct vio_cmo, balance_q.work);
spin_lock_irqsave(&vio_cmo.lock, flags);
/* Calculate minimum entitlement and fulfill spare */
cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
BUG_ON(cmo->min > cmo->entitled);
cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
cmo->min += cmo->spare;
cmo->desired = cmo->min;
/*
* Determine how much entitlement is available and reset device
* entitlements
*/
avail = cmo->entitled - cmo->spare;
list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
viodev = dev_ent->viodev;
devcount++;
viodev->cmo.entitled = VIO_CMO_MIN_ENT;
cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
}
/*
* Having provided each device with the minimum entitlement, loop
* over the devices portioning out the remaining entitlement
* until there is nothing left.
*/
level = VIO_CMO_MIN_ENT;
while (avail) {
fulfilled = 0;
list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
viodev = dev_ent->viodev;
if (viodev->cmo.desired <= level) {
fulfilled++;
continue;
}
/*
* Give the device up to VIO_CMO_BALANCE_CHUNK
* bytes of entitlement, but do not exceed the
* desired level of entitlement for the device.
*/
chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
chunk = min(chunk, (viodev->cmo.desired -
viodev->cmo.entitled));
viodev->cmo.entitled += chunk;
/*
* If the memory for this entitlement increase was
* already allocated to the device it does not come
* from the available pool being portioned out.
*/
need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
max(viodev->cmo.allocated, level);
avail -= need;
}
if (fulfilled == devcount)
break;
level += VIO_CMO_BALANCE_CHUNK;
}
/* Calculate new reserve and excess pool sizes */
cmo->reserve.size = cmo->min;
cmo->excess.free = 0;
cmo->excess.size = 0;
need = 0;
list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
viodev = dev_ent->viodev;
/* Calculated reserve size above the minimum entitlement */
if (viodev->cmo.entitled)
cmo->reserve.size += (viodev->cmo.entitled -
VIO_CMO_MIN_ENT);
/* Calculated used excess entitlement */
if (viodev->cmo.allocated > viodev->cmo.entitled)
need += viodev->cmo.allocated - viodev->cmo.entitled;
}
cmo->excess.size = cmo->entitled - cmo->reserve.size;
cmo->excess.free = cmo->excess.size - need;
cancel_delayed_work(container_of(work, struct delayed_work, work));
spin_unlock_irqrestore(&vio_cmo.lock, flags);
}
static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag)
{
struct vio_dev *viodev = to_vio_dev(dev);
void *ret;
if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
atomic_inc(&viodev->cmo.allocs_failed);
return NULL;
}
ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag);
if (unlikely(ret == NULL)) {
vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
atomic_inc(&viodev->cmo.allocs_failed);
}
return ret;
}
static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
struct vio_dev *viodev = to_vio_dev(dev);
dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle);
vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
}
static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,
size_t size,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
dma_addr_t ret = DMA_ERROR_CODE;
if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
atomic_inc(&viodev->cmo.allocs_failed);
return ret;
}
ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs);
if (unlikely(dma_mapping_error(ret))) {
vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
atomic_inc(&viodev->cmo.allocs_failed);
}
return ret;
}
static void vio_dma_iommu_unmap_single(struct device *dev,
dma_addr_t dma_handle, size_t size,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs);
vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
}
static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
struct scatterlist *sgl;
int ret, count = 0;
size_t alloc_size = 0;
for (sgl = sglist; count < nelems; count++, sgl++)
alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
if (vio_cmo_alloc(viodev, alloc_size)) {
atomic_inc(&viodev->cmo.allocs_failed);
return 0;
}
ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
if (unlikely(!ret)) {
vio_cmo_dealloc(viodev, alloc_size);
atomic_inc(&viodev->cmo.allocs_failed);
}
for (sgl = sglist, count = 0; count < ret; count++, sgl++)
alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
if (alloc_size)
vio_cmo_dealloc(viodev, alloc_size);
return ret;
}
static void vio_dma_iommu_unmap_sg(struct device *dev,
struct scatterlist *sglist, int nelems,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
struct scatterlist *sgl;
size_t alloc_size = 0;
int count = 0;
for (sgl = sglist; count < nelems; count++, sgl++)
alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
vio_cmo_dealloc(viodev, alloc_size);
}
struct dma_mapping_ops vio_dma_mapping_ops = {
.alloc_coherent = vio_dma_iommu_alloc_coherent,
.free_coherent = vio_dma_iommu_free_coherent,
.map_single = vio_dma_iommu_map_single,
.unmap_single = vio_dma_iommu_unmap_single,
.map_sg = vio_dma_iommu_map_sg,
.unmap_sg = vio_dma_iommu_unmap_sg,
};
/**
* vio_cmo_set_dev_desired - Set desired entitlement for a device
*
* @viodev: struct vio_dev for device to alter
* @new_desired: new desired entitlement level in bytes
*
* For use by devices to request a change to their entitlement at runtime or
* through sysfs. The desired entitlement level is changed and a balancing
* of system resources is scheduled to run in the future.
*/
void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
{
unsigned long flags;
struct vio_cmo_dev_entry *dev_ent;
int found = 0;
if (!firmware_has_feature(FW_FEATURE_CMO))
return;
spin_lock_irqsave(&vio_cmo.lock, flags);
if (desired < VIO_CMO_MIN_ENT)
desired = VIO_CMO_MIN_ENT;
/*
* Changes will not be made for devices not in the device list.
* If it is not in the device list, then no driver is loaded
* for the device and it can not receive entitlement.
*/
list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
if (viodev == dev_ent->viodev) {
found = 1;
break;
}
if (!found)
return;
/* Increase/decrease in desired device entitlement */
if (desired >= viodev->cmo.desired) {
/* Just bump the bus and device values prior to a balance*/
vio_cmo.desired += desired - viodev->cmo.desired;
viodev->cmo.desired = desired;
} else {
/* Decrease bus and device values for desired entitlement */
vio_cmo.desired -= viodev->cmo.desired - desired;
viodev->cmo.desired = desired;
/*
* If less entitlement is desired than current entitlement, move
* any reserve memory in the change region to the excess pool.
*/
if (viodev->cmo.entitled > desired) {
vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
vio_cmo.excess.size += viodev->cmo.entitled - desired;
/*
* If entitlement moving from the reserve pool to the
* excess pool is currently unused, add to the excess
* free counter.
*/
if (viodev->cmo.allocated < viodev->cmo.entitled)
vio_cmo.excess.free += viodev->cmo.entitled -
max(viodev->cmo.allocated, desired);
viodev->cmo.entitled = desired;
}
}
schedule_delayed_work(&vio_cmo.balance_q, 0);
spin_unlock_irqrestore(&vio_cmo.lock, flags);
}
/**
* vio_cmo_bus_probe - Handle CMO specific bus probe activities
*
* @viodev - Pointer to struct vio_dev for device
*
* Determine the devices IO memory entitlement needs, attempting
* to satisfy the system minimum entitlement at first and scheduling
* a balance operation to take care of the rest at a later time.
*
* Returns: 0 on success, -EINVAL when device doesn't support CMO, and
* -ENOMEM when entitlement is not available for device or
* device entry.
*
*/
static int vio_cmo_bus_probe(struct vio_dev *viodev)
{
struct vio_cmo_dev_entry *dev_ent;
struct device *dev = &viodev->dev;
struct vio_driver *viodrv = to_vio_driver(dev->driver);
unsigned long flags;
size_t size;
/*
* Check to see that device has a DMA window and configure
* entitlement for the device.
*/
if (of_get_property(viodev->dev.archdata.of_node,
"ibm,my-dma-window", NULL)) {
/* Check that the driver is CMO enabled and get desired DMA */
if (!viodrv->get_desired_dma) {
dev_err(dev, "%s: device driver does not support CMO\n",
__func__);
return -EINVAL;
}
viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
viodev->cmo.desired = VIO_CMO_MIN_ENT;
size = VIO_CMO_MIN_ENT;
dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
GFP_KERNEL);
if (!dev_ent)
return -ENOMEM;
dev_ent->viodev = viodev;
spin_lock_irqsave(&vio_cmo.lock, flags);
list_add(&dev_ent->list, &vio_cmo.device_list);
} else {
viodev->cmo.desired = 0;
size = 0;
spin_lock_irqsave(&vio_cmo.lock, flags);
}
/*
* If the needs for vio_cmo.min have not changed since they
* were last set, the number of devices in the OF tree has
* been constant and the IO memory for this is already in
* the reserve pool.
*/
if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
VIO_CMO_MIN_ENT)) {
/* Updated desired entitlement if device requires it */
if (size)
vio_cmo.desired += (viodev->cmo.desired -
VIO_CMO_MIN_ENT);
} else {
size_t tmp;
tmp = vio_cmo.spare + vio_cmo.excess.free;
if (tmp < size) {
dev_err(dev, "%s: insufficient free "
"entitlement to add device. "
"Need %lu, have %lu\n", __func__,
size, (vio_cmo.spare + tmp));
spin_unlock_irqrestore(&vio_cmo.lock, flags);
return -ENOMEM;
}
/* Use excess pool first to fulfill request */
tmp = min(size, vio_cmo.excess.free);
vio_cmo.excess.free -= tmp;
vio_cmo.excess.size -= tmp;
vio_cmo.reserve.size += tmp;
/* Use spare if excess pool was insufficient */
vio_cmo.spare -= size - tmp;
/* Update bus accounting */
vio_cmo.min += size;
vio_cmo.desired += viodev->cmo.desired;
}
spin_unlock_irqrestore(&vio_cmo.lock, flags);
return 0;
}
/**
* vio_cmo_bus_remove - Handle CMO specific bus removal activities
*
* @viodev - Pointer to struct vio_dev for device
*
* Remove the device from the cmo device list. The minimum entitlement
* will be reserved for the device as long as it is in the system. The
* rest of the entitlement the device had been allocated will be returned
* to the system.
*/
static void vio_cmo_bus_remove(struct vio_dev *viodev)
{
struct vio_cmo_dev_entry *dev_ent;
unsigned long flags;
size_t tmp;
spin_lock_irqsave(&vio_cmo.lock, flags);
if (viodev->cmo.allocated) {
dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
"allocated after remove operation.\n",
__func__, viodev->cmo.allocated);
BUG();
}
/*
* Remove the device from the device list being maintained for
* CMO enabled devices.
*/
list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
if (viodev == dev_ent->viodev) {
list_del(&dev_ent->list);
kfree(dev_ent);
break;
}
/*
* Devices may not require any entitlement and they do not need
* to be processed. Otherwise, return the device's entitlement
* back to the pools.
*/
if (viodev->cmo.entitled) {
/*
* This device has not yet left the OF tree, it's
* minimum entitlement remains in vio_cmo.min and
* vio_cmo.desired
*/
vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
/*
* Save min allocation for device in reserve as long
* as it exists in OF tree as determined by later
* balance operation
*/
viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
/* Replenish spare from freed reserve pool */
if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
vio_cmo.spare));
vio_cmo.spare += tmp;
viodev->cmo.entitled -= tmp;
}
/* Remaining reserve goes to excess pool */
vio_cmo.excess.size += viodev->cmo.entitled;
vio_cmo.excess.free += viodev->cmo.entitled;
vio_cmo.reserve.size -= viodev->cmo.entitled;
/*
* Until the device is removed it will keep a
* minimum entitlement; this will guarantee that
* a module unload/load will result in a success.
*/
viodev->cmo.entitled = VIO_CMO_MIN_ENT;
viodev->cmo.desired = VIO_CMO_MIN_ENT;
atomic_set(&viodev->cmo.allocs_failed, 0);
}
spin_unlock_irqrestore(&vio_cmo.lock, flags);
}
static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
{
vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
}
/**
* vio_cmo_bus_init - CMO entitlement initialization at bus init time
*
* Set up the reserve and excess entitlement pools based on available
* system entitlement and the number of devices in the OF tree that
* require entitlement in the reserve pool.
*/
static void vio_cmo_bus_init(void)
{
struct hvcall_mpp_data mpp_data;
int err;
memset(&vio_cmo, 0, sizeof(struct vio_cmo));
spin_lock_init(&vio_cmo.lock);
INIT_LIST_HEAD(&vio_cmo.device_list);
INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
/* Get current system entitlement */
err = h_get_mpp(&mpp_data);
/*
* On failure, continue with entitlement set to 0, will panic()
* later when spare is reserved.
*/
if (err != H_SUCCESS) {
printk(KERN_ERR "%s: unable to determine system IO "\
"entitlement. (%d)\n", __func__, err);
vio_cmo.entitled = 0;
} else {
vio_cmo.entitled = mpp_data.entitled_mem;
}
/* Set reservation and check against entitlement */
vio_cmo.spare = VIO_CMO_MIN_ENT;
vio_cmo.reserve.size = vio_cmo.spare;
vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
VIO_CMO_MIN_ENT);
if (vio_cmo.reserve.size > vio_cmo.entitled) {
printk(KERN_ERR "%s: insufficient system entitlement\n",
__func__);
panic("%s: Insufficient system entitlement", __func__);
}
/* Set the remaining accounting variables */
vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
vio_cmo.excess.free = vio_cmo.excess.size;
vio_cmo.min = vio_cmo.reserve.size;
vio_cmo.desired = vio_cmo.reserve.size;
}
/* sysfs device functions and data structures for CMO */
#define viodev_cmo_rd_attr(name) \
static ssize_t viodev_cmo_##name##_show(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
{ \
return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
}
static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct vio_dev *viodev = to_vio_dev(dev);
return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
}
static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct vio_dev *viodev = to_vio_dev(dev);
atomic_set(&viodev->cmo.allocs_failed, 0);
return count;
}
static ssize_t viodev_cmo_desired_set(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct vio_dev *viodev = to_vio_dev(dev);
size_t new_desired;
int ret;
ret = strict_strtoul(buf, 10, &new_desired);
if (ret)
return ret;
vio_cmo_set_dev_desired(viodev, new_desired);
return count;
}
viodev_cmo_rd_attr(desired);
viodev_cmo_rd_attr(entitled);
viodev_cmo_rd_attr(allocated);
static ssize_t name_show(struct device *, struct device_attribute *, char *);
static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
static struct device_attribute vio_cmo_dev_attrs[] = {
__ATTR_RO(name),
__ATTR_RO(devspec),
__ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
viodev_cmo_desired_show, viodev_cmo_desired_set),
__ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
__ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
__ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
__ATTR_NULL
};
/* sysfs bus functions and data structures for CMO */
#define viobus_cmo_rd_attr(name) \
static ssize_t \
viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \
{ \
return sprintf(buf, "%lu\n", vio_cmo.name); \
}
#define viobus_cmo_pool_rd_attr(name, var) \
static ssize_t \
viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \
{ \
return sprintf(buf, "%lu\n", vio_cmo.name.var); \
}
static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
size_t count)
{
unsigned long flags;
spin_lock_irqsave(&vio_cmo.lock, flags);
vio_cmo.high = vio_cmo.curr;
spin_unlock_irqrestore(&vio_cmo.lock, flags);
return count;
}
viobus_cmo_rd_attr(entitled);
viobus_cmo_pool_rd_attr(reserve, size);
viobus_cmo_pool_rd_attr(excess, size);
viobus_cmo_pool_rd_attr(excess, free);
viobus_cmo_rd_attr(spare);
viobus_cmo_rd_attr(min);
viobus_cmo_rd_attr(desired);
viobus_cmo_rd_attr(curr);
viobus_cmo_rd_attr(high);
static struct bus_attribute vio_cmo_bus_attrs[] = {
__ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
__ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
__ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
__ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
__ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL),
__ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL),
__ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
__ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL),
__ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
viobus_cmo_high_show, viobus_cmo_high_reset),
__ATTR_NULL
};
static void vio_cmo_sysfs_init(void)
{
vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
}
#else /* CONFIG_PPC_SMLPAR */
/* Dummy functions for iSeries platform */
int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
static void vio_cmo_bus_init() {}
static void vio_cmo_sysfs_init() { }
#endif /* CONFIG_PPC_SMLPAR */
EXPORT_SYMBOL(vio_cmo_entitlement_update);
EXPORT_SYMBOL(vio_cmo_set_dev_desired);
static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
{
const unsigned char *dma_window;
......@@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev)
return error;
id = vio_match_device(viodrv->id_table, viodev);
if (id)
if (id) {
memset(&viodev->cmo, 0, sizeof(viodev->cmo));
if (firmware_has_feature(FW_FEATURE_CMO)) {
error = vio_cmo_bus_probe(viodev);
if (error)
return error;
}
error = viodrv->probe(viodev, id);
if (error)
vio_cmo_bus_remove(viodev);
}
return error;
}
......@@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev)
{
struct vio_dev *viodev = to_vio_dev(dev);
struct vio_driver *viodrv = to_vio_driver(dev->driver);
struct device *devptr;
int ret = 1;
/*
* Hold a reference to the device after the remove function is called
* to allow for CMO accounting cleanup for the device.
*/
devptr = get_device(dev);
if (viodrv->remove)
return viodrv->remove(viodev);
ret = viodrv->remove(viodev);
if (!ret && firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_bus_remove(viodev);
/* driver can't remove */
return 1;
put_device(devptr);
return ret;
}
/**
......@@ -215,6 +1226,10 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
viodev->unit_address = *unit_address;
}
viodev->dev.archdata.of_node = of_node_get(of_node);
if (firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_set_dma_ops(viodev);
else
viodev->dev.archdata.dma_ops = &dma_iommu_ops;
viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev);
viodev->dev.archdata.numa_node = of_node_to_nid(of_node);
......@@ -245,6 +1260,9 @@ static int __init vio_bus_init(void)
int err;
struct device_node *node_vroot;
if (firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_sysfs_init();
err = bus_register(&vio_bus_type);
if (err) {
printk(KERN_ERR "failed to register VIO bus\n");
......@@ -262,6 +1280,9 @@ static int __init vio_bus_init(void)
return err;
}
if (firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_bus_init();
node_vroot = of_find_node_by_name(NULL, "vdevice");
if (node_vroot) {
struct device_node *of_node;
......
......@@ -85,7 +85,7 @@ SECTIONS
/* The dummy segment contents for the bug workaround mentioned above
near PHDRS. */
.dummy : {
.dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
LONG(0xf177)
} :kernel :dummy
......
......@@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs)
return 0;
}
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
static void do_dabr(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
siginfo_t info;
if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
return;
if (debugger_dabr_match(regs))
return;
/* Clear the DABR */
set_dabr(0);
/* Deliver the signal to userspace */
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_HWBKPT;
info.si_addr = (void __user *)address;
force_sig_info(SIGTRAP, &info, current);
}
#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
* for a data fault, SRR1 for an instruction fault. For 400-family processors
......
config PPC_MPC52xx
bool "52xx-based boards"
depends on PPC_MULTIPLATFORM && PPC32
select FSL_SOC
select PPC_CLOCK
select PPC_PCI_CHOICE
......@@ -49,5 +48,6 @@ config PPC_MPC5200_GPIO
bool "MPC5200 GPIO support"
depends on PPC_MPC52xx
select ARCH_REQUIRE_GPIOLIB
select GENERIC_GPIO
help
Enable gpiolib support for mpc5200 based boards
......@@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
}
}
static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
......@@ -213,6 +213,7 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
index, npages, direction, base_pte);
return 0;
}
static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
......@@ -1150,12 +1151,23 @@ static int iommu_fixed_disabled;
static int __init setup_iommu_fixed(char *str)
{
struct device_node *pciep;
if (strcmp(str, "off") == 0)
iommu_fixed_disabled = 1;
else if (strcmp(str, "weak") == 0)
/* If we can find a pcie-endpoint in the device tree assume that
* we're on a triblade or a CAB so by default the fixed mapping
* should be set to be weakly ordered; but only if the boot
* option WASN'T set for strong ordering
*/
pciep = of_find_node_by_type(NULL, "pcie-endpoint");
if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
iommu_fixed_is_weak = 1;
of_node_put(pciep);
return 1;
}
__setup("iommu_fixed=", setup_iommu_fixed);
......
......@@ -312,10 +312,27 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
*/
node = cpu_to_node(raw_smp_processor_id());
for (n = 0; n < MAX_NUMNODES; n++, node++) {
int available_spus;
node = (node < MAX_NUMNODES) ? node : 0;
if (!node_allowed(ctx, node))
continue;
available_spus = 0;
mutex_lock(&cbe_spu_info[node].list_mutex);
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
if (spu->ctx && spu->ctx->gang
&& spu->ctx->aff_offset == 0)
available_spus -=
(spu->ctx->gang->contexts - 1);
else
available_spus++;
}
if (available_spus < ctx->gang->contexts) {
mutex_unlock(&cbe_spu_info[node].list_mutex);
continue;
}
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
if ((!mem_aff || spu->has_mem_affinity) &&
sched_spu(spu)) {
......@@ -389,6 +406,9 @@ static int has_affinity(struct spu_context *ctx)
if (list_empty(&ctx->aff_list))
return 0;
if (atomic_read(&ctx->gang->aff_sched_count) == 0)
ctx->gang->aff_ref_spu = NULL;
if (!gang->aff_ref_spu) {
if (!(gang->aff_flags & AFF_MERGED))
aff_merge_remaining_ctxs(gang);
......@@ -416,14 +436,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
if (spu->ctx->flags & SPU_CREATE_NOSCHED)
atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
if (ctx->gang){
mutex_lock(&ctx->gang->aff_mutex);
if (has_affinity(ctx)) {
if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
ctx->gang->aff_ref_spu = NULL;
}
mutex_unlock(&ctx->gang->aff_mutex);
}
if (ctx->gang)
atomic_dec_if_positive(&ctx->gang->aff_sched_count);
spu_switch_notify(spu, NULL);
spu_unmap_mappings(ctx);
......@@ -562,10 +576,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
goto found;
mutex_unlock(&cbe_spu_info[node].list_mutex);
mutex_lock(&ctx->gang->aff_mutex);
if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
ctx->gang->aff_ref_spu = NULL;
mutex_unlock(&ctx->gang->aff_mutex);
atomic_dec(&ctx->gang->aff_sched_count);
goto not_found;
}
mutex_unlock(&ctx->gang->aff_mutex);
......
......@@ -196,8 +196,7 @@ static int __init sputrace_init(void)
struct proc_dir_entry *entry;
int i, error = -ENOMEM;
sputrace_log = kcalloc(sizeof(struct sputrace),
bufsize, GFP_KERNEL);
sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
if (!sputrace_log)
goto out;
......
......@@ -41,7 +41,7 @@
#include <asm/iseries/hv_call_event.h>
#include <asm/iseries/iommu.h>
static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
......@@ -71,6 +71,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
index++;
uaddr += TCE_PAGE_SIZE;
}
return 0;
}
static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
......
......@@ -83,7 +83,7 @@ static u32 *iob_l2_base;
static struct iommu_table iommu_table_iobmap;
static int iommu_table_iobmap_inited;
static void iobmap_build(struct iommu_table *tbl, long index,
static int iobmap_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
......@@ -108,6 +108,7 @@ static void iobmap_build(struct iommu_table *tbl, long index,
uaddr += IOBMAP_PAGE_SIZE;
bus_addr += IOBMAP_PAGE_SIZE;
}
return 0;
}
......
......@@ -40,3 +40,26 @@ config PPC_PSERIES_DEBUG
depends on PPC_PSERIES && PPC_EARLY_DEBUG
bool "Enable extra debug logging in platforms/pseries"
default y
config PPC_SMLPAR
bool "Support for shared-memory logical partitions"
depends on PPC_PSERIES
select LPARCFG
default n
help
Select this option to enable shared memory partition support.
With this option a system running in an LPAR can be given more
memory than physically available and will allow firmware to
balance memory across many LPARs.
config CMM
tristate "Collaborative memory management"
depends on PPC_SMLPAR
default y
help
Select this option, if you want to enable the kernel interface
to reduce the memory size of the system. This is accomplished
by allocating pages of memory and put them "on hold". This only
makes sense for a system running in an LPAR where the unused pages
will be reused for other LPARs. The interface allows firmware to
balance memory across many LPARs.
......@@ -24,3 +24,4 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
obj-$(CONFIG_HVCS) += hvcserver.o
obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
obj-$(CONFIG_CMM) += cmm.o
/*
* Collaborative memory management interface.
*
* Copyright (C) 2008 IBM Corporation
* Author(s): Brian King (brking@linux.vnet.ibm.com),
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <linux/ctype.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/oom.h>
#include <linux/sched.h>
#include <linux/stringify.h>
#include <linux/swap.h>
#include <linux/sysdev.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/mmu.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
#include "plpar_wrappers.h"
#define CMM_DRIVER_VERSION "1.0.0"
#define CMM_DEFAULT_DELAY 1
#define CMM_DEBUG 0
#define CMM_DISABLE 0
#define CMM_OOM_KB 1024
#define CMM_MIN_MEM_MB 256
#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
static unsigned int delay = CMM_DEFAULT_DELAY;
static unsigned int oom_kb = CMM_OOM_KB;
static unsigned int cmm_debug = CMM_DEBUG;
static unsigned int cmm_disabled = CMM_DISABLE;
static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
static struct sys_device cmm_sysdev;
MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
MODULE_LICENSE("GPL");
MODULE_VERSION(CMM_DRIVER_VERSION);
module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
"[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
"[Default=" __stringify(CMM_OOM_KB) "]");
module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
"[Default=" __stringify(CMM_MIN_MEM_MB) "]");
module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
"[Default=" __stringify(CMM_DEBUG) "]");
#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
struct cmm_page_array {
struct cmm_page_array *next;
unsigned long index;
unsigned long page[CMM_NR_PAGES];
};
static unsigned long loaned_pages;
static unsigned long loaned_pages_target;
static unsigned long oom_freed_pages;
static struct cmm_page_array *cmm_page_list;
static DEFINE_SPINLOCK(cmm_lock);
static struct task_struct *cmm_thread_ptr;
/**
* cmm_alloc_pages - Allocate pages and mark them as loaned
* @nr: number of pages to allocate
*
* Return value:
* number of pages requested to be allocated which were not
**/
static long cmm_alloc_pages(long nr)
{
struct cmm_page_array *pa, *npa;
unsigned long addr;
long rc;
cmm_dbg("Begin request for %ld pages\n", nr);
while (nr) {
addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
__GFP_NORETRY | __GFP_NOMEMALLOC);
if (!addr)
break;
spin_lock(&cmm_lock);
pa = cmm_page_list;
if (!pa || pa->index >= CMM_NR_PAGES) {
/* Need a new page for the page list. */
spin_unlock(&cmm_lock);
npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
__GFP_NORETRY | __GFP_NOMEMALLOC);
if (!npa) {
pr_info("%s: Can not allocate new page list\n", __FUNCTION__);
free_page(addr);
break;
}
spin_lock(&cmm_lock);
pa = cmm_page_list;
if (!pa || pa->index >= CMM_NR_PAGES) {
npa->next = pa;
npa->index = 0;
pa = npa;
cmm_page_list = pa;
} else
free_page((unsigned long) npa);
}
if ((rc = plpar_page_set_loaned(__pa(addr)))) {
pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc);
spin_unlock(&cmm_lock);
free_page(addr);
break;
}
pa->page[pa->index++] = addr;
loaned_pages++;
totalram_pages--;
spin_unlock(&cmm_lock);
nr--;
}
cmm_dbg("End request with %ld pages unfulfilled\n", nr);
return nr;
}
/**
* cmm_free_pages - Free pages and mark them as active
* @nr: number of pages to free
*
* Return value:
* number of pages requested to be freed which were not
**/
static long cmm_free_pages(long nr)
{
struct cmm_page_array *pa;
unsigned long addr;
cmm_dbg("Begin free of %ld pages.\n", nr);
spin_lock(&cmm_lock);
pa = cmm_page_list;
while (nr) {
if (!pa || pa->index <= 0)
break;
addr = pa->page[--pa->index];
if (pa->index == 0) {
pa = pa->next;
free_page((unsigned long) cmm_page_list);
cmm_page_list = pa;
}
plpar_page_set_active(__pa(addr));
free_page(addr);
loaned_pages--;
nr--;
totalram_pages++;
}
spin_unlock(&cmm_lock);
cmm_dbg("End request with %ld pages unfulfilled\n", nr);
return nr;
}
/**
* cmm_oom_notify - OOM notifier
* @self: notifier block struct
* @dummy: not used
* @parm: returned - number of pages freed
*
* Return value:
* NOTIFY_OK
**/
static int cmm_oom_notify(struct notifier_block *self,
unsigned long dummy, void *parm)
{
unsigned long *freed = parm;
long nr = KB2PAGES(oom_kb);
cmm_dbg("OOM processing started\n");
nr = cmm_free_pages(nr);
loaned_pages_target = loaned_pages;
*freed += KB2PAGES(oom_kb) - nr;
oom_freed_pages += KB2PAGES(oom_kb) - nr;
cmm_dbg("OOM processing complete\n");
return NOTIFY_OK;
}
/**
* cmm_get_mpp - Read memory performance parameters
*
* Makes hcall to query the current page loan request from the hypervisor.
*
* Return value:
* nothing
**/
static void cmm_get_mpp(void)
{
int rc;
struct hvcall_mpp_data mpp_data;
unsigned long active_pages_target;
signed long page_loan_request;
rc = h_get_mpp(&mpp_data);
if (rc != H_SUCCESS)
return;
page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
loaned_pages_target = page_loan_request + loaned_pages;
if (loaned_pages_target > oom_freed_pages)
loaned_pages_target -= oom_freed_pages;
else
loaned_pages_target = 0;
active_pages_target = totalram_pages + loaned_pages - loaned_pages_target;
if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE))
loaned_pages_target = totalram_pages + loaned_pages -
((min_mem_mb * 1024 * 1024) / PAGE_SIZE);
cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
page_loan_request, loaned_pages, loaned_pages_target,
oom_freed_pages, totalram_pages);
}
static struct notifier_block cmm_oom_nb = {
.notifier_call = cmm_oom_notify
};
/**
* cmm_thread - CMM task thread
* @dummy: not used
*
* Return value:
* 0
**/
static int cmm_thread(void *dummy)
{
unsigned long timeleft;
while (1) {
timeleft = msleep_interruptible(delay * 1000);
if (kthread_should_stop() || timeleft) {
loaned_pages_target = loaned_pages;
break;
}
cmm_get_mpp();
if (loaned_pages_target > loaned_pages) {
if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
loaned_pages_target = loaned_pages;
} else if (loaned_pages_target < loaned_pages)
cmm_free_pages(loaned_pages - loaned_pages_target);
}
return 0;
}
#define CMM_SHOW(name, format, args...) \
static ssize_t show_##name(struct sys_device *dev, char *buf) \
{ \
return sprintf(buf, format, ##args); \
} \
static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
static ssize_t show_oom_pages(struct sys_device *dev, char *buf)
{
return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
}
static ssize_t store_oom_pages(struct sys_device *dev,
const char *buf, size_t count)
{
unsigned long val = simple_strtoul (buf, NULL, 10);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (val != 0)
return -EBADMSG;
oom_freed_pages = 0;
return count;
}
static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
show_oom_pages, store_oom_pages);
static struct sysdev_attribute *cmm_attrs[] = {
&attr_loaned_kb,
&attr_loaned_target_kb,
&attr_oom_freed_kb,
};
static struct sysdev_class cmm_sysdev_class = {
.name = "cmm",
};
/**
* cmm_sysfs_register - Register with sysfs
*
* Return value:
* 0 on success / other on failure
**/
static int cmm_sysfs_register(struct sys_device *sysdev)
{
int i, rc;
if ((rc = sysdev_class_register(&cmm_sysdev_class)))
return rc;
sysdev->id = 0;
sysdev->cls = &cmm_sysdev_class;
if ((rc = sysdev_register(sysdev)))
goto class_unregister;
for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
goto fail;
}
return 0;
fail:
while (--i >= 0)
sysdev_remove_file(sysdev, cmm_attrs[i]);
sysdev_unregister(sysdev);
class_unregister:
sysdev_class_unregister(&cmm_sysdev_class);
return rc;
}
/**
* cmm_unregister_sysfs - Unregister from sysfs
*
**/
static void cmm_unregister_sysfs(struct sys_device *sysdev)
{
int i;
for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
sysdev_remove_file(sysdev, cmm_attrs[i]);
sysdev_unregister(sysdev);
sysdev_class_unregister(&cmm_sysdev_class);
}
/**
* cmm_init - Module initialization
*
* Return value:
* 0 on success / other on failure
**/
static int cmm_init(void)
{
int rc = -ENOMEM;
if (!firmware_has_feature(FW_FEATURE_CMO))
return -EOPNOTSUPP;
if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
return rc;
if ((rc = cmm_sysfs_register(&cmm_sysdev)))
goto out_oom_notifier;
if (cmm_disabled)
return rc;
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (IS_ERR(cmm_thread_ptr)) {
rc = PTR_ERR(cmm_thread_ptr);
goto out_unregister_sysfs;
}
return rc;
out_unregister_sysfs:
cmm_unregister_sysfs(&cmm_sysdev);
out_oom_notifier:
unregister_oom_notifier(&cmm_oom_nb);
return rc;
}
/**
* cmm_exit - Module exit
*
* Return value:
* nothing
**/
static void cmm_exit(void)
{
if (cmm_thread_ptr)
kthread_stop(cmm_thread_ptr);
unregister_oom_notifier(&cmm_oom_nb);
cmm_free_pages(loaned_pages);
cmm_unregister_sysfs(&cmm_sysdev);
}
/**
* cmm_set_disable - Disable/Enable CMM
*
* Return value:
* 0 on success / other on failure
**/
static int cmm_set_disable(const char *val, struct kernel_param *kp)
{
int disable = simple_strtoul(val, NULL, 10);
if (disable != 0 && disable != 1)
return -EINVAL;
if (disable && !cmm_disabled) {
if (cmm_thread_ptr)
kthread_stop(cmm_thread_ptr);
cmm_thread_ptr = NULL;
cmm_free_pages(loaned_pages);
} else if (!disable && cmm_disabled) {
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (IS_ERR(cmm_thread_ptr))
return PTR_ERR(cmm_thread_ptr);
}
cmm_disabled = disable;
return 0;
}
module_param_call(disable, cmm_set_disable, param_get_uint,
&cmm_disabled, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
"[Default=" __stringify(CMM_DISABLE) "]");
module_init(cmm_init);
module_exit(cmm_exit);
......@@ -48,7 +48,7 @@
#include "plpar_wrappers.h"
static void tce_build_pSeries(struct iommu_table *tbl, long index,
static int tce_build_pSeries(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
......@@ -72,6 +72,7 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index,
uaddr += TCE_PAGE_SIZE;
tcep++;
}
return 0;
}
......@@ -94,14 +95,19 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
return *tcep;
}
static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
static void tce_free_pSeriesLP(struct iommu_table*, long, long);
static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
u64 rc;
u64 rc = 0;
u64 proto_tce, tce;
u64 rpn;
int ret = 0;
long tcenum_start = tcenum, npages_start = npages;
rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
proto_tce = TCE_PCI_READ;
......@@ -112,6 +118,13 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
tce_free_pSeriesLP(tbl, tcenum_start,
(npages_start - (npages + 1)));
break;
}
if (rc && printk_ratelimit()) {
printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
......@@ -123,25 +136,27 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcenum++;
rpn++;
}
return ret;
}
static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
u64 rc;
u64 rc = 0;
u64 proto_tce;
u64 *tcep;
u64 rpn;
long l, limit;
long tcenum_start = tcenum, npages_start = npages;
int ret = 0;
if (npages == 1) {
tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
direction, attrs);
return;
}
tcep = __get_cpu_var(tce_page);
......@@ -153,9 +168,8 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcep = (u64 *)__get_free_page(GFP_ATOMIC);
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
direction, attrs);
return;
}
__get_cpu_var(tce_page) = tcep;
}
......@@ -187,6 +201,13 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcenum += limit;
} while (npages > 0 && !rc);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
tce_freemulti_pSeriesLP(tbl, tcenum_start,
(npages_start - (npages + limit)));
return ret;
}
if (rc && printk_ratelimit()) {
printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
......@@ -194,6 +215,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
printk("\ttce[0] val = 0x%lx\n", tcep[0]);
show_stack(current, (unsigned long *)__get_SP());
}
return ret;
}
static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
......
......@@ -42,6 +42,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
return vpa_call(0x3, cpu, vpa);
}
static inline long plpar_page_set_loaned(unsigned long vpa)
{
return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0);
}
static inline long plpar_page_set_active(unsigned long vpa)
{
return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0);
}
extern void vpa_init(int cpu);
static inline long plpar_pte_enter(unsigned long flags,
......
......@@ -314,6 +314,76 @@ static int pseries_set_xdabr(unsigned long dabr)
H_DABRX_KERNEL | H_DABRX_USER);
}
#define CMO_CHARACTERISTICS_TOKEN 44
#define CMO_MAXLENGTH 1026
/**
* fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
* handle that here. (Stolen from parse_system_parameter_string)
*/
void pSeries_cmo_feature_init(void)
{
char *ptr, *key, *value, *end;
int call_status;
int PrPSP = -1;
int SecPSP = -1;
pr_debug(" -> fw_cmo_feature_init()\n");
spin_lock(&rtas_data_buf_lock);
memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
NULL,
CMO_CHARACTERISTICS_TOKEN,
__pa(rtas_data_buf),
RTAS_DATA_BUF_SIZE);
if (call_status != 0) {
spin_unlock(&rtas_data_buf_lock);
pr_debug("CMO not available\n");
pr_debug(" <- fw_cmo_feature_init()\n");
return;
}
end = rtas_data_buf + CMO_MAXLENGTH - 2;
ptr = rtas_data_buf + 2; /* step over strlen value */
key = value = ptr;
while (*ptr && (ptr <= end)) {
/* Separate the key and value by replacing '=' with '\0' and
* point the value at the string after the '='
*/
if (ptr[0] == '=') {
ptr[0] = '\0';
value = ptr + 1;
} else if (ptr[0] == '\0' || ptr[0] == ',') {
/* Terminate the string containing the key/value pair */
ptr[0] = '\0';
if (key == value) {
pr_debug("Malformed key/value pair\n");
/* Never found a '=', end processing */
break;
}
if (0 == strcmp(key, "PrPSP"))
PrPSP = simple_strtol(value, NULL, 10);
else if (0 == strcmp(key, "SecPSP"))
SecPSP = simple_strtol(value, NULL, 10);
value = key = ptr + 1;
}
ptr++;
}
if (PrPSP != -1 || SecPSP != -1) {
pr_info("CMO enabled\n");
pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
powerpc_firmware_features |= FW_FEATURE_CMO;
} else
pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
spin_unlock(&rtas_data_buf_lock);
pr_debug(" <- fw_cmo_feature_init()\n");
}
/*
* Early initialization. Relocation is on but do not reference unbolted pages
*/
......@@ -329,6 +399,7 @@ static void __init pSeries_init_early(void)
else if (firmware_has_feature(FW_FEATURE_XDABR))
ppc_md.set_dabr = pseries_set_xdabr;
pSeries_cmo_feature_init();
iommu_init_early_pSeries();
pr_debug(" <- pSeries_init_early()\n");
......
......@@ -147,7 +147,7 @@ static void dart_flush(struct iommu_table *tbl)
}
}
static void dart_build(struct iommu_table *tbl, long index,
static int dart_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
......@@ -184,6 +184,7 @@ static void dart_build(struct iommu_table *tbl, long index,
} else {
dart_dirty = 1;
}
return 0;
}
......
......@@ -33,6 +33,7 @@
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/ioport.h>
......@@ -52,7 +53,9 @@
#include <asm/hvcall.h>
#include <asm/atomic.h>
#include <asm/vio.h>
#include <asm/iommu.h>
#include <asm/uaccess.h>
#include <asm/firmware.h>
#include <linux/seq_file.h>
#include "ibmveth.h"
......@@ -94,8 +97,10 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter);
static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter);
static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
static struct kobj_type ktype_veth_pool;
#ifdef CONFIG_PROC_FS
#define IBMVETH_PROC_DIR "ibmveth"
static struct proc_dir_entry *ibmveth_proc_dir;
......@@ -226,17 +231,17 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
u32 i;
u32 count = pool->size - atomic_read(&pool->available);
u32 buffers_added = 0;
mb();
for(i = 0; i < count; ++i) {
struct sk_buff *skb;
unsigned int free_index, index;
u64 correlator;
union ibmveth_buf_desc desc;
unsigned long lpar_rc;
dma_addr_t dma_addr;
mb();
for(i = 0; i < count; ++i) {
union ibmveth_buf_desc desc;
skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
if(!skb) {
......@@ -255,6 +260,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
pool->buff_size, DMA_FROM_DEVICE);
if (dma_mapping_error(dma_addr))
goto failure;
pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
pool->dma_addr[index] = dma_addr;
pool->skbuff[index] = skb;
......@@ -267,24 +275,31 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
if(lpar_rc != H_SUCCESS) {
if (lpar_rc != H_SUCCESS)
goto failure;
else {
buffers_added++;
adapter->replenish_add_buff_success++;
}
}
mb();
atomic_add(buffers_added, &(pool->available));
return;
failure:
pool->free_map[free_index] = index;
pool->skbuff[index] = NULL;
if (pool->consumer_index == 0)
pool->consumer_index = pool->size - 1;
else
pool->consumer_index--;
if (!dma_mapping_error(dma_addr))
dma_unmap_single(&adapter->vdev->dev,
pool->dma_addr[index], pool->buff_size,
DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
adapter->replenish_add_buff_failure++;
break;
} else {
buffers_added++;
adapter->replenish_add_buff_success++;
}
}
mb();
atomic_add(buffers_added, &(pool->available));
......@@ -297,7 +312,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
adapter->replenish_task_cycles++;
for(i = 0; i < IbmVethNumBufferPools; i++)
for (i = (IbmVethNumBufferPools - 1); i >= 0; i--)
if(adapter->rx_buff_pool[i].active)
ibmveth_replenish_buffer_pool(adapter,
&adapter->rx_buff_pool[i]);
......@@ -472,6 +487,18 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
if (adapter->rx_buff_pool[i].active)
ibmveth_free_buffer_pool(adapter,
&adapter->rx_buff_pool[i]);
if (adapter->bounce_buffer != NULL) {
if (!dma_mapping_error(adapter->bounce_buffer_dma)) {
dma_unmap_single(&adapter->vdev->dev,
adapter->bounce_buffer_dma,
adapter->netdev->mtu + IBMVETH_BUFF_OH,
DMA_BIDIRECTIONAL);
adapter->bounce_buffer_dma = DMA_ERROR_CODE;
}
kfree(adapter->bounce_buffer);
adapter->bounce_buffer = NULL;
}
}
static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
......@@ -607,6 +634,24 @@ static int ibmveth_open(struct net_device *netdev)
return rc;
}
adapter->bounce_buffer =
kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL);
if (!adapter->bounce_buffer) {
ibmveth_error_printk("unable to allocate bounce buffer\n");
ibmveth_cleanup(adapter);
napi_disable(&adapter->napi);
return -ENOMEM;
}
adapter->bounce_buffer_dma =
dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer,
netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL);
if (dma_mapping_error(adapter->bounce_buffer_dma)) {
ibmveth_error_printk("unable to map bounce buffer\n");
ibmveth_cleanup(adapter);
napi_disable(&adapter->napi);
return -ENOMEM;
}
ibmveth_debug_printk("initial replenish cycle\n");
ibmveth_interrupt(netdev->irq, netdev);
......@@ -853,9 +898,11 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
unsigned int tx_packets = 0;
unsigned int tx_send_failed = 0;
unsigned int tx_map_failed = 0;
int used_bounce = 0;
unsigned long data_dma_addr;
desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len;
desc.fields.address = dma_map_single(&adapter->vdev->dev, skb->data,
data_dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
skb->len, DMA_TO_DEVICE);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
......@@ -875,12 +922,16 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
buf[1] = 0;
}
if (dma_mapping_error(desc.fields.address)) {
if (dma_mapping_error(data_dma_addr)) {
if (!firmware_has_feature(FW_FEATURE_CMO))
ibmveth_error_printk("tx: unable to map xmit buffer\n");
skb_copy_from_linear_data(skb, adapter->bounce_buffer,
skb->len);
desc.fields.address = adapter->bounce_buffer_dma;
tx_map_failed++;
tx_dropped++;
goto out;
}
used_bounce = 1;
} else
desc.fields.address = data_dma_addr;
/* send the frame. Arbitrarily set retrycount to 1024 */
correlator = 0;
......@@ -904,7 +955,8 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
netdev->trans_start = jiffies;
}
dma_unmap_single(&adapter->vdev->dev, desc.fields.address,
if (!used_bounce)
dma_unmap_single(&adapter->vdev->dev, data_dma_addr,
skb->len, DMA_TO_DEVICE);
out: spin_lock_irqsave(&adapter->stats_lock, flags);
......@@ -1053,9 +1105,9 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
{
struct ibmveth_adapter *adapter = dev->priv;
struct vio_dev *viodev = adapter->vdev;
int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
int reinit = 0;
int i, rc;
int i;
if (new_mtu < IBMVETH_MAX_MTU)
return -EINVAL;
......@@ -1067,23 +1119,34 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
if (i == IbmVethNumBufferPools)
return -EINVAL;
/* Deactivate all the buffer pools so that the next loop can activate
only the buffer pools necessary to hold the new MTU */
for (i = 0; i < IbmVethNumBufferPools; i++)
if (adapter->rx_buff_pool[i].active) {
ibmveth_free_buffer_pool(adapter,
&adapter->rx_buff_pool[i]);
adapter->rx_buff_pool[i].active = 0;
}
/* Look for an active buffer pool that can hold the new MTU */
for(i = 0; i<IbmVethNumBufferPools; i++) {
if (!adapter->rx_buff_pool[i].active) {
adapter->rx_buff_pool[i].active = 1;
reinit = 1;
}
if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) {
if (reinit && netif_running(adapter->netdev)) {
if (netif_running(adapter->netdev)) {
adapter->pool_config = 1;
ibmveth_close(adapter->netdev);
adapter->pool_config = 0;
dev->mtu = new_mtu;
if ((rc = ibmveth_open(adapter->netdev)))
return rc;
} else
vio_cmo_set_dev_desired(viodev,
ibmveth_get_desired_dma
(viodev));
return ibmveth_open(adapter->netdev);
}
dev->mtu = new_mtu;
vio_cmo_set_dev_desired(viodev,
ibmveth_get_desired_dma
(viodev));
return 0;
}
}
......@@ -1098,6 +1161,46 @@ static void ibmveth_poll_controller(struct net_device *dev)
}
#endif
/**
* ibmveth_get_desired_dma - Calculate IO memory desired by the driver
*
* @vdev: struct vio_dev for the device whose desired IO mem is to be returned
*
* Return value:
* Number of bytes of IO data the driver will need to perform well.
*/
static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
{
struct net_device *netdev = dev_get_drvdata(&vdev->dev);
struct ibmveth_adapter *adapter;
unsigned long ret;
int i;
int rxqentries = 1;
/* netdev inits at probe time along with the structures we need below*/
if (netdev == NULL)
return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT);
adapter = netdev_priv(netdev);
ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
ret += IOMMU_PAGE_ALIGN(netdev->mtu);
for (i = 0; i < IbmVethNumBufferPools; i++) {
/* add the size of the active receive buffers */
if (adapter->rx_buff_pool[i].active)
ret +=
adapter->rx_buff_pool[i].size *
IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
buff_size);
rxqentries += adapter->rx_buff_pool[i].size;
}
/* add the size of the receive queue entries */
ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry));
return ret;
}
static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
{
int rc, i;
......@@ -1242,6 +1345,8 @@ static int __devexit ibmveth_remove(struct vio_dev *dev)
ibmveth_proc_unregister_adapter(adapter);
free_netdev(netdev);
dev_set_drvdata(&dev->dev, NULL);
return 0;
}
......@@ -1402,14 +1507,15 @@ const char * buf, size_t count)
return -EPERM;
}
pool->active = 0;
if (netif_running(netdev)) {
adapter->pool_config = 1;
ibmveth_close(netdev);
pool->active = 0;
adapter->pool_config = 0;
if ((rc = ibmveth_open(netdev)))
return rc;
}
pool->active = 0;
}
} else if (attr == &veth_num_attr) {
if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
......@@ -1485,6 +1591,7 @@ static struct vio_driver ibmveth_driver = {
.id_table = ibmveth_device_table,
.probe = ibmveth_probe,
.remove = ibmveth_remove,
.get_desired_dma = ibmveth_get_desired_dma,
.driver = {
.name = ibmveth_driver_name,
.owner = THIS_MODULE,
......
......@@ -93,9 +93,12 @@ static inline long h_illan_attributes(unsigned long unit_address,
plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac)
#define IbmVethNumBufferPools 5
#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */
#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */
#define IBMVETH_MAX_MTU 68
#define IBMVETH_MAX_POOL_COUNT 4096
#define IBMVETH_BUFF_LIST_SIZE 4096
#define IBMVETH_FILT_LIST_SIZE 4096
#define IBMVETH_MAX_BUF_SIZE (1024 * 128)
static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
......@@ -143,6 +146,8 @@ struct ibmveth_adapter {
struct ibmveth_rx_q rx_queue;
int pool_config;
int rx_csum;
void *bounce_buffer;
dma_addr_t bounce_buffer_dma;
/* adapter specific stats */
u64 replenish_task_cycles;
......
......@@ -91,8 +91,6 @@ void of_register_i2c_devices(struct i2c_adapter *adap,
}
info.irq = irq_of_parse_and_map(node, 0);
if (info.irq == NO_IRQ)
info.irq = -1;
if (of_find_i2c_driver(node, &info) < 0) {
irq_dispose_mapping(info.irq);
......
......@@ -3819,6 +3819,20 @@ static int ibmvfc_remove(struct vio_dev *vdev)
return 0;
}
/**
* ibmvfc_get_desired_dma - Calculate DMA resources needed by the driver
* @vdev: vio device struct
*
* Return value:
* Number of bytes the driver will need to DMA map at the same time in
* order to perform well.
*/
static unsigned long ibmvfc_get_desired_dma(struct vio_dev *vdev)
{
unsigned long pool_dma = max_requests * sizeof(union ibmvfc_iu);
return pool_dma + ((512 * 1024) * driver_template.cmd_per_lun);
}
static struct vio_device_id ibmvfc_device_table[] __devinitdata = {
{"fcp", "IBM,vfc-client"},
{ "", "" }
......@@ -3829,6 +3843,7 @@ static struct vio_driver ibmvfc_driver = {
.id_table = ibmvfc_device_table,
.probe = ibmvfc_probe,
.remove = ibmvfc_remove,
.get_desired_dma = ibmvfc_get_desired_dma,
.driver = {
.name = IBMVFC_NAME,
.owner = THIS_MODULE,
......
......@@ -72,6 +72,7 @@
#include <linux/delay.h>
#include <asm/firmware.h>
#include <asm/vio.h>
#include <asm/firmware.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>
......@@ -426,8 +427,10 @@ static int map_sg_data(struct scsi_cmnd *cmd,
SG_ALL * sizeof(struct srp_direct_buf),
&evt_struct->ext_list_token, 0);
if (!evt_struct->ext_list) {
if (!firmware_has_feature(FW_FEATURE_CMO))
sdev_printk(KERN_ERR, cmd->device,
"Can't allocate memory for indirect table\n");
"Can't allocate memory "
"for indirect table\n");
return 0;
}
}
......@@ -743,7 +746,9 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd,
srp_cmd->lun = ((u64) lun) << 48;
if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) {
sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n");
if (!firmware_has_feature(FW_FEATURE_CMO))
sdev_printk(KERN_ERR, cmnd->device,
"couldn't convert cmd to srp_cmd\n");
free_event_struct(&hostdata->pool, evt_struct);
return SCSI_MLQUEUE_HOST_BUSY;
}
......@@ -855,7 +860,10 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
DMA_BIDIRECTIONAL);
if (dma_mapping_error(req->buffer)) {
dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n");
if (!firmware_has_feature(FW_FEATURE_CMO))
dev_err(hostdata->dev,
"Unable to map request_buffer for "
"adapter_info!\n");
free_event_struct(&hostdata->pool, evt_struct);
return;
}
......@@ -1400,7 +1408,9 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
DMA_BIDIRECTIONAL);
if (dma_mapping_error(host_config->buffer)) {
dev_err(hostdata->dev, "dma_mapping error getting host config\n");
if (!firmware_has_feature(FW_FEATURE_CMO))
dev_err(hostdata->dev,
"dma_mapping error getting host config\n");
free_event_struct(&hostdata->pool, evt_struct);
return -1;
}
......@@ -1604,7 +1614,7 @@ static struct scsi_host_template driver_template = {
.eh_host_reset_handler = ibmvscsi_eh_host_reset_handler,
.slave_configure = ibmvscsi_slave_configure,
.change_queue_depth = ibmvscsi_change_queue_depth,
.cmd_per_lun = 16,
.cmd_per_lun = IBMVSCSI_CMDS_PER_LUN_DEFAULT,
.can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT,
.this_id = -1,
.sg_tablesize = SG_ALL,
......@@ -1612,6 +1622,26 @@ static struct scsi_host_template driver_template = {
.shost_attrs = ibmvscsi_attrs,
};
/**
* ibmvscsi_get_desired_dma - Calculate IO memory desired by the driver
*
* @vdev: struct vio_dev for the device whose desired IO mem is to be returned
*
* Return value:
* Number of bytes of IO data the driver will need to perform well.
*/
static unsigned long ibmvscsi_get_desired_dma(struct vio_dev *vdev)
{
/* iu_storage data allocated in initialize_event_pool */
unsigned long desired_io = max_requests * sizeof(union viosrp_iu);
/* add io space for sg data */
desired_io += (IBMVSCSI_MAX_SECTORS_DEFAULT *
IBMVSCSI_CMDS_PER_LUN_DEFAULT);
return desired_io;
}
/**
* Called by bus code for each adapter
*/
......@@ -1641,7 +1671,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
hostdata->host = host;
hostdata->dev = dev;
atomic_set(&hostdata->request_limit, -1);
hostdata->host->max_sectors = 32 * 8; /* default max I/O 32 pages */
hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests);
if (rc != 0 && rc != H_RESOURCE) {
......@@ -1735,6 +1765,7 @@ static struct vio_driver ibmvscsi_driver = {
.id_table = ibmvscsi_device_table,
.probe = ibmvscsi_probe,
.remove = ibmvscsi_remove,
.get_desired_dma = ibmvscsi_get_desired_dma,
.driver = {
.name = "ibmvscsi",
.owner = THIS_MODULE,
......
......@@ -45,6 +45,8 @@ struct Scsi_Host;
#define MAX_INDIRECT_BUFS 10
#define IBMVSCSI_MAX_REQUESTS_DEFAULT 100
#define IBMVSCSI_CMDS_PER_LUN_DEFAULT 16
#define IBMVSCSI_MAX_SECTORS_DEFAULT 256 /* 32 * 8 = default max I/O 32 pages */
#define IBMVSCSI_MAX_CMDS_PER_LUN 64
/* ------------------------------------------------------------
......
......@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
#endif
#ifndef ELF_BASE_PLATFORM
/*
* AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
* If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
* will be copied to the user stack in the same manner as AT_PLATFORM.
*/
#define ELF_BASE_PLATFORM NULL
#endif
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
......@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
elf_addr_t __user *envp;
elf_addr_t __user *sp;
elf_addr_t __user *u_platform;
elf_addr_t __user *u_base_platform;
const char *k_platform = ELF_PLATFORM;
const char *k_base_platform = ELF_BASE_PLATFORM;
int items;
elf_addr_t *elf_info;
int ei_index = 0;
......@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
return -EFAULT;
}
/*
* If this architecture has a "base" platform capability
* string, copy it to userspace.
*/
u_base_platform = NULL;
if (k_base_platform) {
size_t len = strlen(k_base_platform) + 1;
u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
if (__copy_to_user(u_base_platform, k_base_platform, len))
return -EFAULT;
}
/* Create the ELF interpreter info */
elf_info = (elf_addr_t *)current->mm->saved_auxv;
/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
......@@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
NEW_AUX_ENT(AT_PLATFORM,
(elf_addr_t)(unsigned long)u_platform);
}
if (k_base_platform) {
NEW_AUX_ENT(AT_BASE_PLATFORM,
(elf_addr_t)(unsigned long)u_base_platform);
}
if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
}
......
......@@ -127,6 +127,8 @@ extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
extern void do_feature_fixups(unsigned long value, void *fixup_start,
void *fixup_end);
extern const char *powerpc_base_platform;
#endif /* __ASSEMBLY__ */
/* CPU kernel features */
......
......@@ -217,6 +217,14 @@ typedef elf_vrregset_t elf_fpxregset_t;
#define ELF_PLATFORM (cur_cpu_spec->platform)
/* While ELF_PLATFORM indicates the ISA supported by the platform, it
* may not accurately reflect the underlying behavior of the hardware
* (as in the case of running in Power5+ compatibility mode on a
* Power6 machine). ELF_BASE_PLATFORM allows ld.so to load libraries
* that are tuned for the real hardware.
*/
#define ELF_BASE_PLATFORM (powerpc_base_platform)
#ifdef __powerpc64__
# define ELF_PLAT_INIT(_r, load_addr) do { \
_r->gpr[2] = load_addr; \
......
......@@ -46,6 +46,7 @@
#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000)
#define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000)
#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000)
#define FW_FEATURE_CMO ASM_CONST(0x0000000004000000)
#ifndef __ASSEMBLY__
......@@ -58,7 +59,7 @@ enum {
FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE |
FW_FEATURE_SPLPAR | FW_FEATURE_LPAR,
FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
......
......@@ -92,6 +92,11 @@
#define H_EXACT (1UL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */
#define H_R_XLATE (1UL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */
#define H_READ_4 (1UL<<(63-26)) /* Return 4 PTEs */
#define H_PAGE_STATE_CHANGE (1UL<<(63-28))
#define H_PAGE_UNUSED ((1UL<<(63-29)) | (1UL<<(63-30)))
#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1UL<<(63-31)))
#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE
#define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */
#define H_ANDCOND (1UL<<(63-33))
#define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */
......@@ -210,7 +215,9 @@
#define H_JOIN 0x298
#define H_VASI_STATE 0x2A4
#define H_ENABLE_CRQ 0x2B0
#define MAX_HCALL_OPCODE H_ENABLE_CRQ
#define H_SET_MPP 0x2D0
#define H_GET_MPP 0x2D4
#define MAX_HCALL_OPCODE H_GET_MPP
#ifndef __ASSEMBLY__
......@@ -270,6 +277,20 @@ struct hcall_stats {
};
#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
struct hvcall_mpp_data {
unsigned long entitled_mem;
unsigned long mapped_mem;
unsigned short group_num;
unsigned short pool_num;
unsigned char mem_weight;
unsigned char unallocated_mem_weight;
unsigned long unallocated_entitlement; /* value in bytes */
unsigned long pool_size;
signed long loan_request;
unsigned long backing_mem;
};
int h_get_mpp(struct hvcall_mpp_data *);
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
......@@ -125,7 +125,10 @@ struct lppaca {
// NOTE: This value will ALWAYS be zero for dedicated processors and
// will NEVER be zero for shared processors (ie, initialized to a 1).
volatile u32 yield_count; // PLIC increments each dispatchx00-x03
u8 reserved6[124]; // Reserved x04-x7F
u32 reserved6;
volatile u64 cmo_faults; // CMO page fault count x08-x0F
volatile u64 cmo_fault_time; // CMO page fault time x10-x17
u8 reserved7[104]; // Reserved x18-x7F
//=============================================================================
// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data
......
......@@ -76,7 +76,7 @@ struct machdep_calls {
* destroyed as well */
void (*hpte_clear_all)(void);
void (*tce_build)(struct iommu_table * tbl,
int (*tce_build)(struct iommu_table *tbl,
long index,
long npages,
unsigned long uaddr,
......
......@@ -60,10 +60,12 @@
#define MPC52xx_PSC_RXTX_FIFO_ALARM 0x0002
#define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001
/* PSC interrupt mask bits */
/* PSC interrupt status/mask bits */
#define MPC52xx_PSC_IMR_TXRDY 0x0100
#define MPC52xx_PSC_IMR_RXRDY 0x0200
#define MPC52xx_PSC_IMR_DB 0x0400
#define MPC52xx_PSC_IMR_TXEMP 0x0800
#define MPC52xx_PSC_IMR_ORERR 0x1000
#define MPC52xx_PSC_IMR_IPC 0x8000
/* PSC input port change bit */
......@@ -92,6 +94,34 @@
#define MPC52xx_PSC_RFNUM_MASK 0x01ff
#define MPC52xx_PSC_SICR_DTS1 (1 << 29)
#define MPC52xx_PSC_SICR_SHDR (1 << 28)
#define MPC52xx_PSC_SICR_SIM_MASK (0xf << 24)
#define MPC52xx_PSC_SICR_SIM_UART (0x0 << 24)
#define MPC52xx_PSC_SICR_SIM_UART_DCD (0x8 << 24)
#define MPC52xx_PSC_SICR_SIM_CODEC_8 (0x1 << 24)
#define MPC52xx_PSC_SICR_SIM_CODEC_16 (0x2 << 24)
#define MPC52xx_PSC_SICR_SIM_AC97 (0x3 << 24)
#define MPC52xx_PSC_SICR_SIM_SIR (0x8 << 24)
#define MPC52xx_PSC_SICR_SIM_SIR_DCD (0xc << 24)
#define MPC52xx_PSC_SICR_SIM_MIR (0x5 << 24)
#define MPC52xx_PSC_SICR_SIM_FIR (0x6 << 24)
#define MPC52xx_PSC_SICR_SIM_CODEC_24 (0x7 << 24)
#define MPC52xx_PSC_SICR_SIM_CODEC_32 (0xf << 24)
#define MPC52xx_PSC_SICR_GENCLK (1 << 23)
#define MPC52xx_PSC_SICR_I2S (1 << 22)
#define MPC52xx_PSC_SICR_CLKPOL (1 << 21)
#define MPC52xx_PSC_SICR_SYNCPOL (1 << 20)
#define MPC52xx_PSC_SICR_CELLSLAVE (1 << 19)
#define MPC52xx_PSC_SICR_CELL2XCLK (1 << 18)
#define MPC52xx_PSC_SICR_ESAI (1 << 17)
#define MPC52xx_PSC_SICR_ENAC97 (1 << 16)
#define MPC52xx_PSC_SICR_SPI (1 << 15)
#define MPC52xx_PSC_SICR_MSTR (1 << 14)
#define MPC52xx_PSC_SICR_CPOL (1 << 13)
#define MPC52xx_PSC_SICR_CPHA (1 << 12)
#define MPC52xx_PSC_SICR_USEEOF (1 << 11)
#define MPC52xx_PSC_SICR_DISABLEEOF (1 << 10)
/* Structure of the hardware registers */
struct mpc52xx_psc {
......@@ -132,8 +162,12 @@ struct mpc52xx_psc {
u8 reserved5[3];
u8 ctlr; /* PSC + 0x1c */
u8 reserved6[3];
u16 ccr; /* PSC + 0x20 */
u8 reserved7[14];
/* BitClkDiv field of CCR is byte swapped in
* the hardware for mpc5200/b compatibility */
u32 ccr; /* PSC + 0x20 */
u32 ac97_slots; /* PSC + 0x24 */
u32 ac97_cmd; /* PSC + 0x28 */
u32 ac97_data; /* PSC + 0x2c */
u8 ivr; /* PSC + 0x30 */
u8 reserved8[3];
u8 ip; /* PSC + 0x34 */
......
......@@ -38,6 +38,19 @@ extern void paging_init(void);
remap_pfn_range(vma, vaddr, pfn, size, prot)
#include <asm-generic/pgtable.h>
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
* We use it to ensure coherency between the i-cache and d-cache
* for the page which has just been mapped in.
* On machines which use an MMU hash table, we use this to put a
* corresponding HPTE into the hash table ahead of time, instead of
* waiting for the inevitable extra hash-table miss exception.
*/
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
......
......@@ -31,6 +31,7 @@ asmlinkage int sys_vfork(unsigned long p1, unsigned long p2,
unsigned long p3, unsigned long p4, unsigned long p5,
unsigned long p6, struct pt_regs *regs);
asmlinkage long sys_pipe(int __user *fildes);
asmlinkage long sys_pipe2(int __user *fildes, int flags);
asmlinkage long sys_rt_sigaction(int sig,
const struct sigaction __user *act,
struct sigaction __user *oact, size_t sigsetsize);
......
......@@ -316,3 +316,9 @@ COMPAT_SYS(fallocate)
SYSCALL(subpage_prot)
COMPAT_SYS_SPU(timerfd_settime)
COMPAT_SYS_SPU(timerfd_gettime)
COMPAT_SYS_SPU(signalfd4)
SYSCALL_SPU(eventfd2)
SYSCALL_SPU(epoll_create1)
SYSCALL_SPU(dup3)
SYSCALL_SPU(pipe2)
SYSCALL(inotify_init1)
......@@ -110,6 +110,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
#endif
extern int set_dabr(unsigned long dabr);
extern void do_dabr(struct pt_regs *regs, unsigned long address,
unsigned long error_code);
extern void print_backtrace(unsigned long *);
extern void show_regs(struct pt_regs * regs);
extern void flush_instruction_cache(void);
......
......@@ -162,16 +162,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
#endif
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
* We use it to ensure coherency between the i-cache and d-cache
* for the page which has just been mapped in.
* On machines which use an MMU hash table, we use this to put a
* corresponding HPTE into the hash table ahead of time, instead of
* waiting for the inevitable extra hash-table miss exception.
*/
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
#endif /*__KERNEL__ */
#endif /* _ASM_POWERPC_TLBFLUSH_H */
......@@ -335,10 +335,16 @@
#define __NR_subpage_prot 310
#define __NR_timerfd_settime 311
#define __NR_timerfd_gettime 312
#define __NR_signalfd4 313
#define __NR_eventfd2 314
#define __NR_epoll_create1 315
#define __NR_dup3 316
#define __NR_pipe2 317
#define __NR_inotify_init1 318
#ifdef __KERNEL__
#define __NR_syscalls 313
#define __NR_syscalls 319
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
......
......@@ -39,16 +39,32 @@
#define VIO_IRQ_DISABLE 0UL
#define VIO_IRQ_ENABLE 1UL
/*
* VIO CMO minimum entitlement for all devices and spare entitlement
*/
#define VIO_CMO_MIN_ENT 1562624
struct iommu_table;
/*
* The vio_dev structure is used to describe virtual I/O devices.
/**
* vio_dev - This structure is used to describe virtual I/O devices.
*
* @desired: set from return of driver's get_desired_dma() function
* @entitled: bytes of IO data that has been reserved for this device.
* @allocated: bytes of IO data currently in use by the device.
* @allocs_failed: number of DMA failures due to insufficient entitlement.
*/
struct vio_dev {
const char *name;
const char *type;
uint32_t unit_address;
unsigned int irq;
struct {
size_t desired;
size_t entitled;
size_t allocated;
atomic_t allocs_failed;
} cmo;
struct device dev;
};
......@@ -56,12 +72,19 @@ struct vio_driver {
const struct vio_device_id *id_table;
int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
int (*remove)(struct vio_dev *dev);
/* A driver must have a get_desired_dma() function to
* be loaded in a CMO environment if it uses DMA.
*/
unsigned long (*get_desired_dma)(struct vio_dev *dev);
struct device_driver driver;
};
extern int vio_register_driver(struct vio_driver *drv);
extern void vio_unregister_driver(struct vio_driver *drv);
extern int vio_cmo_entitlement_update(size_t);
extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired);
extern void __devinit vio_unregister_device(struct vio_dev *dev);
struct device_node;
......
......@@ -26,9 +26,13 @@
#define AT_SECURE 23 /* secure mode boolean */
#define AT_BASE_PLATFORM 24 /* string identifying real platform, may
* differ from AT_PLATFORM. */
#define AT_EXECFN 31 /* filename of program */
#ifdef __KERNEL__
#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */
#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
/* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment