Commit fea891b3 authored by David Mosberger's avatar David Mosberger

Merge tiger.hpl.hp.com:/data1/bk/vanilla/linux-2.5

into tiger.hpl.hp.com:/data1/bk/lia64/to-linus-2.5
parents c43626f4 6d9cf7b6
......@@ -400,7 +400,7 @@ CONFIG_ITANIUM
Select your IA64 processor type. The default is Intel Itanium.
CONFIG_MCKINLEY
Select this to configure for a McKinley processor.
Select this to configure for an Itanium 2 (McKinley) processor.
CONFIG_IA64_GENERIC
This selects the system type of your hardware. A "generic" kernel
......
......@@ -69,13 +69,6 @@ ifdef CONFIG_IA64_SGI_SN
$(CORE_FILES)
endif
ifdef CONFIG_IA64_SOFTSDV
SUBDIRS := arch/$(ARCH)/dig \
$(SUBDIRS)
CORE_FILES := arch/$(ARCH)/dig/dig.a \
$(CORE_FILES)
endif
ifdef CONFIG_IA64_DIG
SUBDIRS := arch/$(ARCH)/dig \
$(SUBDIRS)
......
......@@ -16,7 +16,7 @@ define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
choice 'IA-64 processor type' \
"Itanium CONFIG_ITANIUM \
McKinley CONFIG_MCKINLEY" Itanium
Itanium-2 CONFIG_MCKINLEY" Itanium
choice 'IA-64 system type' \
"generic CONFIG_IA64_GENERIC \
......@@ -26,11 +26,18 @@ choice 'IA-64 system type' \
SGI-SN1 CONFIG_IA64_SGI_SN1 \
SGI-SN2 CONFIG_IA64_SGI_SN2" generic
choice 'Kernel page size' \
if [ "$CONFIG_ITANIUM" = "y" ]; then
choice 'Kernel page size' \
"4KB CONFIG_IA64_PAGE_SIZE_4KB \
8KB CONFIG_IA64_PAGE_SIZE_8KB \
16KB CONFIG_IA64_PAGE_SIZE_16KB" 16KB
else
choice 'Kernel page size' \
"4KB CONFIG_IA64_PAGE_SIZE_4KB \
8KB CONFIG_IA64_PAGE_SIZE_8KB \
16KB CONFIG_IA64_PAGE_SIZE_16KB \
64KB CONFIG_IA64_PAGE_SIZE_64KB" 16KB
endif
if [ "$CONFIG_IA64_HP_SIM" = "n" ]; then
define_bool CONFIG_ACPI y
......
......@@ -12,17 +12,3 @@ export-objs := sba_iommu.o
obj-y := sba_iommu.o
include $(TOPDIR)/Rules.make
#
# ia64/platform/hp/common/Makefile
#
# Copyright (C) 2002 Hewlett Packard
# Copyright (C) Alex Williamson (alex_williamson@hp.com)
#
O_TARGET := common.o
export-objs := sba_iommu.o
obj-y := sba_iommu.o
include $(TOPDIR)/Rules.make
......@@ -1389,6 +1389,12 @@ sba_dma_address (struct scatterlist *sg)
return ((unsigned long)sba_sg_iova(sg));
}
int
sba_dma_supported (struct pci_dev *dev, u64 mask)
{
return 1;
}
/**************************************************************
*
* Initialization and claim
......@@ -1858,5 +1864,6 @@ EXPORT_SYMBOL(sba_unmap_single);
EXPORT_SYMBOL(sba_map_sg);
EXPORT_SYMBOL(sba_unmap_sg);
EXPORT_SYMBOL(sba_dma_address);
EXPORT_SYMBOL(sba_dma_supported);
EXPORT_SYMBOL(sba_alloc_consistent);
EXPORT_SYMBOL(sba_free_consistent);
......@@ -7,9 +7,9 @@
* case means sys_sim.c console (goes via the simulator). The code hereafter
* is completely leveraged from the serial.c driver.
*
* Copyright (C) 1999-2000 Hewlett-Packard Co
* Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* 02/04/00 D. Mosberger Merged in serial.c bug fixes in rs_close().
* 02/25/00 D. Mosberger Synced up with 2.3.99pre-5 version of serial.c.
......@@ -24,7 +24,7 @@
#include <linux/major.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/malloc.h>
#include <linux/slab.h>
#include <linux/console.h>
#include <linux/module.h>
#include <linux/serial.h>
......
......@@ -11,16 +11,3 @@ obj-y := hpzx1_misc.o
obj-$(CONFIG_IA64_GENERIC) += hpzx1_machvec.o
include $(TOPDIR)/Rules.make
#
# ia64/platform/hp/zx1/Makefile
#
# Copyright (C) 2002 Hewlett Packard
# Copyright (C) Alex Williamson (alex_williamson@hp.com)
#
O_TARGET := zx1.o
obj-y := hpzx1_misc.o
obj-$(CONFIG_IA64_GENERIC) += hpzx1_machvec.o
include $(TOPDIR)/Rules.make
......@@ -198,7 +198,6 @@ extern acpi_status acpi_get_crs(acpi_handle, acpi_buffer *);
extern acpi_resource *acpi_get_crs_next(acpi_buffer *, int *);
extern acpi_resource_data *acpi_get_crs_type(acpi_buffer *, int *, int);
extern void acpi_dispose_crs(acpi_buffer *);
extern acpi_status acpi_cf_evaluate_method(acpi_handle, UINT8 *, NATIVE_UINT *);
static acpi_status
hp_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
......@@ -388,407 +387,7 @@ hpzx1_acpi_dev_init(void)
}
extern void sba_init(void);
void
hpzx1_pci_fixup (int phase)
{
if (phase == 0)
hpzx1_acpi_dev_init();
iosapic_pci_fixup(phase);
if (phase == 1)
sba_init();
}
/*
* Misc. support for HP zx1 chipset support
*
* Copyright (C) 2002 Hewlett-Packard Co
* Copyright (C) 2002 Alex Williamson <alex_williamson@hp.com>
* Copyright (C) 2002 Bjorn Helgaas <bjorn_helgaas@hp.com>
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/acpi.h>
#include <asm/iosapic.h>
#include <asm/efi.h>
#include "../drivers/acpi/include/platform/acgcc.h"
#include "../drivers/acpi/include/actypes.h"
#include "../drivers/acpi/include/acexcep.h"
#include "../drivers/acpi/include/acpixf.h"
#include "../drivers/acpi/include/actbl.h"
#include "../drivers/acpi/include/acconfig.h"
#include "../drivers/acpi/include/acmacros.h"
#include "../drivers/acpi/include/aclocal.h"
#include "../drivers/acpi/include/acobject.h"
#include "../drivers/acpi/include/acstruct.h"
#include "../drivers/acpi/include/acnamesp.h"
#include "../drivers/acpi/include/acutils.h"
#define PFX "hpzx1: "
struct fake_pci_dev {
struct fake_pci_dev *next;
unsigned char bus;
unsigned int devfn;
int sizing; // in middle of BAR sizing operation?
unsigned long csr_base;
unsigned int csr_size;
unsigned long mapped_csrs; // ioremapped
};
static struct fake_pci_dev *fake_pci_head, **fake_pci_tail = &fake_pci_head;
static struct pci_ops orig_pci_ops;
static inline struct fake_pci_dev *
fake_pci_find_slot(unsigned char bus, unsigned int devfn)
{
struct fake_pci_dev *dev;
for (dev = fake_pci_head; dev; dev = dev->next)
if (dev->bus == bus && dev->devfn == devfn)
return dev;
return NULL;
}
static struct fake_pci_dev *
alloc_fake_pci_dev(void)
{
struct fake_pci_dev *dev;
dev = kmalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return NULL;
memset(dev, 0, sizeof(*dev));
*fake_pci_tail = dev;
fake_pci_tail = &dev->next;
return dev;
}
#define HP_CFG_RD(sz, bits, name) \
static int hp_cfg_read##sz (struct pci_dev *dev, int where, u##bits *value) \
{ \
struct fake_pci_dev *fake_dev; \
if (!(fake_dev = fake_pci_find_slot(dev->bus->number, dev->devfn))) \
return orig_pci_ops.name(dev, where, value); \
\
switch (where) { \
case PCI_COMMAND: \
*value = read##sz(fake_dev->mapped_csrs + where); \
*value |= PCI_COMMAND_MEMORY; /* SBA omits this */ \
break; \
case PCI_BASE_ADDRESS_0: \
if (fake_dev->sizing) \
*value = ~(fake_dev->csr_size - 1); \
else \
*value = (fake_dev->csr_base & \
PCI_BASE_ADDRESS_MEM_MASK) | \
PCI_BASE_ADDRESS_SPACE_MEMORY; \
fake_dev->sizing = 0; \
break; \
default: \
*value = read##sz(fake_dev->mapped_csrs + where); \
break; \
} \
return PCIBIOS_SUCCESSFUL; \
}
#define HP_CFG_WR(sz, bits, name) \
static int hp_cfg_write##sz (struct pci_dev *dev, int where, u##bits value) \
{ \
struct fake_pci_dev *fake_dev; \
if (!(fake_dev = fake_pci_find_slot(dev->bus->number, dev->devfn))) \
return orig_pci_ops.name(dev, where, value); \
\
switch (where) { \
case PCI_BASE_ADDRESS_0: \
if (value == ~0) \
fake_dev->sizing = 1; \
break; \
default: \
write##sz(value, fake_dev->mapped_csrs + where); \
break; \
} \
return PCIBIOS_SUCCESSFUL; \
}
HP_CFG_RD(b, 8, read_byte)
HP_CFG_RD(w, 16, read_word)
HP_CFG_RD(l, 32, read_dword)
HP_CFG_WR(b, 8, write_byte)
HP_CFG_WR(w, 16, write_word)
HP_CFG_WR(l, 32, write_dword)
static struct pci_ops hp_pci_conf = {
hp_cfg_readb,
hp_cfg_readw,
hp_cfg_readl,
hp_cfg_writeb,
hp_cfg_writew,
hp_cfg_writel,
};
/*
* Assume we'll never have a physical slot higher than 0x10, so we can
* use slots above that for "fake" PCI devices to represent things
* that only show up in the ACPI namespace.
*/
#define HP_MAX_SLOT 0x10
static struct fake_pci_dev *
hpzx1_fake_pci_dev(unsigned long addr, unsigned int bus, unsigned int size)
{
struct fake_pci_dev *dev;
int slot;
// Note: lspci thinks 0x1f is invalid
for (slot = 0x1e; slot > HP_MAX_SLOT; slot--) {
if (!fake_pci_find_slot(bus, PCI_DEVFN(slot, 0)))
break;
}
if (slot == HP_MAX_SLOT) {
printk(KERN_ERR PFX
"no slot space for device (0x%p) on bus 0x%02x\n",
(void *) addr, bus);
return NULL;
}
dev = alloc_fake_pci_dev();
if (!dev) {
printk(KERN_ERR PFX
"no memory for device (0x%p) on bus 0x%02x\n",
(void *) addr, bus);
return NULL;
}
dev->bus = bus;
dev->devfn = PCI_DEVFN(slot, 0);
dev->csr_base = addr;
dev->csr_size = size;
/*
* Drivers should ioremap what they need, but we have to do
* it here, too, so PCI config accesses work.
*/
dev->mapped_csrs = (unsigned long) ioremap(dev->csr_base, dev->csr_size);
return dev;
}
typedef struct {
u8 guid_id;
u8 guid[16];
u8 csr_base[8];
u8 csr_length[8];
} acpi_hp_vendor_long;
#define HP_CCSR_LENGTH 0x21
#define HP_CCSR_TYPE 0x2
#define HP_CCSR_GUID \
((efi_guid_t) { 0x69e9adf9, 0x924f, 0xab5f, { 0xf6, 0x4a, 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad }})
extern acpi_status acpi_get_crs(acpi_handle, acpi_buffer *);
extern acpi_resource *acpi_get_crs_next(acpi_buffer *, int *);
extern acpi_resource_data *acpi_get_crs_type(acpi_buffer *, int *, int);
extern void acpi_dispose_crs(acpi_buffer *);
extern acpi_status acpi_cf_evaluate_method(acpi_handle, UINT8 *, NATIVE_UINT *);
static acpi_status
hp_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
{
int i, offset = 0;
acpi_status status;
acpi_buffer buf;
acpi_resource_vendor *res;
acpi_hp_vendor_long *hp_res;
efi_guid_t vendor_guid;
*csr_base = 0;
*csr_length = 0;
status = acpi_get_crs(obj, &buf);
if (status != AE_OK) {
printk(KERN_ERR PFX "Unable to get _CRS data on object\n");
return status;
}
res = (acpi_resource_vendor *)acpi_get_crs_type(&buf, &offset, ACPI_RSTYPE_VENDOR);
if (!res) {
printk(KERN_ERR PFX "Failed to find config space for device\n");
acpi_dispose_crs(&buf);
return AE_NOT_FOUND;
}
hp_res = (acpi_hp_vendor_long *)(res->reserved);
if (res->length != HP_CCSR_LENGTH || hp_res->guid_id != HP_CCSR_TYPE) {
printk(KERN_ERR PFX "Unknown Vendor data\n");
acpi_dispose_crs(&buf);
return AE_TYPE; /* Revisit error? */
}
memcpy(&vendor_guid, hp_res->guid, sizeof(efi_guid_t));
if (efi_guidcmp(vendor_guid, HP_CCSR_GUID) != 0) {
printk(KERN_ERR PFX "Vendor GUID does not match\n");
acpi_dispose_crs(&buf);
return AE_TYPE; /* Revisit error? */
}
for (i = 0 ; i < 8 ; i++) {
*csr_base |= ((u64)(hp_res->csr_base[i]) << (i * 8));
*csr_length |= ((u64)(hp_res->csr_length[i]) << (i * 8));
}
acpi_dispose_crs(&buf);
return AE_OK;
}
static acpi_status
hpzx1_sba_probe(acpi_handle obj, u32 depth, void *context, void **ret)
{
u64 csr_base = 0, csr_length = 0;
char *name = context;
struct fake_pci_dev *dev;
acpi_status status;
status = hp_csr_space(obj, &csr_base, &csr_length);
if (status != AE_OK)
return status;
/*
* Only SBA shows up in ACPI namespace, so its CSR space
* includes both SBA and IOC. Make SBA and IOC show up
* separately in PCI space.
*/
if ((dev = hpzx1_fake_pci_dev(csr_base, 0, 0x1000)))
printk(KERN_INFO PFX "%s SBA at 0x%lx; pci dev %02x:%02x.%d\n",
name, csr_base, dev->bus,
PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
if ((dev = hpzx1_fake_pci_dev(csr_base + 0x1000, 0, 0x1000)))
printk(KERN_INFO PFX "%s IOC at 0x%lx; pci dev %02x:%02x.%d\n",
name, csr_base + 0x1000, dev->bus,
PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
return AE_OK;
}
static acpi_status
hpzx1_lba_probe(acpi_handle obj, u32 depth, void *context, void **ret)
{
acpi_status status;
u64 csr_base = 0, csr_length = 0;
char *name = context;
NATIVE_UINT busnum = 0;
struct fake_pci_dev *dev;
status = hp_csr_space(obj, &csr_base, &csr_length);
if (status != AE_OK)
return status;
status = acpi_cf_evaluate_method(obj, METHOD_NAME__BBN, &busnum);
if (ACPI_FAILURE(status)) {
printk(KERN_ERR PFX "evaluate _BBN fail=0x%x\n", status);
busnum = 0; // no _BBN; stick it on bus 0
}
if ((dev = hpzx1_fake_pci_dev(csr_base, busnum, csr_length)))
printk(KERN_INFO PFX "%s LBA at 0x%lx, _BBN 0x%02x; "
"pci dev %02x:%02x.%d\n",
name, csr_base, busnum, dev->bus,
PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
return AE_OK;
}
static void
hpzx1_acpi_dev_init(void)
{
extern struct pci_ops pci_conf;
/*
* Make fake PCI devices for the following hardware in the
* ACPI namespace. This makes it more convenient for drivers
* because they can claim these devices based on PCI
* information, rather than needing to know about ACPI. The
* 64-bit "HPA" space for this hardware is available as BAR
* 0/1.
*
* HWP0001: Single IOC SBA w/o IOC in namespace
* HWP0002: LBA device
* HWP0003: AGP LBA device
*/
acpi_get_devices("HWP0001", hpzx1_sba_probe, "HWP0001", NULL);
#ifdef CONFIG_IA64_HP_PROTO
if (fake_pci_tail != &fake_pci_head) {
#endif
acpi_get_devices("HWP0002", hpzx1_lba_probe, "HWP0002", NULL);
acpi_get_devices("HWP0003", hpzx1_lba_probe, "HWP0003", NULL);
#ifdef CONFIG_IA64_HP_PROTO
}
#define ZX1_FUNC_ID_VALUE (PCI_DEVICE_ID_HP_ZX1_SBA << 16) | PCI_VENDOR_ID_HP
/*
* Early protos don't have bridges in the ACPI namespace, so
* if we didn't find anything, add the things we know are
* there.
*/
if (fake_pci_tail == &fake_pci_head) {
u64 hpa, csr_base;
struct fake_pci_dev *dev;
csr_base = 0xfed00000UL;
hpa = (u64) ioremap(csr_base, 0x1000);
if (__raw_readl(hpa) == ZX1_FUNC_ID_VALUE) {
if ((dev = hpzx1_fake_pci_dev(csr_base, 0, 0x1000)))
printk(KERN_INFO PFX "HWP0001 SBA at 0x%lx; "
"pci dev %02x:%02x.%d\n", csr_base,
dev->bus, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
if ((dev = hpzx1_fake_pci_dev(csr_base + 0x1000, 0,
0x1000)))
printk(KERN_INFO PFX "HWP0001 IOC at 0x%lx; "
"pci dev %02x:%02x.%d\n",
csr_base + 0x1000,
dev->bus, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
csr_base = 0xfed24000UL;
iounmap(hpa);
hpa = (u64) ioremap(csr_base, 0x1000);
if ((dev = hpzx1_fake_pci_dev(csr_base, 0x40, 0x1000)))
printk(KERN_INFO PFX "HWP0003 AGP LBA at "
"0x%lx; pci dev %02x:%02x.%d\n",
csr_base,
dev->bus, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
}
iounmap(hpa);
}
#endif
if (fake_pci_tail == &fake_pci_head)
return;
/*
* Replace PCI ops, but only if we made fake devices.
*/
orig_pci_ops = pci_conf;
pci_conf = hp_pci_conf;
}
extern void sba_init(void);
void
hpzx1_pci_fixup (int phase)
{
......
......@@ -660,10 +660,10 @@ acpi_get_prt (struct pci_vector_struct **vectors, int *count)
list_for_each(node, &acpi_prts.entries) {
entry = (struct acpi_prt_entry *)node;
vector[i].bus = (u16) entry->id.bus;
vector[i].pci_id = (u32) entry->id.dev << 16 | 0xffff;
vector[i].pin = (u8) entry->id.pin;
vector[i].irq = (u8) entry->source.index;
vector[i].bus = entry->id.bus;
vector[i].pci_id = ((u32) entry->id.dev) << 16 | 0xffff;
vector[i].pin = entry->id.pin;
vector[i].irq = entry->source.index;
i++;
}
*count = acpi_prts.count;
......
......@@ -5,7 +5,7 @@
*
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
* Copyright (C) 1999-2001 Hewlett-Packard Co.
* Copyright (C) 1999-2002 Hewlett-Packard Co.
* David Mosberger-Tang <davidm@hpl.hp.com>
* Stephane Eranian <eranian@hpl.hp.com>
*
......@@ -212,8 +212,8 @@ efi_map_pal_code (void)
void *efi_map_start, *efi_map_end, *p;
efi_memory_desc_t *md;
u64 efi_desc_size;
int pal_code_count=0;
u64 mask, flags;
int pal_code_count = 0;
u64 mask, psr;
u64 vaddr;
efi_map_start = __va(ia64_boot_param->efi_memmap);
......@@ -266,10 +266,10 @@ efi_map_pal_code (void)
/*
* Cannot write to CRx with PSR.ic=1
*/
ia64_clear_ic(flags);
psr = ia64_clear_ic();
ia64_itr(0x1, IA64_TR_PALCODE, vaddr & mask,
pte_val(pfn_pte(md->phys_addr >> PAGE_SHIFT, PAGE_KERNEL)), IA64_GRANULE_SHIFT);
local_irq_restore(flags);
ia64_set_psr(psr);
ia64_srlz_i();
}
}
......@@ -485,7 +485,7 @@ efi_get_iobase (void)
}
u32
efi_mem_type (u64 phys_addr)
efi_mem_type (unsigned long phys_addr)
{
void *efi_map_start, *efi_map_end, *p;
efi_memory_desc_t *md;
......@@ -506,7 +506,7 @@ efi_mem_type (u64 phys_addr)
}
u64
efi_mem_attributes (u64 phys_addr)
efi_mem_attributes (unsigned long phys_addr)
{
void *efi_map_start, *efi_map_end, *p;
efi_memory_desc_t *md;
......
......@@ -53,23 +53,21 @@ GLOBAL_ENTRY(efi_call_phys)
mov loc4=ar.rsc // save RSE configuration
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
;;
ld8 gp=[in0] // load EFI function's global pointer
mov out0=in1
mov out1=in2
movl r16=PSR_BITS_TO_CLEAR
mov loc3=psr // save processor status word
movl r17=PSR_BITS_TO_SET
;;
mov out2=in3
or loc3=loc3,r17
mov b6=r2
;;
andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
mov out3=in4
br.call.sptk.many rp=ia64_switch_mode
.ret0: mov out4=in5
mov out0=in1
mov out1=in2
mov out2=in3
mov out3=in4
mov out5=in6
mov out6=in7
br.call.sptk.many rp=b6 // call the EFI function
......
......@@ -13,7 +13,7 @@
#include <asm/unistd.h>
#include <asm/page.h>
.section .text.gate,"ax"
.section .text.gate, "ax"
# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET)
......@@ -108,7 +108,7 @@ back_from_setup_rbs:
dep r8=0,r8,38,26 // clear EC0, CPL0 and reserved bits
adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
;;
.spillsp ar.pfs, CFM_OFF
.spillsp ar.pfs, CFM_OFF+SIGCONTEXT_OFF
st8 [base0]=r8 // save CFM0
adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
;;
......
......@@ -24,6 +24,7 @@
* /proc/irq/#/smp_affinity
* 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing.
* 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq
* 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping error
*/
/*
* Here is what the interrupt logic between a PCI device and the CPU looks like:
......@@ -112,7 +113,7 @@ find_iosapic (unsigned int irq)
int i;
for (i = 0; i < num_iosapic; i++) {
if ((irq - iosapic_lists[i].base_irq) < iosapic_lists[i].max_pin)
if ((unsigned) (irq - iosapic_lists[i].base_irq) <= iosapic_lists[i].max_pin)
return i;
}
......@@ -138,7 +139,7 @@ iosapic_irq_to_vector (int irq)
* Map PCI pin to the corresponding IA-64 interrupt vector. If no such mapping exists,
* return -1.
*/
static int
int
pci_pin_to_vector (int bus, int slot, int pci_pin)
{
struct pci_vector_struct *r;
......
......@@ -1197,7 +1197,7 @@ static void register_irq_proc (unsigned int irq)
{
char name [MAX_NAMELEN];
if (!root_irq_dir || (irq_desc(irq)->handler == &no_irq_type))
if (!root_irq_dir || (irq_desc(irq)->handler == &no_irq_type) || irq_dir[irq])
return;
memset(name, 0, MAX_NAMELEN);
......
......@@ -216,7 +216,7 @@ GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
mov out3 = in3 // copy arg3
;;
mov loc3 = psr // save psr
;;
;;
mov loc4=ar.rsc // save RSE configuration
dep.z loc2=loc2,0,61 // convert pal entry point to physical
;;
......
......@@ -23,6 +23,7 @@
#include <linux/vmalloc.h>
#include <linux/wrapper.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <asm/bitops.h>
#include <asm/errno.h>
......@@ -38,11 +39,11 @@
#ifdef CONFIG_PERFMON
/*
* For PMUs which rely on the debug registers for some features, you
* must enable the following flag to activate the support for
* For PMUs which rely on the debug registers for some features, you must
* you must enable the following flag to activate the support for
* accessing the registers via the perfmonctl() interface.
*/
#ifdef CONFIG_ITANIUM
#if defined(CONFIG_ITANIUM) || defined(CONFIG_MCKINLEY)
#define PFM_PMU_USES_DBR 1
#endif
......@@ -68,26 +69,27 @@
#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_soft_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
#define PFM_FL_INHERIT_MASK (PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)
/* i assume unsigned */
#define PMC_IS_IMPL(i) (i<pmu_conf.num_pmcs && pmu_conf.impl_regs[i>>6] & (1UL<< (i) %64))
#define PMD_IS_IMPL(i) (i<pmu_conf.num_pmds && pmu_conf.impl_regs[4+(i>>6)] & (1UL<<(i) % 64))
#define PMD_IS_COUNTING(i) (i >=0 && i < 256 && pmu_conf.counter_pmds[i>>6] & (1UL <<(i) % 64))
#define PMC_IS_COUNTING(i) PMD_IS_COUNTING(i)
/* XXX: these three assume that register i is implemented */
#define PMD_IS_COUNTING(i) (pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING)
#define PMC_IS_COUNTING(i) (pmu_conf.pmc_desc[i].type == PFM_REG_COUNTING)
#define PMC_IS_MONITOR(c) (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR)
/* k assume unsigned */
#define IBR_IS_IMPL(k) (k<pmu_conf.num_ibrs)
#define DBR_IS_IMPL(k) (k<pmu_conf.num_dbrs)
#define PMC_IS_BTB(a) (((pfm_monitor_t *)(a))->pmc_es == PMU_BTB_EVENT)
#define LSHIFT(x) (1UL<<(x))
#define PMM(x) LSHIFT(x)
#define PMC_IS_MONITOR(c) ((pmu_conf.monitor_pmcs[0] & PMM((c))) != 0)
#define CTX_IS_ENABLED(c) ((c)->ctx_flags.state == PFM_CTX_ENABLED)
#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
#define CTX_INHERIT_MODE(c) ((c)->ctx_fl_inherit)
#define CTX_HAS_SMPL(c) ((c)->ctx_psb != NULL)
#define CTX_USED_PMD(ctx,n) (ctx)->ctx_used_pmds[(n)>>6] |= 1UL<< ((n) % 64)
/* XXX: does not support more than 64 PMDs */
#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
......@@ -109,12 +111,18 @@
*/
#define DBprintk(a) \
do { \
if (pfm_debug_mode >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
if (pfm_sysctl.debug >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
} while (0)
#define DBprintk_ovfl(a) \
do { \
if (pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
} while (0)
/*
* These are some helpful architected PMC and IBR/DBR register layouts
* Architected PMC structure
*/
typedef struct {
unsigned long pmc_plm:4; /* privilege level mask */
......@@ -139,41 +147,40 @@ typedef struct {
typedef struct _pfm_smpl_buffer_desc {
spinlock_t psb_lock; /* protection lock */
unsigned long psb_refcnt; /* how many users for the buffer */
int psb_flags; /* bitvector of flags */
int psb_flags; /* bitvector of flags (not yet used) */
void *psb_addr; /* points to location of first entry */
unsigned long psb_entries; /* maximum number of entries */
unsigned long psb_size; /* aligned size of buffer */
unsigned long psb_index; /* next free entry slot XXX: must use the one in buffer */
unsigned long psb_entry_size; /* size of each entry including entry header */
perfmon_smpl_hdr_t *psb_hdr; /* points to sampling buffer header */
struct _pfm_smpl_buffer_desc *psb_next; /* next psb, used for rvfreeing of psb_hdr */
} pfm_smpl_buffer_desc_t;
/*
* psb_flags
*/
#define PSB_HAS_VMA 0x1 /* a virtual mapping for the buffer exists */
#define LOCK_PSB(p) spin_lock(&(p)->psb_lock)
#define UNLOCK_PSB(p) spin_unlock(&(p)->psb_lock)
#define PFM_PSB_VMA 0x1 /* a VMA is describing the buffer */
/*
* This structure is initialized at boot time and contains
* a description of the PMU main characteristic as indicated
* by PAL
* The possible type of a PMU register
*/
typedef struct {
unsigned long pfm_is_disabled; /* indicates if perfmon is working properly */
unsigned long perf_ovfl_val; /* overflow value for generic counters */
unsigned long max_counters; /* upper limit on counter pair (PMC/PMD) */
unsigned long num_pmcs ; /* highest PMC implemented (may have holes) */
unsigned long num_pmds; /* highest PMD implemented (may have holes) */
unsigned long impl_regs[16]; /* buffer used to hold implememted PMC/PMD mask */
unsigned long num_ibrs; /* number of instruction debug registers */
unsigned long num_dbrs; /* number of data debug registers */
unsigned long monitor_pmcs[4]; /* which pmc are controlling monitors */
unsigned long counter_pmds[4]; /* which pmd are used as counters */
} pmu_config_t;
typedef enum {
PFM_REG_NOTIMPL, /* not implemented */
PFM_REG_NONE, /* end marker */
PFM_REG_MONITOR, /* a PMC with a pmc.pm field only */
PFM_REG_COUNTING,/* a PMC with a pmc.pm AND pmc.oi, a PMD used as a counter */
PFM_REG_CONTROL, /* PMU control register */
PFM_REG_CONFIG, /* refine configuration */
PFM_REG_BUFFER /* PMD used as buffer */
} pfm_pmu_reg_type_t;
/*
* 64-bit software counter structure
......@@ -221,9 +228,11 @@ typedef struct pfm_context {
struct semaphore ctx_restart_sem; /* use for blocking notification mode */
unsigned long ctx_used_pmds[4]; /* bitmask of used PMD (speedup ctxsw) */
unsigned long ctx_saved_pmcs[4]; /* bitmask of PMC to save on ctxsw */
unsigned long ctx_reload_pmcs[4]; /* bitmask of PMC to reload on ctxsw (SMP) */
unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */
unsigned long ctx_reload_pmds[4]; /* bitmask of PMD to reload on ctxsw */
unsigned long ctx_used_pmcs[4]; /* bitmask PMC used by context */
unsigned long ctx_reload_pmcs[4]; /* bitmask of PMC to reload on ctxsw */
unsigned long ctx_used_ibrs[4]; /* bitmask of used IBR (speedup ctxsw) */
unsigned long ctx_used_dbrs[4]; /* bitmask of used DBR (speedup ctxsw) */
......@@ -235,6 +244,7 @@ typedef struct pfm_context {
unsigned long ctx_cpu; /* cpu to which perfmon is applied (system wide) */
atomic_t ctx_saving_in_progress; /* flag indicating actual save in progress */
atomic_t ctx_is_busy; /* context accessed by overflow handler */
atomic_t ctx_last_cpu; /* CPU id of current or last CPU used */
} pfm_context_t;
......@@ -250,15 +260,53 @@ typedef struct pfm_context {
* mostly used to synchronize between system wide and per-process
*/
typedef struct {
spinlock_t pfs_lock; /* lock the structure */
spinlock_t pfs_lock; /* lock the structure */
unsigned long pfs_task_sessions; /* number of per task sessions */
unsigned long pfs_sys_sessions; /* number of per system wide sessions */
unsigned long pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
unsigned long pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
unsigned long pfs_task_sessions; /* number of per task sessions */
unsigned long pfs_sys_sessions; /* number of per system wide sessions */
unsigned long pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
unsigned long pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
} pfm_session_t;
/*
* information about a PMC or PMD.
* dep_pmd[]: a bitmask of dependent PMD registers
* dep_pmc[]: a bitmask of dependent PMC registers
*/
typedef struct {
pfm_pmu_reg_type_t type;
int pm_pos;
int (*read_check)(struct task_struct *task, unsigned int cnum, unsigned long *val);
int (*write_check)(struct task_struct *task, unsigned int cnum, unsigned long *val);
unsigned long dep_pmd[4];
unsigned long dep_pmc[4];
} pfm_reg_desc_t;
/* assume cnum is a valid monitor */
#define PMC_PM(cnum, val) (((val) >> (pmu_conf.pmc_desc[cnum].pm_pos)) & 0x1)
#define PMC_WR_FUNC(cnum) (pmu_conf.pmc_desc[cnum].write_check)
#define PMD_WR_FUNC(cnum) (pmu_conf.pmd_desc[cnum].write_check)
#define PMD_RD_FUNC(cnum) (pmu_conf.pmd_desc[cnum].read_check)
/*
* This structure is initialized at boot time and contains
* a description of the PMU main characteristic as indicated
* by PAL along with a list of inter-registers dependencies and configurations.
*/
typedef struct {
unsigned long pfm_is_disabled; /* indicates if perfmon is working properly */
unsigned long perf_ovfl_val; /* overflow value for generic counters */
unsigned long max_counters; /* upper limit on counter pair (PMC/PMD) */
unsigned long num_pmcs ; /* highest PMC implemented (may have holes) */
unsigned long num_pmds; /* highest PMD implemented (may have holes) */
unsigned long impl_regs[16]; /* buffer used to hold implememted PMC/PMD mask */
unsigned long num_ibrs; /* number of instruction debug registers */
unsigned long num_dbrs; /* number of data debug registers */
pfm_reg_desc_t *pmc_desc; /* detailed PMC register descriptions */
pfm_reg_desc_t *pmd_desc; /* detailed PMD register descriptions */
} pmu_config_t;
/*
* structure used to pass argument to/from remote CPU
* using IPI to check and possibly save the PMU context on SMP systems.
......@@ -301,22 +349,52 @@ typedef struct {
#define PFM_CMD_NARG(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_narg)
#define PFM_CMD_ARG_SIZE(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_argsize)
typedef struct {
int debug; /* turn on/off debugging via syslog */
int debug_ovfl; /* turn on/off debug printk in overflow handler */
int fastctxsw; /* turn on/off fast (unsecure) ctxsw */
} pfm_sysctl_t;
typedef struct {
unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
unsigned long pfm_recorded_samples_count;
unsigned long pfm_full_smpl_buffer_count; /* how many times the sampling buffer was full */
} pfm_stats_t;
/*
* perfmon internal variables
*/
static pmu_config_t pmu_conf; /* PMU configuration */
static int pfm_debug_mode; /* 0= nodebug, >0= debug output on */
static pfm_session_t pfm_sessions; /* global sessions information */
static struct proc_dir_entry *perfmon_dir; /* for debug only */
static unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
static unsigned long pfm_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
static unsigned long pfm_recorded_samples_count;
static pfm_stats_t pfm_stats;
int __per_cpu_data pfm_syst_wide;
static int __per_cpu_data pfm_dcr_pp;
/* sysctl() controls */
static pfm_sysctl_t pfm_sysctl;
static ctl_table pfm_ctl_table[]={
{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
{2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
{3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
{ 0, },
};
static ctl_table pfm_sysctl_dir[] = {
{1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
{0,},
};
static ctl_table pfm_sysctl_root[] = {
{1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
{0,},
};
static struct ctl_table_header *pfm_sysctl_header;
static unsigned long reset_pmcs[IA64_NUM_PMC_REGS]; /* contains PAL reset values for PMCS */
static void pfm_vm_close(struct vm_area_struct * area);
static struct vm_operations_struct pfm_vm_ops={
close: pfm_vm_close
};
......@@ -339,6 +417,14 @@ static void pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx
#endif
static void pfm_lazy_save_regs (struct task_struct *ta);
#if defined(CONFIG_ITANIUM)
#include "perfmon_itanium.h"
#elif defined(CONFIG_MCKINLEY)
#include "perfmon_mckinley.h"
#else
#include "perfmon_generic.h"
#endif
static inline unsigned long
pfm_read_soft_counter(pfm_context_t *ctx, int i)
{
......@@ -353,7 +439,7 @@ pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
* writing to unimplemented part is ignore, so we do not need to
* mask off top part
*/
ia64_set_pmd(i, val);
ia64_set_pmd(i, val & pmu_conf.perf_ovfl_val);
}
/*
......@@ -388,7 +474,8 @@ pfm_get_stamp(void)
}
/* Here we want the physical address of the memory.
* This is used when initializing the contents of the area.
* This is used when initializing the contents of the
* area and marking the pages as reserved.
*/
static inline unsigned long
pfm_kvirt_to_pa(unsigned long adr)
......@@ -398,7 +485,6 @@ pfm_kvirt_to_pa(unsigned long adr)
return pa;
}
static void *
pfm_rvmalloc(unsigned long size)
{
......@@ -473,7 +559,7 @@ pfm_vm_close(struct vm_area_struct *vma)
*
* This function cannot remove the buffer from here, because exit_mmap() must first
* complete. Given that there is no other vma related callback in the generic code,
* we have created on own with the linked list of sampling buffer to free which
* we have created our own with the linked list of sampling buffers to free. The list
* is part of the thread structure. In release_thread() we check if the list is
* empty. If not we call into perfmon to free the buffer and psb. That is the only
* way to ensure a safe deallocation of the sampling buffer which works when
......@@ -489,16 +575,15 @@ pfm_vm_close(struct vm_area_struct *vma)
psb->psb_next = current->thread.pfm_smpl_buf_list;
current->thread.pfm_smpl_buf_list = psb;
DBprintk(("psb for [%d] smpl @%p size %ld inserted into list\n",
current->pid, psb->psb_hdr, psb->psb_size));
DBprintk(("[%d] add smpl @%p size %lu to smpl_buf_list psb_flags=0x%x\n",
current->pid, psb->psb_hdr, psb->psb_size, psb->psb_flags));
}
DBprintk(("psb vma flag cleared for [%d] smpl @%p size %ld inserted into list\n",
current->pid, psb->psb_hdr, psb->psb_size));
DBprintk(("[%d] clearing psb_flags=0x%x smpl @%p size %lu\n",
current->pid, psb->psb_flags, psb->psb_hdr, psb->psb_size));
/*
* indicate to pfm_context_exit() that the vma has been removed.
* decrement the number vma for the buffer
*/
psb->psb_flags &= ~PFM_PSB_VMA;
psb->psb_flags &= ~PSB_HAS_VMA;
UNLOCK_PSB(psb);
}
......@@ -521,7 +606,7 @@ pfm_remove_smpl_mapping(struct task_struct *task)
printk("perfmon: invalid context mm=%p\n", task->mm);
return -1;
}
psb = ctx->ctx_psb;
psb = ctx->ctx_psb;
down_write(&task->mm->mmap_sem);
......@@ -532,14 +617,9 @@ pfm_remove_smpl_mapping(struct task_struct *task)
printk("perfmon: pid %d unable to unmap sampling buffer @0x%lx size=%ld\n",
task->pid, ctx->ctx_smpl_vaddr, psb->psb_size);
}
DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d\n",
task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r));
/*
* make sure we suppress all traces of this buffer
* (important for pfm_inherit)
*/
ctx->ctx_smpl_vaddr = 0;
DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d refcnt=%lu psb_flags=0x%x\n",
task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r, psb->psb_refcnt, psb->psb_flags));
return 0;
}
......@@ -572,7 +652,7 @@ pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long ad
while (size > 0) {
page = pfm_kvirt_to_pa(buf);
if (remap_page_range(vma, addr, page, PAGE_SIZE, PAGE_SHARED)) return -ENOMEM;
if (remap_page_range(vma, addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM;
addr += PAGE_SIZE;
buf += PAGE_SIZE;
......@@ -611,17 +691,25 @@ pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned lo
void *smpl_buf;
pfm_smpl_buffer_desc_t *psb;
regcount = pfm_smpl_entry_size(which_pmds, 1);
/* note that regcount might be 0, in this case only the header for each
* entry will be recorded.
*/
regcount = pfm_smpl_entry_size(which_pmds, 1);
if ((sizeof(perfmon_smpl_hdr_t)+ entries*sizeof(perfmon_smpl_entry_t)) <= entries) {
DBprintk(("requested entries %lu is too big\n", entries));
return -EINVAL;
}
/*
* 1 buffer hdr and for each entry a header + regcount PMDs to save
*/
size = PAGE_ALIGN( sizeof(perfmon_smpl_hdr_t)
+ entries * (sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64)));
DBprintk(("sampling buffer size=%lu bytes\n", size));
/*
* check requested size to avoid Denial-of-service attacks
* XXX: may have to refine this test
......@@ -661,8 +749,13 @@ pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned lo
}
/*
* partially initialize the vma for the sampling buffer
*
* The VM_DONTCOPY flag is very important as it ensures that the mapping
* will never be inherited for any child process (via fork()) which is always
* what we want.
*/
vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED;
vma->vm_mm = mm;
vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED|VM_DONTCOPY;
vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
vma->vm_ops = &pfm_vm_ops; /* necesarry to get the close() callback */
vma->vm_pgoff = 0;
......@@ -680,8 +773,8 @@ pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned lo
psb->psb_size = size; /* aligned size */
psb->psb_index = 0;
psb->psb_entries = entries;
psb->psb_flags = PFM_PSB_VMA; /* remember that there is a vma describing the buffer */
psb->psb_refcnt = 1;
psb->psb_flags = PSB_HAS_VMA;
spin_lock_init(&psb->psb_lock);
......@@ -691,9 +784,9 @@ pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned lo
*/
psb->psb_entry_size = sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64);
DBprintk(("psb @%p entry_size=%ld hdr=%p addr=%p\n",
DBprintk(("psb @%p entry_size=%ld hdr=%p addr=%p refcnt=%lu psb_flags=0x%x\n",
(void *)psb,psb->psb_entry_size, (void *)psb->psb_hdr,
(void *)psb->psb_addr));
(void *)psb->psb_addr, psb->psb_refcnt, psb->psb_flags));
/* initialize some of the fields of user visible buffer header */
psb->psb_hdr->hdr_version = PFM_SMPL_VERSION;
......@@ -785,6 +878,11 @@ pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
}
ctx_flags = pfx->ctx_flags;
if ((ctx_flags & PFM_FL_INHERIT_MASK) == (PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)) {
DBprintk(("invalid inherit mask 0x%x\n",ctx_flags & PFM_FL_INHERIT_MASK));
return -EINVAL;
}
if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
DBprintk(("cpu_mask=0x%lx\n", pfx->ctx_cpu_mask));
/*
......@@ -823,7 +921,15 @@ pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
* must provide a target for the signal in blocking mode even when
* no counter is configured with PFM_FL_REG_OVFL_NOTIFY
*/
if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == 0) return -EINVAL;
if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == 0) {
DBprintk(("must have notify_pid when blocking for [%d]\n", task->pid));
return -EINVAL;
}
if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == task->pid) {
DBprintk(("cannot notify self when blocking for [%d]\n", task->pid));
return -EINVAL;
}
}
/* probably more to add here */
......@@ -831,7 +937,7 @@ pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
}
static int
pfm_create_context(struct task_struct *task, pfm_context_t *ctx, void *req, int count,
pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int count,
struct pt_regs *regs)
{
pfarg_context_t tmp;
......@@ -956,7 +1062,7 @@ pfm_create_context(struct task_struct *task, pfm_context_t *ctx, void *req, int
}
if (tmp.ctx_smpl_entries) {
DBprintk(("sampling entries=%ld\n",tmp.ctx_smpl_entries));
DBprintk(("sampling entries=%lu\n",tmp.ctx_smpl_entries));
ret = pfm_smpl_buffer_alloc(ctx, tmp.ctx_smpl_regs,
tmp.ctx_smpl_entries, &uaddr);
......@@ -982,20 +1088,12 @@ pfm_create_context(struct task_struct *task, pfm_context_t *ctx, void *req, int
atomic_set(&ctx->ctx_last_cpu,-1); /* SMP only, means no CPU */
/*
* Keep track of the pmds we want to sample
* XXX: may be we don't need to save/restore the DEAR/IEAR pmds
* but we do need the BTB for sure. This is because of a hardware
* buffer of 1 only for non-BTB pmds.
*
* We ignore the unimplemented pmds specified by the user
*/
ctx->ctx_used_pmds[0] = tmp.ctx_smpl_regs[0] & pmu_conf.impl_regs[4];
ctx->ctx_saved_pmcs[0] = 1; /* always save/restore PMC[0] */
/* may be redudant with memset() but at least it's easier to remember */
atomic_set(&ctx->ctx_saving_in_progress, 0);
atomic_set(&ctx->ctx_is_busy, 0);
sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */
if (copy_to_user(req, &tmp, sizeof(tmp))) {
ret = -EFAULT;
goto buffer_error;
......@@ -1097,21 +1195,22 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
current->pid,
flag == PFM_RELOAD_LONG_RESET ? "long" : "short", i, val));
}
ia64_srlz_d();
/* just in case ! */
ctx->ctx_ovfl_regs[0] = 0UL;
}
static int
pfm_write_pmcs(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
struct thread_struct *th = &ta->thread;
struct thread_struct *th = &task->thread;
pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
unsigned int cnum;
int i;
int ret = 0, reg_retval = 0;
/* we don't quite support this right now */
if (ta != current) return -EINVAL;
if (task != current) return -EINVAL;
if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
......@@ -1140,30 +1239,30 @@ pfm_write_pmcs(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
* - per-task : user monitor
* any other configuration is rejected.
*/
if (PMC_IS_MONITOR(cnum)) {
pfm_monitor_t *p = (pfm_monitor_t *)&tmp.reg_value;
if (PMC_IS_MONITOR(cnum) || PMC_IS_COUNTING(cnum)) {
DBprintk(("pmc[%u].pm=%ld\n", cnum, PMC_PM(cnum, tmp.reg_value)));
DBprintk(("pmc[%u].pm = %d\n", cnum, p->pmc_pm));
if (ctx->ctx_fl_system ^ p->pmc_pm) {
//if ((ctx->ctx_fl_system == 1 && p->pmc_pm == 0)
// ||(ctx->ctx_fl_system == 0 && p->pmc_pm == 1)) {
if (ctx->ctx_fl_system ^ PMC_PM(cnum, tmp.reg_value)) {
DBprintk(("pmc_pm=%ld fl_system=%d\n", PMC_PM(cnum, tmp.reg_value), ctx->ctx_fl_system));
ret = -EINVAL;
goto abort_mission;
}
/*
* enforce generation of overflow interrupt. Necessary on all
* CPUs which do not implement 64-bit hardware counters.
*/
p->pmc_oi = 1;
}
if (PMC_IS_COUNTING(cnum)) {
pfm_monitor_t *p = (pfm_monitor_t *)&tmp.reg_value;
/*
* enforce generation of overflow interrupt. Necessary on all
* CPUs.
*/
p->pmc_oi = 1;
if (tmp.reg_flags & PFM_REGFL_OVFL_NOTIFY) {
/*
* must have a target for the signal
*/
if (ctx->ctx_notify_task == NULL) {
DBprintk(("no notify_task && PFM_REGFL_OVFL_NOTIFY\n"));
ret = -EINVAL;
goto abort_mission;
}
......@@ -1177,14 +1276,11 @@ pfm_write_pmcs(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
ctx->ctx_soft_pmds[cnum].reset_pmds[1] = tmp.reg_reset_pmds[1];
ctx->ctx_soft_pmds[cnum].reset_pmds[2] = tmp.reg_reset_pmds[2];
ctx->ctx_soft_pmds[cnum].reset_pmds[3] = tmp.reg_reset_pmds[3];
/*
* needed in case the user does not initialize the equivalent
* PMD. Clearing is done in reset_pmu() so there is no possible
* leak here.
*/
CTX_USED_PMD(ctx, cnum);
}
/*
* execute write checker, if any
*/
if (PMC_WR_FUNC(cnum)) ret = PMC_WR_FUNC(cnum)(task, cnum, &tmp.reg_value);
abort_mission:
if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;
......@@ -1204,14 +1300,21 @@ pfm_write_pmcs(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
*/
if (ret != 0) {
DBprintk(("[%d] pmc[%u]=0x%lx error %d\n",
ta->pid, cnum, tmp.reg_value, reg_retval));
task->pid, cnum, tmp.reg_value, reg_retval));
break;
}
/*
* We can proceed with this register!
*/
/*
* Needed in case the user does not initialize the equivalent
* PMD. Clearing is done in reset_pmu() so there is no possible
* leak here.
*/
CTX_USED_PMD(ctx, pmu_conf.pmc_desc[cnum].dep_pmd[0]);
/*
* keep copy the pmc, used for register reload
*/
......@@ -1219,17 +1322,17 @@ pfm_write_pmcs(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
ia64_set_pmc(cnum, tmp.reg_value);
DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x save_pmcs=0%lx reload_pmcs=0x%lx\n",
ta->pid, cnum, tmp.reg_value,
DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x used_pmds=0x%lx\n",
task->pid, cnum, tmp.reg_value,
ctx->ctx_soft_pmds[cnum].flags,
ctx->ctx_saved_pmcs[0], ctx->ctx_reload_pmcs[0]));
ctx->ctx_used_pmds[0]));
}
return ret;
}
static int
pfm_write_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
unsigned int cnum;
......@@ -1237,7 +1340,7 @@ pfm_write_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
int ret = 0, reg_retval = 0;
/* we don't quite support this right now */
if (ta != current) return -EINVAL;
if (task != current) return -EINVAL;
/*
* Cannot do anything before PMU is enabled
......@@ -1252,7 +1355,6 @@ pfm_write_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
cnum = tmp.reg_num;
if (!PMD_IS_IMPL(cnum)) {
ret = -EINVAL;
goto abort_mission;
......@@ -1266,6 +1368,10 @@ pfm_write_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
}
/*
* execute write checker, if any
*/
if (PMD_WR_FUNC(cnum)) ret = PMD_WR_FUNC(cnum)(task, cnum, &tmp.reg_value);
abort_mission:
if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;
......@@ -1282,21 +1388,22 @@ pfm_write_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
*/
if (ret != 0) {
DBprintk(("[%d] pmc[%u]=0x%lx error %d\n",
ta->pid, cnum, tmp.reg_value, reg_retval));
task->pid, cnum, tmp.reg_value, reg_retval));
break;
}
/* keep track of what we use */
CTX_USED_PMD(ctx, cnum);
CTX_USED_PMD(ctx, pmu_conf.pmd_desc[(cnum)].dep_pmd[0]);
/* writes to unimplemented part is ignored, so this is safe */
ia64_set_pmd(cnum, tmp.reg_value);
ia64_set_pmd(cnum, tmp.reg_value & pmu_conf.perf_ovfl_val);
/* to go away */
ia64_srlz_d();
DBprintk(("[%d] pmd[%u]: soft_pmd=0x%lx short_reset=0x%lx "
"long_reset=0x%lx hw_pmd=%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx\n",
ta->pid, cnum,
task->pid, cnum,
ctx->ctx_soft_pmds[cnum].val,
ctx->ctx_soft_pmds[cnum].short_reset,
ctx->ctx_soft_pmds[cnum].long_reset,
......@@ -1309,12 +1416,13 @@ pfm_write_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
}
static int
pfm_read_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
struct thread_struct *th = &ta->thread;
struct thread_struct *th = &task->thread;
unsigned long val=0;
pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
int i;
unsigned int cnum;
int i, ret = 0;
if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
......@@ -1327,14 +1435,25 @@ pfm_read_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
/* XXX: ctx locking may be required here */
DBprintk(("ctx_last_cpu=%d for [%d]\n", atomic_read(&ctx->ctx_last_cpu), ta->pid));
DBprintk(("ctx_last_cpu=%d for [%d]\n", atomic_read(&ctx->ctx_last_cpu), task->pid));
for (i = 0; i < count; i++, req++) {
unsigned long reg_val = ~0UL, ctx_val = ~0UL;
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
if (!PMD_IS_IMPL(tmp.reg_num)) goto abort_mission;
cnum = tmp.reg_num;
if (!PMD_IS_IMPL(cnum)) goto abort_mission;
/*
* we can only read the register that we use. That includes
* the one we explicitely initialize AND the one we want included
* in the sampling buffer (smpl_regs).
*
* Having this restriction allows optimization in the ctxsw routine
* without compromising security (leaks)
*/
if (!CTX_IS_USED_PMD(ctx, cnum)) goto abort_mission;
/*
* If the task is not the current one, then we check if the
......@@ -1343,8 +1462,8 @@ pfm_read_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
*/
if (atomic_read(&ctx->ctx_last_cpu) == smp_processor_id()){
ia64_srlz_d();
val = reg_val = ia64_get_pmd(tmp.reg_num);
DBprintk(("reading pmd[%u]=0x%lx from hw\n", tmp.reg_num, val));
val = reg_val = ia64_get_pmd(cnum);
DBprintk(("reading pmd[%u]=0x%lx from hw\n", cnum, val));
} else {
#ifdef CONFIG_SMP
int cpu;
......@@ -1360,30 +1479,38 @@ pfm_read_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
*/
cpu = atomic_read(&ctx->ctx_last_cpu);
if (cpu != -1) {
DBprintk(("must fetch on CPU%d for [%d]\n", cpu, ta->pid));
pfm_fetch_regs(cpu, ta, ctx);
DBprintk(("must fetch on CPU%d for [%d]\n", cpu, task->pid));
pfm_fetch_regs(cpu, task, ctx);
}
#endif
/* context has been saved */
val = reg_val = th->pmd[tmp.reg_num];
val = reg_val = th->pmd[cnum];
}
if (PMD_IS_COUNTING(tmp.reg_num)) {
if (PMD_IS_COUNTING(cnum)) {
/*
* XXX: need to check for overflow
*/
val &= pmu_conf.perf_ovfl_val;
val += ctx_val = ctx->ctx_soft_pmds[tmp.reg_num].val;
val += ctx_val = ctx->ctx_soft_pmds[cnum].val;
} else {
val = reg_val = ia64_get_pmd(tmp.reg_num);
val = reg_val = ia64_get_pmd(cnum);
}
PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
tmp.reg_value = val;
DBprintk(("read pmd[%u] soft_pmd=0x%lx reg=0x%lx pmc=0x%lx\n",
tmp.reg_num, ctx_val, reg_val,
ia64_get_pmc(tmp.reg_num)));
/*
* execute read checker, if any
*/
if (PMD_RD_FUNC(cnum)) {
ret = PMD_RD_FUNC(cnum)(task, cnum, &tmp.reg_value);
}
PFM_REG_RETFLAG_SET(tmp.reg_flags, ret);
DBprintk(("read pmd[%u] ret=%d soft_pmd=0x%lx reg=0x%lx pmc=0x%lx\n",
cnum, ret, ctx_val, reg_val,
ia64_get_pmc(cnum)));
if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
}
......@@ -1391,7 +1518,7 @@ pfm_read_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count,
abort_mission:
PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
/*
* XXX: if this fails, we stick we the original failure, flag not updated!
* XXX: if this fails, we stick with the original failure, flag not updated!
*/
copy_to_user(req, &tmp, sizeof(tmp));
return -EINVAL;
......@@ -1459,6 +1586,7 @@ pfm_use_debug_registers(struct task_struct *task)
* perfmormance monitoring, so we only decrement the number
* of "ptraced" debug register users to keep the count up to date
*/
int
pfm_release_debug_registers(struct task_struct *task)
{
......@@ -1505,13 +1633,6 @@ pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
*/
if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
#if 0
if (ctx->ctx_fl_frozen==0) {
printk("task %d without pmu_frozen set\n", task->pid);
return -EINVAL;
}
#endif
if (task == current) {
DBprintk(("restarting self %d frozen=%d \n", current->pid, ctx->ctx_fl_frozen));
......@@ -1554,7 +1675,6 @@ pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
up(sem);
} else {
task->thread.pfm_ovfl_block_reset = 1;
set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
}
#if 0
/*
......@@ -1629,25 +1749,35 @@ pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
current->pid,
ctx->ctx_fl_system, PMU_OWNER(),
current));
/* simply stop monitoring but not the PMU */
if (ctx->ctx_fl_system) {
__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
/* disable dcr pp */
ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
/* stop monitoring */
__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
ia64_srlz_i();
#ifdef CONFIG_SMP
local_cpu_data->pfm_dcr_pp = 0;
this_cpu(pfm_dcr_pp) = 0;
#else
pfm_tasklist_toggle_pp(0);
#endif
ia64_psr(regs)->pp = 0;
} else {
/* stop monitoring */
__asm__ __volatile__ ("rum psr.up;;"::: "memory");
ia64_srlz_i();
/*
* clear user level psr.up
*/
ia64_psr(regs)->up = 0;
}
return 0;
......@@ -1674,7 +1804,7 @@ pfm_disable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
ia64_psr(regs)->up = 0;
}
/*
* goes back to default behavior
* goes back to default behavior: no user level control
* no need to change live psr.sp because useless at the kernel level
*/
ia64_psr(regs)->sp = 1;
......@@ -1686,10 +1816,8 @@ pfm_disable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
return 0;
}
static int
pfm_destroy_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
pfm_context_destroy(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
struct pt_regs *regs)
{
/* we don't quite support this right now */
......@@ -1715,15 +1843,14 @@ pfm_destroy_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int
ia64_psr(regs)->up = 0;
}
/* restore security level */
ia64_psr(regs)->sp = 1;
skipped_stop:
/*
* remove sampling buffer mapping, if any
*/
if (ctx->ctx_smpl_vaddr) pfm_remove_smpl_mapping(task);
if (ctx->ctx_smpl_vaddr) {
pfm_remove_smpl_mapping(task);
ctx->ctx_smpl_vaddr = 0UL;
}
/* now free context and related state */
pfm_context_exit(task);
......@@ -1734,7 +1861,7 @@ pfm_destroy_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int
* does nothing at the moment
*/
static int
pfm_unprotect_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
pfm_context_unprotect(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
struct pt_regs *regs)
{
return 0;
......@@ -1764,9 +1891,9 @@ pfm_debug(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
{
unsigned int mode = *(unsigned int *)arg;
pfm_debug_mode = mode == 0 ? 0 : 1;
pfm_sysctl.debug = mode == 0 ? 0 : 1;
printk("perfmon debugging %s\n", pfm_debug_mode ? "on" : "off");
printk("perfmon debugging %s\n", pfm_sysctl.debug ? "on" : "off");
return 0;
}
......@@ -1855,15 +1982,17 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
memset(task->thread.ibr, 0, sizeof(task->thread.ibr));
/*
* clear hardware registers to make sure we don't leak
* information and pick up stale state
* clear hardware registers to make sure we don't
* pick up stale state
*/
for (i=0; i < pmu_conf.num_ibrs; i++) {
ia64_set_ibr(i, 0UL);
}
ia64_srlz_i();
for (i=0; i < pmu_conf.num_dbrs; i++) {
ia64_set_dbr(i, 0UL);
}
ia64_srlz_d();
}
}
......@@ -1924,6 +2053,7 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
CTX_USED_IBR(ctx, rnum);
ia64_set_ibr(rnum, dbreg.val);
ia64_srlz_i();
thread->ibr[rnum] = dbreg.val;
......@@ -1932,6 +2062,7 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
CTX_USED_DBR(ctx, rnum);
ia64_set_dbr(rnum, dbreg.val);
ia64_srlz_d();
thread->dbr[rnum] = dbreg.val;
......@@ -2031,27 +2162,35 @@ pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
if (ctx->ctx_fl_system) {
/* enable dcr pp */
ia64_set_dcr(ia64_get_dcr()|IA64_DCR_PP);
#ifdef CONFIG_SMP
local_cpu_data->pfm_dcr_pp = 1;
this_cpu(pfm_dcr_pp) = 1;
#else
pfm_tasklist_toggle_pp(1);
#endif
/* set user level psr.pp */
ia64_psr(regs)->pp = 1;
/* start monitoring at kernel level */
__asm__ __volatile__ ("ssm psr.pp;;"::: "memory");
/* enable dcr pp */
ia64_set_dcr(ia64_get_dcr()|IA64_DCR_PP);
ia64_srlz_i();
} else {
if ((task->thread.flags & IA64_THREAD_PM_VALID) == 0) {
printk("perfmon: pfm_start task flag not set for [%d]\n", task->pid);
return -EINVAL;
}
/* set user level psr.up */
ia64_psr(regs)->up = 1;
/* start monitoring at kernel level */
__asm__ __volatile__ ("sum psr.up;;"::: "memory");
ia64_srlz_i();
}
ia64_srlz_d();
return 0;
}
......@@ -2074,11 +2213,13 @@ pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
ia64_psr(regs)->pp = 0;
ia64_psr(regs)->up = 0; /* just to make sure! */
/* make sure monitoring is stopped */
__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
ia64_srlz_i();
#ifdef CONFIG_SMP
local_cpu_data->pfm_syst_wide = 1;
local_cpu_data->pfm_dcr_pp = 0;
this_cpu(pfm_syst_wide) = 1;
this_cpu(pfm_dcr_pp) = 0;
#endif
} else {
/*
......@@ -2089,21 +2230,21 @@ pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
ia64_psr(regs)->pp = 0; /* just to make sure! */
ia64_psr(regs)->up = 0;
/* make sure monitoring is stopped */
__asm__ __volatile__ ("rum psr.up;;"::: "memory");
/*
* allow user control (user monitors only)
if (task == ctx->ctx_owner) {
*/
{
DBprintk(("clearing psr.sp for [%d]\n", current->pid));
ia64_psr(regs)->sp = 0;
}
ia64_srlz_i();
DBprintk(("clearing psr.sp for [%d]\n", current->pid));
/* allow user level control */
ia64_psr(regs)->sp = 0;
/* PMU state will be saved/restored on ctxsw */
task->thread.flags |= IA64_THREAD_PM_VALID;
}
SET_PMU_OWNER(task);
ctx->ctx_flags.state = PFM_CTX_ENABLED;
atomic_set(&ctx->ctx_last_cpu, smp_processor_id());
......@@ -2114,6 +2255,40 @@ pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
return 0;
}
static int
pfm_get_pmc_reset(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
struct pt_regs *regs)
{
pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
unsigned int cnum;
int i;
for (i = 0; i < count; i++, req++) {
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
cnum = tmp.reg_num;
if (!PMC_IS_IMPL(cnum)) goto abort_mission;
tmp.reg_value = reset_pmcs[cnum];
PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
DBprintk(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, tmp.reg_value));
if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
}
return 0;
abort_mission:
PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
/*
* XXX: if this fails, we stick with the original failure, flag not updated!
*/
copy_to_user(req, &tmp, sizeof(tmp));
return -EINVAL;
}
/*
* functions MUST be listed in the increasing order of their index (see permfon.h)
*/
......@@ -2121,19 +2296,19 @@ static pfm_cmd_desc_t pfm_cmd_tab[]={
/* 0 */{ NULL, 0, 0, 0}, /* not used */
/* 1 */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 2 */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 3 */{ pfm_read_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 3 */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 4 */{ pfm_stop, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 5 */{ pfm_start, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 6 */{ pfm_enable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 7 */{ pfm_disable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 8 */{ pfm_create_context, PFM_CMD_ARG_READ, 1, sizeof(pfarg_context_t)},
/* 9 */{ pfm_destroy_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 8 */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, 1, sizeof(pfarg_context_t)},
/* 9 */{ pfm_context_destroy, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 10 */{ pfm_restart, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_NOCHK, 0, 0},
/* 11 */{ pfm_protect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 12 */{ pfm_get_features, PFM_CMD_ARG_WRITE, 0, 0},
/* 13 */{ pfm_debug, 0, 1, sizeof(unsigned int)},
/* 14 */{ pfm_unprotect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 15 */{ NULL, 0, 0, 0}, /* not used */
/* 14 */{ pfm_context_unprotect, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 16 */{ NULL, 0, 0, 0}, /* not used */
/* 17 */{ NULL, 0, 0, 0}, /* not used */
/* 18 */{ NULL, 0, 0, 0}, /* not used */
......@@ -2167,19 +2342,10 @@ check_task_state(struct task_struct *task)
* after the task is marked as STOPPED but before pfm_save_regs()
* is completed.
*/
for (;;) {
task_lock(task);
if (1 /*XXX !task_has_cpu(task)*/) break;
task_unlock(task);
do {
if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) return -EBUSY;
barrier();
cpu_relax();
} while (0 /*task_has_cpu(task)*/);
}
task_unlock(task);
if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) return -EBUSY;
DBprintk(("before wait_task_inactive [%d] state %ld\n", task->pid, task->state));
wait_task_inactive(task);
DBprintk(("after wait_task_inactive [%d] state %ld\n", task->pid, task->state));
#else
if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
......@@ -2273,7 +2439,7 @@ sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6,
}
void
pfm_ovfl_block_reset (void)
pfm_ovfl_block_reset(void)
{
struct thread_struct *th = &current->thread;
pfm_context_t *ctx = current->thread.pfm_context;
......@@ -2353,18 +2519,17 @@ pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ov
int j;
pfm_recorded_samples_count++;
idx = ia64_fetch_and_add(1, &psb->psb_index);
DBprintk(("recording index=%ld entries=%ld\n", idx-1, psb->psb_entries));
DBprintk_ovfl(("recording index=%ld entries=%ld\n", idx-1, psb->psb_entries));
/*
* XXX: there is a small chance that we could run out on index before resetting
* but index is unsigned long, so it will take some time.....
* We use > instead of == because fetch_and_add() is off by one (see below)
*
* This case can happen in non-blocking mode or with multiple processes.
* For non-blocking, we need to reload and continue.
*/
* XXX: there is a small chance that we could run out on index before resetting
* but index is unsigned long, so it will take some time.....
* We use > instead of == because fetch_and_add() is off by one (see below)
*
* This case can happen in non-blocking mode or with multiple processes.
* For non-blocking, we need to reload and continue.
*/
if (idx > psb->psb_entries) return 0;
/* first entry is really entry 0, not 1 caused by fetch_and_add */
......@@ -2375,7 +2540,7 @@ pfm_recorded_samples_count++;
/*
* initialize entry header
*/
h->pid = task->pid;
h->pid = current->pid;
h->cpu = smp_processor_id();
h->rate = 0; /* XXX: add the sampling rate used here */
h->ip = regs ? regs->cr_iip : 0x0; /* where did the fault happened */
......@@ -2403,24 +2568,27 @@ pfm_recorded_samples_count++;
} else {
*e = ia64_get_pmd(j); /* slow */
}
DBprintk(("e=%p pmd%d =0x%lx\n", (void *)e, j, *e));
DBprintk_ovfl(("e=%p pmd%d =0x%lx\n", (void *)e, j, *e));
e++;
}
pfm_stats.pfm_recorded_samples_count++;
/*
* make the new entry visible to user, needs to be atomic
*/
ia64_fetch_and_add(1, &psb->psb_hdr->hdr_count);
DBprintk(("index=%ld entries=%ld hdr_count=%ld\n",
DBprintk_ovfl(("index=%ld entries=%ld hdr_count=%ld\n",
idx, psb->psb_entries, psb->psb_hdr->hdr_count));
/*
* sampling buffer full ?
*/
if (idx == (psb->psb_entries-1)) {
DBprintk(("sampling buffer full\n"));
DBprintk_ovfl(("sampling buffer full\n"));
/*
* XXX: must reset buffer in blocking mode and lost notified
*/
pfm_stats.pfm_full_smpl_buffer_count++;
return 1;
}
return 0;
......@@ -2433,15 +2601,13 @@ pfm_recorded_samples_count++;
* new value of pmc[0]. if 0x0 then unfreeze, else keep frozen
*/
static unsigned long
pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
{
unsigned long mask;
struct thread_struct *t;
pfm_context_t *ctx;
unsigned long old_val;
unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL;
int i;
int my_cpu = smp_processor_id();
int ret = 1;
struct siginfo si;
/*
......@@ -2457,18 +2623,7 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
* valid one, i.e. the one that caused the interrupt.
*/
if (task == NULL) {
DBprintk(("owners[%d]=NULL\n", my_cpu));
return 0x1;
}
t = &task->thread;
ctx = task->thread.pfm_context;
if (!ctx) {
printk("perfmon: Spurious overflow interrupt: process %d has no PFM context\n",
task->pid);
return 0;
}
/*
* XXX: debug test
......@@ -2490,12 +2645,12 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
mask = pmc0 >> PMU_FIRST_COUNTER;
DBprintk(("pmc0=0x%lx pid=%d iip=0x%lx, %s"
" mode used_pmds=0x%lx save_pmcs=0x%lx reload_pmcs=0x%lx\n",
DBprintk_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s"
" mode used_pmds=0x%lx used_pmcs=0x%lx reload_pmcs=0x%lx\n",
pmc0, task->pid, (regs ? regs->cr_iip : 0),
CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
ctx->ctx_used_pmds[0],
ctx->ctx_saved_pmcs[0],
ctx->ctx_used_pmcs[0],
ctx->ctx_reload_pmcs[0]));
/*
......@@ -2506,7 +2661,7 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
/* skip pmd which did not overflow */
if ((mask & 0x1) == 0) continue;
DBprintk(("PMD[%d] overflowed hw_pmd=0x%lx soft_pmd=0x%lx\n",
DBprintk_ovfl(("pmd[%d] overflowed hw_pmd=0x%lx soft_pmd=0x%lx\n",
i, ia64_get_pmd(i), ctx->ctx_soft_pmds[i].val));
/*
......@@ -2518,8 +2673,7 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
old_val = ctx->ctx_soft_pmds[i].val;
ctx->ctx_soft_pmds[i].val = 1 + pmu_conf.perf_ovfl_val + pfm_read_soft_counter(ctx, i);
DBprintk(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx\n",
DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx\n",
i, ctx->ctx_soft_pmds[i].val, old_val,
ia64_get_pmd(i) & pmu_conf.perf_ovfl_val));
......@@ -2536,7 +2690,7 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
ovfl_pmds |= 1UL << i;
DBprintk(("soft_pmd[%d] overflowed flags=0x%x, ovfl=0x%lx\n", i, ctx->ctx_soft_pmds[i].flags, ovfl_pmds));
DBprintk_ovfl(("soft_pmd[%d] overflowed flags=0x%x, ovfl=0x%lx\n", i, ctx->ctx_soft_pmds[i].flags, ovfl_pmds));
if (PMC_OVFL_NOTIFY(ctx, i)) {
ovfl_notify |= 1UL << i;
......@@ -2575,7 +2729,8 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
* No overflow requiring a user level notification
*/
if (ovfl_notify == 0UL) {
pfm_reset_regs(ctx, &ovfl_pmds, PFM_RELOAD_SHORT_RESET);
if (ovfl_pmds)
pfm_reset_regs(ctx, &ovfl_pmds, PFM_RELOAD_SHORT_RESET);
return 0x0;
}
......@@ -2650,7 +2805,7 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
* necessarily go to the signal handler (if any) when it goes back to
* user mode.
*/
DBprintk(("[%d] sending notification to [%d]\n",
DBprintk_ovfl(("[%d] sending notification to [%d]\n",
task->pid, ctx->ctx_notify_task->pid));
......@@ -2683,7 +2838,7 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
* before, changing it to NULL will still maintain this invariant.
* Of course, when it is equal to current it cannot change at this point.
*/
DBprintk(("block=%d notify [%d] current [%d]\n",
DBprintk_ovfl(("block=%d notify [%d] current [%d]\n",
ctx->ctx_fl_block,
ctx->ctx_notify_task ? ctx->ctx_notify_task->pid: -1,
current->pid ));
......@@ -2694,7 +2849,7 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
} else {
lost_notify: /* XXX: more to do here, to convert to non-blocking (reset values) */
DBprintk(("notification task has disappeared !\n"));
DBprintk_ovfl(("notification task has disappeared !\n"));
/*
* for a non-blocking context, we make sure we do not fall into the
* pfm_overflow_notify() trap. Also in the case of a blocking context with lost
......@@ -2716,7 +2871,7 @@ pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
*/
ctx->ctx_fl_frozen = 1;
DBprintk(("reload pmc0=0x%x must_block=%ld\n",
DBprintk_ovfl(("return pmc0=0x%x must_block=%ld\n",
ctx->ctx_fl_frozen ? 0x1 : 0x0, t->pfm_ovfl_block_reset));
return ctx->ctx_fl_frozen ? 0x1 : 0x0;
......@@ -2727,8 +2882,9 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
{
u64 pmc0;
struct task_struct *task;
pfm_context_t *ctx;
pfm_ovfl_intr_count++;
pfm_stats.pfm_ovfl_intr_count++;
/*
* srlz.d done before arriving here
......@@ -2742,24 +2898,54 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
* assumes : if any PM[0].bit[63-1] is set, then PMC[0].fr = 1
*/
if ((pmc0 & ~0x1UL)!=0UL && (task=PMU_OWNER())!= NULL) {
/*
* assumes, PMC[0].fr = 1 at this point
*
* XXX: change protype to pass &pmc0
* we assume that pmc0.fr is always set here
*/
pmc0 = pfm_overflow_handler(task, pmc0, regs);
ctx = task->thread.pfm_context;
/* we never explicitely freeze PMU here */
if (pmc0 == 0) {
ia64_set_pmc(0, 0);
ia64_srlz_d();
/* sanity check */
if (!ctx) {
printk("perfmon: Spurious overflow interrupt: process %d has no PFM context\n",
task->pid);
return;
}
#ifdef CONFIG_SMP
/*
* Because an IPI has higher priority than the PMU overflow interrupt, it is
* possible that the handler be interrupted by a request from another CPU to fetch
* the PMU state of the currently active context. The task may have just been
* migrated to another CPU which is trying to restore the context. If there was
* a pending overflow interrupt when the task left this CPU, it is possible for
* the handler to get interrupt by the IPI. In which case, we fetch request
* MUST be postponed until the interrupt handler is done. The ctx_is_busy
* flag indicates such a condition. The other CPU must busy wait until it's cleared.
*/
atomic_set(&ctx->ctx_is_busy, 1);
#endif
/*
* assume PMC[0].fr = 1 at this point
*/
pmc0 = pfm_overflow_handler(task, ctx, pmc0, regs);
/*
* We always clear the overflow status bits and either unfreeze
* or keep the PMU frozen.
*/
ia64_set_pmc(0, pmc0);
ia64_srlz_d();
#ifdef CONFIG_SMP
/*
* announce that we are doing with the context
*/
atomic_set(&ctx->ctx_is_busy, 0);
#endif
} else {
pfm_spurious_ovfl_intr_count++;
pfm_stats.pfm_spurious_ovfl_intr_count++;
DBprintk(("perfmon: Spurious PMU overflow interrupt on CPU%d: pmc0=0x%lx owner=%p\n",
smp_processor_id(), pmc0, (void *)PMU_OWNER()));
printk("perfmon: Spurious PMU overflow interrupt on CPU%d: pmc0=0x%lx owner=%p\n",
smp_processor_id(), pmc0, (void *)PMU_OWNER());
}
}
......@@ -2773,27 +2959,30 @@ perfmon_proc_info(char *page)
#define cpu_is_online(i) 1
#endif
char *p = page;
u64 pmc0 = ia64_get_pmc(0);
int i;
p += sprintf(p, "perfmon enabled: %s\n", pmu_conf.pfm_is_disabled ? "No": "Yes");
p += sprintf(p, "monitors_pmcs0]=0x%lx\n", pmu_conf.monitor_pmcs[0]);
p += sprintf(p, "counter_pmcds[0]=0x%lx\n", pmu_conf.counter_pmds[0]);
p += sprintf(p, "overflow interrupts=%lu\n", pfm_ovfl_intr_count);
p += sprintf(p, "spurious overflow interrupts=%lu\n", pfm_spurious_ovfl_intr_count);
p += sprintf(p, "recorded samples=%lu\n", pfm_recorded_samples_count);
p += sprintf(p, "CPU%d.pmc[0]=%lx\nPerfmon debug: %s\n",
smp_processor_id(), pmc0, pfm_debug_mode ? "On" : "Off");
p += sprintf(p, "enabled : %s\n", pmu_conf.pfm_is_disabled ? "No": "Yes");
p += sprintf(p, "fastctxsw : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No");
p += sprintf(p, "ovfl_mask : 0x%lx\n", pmu_conf.perf_ovfl_val);
p += sprintf(p, "overflow intrs : %lu\n", pfm_stats.pfm_ovfl_intr_count);
p += sprintf(p, "spurious intrs : %lu\n", pfm_stats.pfm_spurious_ovfl_intr_count);
p += sprintf(p, "recorded samples : %lu\n", pfm_stats.pfm_recorded_samples_count);
p += sprintf(p, "smpl buffer full : %lu\n", pfm_stats.pfm_full_smpl_buffer_count);
#ifdef CONFIG_SMP
p += sprintf(p, "CPU%d cpu_data.pfm_syst_wide=%d cpu_data.dcr_pp=%d\n",
smp_processor_id(), local_cpu_data->pfm_syst_wide, local_cpu_data->pfm_dcr_pp);
p += sprintf(p, "CPU%d syst_wide : %d\n"
"CPU%d dcr_pp : %d\n",
smp_processor_id(),
this_cpu(pfm_syst_wide),
smp_processor_id(),
this_cpu(pfm_dcr_pp));
#endif
LOCK_PFS();
p += sprintf(p, "proc_sessions=%lu\nsys_sessions=%lu\nsys_use_dbregs=%lu\nptrace_use_dbregs=%lu\n",
p += sprintf(p, "proc_sessions : %lu\n"
"sys_sessions : %lu\n"
"sys_use_dbregs : %lu\n"
"ptrace_use_dbregs: %lu\n",
pfm_sessions.pfs_task_sessions,
pfm_sessions.pfs_sys_sessions,
pfm_sessions.pfs_sys_use_dbregs,
......@@ -2803,12 +2992,28 @@ perfmon_proc_info(char *page)
for(i=0; i < NR_CPUS; i++) {
if (cpu_is_online(i)) {
p += sprintf(p, "CPU%d.pmu_owner: %-6d\n",
p += sprintf(p, "CPU%d owner : %-6d\n",
i,
pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1);
}
}
for(i=0; pmd_desc[i].type != PFM_REG_NONE; i++) {
p += sprintf(p, "PMD%-2d: %d 0x%lx 0x%lx\n",
i,
pmd_desc[i].type,
pmd_desc[i].dep_pmd[0],
pmd_desc[i].dep_pmc[0]);
}
for(i=0; pmc_desc[i].type != PFM_REG_NONE; i++) {
p += sprintf(p, "PMC%-2d: %d 0x%lx 0x%lx\n",
i,
pmc_desc[i].type,
pmc_desc[i].dep_pmd[0],
pmc_desc[i].dep_pmc[0]);
}
return p - page;
}
......@@ -2840,7 +3045,7 @@ pfm_syst_wide_update_task(struct task_struct *task, int mode)
/*
* propagate the value of the dcr_pp bit to the psr
*/
ia64_psr(regs)->pp = mode ? local_cpu_data->pfm_dcr_pp : 0;
ia64_psr(regs)->pp = mode ? this_cpu(pfm_dcr_pp) : 0;
}
#endif
......@@ -2867,6 +3072,7 @@ pfm_save_regs (struct task_struct *task)
* It will be restored from ipsr when going back to user level
*/
__asm__ __volatile__ ("rum psr.up;;"::: "memory");
ia64_srlz_i();
ctx->ctx_saved_psr = psr;
......@@ -2922,13 +3128,9 @@ pfm_lazy_save_regs (struct task_struct *task)
for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1) t->pmd[i] =ia64_get_pmd(i);
}
/*
* XXX: simplify to pmc0 only
*/
mask = ctx->ctx_saved_pmcs[0];
for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1) t->pmc[i] = ia64_get_pmc(i);
}
/* save pmc0 */
t->pmc[0] = ia64_get_pmc(0);
/* not owned by this CPU */
atomic_set(&ctx->ctx_last_cpu, -1);
......@@ -2966,6 +3168,12 @@ pfm_handle_fetch_regs(void *info)
PMU_OWNER() ? PMU_OWNER()->pid: -1,
atomic_read(&ctx->ctx_saving_in_progress)));
/* must wait until not busy before retrying whole request */
if (atomic_read(&ctx->ctx_is_busy)) {
arg->retval = 2;
return;
}
/* must wait if saving was interrupted */
if (atomic_read(&ctx->ctx_saving_in_progress)) {
arg->retval = 1;
......@@ -2978,9 +3186,9 @@ pfm_handle_fetch_regs(void *info)
return;
}
DBprintk(("saving state for [%d] save_pmcs=0x%lx all_pmcs=0x%lx used_pmds=0x%lx\n",
DBprintk(("saving state for [%d] used_pmcs=0x%lx reload_pmcs=0x%lx used_pmds=0x%lx\n",
arg->task->pid,
ctx->ctx_saved_pmcs[0],
ctx->ctx_used_pmcs[0],
ctx->ctx_reload_pmcs[0],
ctx->ctx_used_pmds[0]));
......@@ -2993,17 +3201,15 @@ pfm_handle_fetch_regs(void *info)
/*
* XXX needs further optimization.
* Also must take holes into account
*/
mask = ctx->ctx_used_pmds[0];
for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1) t->pmd[i] =ia64_get_pmd(i);
}
mask = ctx->ctx_saved_pmcs[0];
for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1) t->pmc[i] = ia64_get_pmc(i);
if (mask & 0x1) t->pmd[i] = ia64_get_pmd(i);
}
/* save pmc0 */
t->pmc[0] = ia64_get_pmc(0);
/* not owned by this CPU */
atomic_set(&ctx->ctx_last_cpu, -1);
......@@ -3032,11 +3238,17 @@ pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx)
arg.task = task;
arg.retval = -1;
if (atomic_read(&ctx->ctx_is_busy)) {
must_wait_busy:
while (atomic_read(&ctx->ctx_is_busy));
}
if (atomic_read(&ctx->ctx_saving_in_progress)) {
DBprintk(("no IPI, must wait for [%d] to be saved on [%d]\n", task->pid, cpu));
must_wait_saving:
/* busy wait */
while (atomic_read(&ctx->ctx_saving_in_progress));
DBprintk(("done saving for [%d] on [%d]\n", task->pid, cpu));
return;
}
DBprintk(("calling CPU %d from CPU %d\n", cpu, smp_processor_id()));
......@@ -3056,11 +3268,8 @@ pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx)
* This is the case, where we interrupted the saving which started just at the time we sent the
* IPI.
*/
if (arg.retval == 1) {
DBprintk(("must wait for [%d] to be saved on [%d]\n", task->pid, cpu));
while (atomic_read(&ctx->ctx_saving_in_progress));
DBprintk(("done saving for [%d] on [%d]\n", task->pid, cpu));
}
if (arg.retval == 1) goto must_wait_saving;
if (arg.retval == 2) goto must_wait_busy;
}
#endif /* CONFIG_SMP */
......@@ -3114,55 +3323,53 @@ pfm_load_regs (struct task_struct *task)
pfm_fetch_regs(cpu, task, ctx);
}
#endif
t = &task->thread;
t = &task->thread;
/*
* XXX: will be replaced by assembly routine
* We clear all unused PMDs to avoid leaking information
* To avoid leaking information to the user level when psr.sp=0,
* we must reload ALL implemented pmds (even the ones we don't use).
* In the kernel we only allow PFM_READ_PMDS on registers which
* we initialized or requested (sampling) so there is no risk there.
*
* As an optimization, we will only reload the PMD that we use when
* the context is in protected mode, i.e. psr.sp=1 because then there
* is no leak possible.
*/
mask = ctx->ctx_used_pmds[0];
mask = pfm_sysctl.fastctxsw || ctx->ctx_fl_protected ? ctx->ctx_used_pmds[0] : ctx->ctx_reload_pmds[0];
for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1)
ia64_set_pmd(i, t->pmd[i]);
else
ia64_set_pmd(i, 0UL);
if (mask & 0x1) ia64_set_pmd(i, t->pmd[i] & pmu_conf.perf_ovfl_val);
}
/* XXX: will need to clear all unused pmd, for security */
/*
* skip pmc[0] to avoid side-effects,
* all PMCs are systematically reloaded, unsued get default value
* to avoid picking up stale configuration
* PMC0 is never set in the mask because it is always restored
* separately.
*
* ALL PMCs are systematically reloaded, unused registers
* get their default (PAL reset) values to avoid picking up
* stale configuration.
*/
mask = ctx->ctx_reload_pmcs[0]>>1;
for (i=1; mask; i++, mask>>=1) {
mask = ctx->ctx_reload_pmcs[0];
for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1) ia64_set_pmc(i, t->pmc[i]);
}
/*
* restore debug registers when used for range restrictions.
* We must restore the unused registers to avoid picking up
* stale information.
* we restore ALL the debug registers to avoid picking up
* stale state.
*/
mask = ctx->ctx_used_ibrs[0];
for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1)
if (ctx->ctx_fl_using_dbreg) {
for (i=0; i < pmu_conf.num_ibrs; i++) {
ia64_set_ibr(i, t->ibr[i]);
else
ia64_set_ibr(i, 0UL);
}
mask = ctx->ctx_used_dbrs[0];
for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1)
}
ia64_srlz_i();
for (i=0; i < pmu_conf.num_dbrs; i++) {
ia64_set_dbr(i, t->dbr[i]);
else
ia64_set_dbr(i, 0UL);
}
}
ia64_srlz_d();
if (t->pmc[0] & ~0x1) {
ia64_srlz_d();
pfm_overflow_handler(task, t->pmc[0], NULL);
pfm_overflow_handler(task, ctx, t->pmc[0], NULL);
}
/*
......@@ -3215,7 +3422,7 @@ ia64_reset_pmu(struct task_struct *task)
* When restoring context, we must restore ALL pmcs, even the ones
* that the task does not use to avoid leaks and possibly corruption
* of the sesion because of configuration conflicts. So here, we
* initializaed the table used in the context switch restore routine.
* initialize the entire set used in the context switch restore routine.
*/
t->pmc[i] = reset_pmcs[i];
DBprintk((" pmc[%d]=0x%lx\n", i, reset_pmcs[i]));
......@@ -3224,39 +3431,61 @@ ia64_reset_pmu(struct task_struct *task)
}
/*
* clear reset values for PMD.
* XX: good up to 64 PMDS. Suppose that zero is a valid value.
* XXX: good up to 64 PMDS. Suppose that zero is a valid value.
*/
mask = pmu_conf.impl_regs[4];
for(i=0; mask; mask>>=1, i++) {
if (mask & 0x1) ia64_set_pmd(i, 0UL);
t->pmd[i] = 0UL;
}
/*
* On context switched restore, we must restore ALL pmc even
* On context switched restore, we must restore ALL pmc and ALL pmd even
* when they are not actively used by the task. In UP, the incoming process
* may otherwise pick up left over PMC state from the previous process.
* may otherwise pick up left over PMC, PMD state from the previous process.
* As opposed to PMD, stale PMC can cause harm to the incoming
* process because they may change what is being measured.
* Therefore, we must systematically reinstall the entire
* PMC state. In SMP, the same thing is possible on the
* same CPU but also on between 2 CPUs.
* same CPU but also on between 2 CPUs.
*
* The problem with PMD is information leaking especially
* to user level when psr.sp=0
*
* There is unfortunately no easy way to avoid this problem
* on either UP or SMP. This definitively slows down the
* pfm_load_regs().
* on either UP or SMP. This definitively slows down the
* pfm_load_regs() function.
*/
/*
* We must include all the PMC in this mask to make sure we don't
* see any side effect of the stale state, such as opcode matching
* see any side effect of a stale state, such as opcode matching
* or range restrictions, for instance.
*
* We never directly restore PMC0 so we do not include it in the mask.
*/
ctx->ctx_reload_pmcs[0] = pmu_conf.impl_regs[0];
ctx->ctx_reload_pmcs[0] = pmu_conf.impl_regs[0] & ~0x1;
/*
* We must include all the PMD in this mask to avoid picking
* up stale value and leak information, especially directly
* at the user level when psr.sp=0
*/
ctx->ctx_reload_pmds[0] = pmu_conf.impl_regs[4];
/*
* Keep track of the pmds we want to sample
* XXX: may be we don't need to save/restore the DEAR/IEAR pmds
* but we do need the BTB for sure. This is because of a hardware
* buffer of 1 only for non-BTB pmds.
*
* We ignore the unimplemented pmds specified by the user
*/
ctx->ctx_used_pmds[0] = ctx->ctx_smpl_regs[0] & pmu_conf.impl_regs[4];
ctx->ctx_used_pmcs[0] = 1; /* always save/restore PMC[0] */
/*
* useful in case of re-enable after disable
*/
ctx->ctx_used_pmds[0] = 0UL;
ctx->ctx_used_ibrs[0] = 0UL;
ctx->ctx_used_dbrs[0] = 0UL;
......@@ -3278,7 +3507,7 @@ pfm_flush_regs (struct task_struct *task)
{
pfm_context_t *ctx;
u64 pmc0;
unsigned long mask, mask2, val;
unsigned long mask2, val;
int i;
ctx = task->thread.pfm_context;
......@@ -3300,22 +3529,28 @@ pfm_flush_regs (struct task_struct *task)
*/
if (ctx->ctx_fl_system) {
__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
/* disable dcr pp */
ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
/* stop monitoring */
__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
ia64_srlz_i();
#ifdef CONFIG_SMP
local_cpu_data->pfm_syst_wide = 0;
local_cpu_data->pfm_dcr_pp = 0;
this_cpu(pfm_syst_wide) = 0;
this_cpu(pfm_dcr_pp) = 0;
#else
pfm_tasklist_toggle_pp(0);
#endif
} else {
/* stop monitoring */
__asm__ __volatile__ ("rum psr.up;;"::: "memory");
ia64_srlz_i();
/* no more save/restore on ctxsw */
current->thread.flags &= ~IA64_THREAD_PM_VALID;
}
......@@ -3349,7 +3584,7 @@ pfm_flush_regs (struct task_struct *task)
ia64_srlz_d();
/*
* We don't need to restore psr, because we are on our way out anyway
* We don't need to restore psr, because we are on our way out
*/
/*
......@@ -3365,10 +3600,12 @@ pfm_flush_regs (struct task_struct *task)
if (atomic_read(&ctx->ctx_last_cpu) != smp_processor_id())
printk("perfmon: [%d] last_cpu=%d\n", task->pid, atomic_read(&ctx->ctx_last_cpu));
mask = pmc0 >> PMU_FIRST_COUNTER;
mask2 = ctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER;
for (i = PMU_FIRST_COUNTER; mask2; i++, mask>>=1, mask2>>=1) {
/*
* we save all the used pmds
* we take care of overflows for pmds used as counters
*/
mask2 = ctx->ctx_used_pmds[0];
for (i = 0; mask2; i++, mask2>>=1) {
/* skip non used pmds */
if ((mask2 & 0x1) == 0) continue;
......@@ -3376,7 +3613,6 @@ pfm_flush_regs (struct task_struct *task)
val = ia64_get_pmd(i);
if (PMD_IS_COUNTING(i)) {
DBprintk(("[%d] pmd[%d] soft_pmd=0x%lx hw_pmd=0x%lx\n", task->pid, i, ctx->ctx_soft_pmds[i].val, val & pmu_conf.perf_ovfl_val));
/* collect latest results */
......@@ -3389,15 +3625,19 @@ pfm_flush_regs (struct task_struct *task)
*/
task->thread.pmd[i] = 0;
/* take care of overflow inline */
if (mask & 0x1) {
/*
* take care of overflow inline
*/
if (pmc0 & (1UL << i)) {
ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.perf_ovfl_val;
DBprintk(("[%d] pmd[%d] overflowed soft_pmd=0x%lx\n",
task->pid, i, ctx->ctx_soft_pmds[i].val));
}
} else {
DBprintk(("[%d] pmd[%d] hw_pmd=0x%lx\n", task->pid, i, val));
/* not a counter, just save value as is */
/*
* not a counter, just save value as is
*/
task->thread.pmd[i] = val;
}
}
......@@ -3409,38 +3649,78 @@ pfm_flush_regs (struct task_struct *task)
}
/*
* task is the newly created task, pt_regs for new child
*/
int
pfm_inherit(struct task_struct *task, struct pt_regs *regs)
{
pfm_context_t *ctx = current->thread.pfm_context;
pfm_context_t *ctx;
pfm_context_t *nctx;
struct thread_struct *th = &task->thread;
struct thread_struct *thread;
unsigned long m;
int i;
/*
* the new task was copied from parent and therefore points
* to the parent's context at this point
*/
ctx = task->thread.pfm_context;
thread = &task->thread;
/*
* make sure child cannot mess up the monitoring session
*/
ia64_psr(regs)->sp = 1;
DBprintk(("enabling psr.sp for [%d]\n", task->pid));
/*
* remove any sampling buffer mapping from child user
* address space. Must be done for all cases of inheritance.
*/
if (ctx->ctx_smpl_vaddr) pfm_remove_smpl_mapping(task);
/*
* if there was a virtual mapping for the sampling buffer
* the mapping is NOT inherited across fork() (see VM_DONTCOPY),
* so we don't have to explicitely remove it here.
*
*
* Part of the clearing of fields is also done in
* copy_thread() because the fiels are outside the
* pfm_context structure and can affect tasks not
* using perfmon.
*/
/* clear pending notification */
task->thread.pfm_ovfl_block_reset = 0;
/*
* clear cpu pinning restriction for child
*/
if (ctx->ctx_fl_system) {
set_cpus_allowed(task, ctx->ctx_saved_cpus_allowed);
DBprintk(("setting cpus_allowed for [%d] to 0x%lx from 0x%lx\n",
task->pid,
ctx->ctx_saved_cpus_allowed,
current->cpus_allowed));
}
/*
* takes care of easiest case first
*/
if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_NONE) {
DBprintk(("removing PFM context for [%d]\n", task->pid));
task->thread.pfm_context = NULL;
task->thread.pfm_ovfl_block_reset = 0;
task->thread.pfm_context = NULL;
/*
* we must clear psr.up because the new child does
* not have a context and the PM_VALID flag is cleared
* in copy_thread().
*
* we do not clear psr.pp because it is always
* controlled by the system wide logic and we should
* never be here when system wide is running anyway
*/
ia64_psr(regs)->up = 0;
/* copy_thread() clears IA64_THREAD_PM_VALID */
return 0;
......@@ -3454,69 +3734,82 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_ONCE) {
nctx->ctx_fl_inherit = PFM_FL_INHERIT_NONE;
atomic_set(&nctx->ctx_last_cpu, -1);
/*
* task is not yet visible in the tasklist, so we do
* not need to lock the newly created context.
* However, we must grab the tasklist_lock to ensure
* that the ctx_owner or ctx_notify_task do not disappear
* while we increment their check counters.
*/
read_lock(&tasklist_lock);
DBprintk(("downgrading to INHERIT_NONE for [%d]\n", task->pid));
}
/*
* task is not yet visible in the tasklist, so we do
* not need to lock the newly created context.
* However, we must grab the tasklist_lock to ensure
* that the ctx_owner or ctx_notify_task do not disappear
* while we increment their check counters.
*/
read_lock(&tasklist_lock);
if (nctx->ctx_notify_task)
atomic_inc(&nctx->ctx_notify_task->thread.pfm_notifiers_check);
if (nctx->ctx_notify_task)
atomic_inc(&nctx->ctx_notify_task->thread.pfm_notifiers_check);
if (nctx->ctx_owner)
atomic_inc(&nctx->ctx_owner->thread.pfm_owners_check);
if (nctx->ctx_owner)
atomic_inc(&nctx->ctx_owner->thread.pfm_owners_check);
read_unlock(&tasklist_lock);
read_unlock(&tasklist_lock);
DBprintk(("downgrading to INHERIT_NONE for [%d]\n", task->pid));
LOCK_PFS();
pfm_sessions.pfs_task_sessions++;
UNLOCK_PFS();
}
LOCK_PFS();
pfm_sessions.pfs_task_sessions++;
UNLOCK_PFS();
/* initialize counters in new context */
m = pmu_conf.counter_pmds[0] >> PMU_FIRST_COUNTER;
m = nctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER;
for(i = PMU_FIRST_COUNTER ; m ; m>>=1, i++) {
if (m & 0x1) {
if ((m & 0x1) && pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) {
nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].ival & ~pmu_conf.perf_ovfl_val;
th->pmd[i] = nctx->ctx_soft_pmds[i].ival & pmu_conf.perf_ovfl_val;
thread->pmd[i] = nctx->ctx_soft_pmds[i].ival & pmu_conf.perf_ovfl_val;
}
/* what about the other pmds? zero or keep as is */
}
/* clear BTB index register */
th->pmd[16] = 0;
/*
* clear BTB index register
* XXX: CPU-model specific knowledge!
*/
thread->pmd[16] = 0;
/* if sampling then increment number of users of buffer */
if (nctx->ctx_psb) {
/*
* XXX: nopt very pretty!
*/
nctx->ctx_fl_frozen = 0;
nctx->ctx_ovfl_regs[0] = 0UL;
atomic_set(&nctx->ctx_last_cpu, -1);
/*
* here nctx->ctx_psb == ctx->ctx_psb
*
* increment reference count to sampling
* buffer, if any. Note that this is independent
* from the virtual mapping. The latter is never
* inherited while the former will be if context
* is setup to something different from PFM_FL_INHERIT_NONE
*/
if (nctx->ctx_psb) {
LOCK_PSB(nctx->ctx_psb);
nctx->ctx_psb->psb_refcnt++;
DBprintk(("updated smpl @ %p refcnt=%lu psb_flags=0x%x\n",
ctx->ctx_psb->psb_hdr,
ctx->ctx_psb->psb_refcnt,
ctx->ctx_psb->psb_flags));
UNLOCK_PSB(nctx->ctx_psb);
/*
* remove any pointer to sampling buffer mapping
*/
nctx->ctx_smpl_vaddr = 0;
}
nctx->ctx_fl_frozen = 0;
nctx->ctx_ovfl_regs[0] = 0UL;
sema_init(&nctx->ctx_restart_sem, 0); /* reset this semaphore to locked */
/* clear pending notification */
th->pfm_ovfl_block_reset = 0;
/* link with new task */
th->pfm_context = nctx;
thread->pfm_context = nctx;
DBprintk(("nctx=%p for process [%d]\n", (void *)nctx, task->pid));
......@@ -3526,7 +3819,7 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
*/
if (current->thread.flags & IA64_THREAD_PM_VALID) {
DBprintk(("setting PM_VALID for [%d]\n", task->pid));
th->flags |= IA64_THREAD_PM_VALID;
thread->flags |= IA64_THREAD_PM_VALID;
}
return 0;
......@@ -3555,9 +3848,9 @@ pfm_context_exit(struct task_struct *task)
LOCK_PSB(psb);
DBprintk(("sampling buffer from [%d] @%p size %ld vma_flag=0x%x\n",
DBprintk(("sampling buffer from [%d] @%p size %ld refcnt=%lu psb_flags=0x%x\n",
task->pid,
psb->psb_hdr, psb->psb_size, psb->psb_flags));
psb->psb_hdr, psb->psb_size, psb->psb_refcnt, psb->psb_flags));
/*
* in the case where we are the last user, we may be able to free
......@@ -3580,7 +3873,7 @@ pfm_context_exit(struct task_struct *task)
*
* See pfm_vm_close() and pfm_cleanup_smpl_buf() for more details.
*/
if ((psb->psb_flags & PFM_PSB_VMA) == 0) {
if ((psb->psb_flags & PSB_HAS_VMA) == 0) {
DBprintk(("cleaning sampling buffer from [%d] @%p size %ld\n",
task->pid,
......@@ -3612,7 +3905,7 @@ pfm_context_exit(struct task_struct *task)
* direct pointer to a task structure thereby bypassing the tasklist.
* We must make sure that, if we have task!= NULL, the target task is still
* present and is identical to the initial task specified
* during pfm_create_context(). It may already be detached from the tasklist but
* during pfm_context_create(). It may already be detached from the tasklist but
* that's okay. Note that it is okay if we miss the deadline and the task scans
* the list for nothing, it will affect performance but not correctness.
* The correctness is ensured by using the ctx_lock which prevents the
......@@ -3761,6 +4054,8 @@ pfm_cleanup_owners(struct task_struct *task)
}
}
read_unlock(&tasklist_lock);
atomic_set(&task->thread.pfm_owners_check, 0);
}
......@@ -3818,6 +4113,8 @@ pfm_cleanup_notifiers(struct task_struct *task)
}
}
read_unlock(&tasklist_lock);
atomic_set(&task->thread.pfm_notifiers_check, 0);
}
static struct irqaction perfmon_irqaction = {
......@@ -3836,6 +4133,12 @@ pfm_pmu_snapshot(void)
if (i >= pmu_conf.num_pmcs) break;
if (PMC_IS_IMPL(i)) reset_pmcs[i] = ia64_get_pmc(i);
}
#ifdef CONFIG_MCKINLEY
/*
* set the 'stupid' enable bit to power the PMU!
*/
reset_pmcs[4] |= 1UL << 23;
#endif
}
/*
......@@ -3903,23 +4206,12 @@ perfmon_init (void)
*/
pfm_pmu_snapshot();
/*
* list the pmc registers used to control monitors
* XXX: unfortunately this information is not provided by PAL
*
* We start with the architected minimum and then refine for each CPU model
*/
pmu_conf.monitor_pmcs[0] = PMM(4)|PMM(5)|PMM(6)|PMM(7);
/*
* architected counters
* setup the register configuration descriptions for the CPU
*/
pmu_conf.counter_pmds[0] |= PMM(4)|PMM(5)|PMM(6)|PMM(7);
pmu_conf.pmc_desc = pmc_desc;
pmu_conf.pmd_desc = pmd_desc;
#ifdef CONFIG_ITANIUM
pmu_conf.monitor_pmcs[0] |= PMM(10)|PMM(11)|PMM(12);
/* Itanium does not add more counters */
#endif
/* we are all set */
pmu_conf.pfm_is_disabled = 0;
......@@ -3928,6 +4220,8 @@ perfmon_init (void)
*/
perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL);
pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
spin_lock_init(&pfm_sessions.pfs_lock);
return 0;
......@@ -3942,7 +4236,6 @@ perfmon_init_percpu (void)
ia64_srlz_d();
}
#else /* !CONFIG_PERFMON */
asmlinkage int
......
......@@ -194,13 +194,15 @@ ia64_save_extra (struct task_struct *task)
pfm_save_regs(task);
# ifdef CONFIG_SMP
if (local_cpu_data->pfm_syst_wide)
if (this_cpu(pfm_syst_wide))
pfm_syst_wide_update_task(task, 0);
# endif
#endif
#ifdef CONFIG_IA32_SUPPORT
if (IS_IA32_PROCESS(ia64_task_regs(task)))
ia32_save_state(task);
#endif
}
void
......@@ -214,12 +216,14 @@ ia64_load_extra (struct task_struct *task)
pfm_load_regs(task);
# ifdef CONFIG_SMP
if (local_cpu_data->pfm_syst_wide) pfm_syst_wide_update_task(task, 1);
if (this_cpu(pfm_syst_wide)) pfm_syst_wide_update_task(task, 1);
# endif
#endif
#ifdef CONFIG_IA32_SUPPORT
if (IS_IA32_PROCESS(ia64_task_regs(task)))
ia32_load_state(task);
#endif
}
/*
......@@ -357,6 +361,8 @@ copy_thread (int nr, unsigned long clone_flags,
*/
atomic_set(&p->thread.pfm_notifiers_check, 0);
atomic_set(&p->thread.pfm_owners_check, 0);
/* clear list of sampling buffer to free for new task */
p->thread.pfm_smpl_buf_list = NULL;
if (current->thread.pfm_context) retval = pfm_inherit(p, child_ptregs);
#endif
......@@ -566,9 +572,8 @@ exit_thread (void)
pfm_flush_regs(current);
/* free debug register resources */
if ((current->thread.flags & IA64_THREAD_DBG_VALID) != 0) {
if (current->thread.flags & IA64_THREAD_DBG_VALID)
pfm_release_debug_registers(current);
}
#endif
}
......
......@@ -559,6 +559,24 @@ cpu_init (void)
*/
identify_cpu(my_cpu_info);
#ifdef CONFIG_MCKINLEY
{
#define FEATURE_SET 16
struct ia64_pal_retval iprv;
if (my_cpu_data->family == 0x1f) {
PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80)) {
PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
(iprv.v1 | 0x80), FEATURE_SET, 0);
}
}
}
#endif
/* Clear the stack memory reserved for pt_regs: */
memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
......@@ -570,7 +588,7 @@ cpu_init (void)
* shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
* be fine).
*/
ia64_set_dcr( IA64_DCR_DM | IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
ia64_set_dcr( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC);
#ifndef CONFIG_SMP
ia64_set_fpu_owner(0);
......
......@@ -143,9 +143,10 @@ copy_siginfo_to_user (siginfo_t *to, siginfo_t *from)
{
if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
return -EFAULT;
if (from->si_code < 0)
return __copy_to_user(to, from, sizeof(siginfo_t));
else {
if (from->si_code < 0) {
if (__copy_to_user(to, from, sizeof(siginfo_t)))
return -EFAULT;
} else {
int err;
/*
......
......@@ -478,6 +478,17 @@ swiotlb_dma_address (struct scatterlist *sg)
return SG_ENT_PHYS_ADDRESS(sg);
}
/*
* Return whether the given PCI device DMA address mask can be supported properly. For
* example, if your device can only drive the low 24-bits during PCI bus mastering, then
* you would pass 0x00ffffff as the mask to this function.
*/
int
swiotlb_pci_dma_supported (struct pci_dev *hwdev, u64 mask)
{
return 1;
}
EXPORT_SYMBOL(swiotlb_init);
EXPORT_SYMBOL(swiotlb_map_single);
EXPORT_SYMBOL(swiotlb_unmap_single);
......@@ -488,3 +499,4 @@ EXPORT_SYMBOL(swiotlb_sync_sg);
EXPORT_SYMBOL(swiotlb_dma_address);
EXPORT_SYMBOL(swiotlb_alloc_consistent);
EXPORT_SYMBOL(swiotlb_free_consistent);
EXPORT_SYMBOL(swiotlb_pci_dma_supported);
......@@ -251,7 +251,7 @@ put_gate_page (struct page *page, unsigned long address)
void __init
ia64_mmu_init (void *my_cpu_data)
{
unsigned long flags, rid, pta, impl_va_bits;
unsigned long psr, rid, pta, impl_va_bits;
extern void __init tlb_init (void);
#ifdef CONFIG_DISABLE_VHPT
# define VHPT_ENABLE_BIT 0
......@@ -263,7 +263,7 @@ ia64_mmu_init (void *my_cpu_data)
* Set up the kernel identity mapping for regions 6 and 5. The mapping for region
* 7 is setup up in _start().
*/
ia64_clear_ic(flags);
psr = ia64_clear_ic();
rid = ia64_rid(IA64_REGION_ID_KERNEL, __IA64_UNCACHED_OFFSET);
ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (IA64_GRANULE_SHIFT << 2));
......@@ -277,7 +277,7 @@ ia64_mmu_init (void *my_cpu_data)
ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), PAGE_SHIFT);
__restore_flags(flags);
ia64_set_psr(psr);
ia64_srlz_i();
/*
......
......@@ -18,7 +18,7 @@ EXTRA_CFLAGS := -DLITTLE_ENDIAN
O_TARGET := sgiio.o
ifeq ($(CONFIG_MODULES),y)
export-objs = pciio.o hcl.o
export-objs = pciio.o hcl.o pci_dma.o
endif
obj-y := stubs.o sgi_if.o pciio.o xtalk.o xbow.o xswitch.o klgraph_hack.o \
......
......@@ -4,6 +4,9 @@
* for more details.
*
* Copyright (C) 2000,2002 Silicon Graphics, Inc. All rights reserved.
*
* Routines for PCI DMA mapping. See Documentation/DMA-mapping.txt for
* a description of how these routines should be used.
*/
#include <linux/types.h>
......@@ -12,6 +15,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/devfs_fs_kernel.h>
#include <linux/module.h>
#include <asm/delay.h>
#include <asm/io.h>
......@@ -46,7 +50,7 @@ get_free_pciio_dmamap(devfs_handle_t pci_bus)
/*
* Darn, we need to get the maps allocated for this bus.
*/
for (i=0; i<MAX_PCI_XWIDGET; i++) {
for (i = 0; i < MAX_PCI_XWIDGET; i++) {
if (busnum_to_pcibr_vhdl[i] == pci_bus) {
sn1_dma_map = busnum_to_atedmamaps[i];
}
......@@ -314,22 +318,18 @@ sn1_pci_map_sg (struct pci_dev *hwdev,
}
/*
* It is a 32bit card and we cannot do Direct mapping.
* Let's 32Bit Page map the request.
* It is a 32 bit card and we cannot do direct mapping,
* so we use an ATE.
*/
dma_map = NULL;
#ifdef CONFIG_IA64_SGI_SN1
dma_map = pciio_dmamap_alloc(vhdl, NULL, sg->length,
PCIIO_BYTE_STREAM | PCIIO_DMA_DATA);
#else
dma_map = pciio_dmamap_alloc(vhdl, NULL, sg->length, PCIIO_DMA_DATA);
#endif
dma_map = 0;
dma_map = pciio_dmamap_alloc(vhdl, NULL, sg->length,
DMA_DATA_FLAGS);
if (!dma_map) {
printk("pci_map_sg: Unable to allocate anymore 32Bits Page Map entries.\n");
printk(KERN_ERR "sn_pci_map_sg: Unable to allocate "
"anymore 32 bit page map entries.\n");
BUG();
}
dma_addr = (dma_addr_t)pciio_dmamap_addr(dma_map, temp_ptr, sg->length);
/* printk("pci_map_sg: dma_map 0x%p Phys Addr 0x%p dma_addr 0x%p\n", dma_map, temp_ptr, dma_addr); */
dma_addr = pciio_dmamap_addr(dma_map, phys_addr, sg->length);
sg->address = (char *)dma_addr;
sg->page = (char *)dma_map;
......@@ -372,7 +372,17 @@ sn1_pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int
}
/*
/**
* sn_pci_map_single - map a single region for DMA
* @hwdev: device to map for
* @ptr: kernel virtual address of the region to map
* @size: size of the region
* @direction: DMA direction
*
* Map the region pointed to by @ptr for DMA and return the
* DMA address. Also known as platform_pci_map_single() by
* the IA64 machvec code.
*
* We map this to the one step pciio_dmamap_trans interface rather than
* the two step pciio_dmamap_alloc/pciio_dmamap_addr because we have
* no way of saving the dmamap handle from the alloc to later free
......
......@@ -75,7 +75,7 @@ sgi_mcatest(void)
if (mcatest == 5) {
int zzzspec(long);
int i;
long flags, dcr, res, val, addr=0xff00000000UL;
long psr, dcr, res, val, addr=0xff00000000UL;
dcr = ia64_get_dcr();
for (i=0; i<5; i++) {
......@@ -87,11 +87,11 @@ sgi_mcatest(void)
ia64_set_dcr(dcr);
res = ia64_sn_probe_io_slot(0xff00000000UL, 8, &val);
printk("zzzspec: probe %ld, 0x%lx\n", res, val);
ia64_clear_ic(flags);
psr = ia64_clear_ic();
ia64_itc(0x2, 0xe00000ff00000000UL,
pte_val(pfn_pte(0xff00000000UL >> PAGE_SHIFT,
__pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW))), _PAGE_SIZE_256M);
local_irq_restore(flags);
ia64_set_psr(psr);
ia64_srlz_i ();
}
......
......@@ -41,7 +41,8 @@ SECTIONS
/* Read-only data */
__gp = ALIGN(16) + 0x200000; /* gp must be 16-byte aligned for exc. table */
. = ALIGN(16);
__gp = . + 0x200000; /* gp must be 16-byte aligned for exc. table */
/* Global data */
_data = .;
......
......@@ -7,9 +7,9 @@
*
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
* Copyright (C) 1999 Hewlett-Packard Co.
* Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
* Copyright (C) 1999, 2002 Hewlett-Packard Co.
* David Mosberger-Tang <davidm@hpl.hp.com>
* Stephane Eranian <eranian@hpl.hp.com>
*/
#include <linux/init.h>
#include <linux/string.h>
......@@ -258,8 +258,9 @@ extern void efi_map_pal_code (void);
extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
extern void efi_gettimeofday (struct timeval *tv);
extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */
extern u64 efi_get_iobase (void);
extern u32 efi_mem_type (u64 phys_addr);
extern u64 efi_get_iobase (void);
extern u32 efi_mem_type (unsigned long phys_addr);
extern u64 efi_mem_attributes (unsigned long phys_addr);
/*
* Variable Attributes
......
......@@ -38,7 +38,7 @@
* the way of the program that it will "exec", and that there is
* sufficient room for the brk.
*/
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x800000000)
/*
......
......@@ -2,8 +2,8 @@
#define _ASM_IA64_KREGS_H
/*
* Copyright (C) 2001 Hewlett-Packard Co
* Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 2001-2002 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
/*
* This file defines the kernel register usage convention used by Linux/ia64.
......@@ -31,4 +31,121 @@
#define IA64_TR_PERCPU_DATA 1 /* dtr1: percpu data */
#define IA64_TR_CURRENT_STACK 2 /* dtr2: maps kernel's memory- & register-stacks */
/* Processor status register bits: */
#define IA64_PSR_BE_BIT 1
#define IA64_PSR_UP_BIT 2
#define IA64_PSR_AC_BIT 3
#define IA64_PSR_MFL_BIT 4
#define IA64_PSR_MFH_BIT 5
#define IA64_PSR_IC_BIT 13
#define IA64_PSR_I_BIT 14
#define IA64_PSR_PK_BIT 15
#define IA64_PSR_DT_BIT 17
#define IA64_PSR_DFL_BIT 18
#define IA64_PSR_DFH_BIT 19
#define IA64_PSR_SP_BIT 20
#define IA64_PSR_PP_BIT 21
#define IA64_PSR_DI_BIT 22
#define IA64_PSR_SI_BIT 23
#define IA64_PSR_DB_BIT 24
#define IA64_PSR_LP_BIT 25
#define IA64_PSR_TB_BIT 26
#define IA64_PSR_RT_BIT 27
/* The following are not affected by save_flags()/restore_flags(): */
#define IA64_PSR_CPL0_BIT 32
#define IA64_PSR_CPL1_BIT 33
#define IA64_PSR_IS_BIT 34
#define IA64_PSR_MC_BIT 35
#define IA64_PSR_IT_BIT 36
#define IA64_PSR_ID_BIT 37
#define IA64_PSR_DA_BIT 38
#define IA64_PSR_DD_BIT 39
#define IA64_PSR_SS_BIT 40
#define IA64_PSR_RI_BIT 41
#define IA64_PSR_ED_BIT 43
#define IA64_PSR_BN_BIT 44
#define IA64_PSR_BE (__IA64_UL(1) << IA64_PSR_BE_BIT)
#define IA64_PSR_UP (__IA64_UL(1) << IA64_PSR_UP_BIT)
#define IA64_PSR_AC (__IA64_UL(1) << IA64_PSR_AC_BIT)
#define IA64_PSR_MFL (__IA64_UL(1) << IA64_PSR_MFL_BIT)
#define IA64_PSR_MFH (__IA64_UL(1) << IA64_PSR_MFH_BIT)
#define IA64_PSR_IC (__IA64_UL(1) << IA64_PSR_IC_BIT)
#define IA64_PSR_I (__IA64_UL(1) << IA64_PSR_I_BIT)
#define IA64_PSR_PK (__IA64_UL(1) << IA64_PSR_PK_BIT)
#define IA64_PSR_DT (__IA64_UL(1) << IA64_PSR_DT_BIT)
#define IA64_PSR_DFL (__IA64_UL(1) << IA64_PSR_DFL_BIT)
#define IA64_PSR_DFH (__IA64_UL(1) << IA64_PSR_DFH_BIT)
#define IA64_PSR_SP (__IA64_UL(1) << IA64_PSR_SP_BIT)
#define IA64_PSR_PP (__IA64_UL(1) << IA64_PSR_PP_BIT)
#define IA64_PSR_DI (__IA64_UL(1) << IA64_PSR_DI_BIT)
#define IA64_PSR_SI (__IA64_UL(1) << IA64_PSR_SI_BIT)
#define IA64_PSR_DB (__IA64_UL(1) << IA64_PSR_DB_BIT)
#define IA64_PSR_LP (__IA64_UL(1) << IA64_PSR_LP_BIT)
#define IA64_PSR_TB (__IA64_UL(1) << IA64_PSR_TB_BIT)
#define IA64_PSR_RT (__IA64_UL(1) << IA64_PSR_RT_BIT)
/* The following are not affected by save_flags()/restore_flags(): */
#define IA64_PSR_IS (__IA64_UL(1) << IA64_PSR_IS_BIT)
#define IA64_PSR_MC (__IA64_UL(1) << IA64_PSR_MC_BIT)
#define IA64_PSR_IT (__IA64_UL(1) << IA64_PSR_IT_BIT)
#define IA64_PSR_ID (__IA64_UL(1) << IA64_PSR_ID_BIT)
#define IA64_PSR_DA (__IA64_UL(1) << IA64_PSR_DA_BIT)
#define IA64_PSR_DD (__IA64_UL(1) << IA64_PSR_DD_BIT)
#define IA64_PSR_SS (__IA64_UL(1) << IA64_PSR_SS_BIT)
#define IA64_PSR_RI (__IA64_UL(3) << IA64_PSR_RI_BIT)
#define IA64_PSR_ED (__IA64_UL(1) << IA64_PSR_ED_BIT)
#define IA64_PSR_BN (__IA64_UL(1) << IA64_PSR_BN_BIT)
/* User mask bits: */
#define IA64_PSR_UM (IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH)
/* Default Control Register */
#define IA64_DCR_PP_BIT 0 /* privileged performance monitor default */
#define IA64_DCR_BE_BIT 1 /* big-endian default */
#define IA64_DCR_LC_BIT 2 /* ia32 lock-check enable */
#define IA64_DCR_DM_BIT 8 /* defer TLB miss faults */
#define IA64_DCR_DP_BIT 9 /* defer page-not-present faults */
#define IA64_DCR_DK_BIT 10 /* defer key miss faults */
#define IA64_DCR_DX_BIT 11 /* defer key permission faults */
#define IA64_DCR_DR_BIT 12 /* defer access right faults */
#define IA64_DCR_DA_BIT 13 /* defer access bit faults */
#define IA64_DCR_DD_BIT 14 /* defer debug faults */
#define IA64_DCR_PP (__IA64_UL(1) << IA64_DCR_PP_BIT)
#define IA64_DCR_BE (__IA64_UL(1) << IA64_DCR_BE_BIT)
#define IA64_DCR_LC (__IA64_UL(1) << IA64_DCR_LC_BIT)
#define IA64_DCR_DM (__IA64_UL(1) << IA64_DCR_DM_BIT)
#define IA64_DCR_DP (__IA64_UL(1) << IA64_DCR_DP_BIT)
#define IA64_DCR_DK (__IA64_UL(1) << IA64_DCR_DK_BIT)
#define IA64_DCR_DX (__IA64_UL(1) << IA64_DCR_DX_BIT)
#define IA64_DCR_DR (__IA64_UL(1) << IA64_DCR_DR_BIT)
#define IA64_DCR_DA (__IA64_UL(1) << IA64_DCR_DA_BIT)
#define IA64_DCR_DD (__IA64_UL(1) << IA64_DCR_DD_BIT)
/* Interrupt Status Register */
#define IA64_ISR_X_BIT 32 /* execute access */
#define IA64_ISR_W_BIT 33 /* write access */
#define IA64_ISR_R_BIT 34 /* read access */
#define IA64_ISR_NA_BIT 35 /* non-access */
#define IA64_ISR_SP_BIT 36 /* speculative load exception */
#define IA64_ISR_RS_BIT 37 /* mandatory register-stack exception */
#define IA64_ISR_IR_BIT 38 /* invalid register frame exception */
#define IA64_ISR_CODE_MASK 0xf
#define IA64_ISR_X (__IA64_UL(1) << IA64_ISR_X_BIT)
#define IA64_ISR_W (__IA64_UL(1) << IA64_ISR_W_BIT)
#define IA64_ISR_R (__IA64_UL(1) << IA64_ISR_R_BIT)
#define IA64_ISR_NA (__IA64_UL(1) << IA64_ISR_NA_BIT)
#define IA64_ISR_SP (__IA64_UL(1) << IA64_ISR_SP_BIT)
#define IA64_ISR_RS (__IA64_UL(1) << IA64_ISR_RS_BIT)
#define IA64_ISR_IR (__IA64_UL(1) << IA64_ISR_IR_BIT)
/* ISR code field for non-access instructions */
#define IA64_ISR_CODE_TPA 0
#define IA64_ISR_CODE_FC 1
#define IA64_ISR_CODE_PROBE 2
#define IA64_ISR_CODE_TAK 3
#define IA64_ISR_CODE_LFETCH 4
#define IA64_ISR_CODE_PROBEF 5
#endif /* _ASM_IA64_kREGS_H */
......@@ -18,6 +18,7 @@ struct pci_dev;
struct pt_regs;
struct scatterlist;
struct irq_desc;
struct page;
typedef void ia64_mv_setup_t (char **);
typedef void ia64_mv_cpu_init_t(void);
......@@ -45,6 +46,8 @@ typedef void ia64_mv_pci_unmap_sg (struct pci_dev *, struct scatterlist *, int,
typedef void ia64_mv_pci_dma_sync_single (struct pci_dev *, dma_addr_t, size_t, int);
typedef void ia64_mv_pci_dma_sync_sg (struct pci_dev *, struct scatterlist *, int, int);
typedef unsigned long ia64_mv_pci_dma_address (struct scatterlist *);
typedef int ia64_mv_pci_dma_supported (struct pci_dev *, u64);
/*
* WARNING: The legacy I/O space is _architected_. Platforms are
* expected to follow this architected model (see Section 10.7 in the
......@@ -101,6 +104,7 @@ extern void machvec_noop (void);
# define platform_pci_dma_sync_single ia64_mv.sync_single
# define platform_pci_dma_sync_sg ia64_mv.sync_sg
# define platform_pci_dma_address ia64_mv.dma_address
# define platform_pci_dma_supported ia64_mv.dma_supported
# define platform_irq_desc ia64_mv.irq_desc
# define platform_irq_to_vector ia64_mv.irq_to_vector
# define platform_local_vector_to_irq ia64_mv.local_vector_to_irq
......@@ -136,6 +140,7 @@ struct ia64_machine_vector {
ia64_mv_pci_dma_sync_single *sync_single;
ia64_mv_pci_dma_sync_sg *sync_sg;
ia64_mv_pci_dma_address *dma_address;
ia64_mv_pci_dma_supported *dma_supported;
ia64_mv_irq_desc *irq_desc;
ia64_mv_irq_to_vector *irq_to_vector;
ia64_mv_local_vector_to_irq *local_vector_to_irq;
......@@ -172,6 +177,7 @@ struct ia64_machine_vector {
platform_pci_dma_sync_single, \
platform_pci_dma_sync_sg, \
platform_pci_dma_address, \
platform_pci_dma_supported, \
platform_irq_desc, \
platform_irq_to_vector, \
platform_local_vector_to_irq, \
......@@ -269,6 +275,9 @@ extern ia64_mv_pci_dma_address swiotlb_dma_address;
#ifndef platform_pci_dma_address
# define platform_pci_dma_address swiotlb_dma_address
#endif
#ifndef platform_pci_dma_supported
# define platform_pci_dma_supported swiotlb_pci_dma_supported
#endif
#ifndef platform_irq_desc
# define platform_irq_desc __ia64_irq_desc
#endif
......
......@@ -11,6 +11,7 @@ extern ia64_mv_pci_unmap_single sba_unmap_single;
extern ia64_mv_pci_map_sg sba_map_sg;
extern ia64_mv_pci_unmap_sg sba_unmap_sg;
extern ia64_mv_pci_dma_address sba_dma_address;
extern ia64_mv_pci_dma_supported sba_dma_supported;
/*
* This stuff has dual use!
......@@ -33,42 +34,6 @@ extern ia64_mv_pci_dma_address sba_dma_address;
#define platform_pci_dma_sync_single ((ia64_mv_pci_dma_sync_single *) machvec_noop)
#define platform_pci_dma_sync_sg ((ia64_mv_pci_dma_sync_sg *) machvec_noop)
#define platform_pci_dma_address sba_dma_address
#endif /* _ASM_IA64_MACHVEC_HPZX1_h */
#ifndef _ASM_IA64_MACHVEC_HPZX1_h
#define _ASM_IA64_MACHVEC_HPZX1_h
extern ia64_mv_setup_t dig_setup;
extern ia64_mv_pci_fixup_t hpzx1_pci_fixup;
extern ia64_mv_map_nr_t map_nr_dense;
extern ia64_mv_pci_alloc_consistent sba_alloc_consistent;
extern ia64_mv_pci_free_consistent sba_free_consistent;
extern ia64_mv_pci_map_single sba_map_single;
extern ia64_mv_pci_unmap_single sba_unmap_single;
extern ia64_mv_pci_map_sg sba_map_sg;
extern ia64_mv_pci_unmap_sg sba_unmap_sg;
extern ia64_mv_pci_dma_address sba_dma_address;
/*
* This stuff has dual use!
*
* For a generic kernel, the macros are used to initialize the
* platform's machvec structure. When compiling a non-generic kernel,
* the macros are used directly.
*/
#define platform_name "hpzx1"
#define platform_setup dig_setup
#define platform_pci_fixup hpzx1_pci_fixup
#define platform_map_nr map_nr_dense
#define platform_pci_dma_init ((ia64_mv_pci_dma_init *) machvec_noop)
#define platform_pci_alloc_consistent sba_alloc_consistent
#define platform_pci_free_consistent sba_free_consistent
#define platform_pci_map_single sba_map_single
#define platform_pci_unmap_single sba_unmap_single
#define platform_pci_map_sg sba_map_sg
#define platform_pci_unmap_sg sba_unmap_sg
#define platform_pci_dma_sync_single ((ia64_mv_pci_dma_sync_single *) machvec_noop)
#define platform_pci_dma_sync_sg ((ia64_mv_pci_dma_sync_sg *) machvec_noop)
#define platform_pci_dma_address sba_dma_address
#define platform_pci_dma_supported sba_dma_supported
#endif /* _ASM_IA64_MACHVEC_HPZX1_h */
......@@ -106,6 +106,7 @@
#define IA64_SWITCH_STACK_AR_RNAT_OFFSET 536 /* 0x218 */
#define IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET 544 /* 0x220 */
#define IA64_SWITCH_STACK_PR_OFFSET 552 /* 0x228 */
#define IA64_SIGCONTEXT_IP_OFFSET 40 /* 0x28 */
#define IA64_SIGCONTEXT_AR_BSP_OFFSET 72 /* 0x48 */
#define IA64_SIGCONTEXT_AR_FPSR_OFFSET 104 /* 0x68 */
#define IA64_SIGCONTEXT_AR_RNAT_OFFSET 80 /* 0x50 */
......
......@@ -68,20 +68,27 @@ do { \
*/
#define MAP_NR_DENSE(addr) (((unsigned long) (addr) - PAGE_OFFSET) >> PAGE_SHIFT)
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#ifdef CONFIG_IA64_GENERIC
# include <asm/machvec.h>
# define virt_to_page(kaddr) (mem_map + platform_map_nr(kaddr))
# define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT)
# define page_to_pfn(page) ((unsigned long) (page - mem_map))
# define pfn_to_page(pfn) (mem_map + (pfn))
#elif defined (CONFIG_IA64_SGI_SN1)
# ifndef CONFIG_DISCONTIGMEM
# define virt_to_page(kaddr) (mem_map + MAP_NR_DENSE(kaddr))
# define page_to_phys(page) XXX fix me
# define page_to_pfn(page) XXX fix me
# define pfn_to_page(pfn) XXX fix me
# endif
#else
# define virt_to_page(kaddr) (mem_map + MAP_NR_DENSE(kaddr))
# define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT)
# define page_to_pfn(page) ((unsigned long) (page - mem_map))
# define pfn_to_page(pfn) (mem_map + (pfn))
#endif
#define VALID_PAGE(page) ((page - mem_map) < max_mapnr)
typedef union ia64_va {
struct {
......@@ -105,7 +112,7 @@ typedef union ia64_va {
#define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;})
#define REGION_SIZE REGION_NUMBER(1)
#define REGION_KERNEL 7
#define REGION_KERNEL 7
#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); *(int *)0=0; } while (0)
#define PAGE_BUG(page) do { BUG(); } while (0)
......
......@@ -58,6 +58,7 @@ pcibios_penalize_isa_irq (int irq)
#define pci_dma_sync_single platform_pci_dma_sync_single
#define pci_dma_sync_sg platform_pci_dma_sync_sg
#define sg_dma_address platform_pci_dma_address
#define pci_dma_supported platform_pci_dma_supported
/* pci_unmap_{single,page} is not a nop, thus... */
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
......@@ -73,17 +74,6 @@ pcibios_penalize_isa_irq (int irq)
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
(((PTR)->LEN_NAME) = (VAL))
/*
* Return whether the given PCI device DMA address mask can be supported properly. For
* example, if your device can only drive the low 24-bits during PCI bus mastering, then
* you would pass 0x00ffffff as the mask to this function.
*/
static inline int
pci_dma_supported (struct pci_dev *hwdev, u64 mask)
{
return 1;
}
#define pci_map_page(dev,pg,off,size,dir) \
pci_map_single((dev), page_address(pg) + (off), (size), (dir))
#define pci_unmap_page(dev,dma_addr,size,dir) \
......
......@@ -23,6 +23,7 @@
#define PFM_GET_FEATURES 0x0c
#define PFM_DEBUG 0x0d
#define PFM_UNPROTECT_CONTEXT 0x0e
#define PFM_GET_PMC_RESET_VAL 0x0f
/*
......@@ -173,6 +174,8 @@ extern int pfm_cleanup_smpl_buf(struct task_struct *);
extern void pfm_syst_wide_update_task(struct task_struct *, int);
extern void pfm_ovfl_block_reset (void);
extern int pfm_syst_wide;
#endif /* __KERNEL__ */
#endif /* _ASM_IA64_PERFMON_H */
......@@ -207,20 +207,14 @@ ia64_phys_addr_valid (unsigned long addr)
#define VMALLOC_END (0xa000000000000000 + (1UL << (4*PAGE_SHIFT - 9)))
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
* Conversion functions: convert page frame number (pfn) and a protection value to a page
* table entry (pte).
*/
#define mk_pte(page,pgprot) \
({ \
pte_t __pte; \
\
pte_val(__pte) = (page_to_phys(page)) | pgprot_val(pgprot); \
__pte; \
})
/* This takes a physical page address that is used by the remapping functions */
#define mk_pte_phys(physpage, pgprot) \
({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
#define pfn_pte(pfn, pgprot) \
({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); __pte; })
/* Extract pfn from pte. */
#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT)
#define pte_modify(_pte, newprot) \
(__pte((pte_val(_pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)))
......
......@@ -55,123 +55,6 @@
#define MCA_bus 0
#define MCA_bus__is_a_macro /* for versions in ksyms.c */
/* Processor status register bits: */
#define IA64_PSR_BE_BIT 1
#define IA64_PSR_UP_BIT 2
#define IA64_PSR_AC_BIT 3
#define IA64_PSR_MFL_BIT 4
#define IA64_PSR_MFH_BIT 5
#define IA64_PSR_IC_BIT 13
#define IA64_PSR_I_BIT 14
#define IA64_PSR_PK_BIT 15
#define IA64_PSR_DT_BIT 17
#define IA64_PSR_DFL_BIT 18
#define IA64_PSR_DFH_BIT 19
#define IA64_PSR_SP_BIT 20
#define IA64_PSR_PP_BIT 21
#define IA64_PSR_DI_BIT 22
#define IA64_PSR_SI_BIT 23
#define IA64_PSR_DB_BIT 24
#define IA64_PSR_LP_BIT 25
#define IA64_PSR_TB_BIT 26
#define IA64_PSR_RT_BIT 27
/* The following are not affected by save_flags()/restore_flags(): */
#define IA64_PSR_CPL0_BIT 32
#define IA64_PSR_CPL1_BIT 33
#define IA64_PSR_IS_BIT 34
#define IA64_PSR_MC_BIT 35
#define IA64_PSR_IT_BIT 36
#define IA64_PSR_ID_BIT 37
#define IA64_PSR_DA_BIT 38
#define IA64_PSR_DD_BIT 39
#define IA64_PSR_SS_BIT 40
#define IA64_PSR_RI_BIT 41
#define IA64_PSR_ED_BIT 43
#define IA64_PSR_BN_BIT 44
#define IA64_PSR_BE (__IA64_UL(1) << IA64_PSR_BE_BIT)
#define IA64_PSR_UP (__IA64_UL(1) << IA64_PSR_UP_BIT)
#define IA64_PSR_AC (__IA64_UL(1) << IA64_PSR_AC_BIT)
#define IA64_PSR_MFL (__IA64_UL(1) << IA64_PSR_MFL_BIT)
#define IA64_PSR_MFH (__IA64_UL(1) << IA64_PSR_MFH_BIT)
#define IA64_PSR_IC (__IA64_UL(1) << IA64_PSR_IC_BIT)
#define IA64_PSR_I (__IA64_UL(1) << IA64_PSR_I_BIT)
#define IA64_PSR_PK (__IA64_UL(1) << IA64_PSR_PK_BIT)
#define IA64_PSR_DT (__IA64_UL(1) << IA64_PSR_DT_BIT)
#define IA64_PSR_DFL (__IA64_UL(1) << IA64_PSR_DFL_BIT)
#define IA64_PSR_DFH (__IA64_UL(1) << IA64_PSR_DFH_BIT)
#define IA64_PSR_SP (__IA64_UL(1) << IA64_PSR_SP_BIT)
#define IA64_PSR_PP (__IA64_UL(1) << IA64_PSR_PP_BIT)
#define IA64_PSR_DI (__IA64_UL(1) << IA64_PSR_DI_BIT)
#define IA64_PSR_SI (__IA64_UL(1) << IA64_PSR_SI_BIT)
#define IA64_PSR_DB (__IA64_UL(1) << IA64_PSR_DB_BIT)
#define IA64_PSR_LP (__IA64_UL(1) << IA64_PSR_LP_BIT)
#define IA64_PSR_TB (__IA64_UL(1) << IA64_PSR_TB_BIT)
#define IA64_PSR_RT (__IA64_UL(1) << IA64_PSR_RT_BIT)
/* The following are not affected by save_flags()/restore_flags(): */
#define IA64_PSR_IS (__IA64_UL(1) << IA64_PSR_IS_BIT)
#define IA64_PSR_MC (__IA64_UL(1) << IA64_PSR_MC_BIT)
#define IA64_PSR_IT (__IA64_UL(1) << IA64_PSR_IT_BIT)
#define IA64_PSR_ID (__IA64_UL(1) << IA64_PSR_ID_BIT)
#define IA64_PSR_DA (__IA64_UL(1) << IA64_PSR_DA_BIT)
#define IA64_PSR_DD (__IA64_UL(1) << IA64_PSR_DD_BIT)
#define IA64_PSR_SS (__IA64_UL(1) << IA64_PSR_SS_BIT)
#define IA64_PSR_RI (__IA64_UL(3) << IA64_PSR_RI_BIT)
#define IA64_PSR_ED (__IA64_UL(1) << IA64_PSR_ED_BIT)
#define IA64_PSR_BN (__IA64_UL(1) << IA64_PSR_BN_BIT)
/* User mask bits: */
#define IA64_PSR_UM (IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH)
/* Default Control Register */
#define IA64_DCR_PP_BIT 0 /* privileged performance monitor default */
#define IA64_DCR_BE_BIT 1 /* big-endian default */
#define IA64_DCR_LC_BIT 2 /* ia32 lock-check enable */
#define IA64_DCR_DM_BIT 8 /* defer TLB miss faults */
#define IA64_DCR_DP_BIT 9 /* defer page-not-present faults */
#define IA64_DCR_DK_BIT 10 /* defer key miss faults */
#define IA64_DCR_DX_BIT 11 /* defer key permission faults */
#define IA64_DCR_DR_BIT 12 /* defer access right faults */
#define IA64_DCR_DA_BIT 13 /* defer access bit faults */
#define IA64_DCR_DD_BIT 14 /* defer debug faults */
#define IA64_DCR_PP (__IA64_UL(1) << IA64_DCR_PP_BIT)
#define IA64_DCR_BE (__IA64_UL(1) << IA64_DCR_BE_BIT)
#define IA64_DCR_LC (__IA64_UL(1) << IA64_DCR_LC_BIT)
#define IA64_DCR_DM (__IA64_UL(1) << IA64_DCR_DM_BIT)
#define IA64_DCR_DP (__IA64_UL(1) << IA64_DCR_DP_BIT)
#define IA64_DCR_DK (__IA64_UL(1) << IA64_DCR_DK_BIT)
#define IA64_DCR_DX (__IA64_UL(1) << IA64_DCR_DX_BIT)
#define IA64_DCR_DR (__IA64_UL(1) << IA64_DCR_DR_BIT)
#define IA64_DCR_DA (__IA64_UL(1) << IA64_DCR_DA_BIT)
#define IA64_DCR_DD (__IA64_UL(1) << IA64_DCR_DD_BIT)
/* Interrupt Status Register */
#define IA64_ISR_X_BIT 32 /* execute access */
#define IA64_ISR_W_BIT 33 /* write access */
#define IA64_ISR_R_BIT 34 /* read access */
#define IA64_ISR_NA_BIT 35 /* non-access */
#define IA64_ISR_SP_BIT 36 /* speculative load exception */
#define IA64_ISR_RS_BIT 37 /* mandatory register-stack exception */
#define IA64_ISR_IR_BIT 38 /* invalid register frame exception */
#define IA64_ISR_CODE_MASK 0xf
#define IA64_ISR_X (__IA64_UL(1) << IA64_ISR_X_BIT)
#define IA64_ISR_W (__IA64_UL(1) << IA64_ISR_W_BIT)
#define IA64_ISR_R (__IA64_UL(1) << IA64_ISR_R_BIT)
#define IA64_ISR_NA (__IA64_UL(1) << IA64_ISR_NA_BIT)
#define IA64_ISR_SP (__IA64_UL(1) << IA64_ISR_SP_BIT)
#define IA64_ISR_RS (__IA64_UL(1) << IA64_ISR_RS_BIT)
#define IA64_ISR_IR (__IA64_UL(1) << IA64_ISR_IR_BIT)
/* ISR code field for non-access instructions */
#define IA64_ISR_CODE_TPA 0
#define IA64_ISR_CODE_FC 1
#define IA64_ISR_CODE_PROBE 2
#define IA64_ISR_CODE_TAK 3
#define IA64_ISR_CODE_LFETCH 4
#define IA64_ISR_CODE_PROBEF 5
#define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */
#define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */
#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */
......@@ -290,8 +173,6 @@ extern struct cpuinfo_ia64 {
__u64 ipi_count;
__u64 prof_counter;
__u64 prof_multiplier;
__u32 pfm_syst_wide;
__u32 pfm_dcr_pp;
#endif
} cpu_info __per_cpu_data;
......@@ -632,14 +513,22 @@ ia64_invala (void)
asm volatile ("invala" ::: "memory");
}
static inline __u64
ia64_clear_ic (void)
{
__u64 psr;
asm volatile ("mov %0=psr;; rsm psr.i | psr.ic;; srlz.i;;" : "=r"(psr) :: "memory");
return psr;
}
/*
* Save the processor status flags in FLAGS and then clear the interrupt collection and
* interrupt enable bits. Don't trigger any mandatory RSE references while this bit is
* off!
* Restore the psr.
*/
#define ia64_clear_ic(flags) \
asm volatile ("mov %0=psr;; rsm psr.i | psr.ic;; srlz.i;;" \
: "=r"(flags) :: "memory");
static inline void
ia64_set_psr (__u64 psr)
{
asm volatile (";; mov psr.l=%0;; srlz.d" :: "r" (psr) : "memory");
}
/*
* Insert a translation into an instruction and/or data translation
......
......@@ -14,6 +14,7 @@
*/
#include <linux/config.h>
#include <asm/kregs.h>
#include <asm/page.h>
#define KERNEL_START (PAGE_OFFSET + 68*1024*1024)
......@@ -30,7 +31,7 @@ struct pci_vector_struct {
__u16 bus; /* PCI Bus number */
__u32 pci_id; /* ACPI split 16 bits device, 16 bits function (see section 6.1.1) */
__u8 pin; /* PCI PIN (0 = A, 1 = B, 2 = C, 3 = D) */
__u8 irq; /* IRQ assigned */
__u32 irq; /* IRQ assigned */
};
extern struct ia64_boot_param {
......@@ -135,16 +136,21 @@ do { \
} \
} while (0)
# define local_irq_restore(x) \
do { \
unsigned long ip, old_psr, psr = (x); \
\
__asm__ __volatile__ (";;mov %0=psr; mov psr.l=%1;; srlz.d" \
: "=&r" (old_psr) : "r" (psr) : "memory"); \
if ((old_psr & (1UL << 14)) && !(psr & (1UL << 14))) { \
__asm__ ("mov %0=ip" : "=r"(ip)); \
last_cli_ip = ip; \
} \
# define local_irq_restore(x) \
do { \
unsigned long ip, old_psr, psr = (x); \
\
__asm__ __volatile__ ("mov %0=psr;" \
"cmp.ne p6,p7=%1,r0;;" \
"(p6) ssm psr.i;" \
"(p7) rsm psr.i;;" \
"srlz.d" \
: "=&r" (old_psr) : "r"((psr) & IA64_PSR_I) \
: "p6", "p7", "memory"); \
if ((old_psr & IA64_PSR_I) && !(psr & IA64_PSR_I)) { \
__asm__ ("mov %0=ip" : "=r"(ip)); \
last_cli_ip = ip; \
} \
} while (0)
#else /* !CONFIG_IA64_DEBUG_IRQ */
......@@ -153,8 +159,12 @@ do { \
: "=r" (x) :: "memory")
# define local_irq_disable() __asm__ __volatile__ (";; rsm psr.i;;" ::: "memory")
/* (potentially) setting psr.i requires data serialization: */
# define local_irq_restore(x) __asm__ __volatile__ (";; mov psr.l=%0;; srlz.d" \
:: "r" (x) : "memory")
# define local_irq_restore(x) __asm__ __volatile__ ("cmp.ne p6,p7=%0,r0;;" \
"(p6) ssm psr.i;" \
"(p7) rsm psr.i;;" \
"srlz.d" \
:: "r"((x) & IA64_PSR_I) \
: "p6", "p7", "memory")
#endif /* !CONFIG_IA64_DEBUG_IRQ */
#define local_irq_enable() __asm__ __volatile__ (";; ssm psr.i;; srlz.d" ::: "memory")
......
......@@ -222,6 +222,7 @@
#define __NR_futex 1230
#define __NR_sched_setaffinity 1231
#define __NR_sched_getaffinity 1232
#define __NR_security 1233
#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment