Commit 4786b4ee authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6

* 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6: (27 commits)
  [IA64] kdump: Add crash_save_vmcoreinfo for INIT
  [IA64] Fix NUMA configuration issue
  [IA64] Itanium Spec updates
  [IA64] Untangle sync_icache_dcache() page size determination
  [IA64] arch/ia64/kernel/: use time_* macros
  [IA64] remove redundant display of free swap space in show_mem()
  [IA64] make IOMMU respect the segment boundary limits
  [IA64] kprobes: kprobe-booster for ia64
  [IA64] fix getpid and set_tid_address fast system calls for pid namespaces
  [IA64] Replace explicit jiffies tests with time_* macros.
  [IA64] use goto to jump out do/while_each_thread
  [IA64] Fix unlock ordering in smp_callin
  [IA64] pgd_offset() constfication.
  [IA64] kdump: crash.c coding style fix
  [IA64] kdump: add kdump_on_fatal_mca
  [IA64] Minimize per_cpu reservations.
  [IA64] Correct pernodesize calculation.
  [IA64] Kernel parameter for max number of concurrent global TLB purges
  [IA64] Multiple outstanding ptc.g instruction support
  [IA64] Implement smp_call_function_mask for ia64
  ...
parents 253ba4e7 71b264f8
......@@ -1362,6 +1362,10 @@ and is between 256 and 4096 characters. It is defined in the file
nowb [ARM]
nptcg= [IA64] Override max number of concurrent global TLB
purges which is reported from either PAL_VM_SUMMARY or
SAL PALO.
numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
one of ['zone', 'node', 'default'] can be specified
This can be set from sysctl after boot.
......
......@@ -283,6 +283,17 @@ config FORCE_MAX_ZONEORDER
default "17" if HUGETLB_PAGE
default "11"
config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
default n
help
Select this option to enable more accurate task and CPU time
accounting. This is done by reading a CPU counter on each
kernel entry and exit and on transitions within the kernel
between system, softirq and hardirq state, so there is a
small performance impact.
If in doubt, say N here.
config SMP
bool "Symmetric multi-processing support"
help
......@@ -611,6 +622,9 @@ config IRQ_PER_CPU
bool
default y
config IOMMU_HELPER
def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC)
source "arch/ia64/hp/sim/Kconfig"
source "arch/ia64/Kconfig.debug"
......
......@@ -35,6 +35,7 @@
#include <linux/nodemask.h>
#include <linux/bitops.h> /* hweight64() */
#include <linux/crash_dump.h>
#include <linux/iommu-helper.h>
#include <asm/delay.h> /* ia64_get_itc() */
#include <asm/io.h>
......@@ -460,6 +461,13 @@ get_iovp_order (unsigned long size)
return order;
}
static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr,
unsigned int bitshiftcnt)
{
return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3)
+ bitshiftcnt;
}
/**
* sba_search_bitmap - find free space in IO PDIR resource bitmap
* @ioc: IO MMU structure which owns the pdir we are interested in.
......@@ -471,15 +479,25 @@ get_iovp_order (unsigned long size)
* Cool perf optimization: search for log2(size) bits at a time.
*/
static SBA_INLINE unsigned long
sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
sba_search_bitmap(struct ioc *ioc, struct device *dev,
unsigned long bits_wanted, int use_hint)
{
unsigned long *res_ptr;
unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
unsigned long flags, pide = ~0UL;
unsigned long flags, pide = ~0UL, tpide;
unsigned long boundary_size;
unsigned long shift;
int ret;
ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
ASSERT(res_ptr < res_end);
boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1;
boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift;
BUG_ON(ioc->ibase & ~iovp_mask);
shift = ioc->ibase >> iovp_shift;
spin_lock_irqsave(&ioc->res_lock, flags);
/* Allow caller to force a search through the entire resource space */
......@@ -504,9 +522,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
if (likely(*res_ptr != ~0UL)) {
bitshiftcnt = ffz(*res_ptr);
*res_ptr |= (1UL << bitshiftcnt);
pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
pide <<= 3; /* convert to bit address */
pide += bitshiftcnt;
pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
ioc->res_bitshift = bitshiftcnt + bits_wanted;
goto found_it;
}
......@@ -535,11 +551,13 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr);
ASSERT(0 != mask);
for (; mask ; mask <<= o, bitshiftcnt += o) {
if(0 == ((*res_ptr) & mask)) {
tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
ret = iommu_is_span_boundary(tpide, bits_wanted,
shift,
boundary_size);
if ((0 == ((*res_ptr) & mask)) && !ret) {
*res_ptr |= mask; /* mark resources busy! */
pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
pide <<= 3; /* convert to bit address */
pide += bitshiftcnt;
pide = tpide;
ioc->res_bitshift = bitshiftcnt + bits_wanted;
goto found_it;
}
......@@ -560,6 +578,11 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
end = res_end - qwords;
for (; res_ptr < end; res_ptr++) {
tpide = ptr_to_pide(ioc, res_ptr, 0);
ret = iommu_is_span_boundary(tpide, bits_wanted,
shift, boundary_size);
if (ret)
goto next_ptr;
for (i = 0 ; i < qwords ; i++) {
if (res_ptr[i] != 0)
goto next_ptr;
......@@ -572,8 +595,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
res_ptr[i] = ~0UL;
res_ptr[i] |= RESMAP_MASK(bits);
pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
pide <<= 3; /* convert to bit address */
pide = tpide;
res_ptr += qwords;
ioc->res_bitshift = bits;
goto found_it;
......@@ -605,7 +627,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
* resource bit map.
*/
static int
sba_alloc_range(struct ioc *ioc, size_t size)
sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
{
unsigned int pages_needed = size >> iovp_shift;
#ifdef PDIR_SEARCH_TIMING
......@@ -622,9 +644,9 @@ sba_alloc_range(struct ioc *ioc, size_t size)
/*
** "seek and ye shall find"...praying never hurts either...
*/
pide = sba_search_bitmap(ioc, pages_needed, 1);
pide = sba_search_bitmap(ioc, dev, pages_needed, 1);
if (unlikely(pide >= (ioc->res_size << 3))) {
pide = sba_search_bitmap(ioc, pages_needed, 0);
pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
if (unlikely(pide >= (ioc->res_size << 3))) {
#if DELAYED_RESOURCE_CNT > 0
unsigned long flags;
......@@ -653,7 +675,7 @@ sba_alloc_range(struct ioc *ioc, size_t size)
}
spin_unlock_irqrestore(&ioc->saved_lock, flags);
pide = sba_search_bitmap(ioc, pages_needed, 0);
pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
if (unlikely(pide >= (ioc->res_size << 3)))
panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
ioc->ioc_hpa);
......@@ -936,7 +958,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
pide = sba_alloc_range(ioc, size);
pide = sba_alloc_range(ioc, dev, size);
iovp = (dma_addr_t) pide << iovp_shift;
......@@ -1373,7 +1395,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
ASSERT(dma_len <= DMA_CHUNK_SIZE);
dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
| (sba_alloc_range(ioc, dma_len) << iovp_shift)
| (sba_alloc_range(ioc, dev, dma_len) << iovp_shift)
| dma_offset);
n_mappings++;
}
......
......@@ -30,7 +30,19 @@ struct elf_siginfo
int si_errno; /* errno */
};
#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
* Hacks are here since types between compat_timeval (= pair of s32) and
* ia64-native timeval (= pair of s64) are not compatible, at least a file
* arch/ia64/ia32/../../../fs/binfmt_elf.c will get warnings from compiler on
* use of cputime_to_timeval(), which usually an alias of jiffies_to_timeval().
*/
#define cputime_to_timeval(a,b) \
do { (b)->tv_usec = 0; (b)->tv_sec = (a)/NSEC_PER_SEC; } while(0)
#else
#define jiffies_to_timeval(a,b) \
do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; } while(0)
#endif
struct elf_prstatus
{
......
......@@ -38,6 +38,7 @@
#include <linux/eventpoll.h>
#include <linux/personality.h>
#include <linux/ptrace.h>
#include <linux/regset.h>
#include <linux/stat.h>
#include <linux/ipc.h>
#include <linux/capability.h>
......@@ -2387,16 +2388,45 @@ get_free_idx (void)
return -ESRCH;
}
static void set_tls_desc(struct task_struct *p, int idx,
const struct ia32_user_desc *info, int n)
{
struct thread_struct *t = &p->thread;
struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
int cpu;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
while (n-- > 0) {
if (LDT_empty(info)) {
desc->a = 0;
desc->b = 0;
} else {
desc->a = LDT_entry_a(info);
desc->b = LDT_entry_b(info);
}
++info;
++desc;
}
if (t == &current->thread)
load_TLS(t, cpu);
put_cpu();
}
/*
* Set a given TLS descriptor:
*/
asmlinkage int
sys32_set_thread_area (struct ia32_user_desc __user *u_info)
{
struct thread_struct *t = &current->thread;
struct ia32_user_desc info;
struct desc_struct *desc;
int cpu, idx;
int idx;
if (copy_from_user(&info, u_info, sizeof(info)))
return -EFAULT;
......@@ -2416,18 +2446,7 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
cpu = smp_processor_id();
if (LDT_empty(&info)) {
desc->a = 0;
desc->b = 0;
} else {
desc->a = LDT_entry_a(&info);
desc->b = LDT_entry_b(&info);
}
load_TLS(t, cpu);
set_tls_desc(current, idx, &info, 1);
return 0;
}
......@@ -2451,6 +2470,20 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
static void fill_user_desc(struct ia32_user_desc *info, int idx,
const struct desc_struct *desc)
{
info->entry_number = idx;
info->base_addr = GET_BASE(desc);
info->limit = GET_LIMIT(desc);
info->seg_32bit = GET_32BIT(desc);
info->contents = GET_CONTENTS(desc);
info->read_exec_only = !GET_WRITABLE(desc);
info->limit_in_pages = GET_LIMIT_PAGES(desc);
info->seg_not_present = !GET_PRESENT(desc);
info->useable = GET_USEABLE(desc);
}
asmlinkage int
sys32_get_thread_area (struct ia32_user_desc __user *u_info)
{
......@@ -2464,22 +2497,588 @@ sys32_get_thread_area (struct ia32_user_desc __user *u_info)
return -EINVAL;
desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
info.entry_number = idx;
info.base_addr = GET_BASE(desc);
info.limit = GET_LIMIT(desc);
info.seg_32bit = GET_32BIT(desc);
info.contents = GET_CONTENTS(desc);
info.read_exec_only = !GET_WRITABLE(desc);
info.limit_in_pages = GET_LIMIT_PAGES(desc);
info.seg_not_present = !GET_PRESENT(desc);
info.useable = GET_USEABLE(desc);
fill_user_desc(&info, idx, desc);
if (copy_to_user(u_info, &info, sizeof(info)))
return -EFAULT;
return 0;
}
struct regset_get {
void *kbuf;
void __user *ubuf;
};
struct regset_set {
const void *kbuf;
const void __user *ubuf;
};
struct regset_getset {
struct task_struct *target;
const struct user_regset *regset;
union {
struct regset_get get;
struct regset_set set;
} u;
unsigned int pos;
unsigned int count;
int ret;
};
static void getfpreg(struct task_struct *task, int regno, int *val)
{
switch (regno / sizeof(int)) {
case 0:
*val = task->thread.fcr & 0xffff;
break;
case 1:
*val = task->thread.fsr & 0xffff;
break;
case 2:
*val = (task->thread.fsr>>16) & 0xffff;
break;
case 3:
*val = task->thread.fir;
break;
case 4:
*val = (task->thread.fir>>32) & 0xffff;
break;
case 5:
*val = task->thread.fdr;
break;
case 6:
*val = (task->thread.fdr >> 32) & 0xffff;
break;
}
}
static void setfpreg(struct task_struct *task, int regno, int val)
{
switch (regno / sizeof(int)) {
case 0:
task->thread.fcr = (task->thread.fcr & (~0x1f3f))
| (val & 0x1f3f);
break;
case 1:
task->thread.fsr = (task->thread.fsr & (~0xffff)) | val;
break;
case 2:
task->thread.fsr = (task->thread.fsr & (~0xffff0000))
| (val << 16);
break;
case 3:
task->thread.fir = (task->thread.fir & (~0xffffffff)) | val;
break;
case 5:
task->thread.fdr = (task->thread.fdr & (~0xffffffff)) | val;
break;
}
}
static void access_fpreg_ia32(int regno, void *reg,
struct pt_regs *pt, struct switch_stack *sw,
int tos, int write)
{
void *f;
if ((regno += tos) >= 8)
regno -= 8;
if (regno < 4)
f = &pt->f8 + regno;
else if (regno <= 7)
f = &sw->f12 + (regno - 4);
else {
printk(KERN_ERR "regno must be less than 7 \n");
return;
}
if (write)
memcpy(f, reg, sizeof(struct _fpreg_ia32));
else
memcpy(reg, f, sizeof(struct _fpreg_ia32));
}
static void do_fpregs_get(struct unw_frame_info *info, void *arg)
{
struct regset_getset *dst = arg;
struct task_struct *task = dst->target;
struct pt_regs *pt;
int start, end, tos;
char buf[80];
if (dst->count == 0 || unw_unwind_to_user(info) < 0)
return;
if (dst->pos < 7 * sizeof(int)) {
end = min((dst->pos + dst->count),
(unsigned int)(7 * sizeof(int)));
for (start = dst->pos; start < end; start += sizeof(int))
getfpreg(task, start, (int *)(buf + start));
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
0, 7 * sizeof(int));
if (dst->ret || dst->count == 0)
return;
}
if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
pt = task_pt_regs(task);
tos = (task->thread.fsr >> 11) & 7;
end = min(dst->pos + dst->count,
(unsigned int)(sizeof(struct ia32_user_i387_struct)));
start = (dst->pos - 7 * sizeof(int)) /
sizeof(struct _fpreg_ia32);
end = (end - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
for (; start < end; start++)
access_fpreg_ia32(start,
(struct _fpreg_ia32 *)buf + start,
pt, info->sw, tos, 0);
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf,
buf, 7 * sizeof(int),
sizeof(struct ia32_user_i387_struct));
if (dst->ret || dst->count == 0)
return;
}
}
static void do_fpregs_set(struct unw_frame_info *info, void *arg)
{
struct regset_getset *dst = arg;
struct task_struct *task = dst->target;
struct pt_regs *pt;
char buf[80];
int end, start, tos;
if (dst->count == 0 || unw_unwind_to_user(info) < 0)
return;
if (dst->pos < 7 * sizeof(int)) {
start = dst->pos;
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf, buf,
0, 7 * sizeof(int));
if (dst->ret)
return;
for (; start < dst->pos; start += sizeof(int))
setfpreg(task, start, *((int *)(buf + start)));
if (dst->count == 0)
return;
}
if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
start = (dst->pos - 7 * sizeof(int)) /
sizeof(struct _fpreg_ia32);
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf,
buf, 7 * sizeof(int),
sizeof(struct ia32_user_i387_struct));
if (dst->ret)
return;
pt = task_pt_regs(task);
tos = (task->thread.fsr >> 11) & 7;
end = (dst->pos - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
for (; start < end; start++)
access_fpreg_ia32(start,
(struct _fpreg_ia32 *)buf + start,
pt, info->sw, tos, 1);
if (dst->count == 0)
return;
}
}
#define OFFSET(member) ((int)(offsetof(struct ia32_user_fxsr_struct, member)))
static void getfpxreg(struct task_struct *task, int start, int end, char *buf)
{
int min_val;
min_val = min(end, OFFSET(fop));
while (start < min_val) {
if (start == OFFSET(cwd))
*((short *)buf) = task->thread.fcr & 0xffff;
else if (start == OFFSET(swd))
*((short *)buf) = task->thread.fsr & 0xffff;
else if (start == OFFSET(twd))
*((short *)buf) = (task->thread.fsr>>16) & 0xffff;
buf += 2;
start += 2;
}
/* skip fop element */
if (start == OFFSET(fop)) {
start += 2;
buf += 2;
}
while (start < end) {
if (start == OFFSET(fip))
*((int *)buf) = task->thread.fir;
else if (start == OFFSET(fcs))
*((int *)buf) = (task->thread.fir>>32) & 0xffff;
else if (start == OFFSET(foo))
*((int *)buf) = task->thread.fdr;
else if (start == OFFSET(fos))
*((int *)buf) = (task->thread.fdr>>32) & 0xffff;
else if (start == OFFSET(mxcsr))
*((int *)buf) = ((task->thread.fcr>>32) & 0xff80)
| ((task->thread.fsr>>32) & 0x3f);
buf += 4;
start += 4;
}
}
static void setfpxreg(struct task_struct *task, int start, int end, char *buf)
{
int min_val, num32;
short num;
unsigned long num64;
min_val = min(end, OFFSET(fop));
while (start < min_val) {
num = *((short *)buf);
if (start == OFFSET(cwd)) {
task->thread.fcr = (task->thread.fcr & (~0x1f3f))
| (num & 0x1f3f);
} else if (start == OFFSET(swd)) {
task->thread.fsr = (task->thread.fsr & (~0xffff)) | num;
} else if (start == OFFSET(twd)) {
task->thread.fsr = (task->thread.fsr & (~0xffff0000))
| (((int)num) << 16);
}
buf += 2;
start += 2;
}
/* skip fop element */
if (start == OFFSET(fop)) {
start += 2;
buf += 2;
}
while (start < end) {
num32 = *((int *)buf);
if (start == OFFSET(fip))
task->thread.fir = (task->thread.fir & (~0xffffffff))
| num32;
else if (start == OFFSET(foo))
task->thread.fdr = (task->thread.fdr & (~0xffffffff))
| num32;
else if (start == OFFSET(mxcsr)) {
num64 = num32 & 0xff10;
task->thread.fcr = (task->thread.fcr &
(~0xff1000000000UL)) | (num64<<32);
num64 = num32 & 0x3f;
task->thread.fsr = (task->thread.fsr &
(~0x3f00000000UL)) | (num64<<32);
}
buf += 4;
start += 4;
}
}
static void do_fpxregs_get(struct unw_frame_info *info, void *arg)
{
struct regset_getset *dst = arg;
struct task_struct *task = dst->target;
struct pt_regs *pt;
char buf[128];
int start, end, tos;
if (dst->count == 0 || unw_unwind_to_user(info) < 0)
return;
if (dst->pos < OFFSET(st_space[0])) {
end = min(dst->pos + dst->count, (unsigned int)32);
getfpxreg(task, dst->pos, end, buf);
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
0, OFFSET(st_space[0]));
if (dst->ret || dst->count == 0)
return;
}
if (dst->pos < OFFSET(xmm_space[0])) {
pt = task_pt_regs(task);
tos = (task->thread.fsr >> 11) & 7;
end = min(dst->pos + dst->count,
(unsigned int)OFFSET(xmm_space[0]));
start = (dst->pos - OFFSET(st_space[0])) / 16;
end = (end - OFFSET(st_space[0])) / 16;
for (; start < end; start++)
access_fpreg_ia32(start, buf + 16 * start, pt,
info->sw, tos, 0);
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf,
buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
if (dst->ret || dst->count == 0)
return;
}
if (dst->pos < OFFSET(padding[0]))
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf,
&info->sw->f16, OFFSET(xmm_space[0]),
OFFSET(padding[0]));
}
static void do_fpxregs_set(struct unw_frame_info *info, void *arg)
{
struct regset_getset *dst = arg;
struct task_struct *task = dst->target;
char buf[128];
int start, end;
if (dst->count == 0 || unw_unwind_to_user(info) < 0)
return;
if (dst->pos < OFFSET(st_space[0])) {
start = dst->pos;
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf,
buf, 0, OFFSET(st_space[0]));
if (dst->ret)
return;
setfpxreg(task, start, dst->pos, buf);
if (dst->count == 0)
return;
}
if (dst->pos < OFFSET(xmm_space[0])) {
struct pt_regs *pt;
int tos;
pt = task_pt_regs(task);
tos = (task->thread.fsr >> 11) & 7;
start = (dst->pos - OFFSET(st_space[0])) / 16;
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf,
buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
if (dst->ret)
return;
end = (dst->pos - OFFSET(st_space[0])) / 16;
for (; start < end; start++)
access_fpreg_ia32(start, buf + 16 * start, pt, info->sw,
tos, 1);
if (dst->count == 0)
return;
}
if (dst->pos < OFFSET(padding[0]))
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf,
&info->sw->f16, OFFSET(xmm_space[0]),
OFFSET(padding[0]));
}
#undef OFFSET
static int do_regset_call(void (*call)(struct unw_frame_info *, void *),
struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
struct regset_getset info = { .target = target, .regset = regset,
.pos = pos, .count = count,
.u.set = { .kbuf = kbuf, .ubuf = ubuf },
.ret = 0 };
if (target == current)
unw_init_running(call, &info);
else {
struct unw_frame_info ufi;
memset(&ufi, 0, sizeof(ufi));
unw_init_from_blocked_task(&ufi, target);
(*call)(&ufi, &info);
}
return info.ret;
}
static int ia32_fpregs_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
return do_regset_call(do_fpregs_get, target, regset, pos, count,
kbuf, ubuf);
}
static int ia32_fpregs_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
return do_regset_call(do_fpregs_set, target, regset, pos, count,
kbuf, ubuf);
}
static int ia32_fpxregs_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
return do_regset_call(do_fpxregs_get, target, regset, pos, count,
kbuf, ubuf);
}
static int ia32_fpxregs_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
return do_regset_call(do_fpxregs_set, target, regset, pos, count,
kbuf, ubuf);
}
static int ia32_genregs_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
if (kbuf) {
u32 *kp = kbuf;
while (count > 0) {
*kp++ = getreg(target, pos);
pos += 4;
count -= 4;
}
} else {
u32 __user *up = ubuf;
while (count > 0) {
if (__put_user(getreg(target, pos), up++))
return -EFAULT;
pos += 4;
count -= 4;
}
}
return 0;
}
static int ia32_genregs_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
int ret = 0;
if (kbuf) {
const u32 *kp = kbuf;
while (!ret && count > 0) {
putreg(target, pos, *kp++);
pos += 4;
count -= 4;
}
} else {
const u32 __user *up = ubuf;
u32 val;
while (!ret && count > 0) {
ret = __get_user(val, up++);
if (!ret)
putreg(target, pos, val);
pos += 4;
count -= 4;
}
}
return ret;
}
static int ia32_tls_active(struct task_struct *target,
const struct user_regset *regset)
{
struct thread_struct *t = &target->thread;
int n = GDT_ENTRY_TLS_ENTRIES;
while (n > 0 && desc_empty(&t->tls_array[n -1]))
--n;
return n;
}
static int ia32_tls_get(struct task_struct *target,
const struct user_regset *regset, unsigned int pos,
unsigned int count, void *kbuf, void __user *ubuf)
{
const struct desc_struct *tls;
if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
(pos % sizeof(struct ia32_user_desc)) != 0 ||
(count % sizeof(struct ia32_user_desc)) != 0)
return -EINVAL;
pos /= sizeof(struct ia32_user_desc);
count /= sizeof(struct ia32_user_desc);
tls = &target->thread.tls_array[pos];
if (kbuf) {
struct ia32_user_desc *info = kbuf;
while (count-- > 0)
fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++,
tls++);
} else {
struct ia32_user_desc __user *u_info = ubuf;
while (count-- > 0) {
struct ia32_user_desc info;
fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++);
if (__copy_to_user(u_info++, &info, sizeof(info)))
return -EFAULT;
}
}
return 0;
}
static int ia32_tls_set(struct task_struct *target,
const struct user_regset *regset, unsigned int pos,
unsigned int count, const void *kbuf, const void __user *ubuf)
{
struct ia32_user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
const struct ia32_user_desc *info;
if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
(pos % sizeof(struct ia32_user_desc)) != 0 ||
(count % sizeof(struct ia32_user_desc)) != 0)
return -EINVAL;
if (kbuf)
info = kbuf;
else if (__copy_from_user(infobuf, ubuf, count))
return -EFAULT;
else
info = infobuf;
set_tls_desc(target,
GDT_ENTRY_TLS_MIN + (pos / sizeof(struct ia32_user_desc)),
info, count / sizeof(struct ia32_user_desc));
return 0;
}
/*
* This should match arch/i386/kernel/ptrace.c:native_regsets.
* XXX ioperm? vm86?
*/
static const struct user_regset ia32_regsets[] = {
{
.core_note_type = NT_PRSTATUS,
.n = sizeof(struct user_regs_struct32)/4,
.size = 4, .align = 4,
.get = ia32_genregs_get, .set = ia32_genregs_set
},
{
.core_note_type = NT_PRFPREG,
.n = sizeof(struct ia32_user_i387_struct) / 4,
.size = 4, .align = 4,
.get = ia32_fpregs_get, .set = ia32_fpregs_set
},
{
.core_note_type = NT_PRXFPREG,
.n = sizeof(struct ia32_user_fxsr_struct) / 4,
.size = 4, .align = 4,
.get = ia32_fpxregs_get, .set = ia32_fpxregs_set
},
{
.core_note_type = NT_386_TLS,
.n = GDT_ENTRY_TLS_ENTRIES,
.bias = GDT_ENTRY_TLS_MIN,
.size = sizeof(struct ia32_user_desc),
.align = sizeof(struct ia32_user_desc),
.active = ia32_tls_active,
.get = ia32_tls_get, .set = ia32_tls_set,
},
};
const struct user_regset_view user_ia32_view = {
.name = "i386", .e_machine = EM_386,
.regsets = ia32_regsets, .n = ARRAY_SIZE(ia32_regsets)
};
long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
__u32 len_low, __u32 len_high, int advice)
{
......
......@@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
static struct acpi_table_slit __initdata *slit_table;
cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
{
......@@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
(pa->apic_id << 8) | (pa->local_sapic_eid);
/* nid should be overridden as logical node id later */
node_cpuid[srat_num_cpus].nid = pxm;
cpu_set(srat_num_cpus, early_cpu_possible_map);
srat_num_cpus++;
}
......@@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void)
}
/* set logical node id in cpu structure */
for (i = 0; i < srat_num_cpus; i++)
for_each_possible_early_cpu(i)
node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
printk(KERN_INFO "Number of logical nodes in system = %d\n",
......
......@@ -7,6 +7,7 @@
#define ASM_OFFSETS_C 1
#include <linux/sched.h>
#include <linux/pid.h>
#include <linux/clocksource.h>
#include <asm-ia64/processor.h>
......@@ -34,17 +35,29 @@ void foo(void)
DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
BUILD_BUG_ON(sizeof(struct upid) != 32);
DEFINE(IA64_UPID_SHIFT, 5);
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
#endif
BLANK();
DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
......
......@@ -24,6 +24,7 @@ int kdump_status[NR_CPUS];
static atomic_t kdump_cpu_frozen;
atomic_t kdump_in_progress;
static int kdump_on_init = 1;
static int kdump_on_fatal_mca = 1;
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
......@@ -118,6 +119,7 @@ machine_crash_shutdown(struct pt_regs *pt)
static void
machine_kdump_on_init(void)
{
crash_save_vmcoreinfo();
local_irq_disable();
kexec_disable_iosapic();
machine_kexec(ia64_kimage);
......@@ -148,7 +150,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
struct ia64_mca_notify_die *nd;
struct die_args *args = data;
if (!kdump_on_init)
if (!kdump_on_init && !kdump_on_fatal_mca)
return NOTIFY_DONE;
if (!ia64_kimage) {
......@@ -173,32 +175,38 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
return NOTIFY_DONE;
switch (val) {
case DIE_INIT_MONARCH_PROCESS:
case DIE_INIT_MONARCH_PROCESS:
if (kdump_on_init) {
atomic_set(&kdump_in_progress, 1);
*(nd->monarch_cpu) = -1;
break;
case DIE_INIT_MONARCH_LEAVE:
}
break;
case DIE_INIT_MONARCH_LEAVE:
if (kdump_on_init)
machine_kdump_on_init();
break;
case DIE_INIT_SLAVE_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_RENDZVOUS_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_MONARCH_LEAVE:
/* die_register->signr indicate if MCA is recoverable */
if (!args->signr)
machine_kdump_on_init();
break;
break;
case DIE_INIT_SLAVE_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_RENDZVOUS_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_MONARCH_LEAVE:
/* die_register->signr indicate if MCA is recoverable */
if (kdump_on_fatal_mca && !args->signr) {
atomic_set(&kdump_in_progress, 1);
*(nd->monarch_cpu) = -1;
machine_kdump_on_init();
}
break;
}
return NOTIFY_DONE;
}
#ifdef CONFIG_SYSCTL
static ctl_table kdump_on_init_table[] = {
static ctl_table kdump_ctl_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_init",
......@@ -207,6 +215,14 @@ static ctl_table kdump_on_init_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_fatal_mca",
.data = &kdump_on_fatal_mca,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
......@@ -215,7 +231,7 @@ static ctl_table sys_table[] = {
.ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = kdump_on_init_table,
.child = kdump_ctl_table,
},
{ .ctl_name = 0 }
};
......
......@@ -37,6 +37,7 @@
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mca.h>
#include <asm/tlbflush.h>
#define EFI_DEBUG 0
......@@ -403,6 +404,41 @@ efi_get_pal_addr (void)
return NULL;
}
static u8 __init palo_checksum(u8 *buffer, u32 length)
{
u8 sum = 0;
u8 *end = buffer + length;
while (buffer < end)
sum = (u8) (sum + *(buffer++));
return sum;
}
/*
* Parse and handle PALO table which is published at:
* http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
*/
static void __init handle_palo(unsigned long palo_phys)
{
struct palo_table *palo = __va(palo_phys);
u8 checksum;
if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
printk(KERN_INFO "PALO signature incorrect.\n");
return;
}
checksum = palo_checksum((u8 *)palo, palo->length);
if (checksum) {
printk(KERN_INFO "PALO checksum incorrect.\n");
return;
}
setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
}
void
efi_map_pal_code (void)
{
......@@ -432,6 +468,7 @@ efi_init (void)
u64 efi_desc_size;
char *cp, vendor[100] = "unknown";
int i;
unsigned long palo_phys;
/*
* It's too early to be able to use the standard kernel command line
......@@ -496,6 +533,8 @@ efi_init (void)
efi.hcdp = EFI_INVALID_TABLE_ADDR;
efi.uga = EFI_INVALID_TABLE_ADDR;
palo_phys = EFI_INVALID_TABLE_ADDR;
for (i = 0; i < (int) efi.systab->nr_tables; i++) {
if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
efi.mps = config_tables[i].table;
......@@ -515,10 +554,17 @@ efi_init (void)
} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
efi.hcdp = config_tables[i].table;
printk(" HCDP=0x%lx", config_tables[i].table);
} else if (efi_guidcmp(config_tables[i].guid,
PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
palo_phys = config_tables[i].table;
printk(" PALO=0x%lx", config_tables[i].table);
}
}
printk("\n");
if (palo_phys != EFI_INVALID_TABLE_ADDR)
handle_palo(palo_phys);
runtime = __va(efi.systab->runtime);
efi.get_time = phys_get_time;
efi.set_time = phys_set_time;
......
......@@ -710,6 +710,16 @@ ENTRY(ia64_leave_syscall)
(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
#endif
.work_processed_syscall:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
adds r2=PT(LOADRS)+16,r12
(pUStk) mov.m r22=ar.itc // fetch time at leave
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
;;
(p6) ld4 r31=[r18] // load current_thread_info()->flags
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
adds r3=PT(AR_BSPSTORE)+16,r12 // deferred
;;
#else
adds r2=PT(LOADRS)+16,r12
adds r3=PT(AR_BSPSTORE)+16,r12
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
......@@ -718,6 +728,7 @@ ENTRY(ia64_leave_syscall)
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
nop.i 0
;;
#endif
mov r16=ar.bsp // M2 get existing backing store pointer
ld8 r18=[r2],PT(R9)-PT(B6) // load b6
(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
......@@ -737,12 +748,21 @@ ENTRY(ia64_leave_syscall)
ld8 r29=[r2],16 // M0|1 load cr.ipsr
ld8 r28=[r3],16 // M0|1 load cr.iip
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r3],16 // M0|1 load ar.unat
(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
#else
mov r22=r0 // A clear r22
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r3],16 // M0|1 load ar.unat
(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
#endif
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
nop 0
......@@ -759,7 +779,11 @@ ENTRY(ia64_leave_syscall)
ld8.fill r1=[r3],16 // M0|1 load r1
(pUStk) mov r17=1 // A
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
(pUStk) st1 [r15]=r17 // M2|3
#else
(pUStk) st1 [r14]=r17 // M2|3
#endif
ld8.fill r13=[r3],16 // M0|1
mov f8=f0 // F clear f8
;;
......@@ -775,12 +799,22 @@ ENTRY(ia64_leave_syscall)
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
cover // B add current frame into dirty partition & set cr.ifs
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
mov r19=ar.bsp // M2 get new backing store pointer
st8 [r14]=r22 // M save time at leave
mov f10=f0 // F clear f10
mov r22=r0 // A clear r22
movl r14=__kernel_syscall_via_epc // X
;;
#else
mov r19=ar.bsp // M2 get new backing store pointer
mov f10=f0 // F clear f10
nop.m 0
movl r14=__kernel_syscall_via_epc // X
;;
#endif
mov.m ar.csd=r0 // M2 clear ar.csd
mov.m ar.ccv=r0 // M2 clear ar.ccv
mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
......@@ -913,10 +947,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
.pred.rel.mutex pUStk,pKStk
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
(pUStk) mov.m r22=ar.itc // M fetch time at leave
nop.i 0
;;
#else
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
nop.i 0
nop.i 0
;;
#endif
ld8 r29=[r16],16 // load cr.ipsr
ld8 r28=[r17],16 // load cr.iip
;;
......@@ -938,15 +980,37 @@ GLOBAL_ENTRY(ia64_leave_kernel)
;;
ld8.fill r12=[r16],16
ld8.fill r13=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
#else
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
#endif
;;
ld8 r20=[r16],16 // ar.fpsr
ld8.fill r15=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred
#endif
;;
ld8.fill r14=[r16],16
ld8.fill r2=[r17]
(pUStk) mov r17=1
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
// mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;;
// mib : mov add br -> mib : ld8 add br
// bbb_ : br nop cover;; mbb_ : mov br cover;;
//
// no one require bsp in r16 if (pKStk) branch is selected.
(pUStk) st8 [r3]=r22 // save time at leave
(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
shr.u r18=r19,16 // get byte size of existing "dirty" partition
;;
ld8.fill r3=[r16] // deferred
LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
mov r16=ar.bsp // get existing backing store pointer
#else
ld8.fill r3=[r16]
(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
shr.u r18=r19,16 // get byte size of existing "dirty" partition
......@@ -954,6 +1018,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
mov r16=ar.bsp // get existing backing store pointer
LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
#endif
/*
* Restore user backing store.
......
......@@ -61,13 +61,29 @@ ENTRY(fsys_getpid)
.prologue
.altrp b6
.body
add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
;;
ld8 r17=[r17] // r17 = current->group_leader
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;;
ld4 r9=[r9]
add r8=IA64_TASK_TGID_OFFSET,r16
add r17=IA64_TASK_TGIDLINK_OFFSET,r17
;;
and r9=TIF_ALLWORK_MASK,r9
ld4 r8=[r8] // r8 = current->tgid
ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
;;
add r8=IA64_PID_LEVEL_OFFSET,r17
;;
ld4 r8=[r8] // r8 = pid->level
add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
;;
shl r8=r8,IA64_UPID_SHIFT
;;
add r17=r17,r8 // r17 = &pid->numbers[pid->level]
;;
ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
;;
mov r17=0
;;
cmp.ne p8,p0=0,r9
(p8) br.spnt.many fsys_fallback_syscall
......@@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address)
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
add r17=IA64_TASK_TGIDLINK_OFFSET,r16
;;
ld4 r9=[r9]
tnat.z p6,p7=r32 // check argument register for being NaT
ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
;;
and r9=TIF_ALLWORK_MASK,r9
add r8=IA64_TASK_PID_OFFSET,r16
add r8=IA64_PID_LEVEL_OFFSET,r17
add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
;;
ld4 r8=[r8]
ld4 r8=[r8] // r8 = pid->level
add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
;;
shl r8=r8,IA64_UPID_SHIFT
;;
add r17=r17,r8 // r17 = &pid->numbers[pid->level]
;;
ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
;;
cmp.ne p8,p0=0,r9
mov r17=-1
;;
......@@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday)
// Note that instructions are optimized for McKinley. McKinley can
// process two bundles simultaneously and therefore we continuously
// try to feed the CPU two bundles and then a stop.
//
// Additional note that code has changed a lot. Optimization is TBD.
// Comments begin with "?" are maybe outdated.
tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle
mov pr = r30,0xc000 // Set predicates according to function
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
tnat.nz p6,p0 = r31 // guard against Nat argument
(p6) br.cond.spnt.few .fail_einval
movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
;;
ld4 r2 = [r2] // process work pending flags
movl r29 = itc_jitter_data // itc_jitter
add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
ld4 r2 = [r2] // process work pending flags
;;
(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
mov pr = r30,0xc000 // Set predicates according to function
;;
and r2 = TIF_ALLWORK_MASK,r2
(p6) br.cond.spnt.few .fail_einval // ? deferred branch
add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
;;
add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
(p6) br.cond.spnt.many fsys_fallback_syscall
(p6) br.cond.spnt.many fsys_fallback_syscall
;;
// Begin critical section
.time_redo:
......@@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday)
(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
(p13) ld8 r25 = [r19] // get itc_lastcycle value
;; // ? could be removed by moving the last add upward
ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
;;
ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
......@@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday)
EX(.fail_efault, probe.w.fault r31, 3)
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
;;
// ? simulate tbit.nz.or p7,p0 = r28,0
getf.sig r2 = f8
mf
;;
ld4 r10 = [r20] // gtod_lock.sequence
shr.u r2 = r2,r23 // shift by factor
;; // ? overloaded 3 bundles!
;;
add r8 = r8,r2 // Add xtime.nsecs
cmp4.ne p7,p0 = r28,r10
(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
......@@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
;;
mov r8 = r0
(p14) getf.sig r2 = f8
;;
mov r8 = r0
(p14) shr.u r21 = r2, 4
;;
EX(.fail_efault, st8 [r31] = r9)
......@@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
nop.i 0
;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
mov.m r30=ar.itc // M get cycle for accounting
#else
nop.m 0
#endif
nop.i 0
;;
mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
......@@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down)
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
br.call.sptk.many b7=ia64_syscall_setup // B
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
// mov.m r30=ar.itc is called in advance
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
;;
ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
;;
ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
ld8 r21=[r17] // cumulated utime
sub r22=r19,r18 // stime before leave kernel
;;
st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
sub r18=r30,r19 // elapsed time in user mode
;;
add r20=r20,r22 // sum stime
add r21=r21,r18 // sum utime
;;
st8 [r16]=r20 // update stime
st8 [r17]=r21 // update utime
;;
#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
mov rp=r14 // I0 set the real return addr
and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
......
......@@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock)
br.ret.sptk.many rp
END(sched_clock)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
GLOBAL_ENTRY(cycle_to_cputime)
alloc r16=ar.pfs,1,0,0,0
addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
;;
ldf8 f8=[r8]
;;
setf.sig f9=r32
;;
xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
;;
getf.sig r8=f10 // (5 cyc)
getf.sig r9=f11
;;
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp
END(cycle_to_cputime)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
GLOBAL_ENTRY(start_kernel_thread)
.prologue
.save rp, r0 // this is the end of the call-chain
......
......@@ -472,7 +472,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
static unsigned char count;
static long last_time;
if (jiffies - last_time > 5*HZ)
if (time_after(jiffies, last_time + 5 * HZ))
count = 0;
if (++count < 5) {
last_time = jiffies;
......
......@@ -805,8 +805,13 @@ ENTRY(break_fault)
(p8) adds r28=16,r28 // A switch cr.iip to next bundle
(p9) adds r8=1,r8 // A increment ei to next slot
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
;;
mov b6=r30 // I0 setup syscall handler branch reg early
#else
nop.i 0
;;
#endif
mov.m r25=ar.unat // M2 (5 cyc)
dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr
......@@ -817,7 +822,11 @@ ENTRY(break_fault)
//
///////////////////////////////////////////////////////////////////////
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
mov.m r30=ar.itc // M get cycle for accounting
#else
mov b6=r30 // I0 setup syscall handler branch reg early
#endif
cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already?
and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit
......@@ -829,6 +838,30 @@ ENTRY(break_fault)
cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
br.call.sptk.many b7=ia64_syscall_setup // B
1:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
// mov.m r30=ar.itc is called in advance, and r13 is current
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A
(pKStk) br.cond.spnt .skip_accounting // B unlikely skip
;;
ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp
ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave
;;
ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime
ld8 r21=[r17] // M cumulated utime
sub r22=r19,r18 // A stime before leave
;;
st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp
sub r18=r30,r19 // A elapsed time in user
;;
add r20=r20,r22 // A sum stime
add r21=r21,r18 // A sum utime
;;
st8 [r16]=r20 // M update stime
st8 [r17]=r21 // M update utime
;;
.skip_accounting:
#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
nop 0
bsw.1 // B (6 cyc) regs are saved, switch to bank 1
......@@ -928,6 +961,7 @@ END(interrupt)
* - r27: saved ar.rsc
* - r28: saved cr.iip
* - r29: saved cr.ipsr
* - r30: ar.itc for accounting (don't touch)
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
......@@ -1090,6 +1124,41 @@ END(dispatch_illegal_op_fault)
DBG_FAULT(16)
FAULT(16)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
* There is no particular reason for this code to be here, other than
* that there happens to be space here that would go unused otherwise.
* If this fault ever gets "unreserved", simply moved the following
* code to a more suitable spot...
*
* account_sys_enter is called from SAVE_MIN* macros if accounting is
* enabled and if the macro is entered from user mode.
*/
ENTRY(account_sys_enter)
// mov.m r20=ar.itc is called in advance, and r13 is current
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
;;
ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel
;;
ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
ld8 r21=[r17] // cumulated utime
sub r22=r19,r18 // stime before leave kernel
;;
st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp
sub r18=r20,r19 // elapsed time in user mode
;;
add r23=r23,r22 // sum stime
add r21=r21,r18 // sum utime
;;
st8 [r16]=r23 // update stime
st8 [r17]=r21 // update utime
;;
br.ret.sptk.many rp
END(account_sys_enter)
#endif
.org ia64_ivt+0x4400
/////////////////////////////////////////////////////////////////////////////////////////
// 0x4400 Entry 17 (size 64 bundles) Reserved
......
......@@ -78,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = {
{ u, u, u }, /* 1F */
};
/* Insert a long branch code */
static void __kprobes set_brl_inst(void *from, void *to)
{
s64 rel = ((s64) to - (s64) from) >> 4;
bundle_t *brl;
brl = (bundle_t *) ((u64) from & ~0xf);
brl->quad0.template = 0x05; /* [MLX](stop) */
brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */
brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
/* brl.cond.sptk.many.clr rel<<4 (qp=0) */
brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
}
/*
* In this function we check to see if the instruction
* is IP relative instruction and update the kprobe
......@@ -496,6 +510,77 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
}
/* Check the instruction in the slot is break */
static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
{
unsigned int major_opcode;
unsigned int template = bundle->quad0.template;
unsigned long kprobe_inst;
/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
if (slot == 1 && bundle_encoding[template][1] == L)
slot++;
/* Get Kprobe probe instruction at given slot*/
get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
/* For break instruction,
* Bits 37:40 Major opcode to be zero
* Bits 27:32 X6 to be zero
* Bits 32:35 X3 to be zero
*/
if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
/* Not a break instruction */
return 0;
}
/* Is a break instruction */
return 1;
}
/*
* In this function, we check whether the target bundle modifies IP or
* it triggers an exception. If so, it cannot be boostable.
*/
static int __kprobes can_boost(bundle_t *bundle, uint slot,
unsigned long bundle_addr)
{
unsigned int template = bundle->quad0.template;
do {
if (search_exception_tables(bundle_addr + slot) ||
__is_ia64_break_inst(bundle, slot))
return 0; /* exception may occur in this bundle*/
} while ((++slot) < 3);
template &= 0x1e;
if (template >= 0x10 /* including B unit */ ||
template == 0x04 /* including X unit */ ||
template == 0x06) /* undefined */
return 0;
return 1;
}
/* Prepare long jump bundle and disables other boosters if need */
static void __kprobes prepare_booster(struct kprobe *p)
{
unsigned long addr = (unsigned long)p->addr & ~0xFULL;
unsigned int slot = (unsigned long)p->addr & 0xf;
struct kprobe *other_kp;
if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
}
/* disables boosters in previous slots */
for (; addr < (unsigned long)p->addr; addr++) {
other_kp = get_kprobe((void *)addr);
if (other_kp)
other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
}
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
unsigned long addr = (unsigned long) p->addr;
......@@ -530,6 +615,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
prepare_booster(p);
return 0;
}
......@@ -543,7 +630,9 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
src = &p->opcode.bundle;
flush_icache_range((unsigned long)p->ainsn.insn,
(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
(unsigned long)p->ainsn.insn +
sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
switch (p->ainsn.slot) {
case 0:
dest->quad0.slot0 = src->quad0.slot0;
......@@ -584,13 +673,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
mutex_lock(&kprobe_mutex);
free_insn_slot(p->ainsn.insn, 0);
free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
mutex_unlock(&kprobe_mutex);
}
/*
* We are resuming execution after a single step fault, so the pt_regs
* structure reflects the register state after we executed the instruction
* located in the kprobe (p->ainsn.insn.bundle). We still need to adjust
* located in the kprobe (p->ainsn.insn->bundle). We still need to adjust
* the ip to point back to the original stack address. To set the IP address
* to original stack address, handle the case where we need to fixup the
* relative IP address and/or fixup branch register.
......@@ -607,7 +696,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
if (slot == 1 && bundle_encoding[template][1] == L)
slot = 2;
if (p->ainsn.inst_flag) {
if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
/* Fix relative IP address */
......@@ -686,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
{
unsigned int slot = ia64_psr(regs)->ri;
unsigned int template, major_opcode;
unsigned long kprobe_inst;
unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
bundle_t bundle;
memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
template = bundle.quad0.template;
/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
if (slot == 1 && bundle_encoding[template][1] == L)
slot++;
/* Get Kprobe probe instruction at given slot*/
get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
/* For break instruction,
* Bits 37:40 Major opcode to be zero
* Bits 27:32 X6 to be zero
* Bits 32:35 X3 to be zero
*/
if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
/* Not a break instruction */
return 0;
}
/* Is a break instruction */
return 1;
return __is_ia64_break_inst(&bundle, slot);
}
static int __kprobes pre_kprobes_handler(struct die_args *args)
......@@ -802,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
return 1;
ss_probe:
#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
ia64_psr(regs)->ri = p->ainsn.slot;
regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
/* turn single stepping off */
ia64_psr(regs)->ss = 0;
reset_current_kprobe();
preempt_enable_no_resched();
return 1;
}
#endif
prepare_ss(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
......
......@@ -69,6 +69,7 @@
* 2007-04-27 Russ Anderson <rja@sgi.com>
* Support multiple cpus going through OS_MCA in the same event.
*/
#include <linux/jiffies.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/sched.h>
......@@ -97,6 +98,7 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/tlb.h>
#include "mca_drv.h"
#include "entry.h"
......@@ -112,6 +114,7 @@ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */
DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */
unsigned long __per_cpu_mca[NR_CPUS];
......@@ -293,7 +296,8 @@ static void ia64_mlogbuf_dump_from_init(void)
if (mlogbuf_finished)
return;
if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
if (mlogbuf_timestamp &&
time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
" and the system seems to be messed up.\n");
ia64_mlogbuf_finish(0);
......@@ -1182,6 +1186,49 @@ ia64_wait_for_slaves(int monarch, const char *type)
return;
}
/* mca_insert_tr
*
* Switch rid when TR reload and needed!
* iord: 1: itr, 2: itr;
*
*/
static void mca_insert_tr(u64 iord)
{
int i;
u64 old_rr;
struct ia64_tr_entry *p;
unsigned long psr;
int cpu = smp_processor_id();
psr = ia64_clear_ic();
for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
p = &__per_cpu_idtrs[cpu][iord-1][i];
if (p->pte & 0x1) {
old_rr = ia64_get_rr(p->ifa);
if (old_rr != p->rr) {
ia64_set_rr(p->ifa, p->rr);
ia64_srlz_d();
}
ia64_ptr(iord, p->ifa, p->itir >> 2);
ia64_srlz_i();
if (iord & 0x1) {
ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
ia64_srlz_i();
}
if (iord & 0x2) {
ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
ia64_srlz_i();
}
if (old_rr != p->rr) {
ia64_set_rr(p->ifa, old_rr);
ia64_srlz_d();
}
}
}
ia64_set_psr(psr);
}
/*
* ia64_mca_handler
*
......@@ -1266,16 +1313,17 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
} else {
/* Dump buffered message to console */
ia64_mlogbuf_finish(1);
#ifdef CONFIG_KEXEC
atomic_set(&kdump_in_progress, 1);
monarch_cpu = -1;
#endif
}
if (__get_cpu_var(ia64_mca_tr_reload)) {
mca_insert_tr(0x1); /*Reload dynamic itrs*/
mca_insert_tr(0x2); /*Reload dynamic itrs*/
}
if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
== NOTIFY_STOP)
ia64_mca_spin(__func__);
if (atomic_dec_return(&mca_count) > 0) {
int i;
......
......@@ -219,8 +219,13 @@ ia64_reload_tr:
mov r20=IA64_TR_CURRENT_STACK
;;
itr.d dtr[r20]=r16
GET_THIS_PADDR(r2, ia64_mca_tr_reload)
mov r18 = 1
;;
srlz.d
;;
st8 [r2] =r18
;;
done_tlb_purge_and_reload:
......
......@@ -3,6 +3,18 @@
#include "entry.h"
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/* read ar.itc in advance, and use it before leaving bank 0 */
#define ACCOUNT_GET_STAMP \
(pUStk) mov.m r20=ar.itc;
#define ACCOUNT_SYS_ENTER \
(pUStk) br.call.spnt rp=account_sys_enter \
;;
#else
#define ACCOUNT_GET_STAMP
#define ACCOUNT_SYS_ENTER
#endif
/*
* DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
* the minimum state necessary that allows us to turn psr.ic back
......@@ -122,11 +134,13 @@
;; \
.mem.offset 0,0; st8.spill [r16]=r2,16; \
.mem.offset 8,0; st8.spill [r17]=r3,16; \
ACCOUNT_GET_STAMP \
adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
;; \
EXTRA; \
movl r1=__gp; /* establish kernel global pointer */ \
;; \
ACCOUNT_SYS_ENTER \
bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
;;
......
......@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
for(node=0; node < MAX_NUMNODES; node++)
cpus_clear(node_to_cpu_mask[node]);
for(cpu = 0; cpu < NR_CPUS; ++cpu) {
for_each_possible_early_cpu(cpu) {
node = -1;
for (i = 0; i < NR_CPUS; ++i)
if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
......
......@@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
while (offp < (s32 *) end) {
wp = (u64 *) ia64_imva((char *) offp + *offp);
wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
wp[1] = 0x0004000000000200UL;
wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
wp[3] = 0x0084006880000200UL;
wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
wp[1] = 0x0084006880000200UL;
wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
wp[3] = 0x0004000000000200UL;
ia64_fc(wp); ia64_fc(wp + 2);
++offp;
}
......
......@@ -4204,10 +4204,10 @@ pfm_check_task_exist(pfm_context_t *ctx)
do_each_thread (g, t) {
if (t->thread.pfm_context == ctx) {
ret = 0;
break;
goto out;
}
} while_each_thread (g, t);
out:
read_unlock(&tasklist_lock);
DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
......
......@@ -625,42 +625,12 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
do_dump_task_fpu(current, info, arg);
}
int
dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
{
struct unw_frame_info tcore_info;
if (current == task) {
unw_init_running(do_copy_regs, regs);
} else {
memset(&tcore_info, 0, sizeof(tcore_info));
unw_init_from_blocked_task(&tcore_info, task);
do_copy_task_regs(task, &tcore_info, regs);
}
return 1;
}
void
ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
{
unw_init_running(do_copy_regs, dst);
}
int
dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
{
struct unw_frame_info tcore_info;
if (current == task) {
unw_init_running(do_dump_fpu, dst);
} else {
memset(&tcore_info, 0, sizeof(tcore_info));
unw_init_from_blocked_task(&tcore_info, task);
do_dump_task_fpu(task, &tcore_info, dst);
}
return 1;
}
int
dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
{
......
......@@ -3,6 +3,9 @@
*
* Copyright (C) 1999-2005 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 2006 Intel Co
* 2006-08-12 - IA64 Native Utrace implementation support added by
* Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
*
* Derived from the x86 and Alpha versions.
*/
......@@ -17,6 +20,8 @@
#include <linux/security.h>
#include <linux/audit.h>
#include <linux/signal.h>
#include <linux/regset.h>
#include <linux/elf.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
......@@ -740,25 +745,6 @@ ia64_sync_fph (struct task_struct *task)
psr->dfh = 1;
}
static int
access_fr (struct unw_frame_info *info, int regnum, int hi,
unsigned long *data, int write_access)
{
struct ia64_fpreg fpval;
int ret;
ret = unw_get_fr(info, regnum, &fpval);
if (ret < 0)
return ret;
if (write_access) {
fpval.u.bits[hi] = *data;
ret = unw_set_fr(info, regnum, fpval);
} else
*data = fpval.u.bits[hi];
return ret;
}
/*
* Change the machine-state of CHILD such that it will return via the normal
* kernel exit-path, rather than the syscall-exit path.
......@@ -860,309 +846,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt,
static int
access_uarea (struct task_struct *child, unsigned long addr,
unsigned long *data, int write_access)
{
unsigned long *ptr, regnum, urbs_end, cfm;
struct switch_stack *sw;
struct pt_regs *pt;
# define pt_reg_addr(pt, reg) ((void *) \
((unsigned long) (pt) \
+ offsetof(struct pt_regs, reg)))
pt = task_pt_regs(child);
sw = (struct switch_stack *) (child->thread.ksp + 16);
if ((addr & 0x7) != 0) {
dprintk("ptrace: unaligned register address 0x%lx\n", addr);
return -1;
}
if (addr < PT_F127 + 16) {
/* accessing fph */
if (write_access)
ia64_sync_fph(child);
else
ia64_flush_fph(child);
ptr = (unsigned long *)
((unsigned long) &child->thread.fph + addr);
} else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
/* scratch registers untouched by kernel (saved in pt_regs) */
ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
} else if (addr >= PT_F12 && addr < PT_F15 + 16) {
/*
* Scratch registers untouched by kernel (saved in
* switch_stack).
*/
ptr = (unsigned long *) ((long) sw
+ (addr - PT_NAT_BITS - 32));
} else if (addr < PT_AR_LC + 8) {
/* preserved state: */
struct unw_frame_info info;
char nat = 0;
int ret;
unw_init_from_blocked_task(&info, child);
if (unw_unwind_to_user(&info) < 0)
return -1;
switch (addr) {
case PT_NAT_BITS:
return access_nat_bits(child, pt, &info,
data, write_access);
case PT_R4: case PT_R5: case PT_R6: case PT_R7:
if (write_access) {
/* read NaT bit first: */
unsigned long dummy;
ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
&dummy, &nat);
if (ret < 0)
return ret;
}
return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
&nat, write_access);
case PT_B1: case PT_B2: case PT_B3:
case PT_B4: case PT_B5:
return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
write_access);
case PT_AR_EC:
return unw_access_ar(&info, UNW_AR_EC, data,
write_access);
case PT_AR_LC:
return unw_access_ar(&info, UNW_AR_LC, data,
write_access);
default:
if (addr >= PT_F2 && addr < PT_F5 + 16)
return access_fr(&info, (addr - PT_F2)/16 + 2,
(addr & 8) != 0, data,
write_access);
else if (addr >= PT_F16 && addr < PT_F31 + 16)
return access_fr(&info,
(addr - PT_F16)/16 + 16,
(addr & 8) != 0,
data, write_access);
else {
dprintk("ptrace: rejecting access to register "
"address 0x%lx\n", addr);
return -1;
}
}
} else if (addr < PT_F9+16) {
/* scratch state */
switch (addr) {
case PT_AR_BSP:
/*
* By convention, we use PT_AR_BSP to refer to
* the end of the user-level backing store.
* Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
* to get the real value of ar.bsp at the time
* the kernel was entered.
*
* Furthermore, when changing the contents of
* PT_AR_BSP (or PT_CFM) while the task is
* blocked in a system call, convert the state
* so that the non-system-call exit
* path is used. This ensures that the proper
* state will be picked up when resuming
* execution. However, it *also* means that
* once we write PT_AR_BSP/PT_CFM, it won't be
* possible to modify the syscall arguments of
* the pending system call any longer. This
* shouldn't be an issue because modifying
* PT_AR_BSP/PT_CFM generally implies that
* we're either abandoning the pending system
* call or that we defer it's re-execution
* (e.g., due to GDB doing an inferior
* function call).
*/
urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
if (write_access) {
if (*data != urbs_end) {
if (in_syscall(pt))
convert_to_non_syscall(child,
pt,
cfm);
/*
* Simulate user-level write
* of ar.bsp:
*/
pt->loadrs = 0;
pt->ar_bspstore = *data;
}
} else
*data = urbs_end;
return 0;
case PT_CFM:
urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
if (write_access) {
if (((cfm ^ *data) & PFM_MASK) != 0) {
if (in_syscall(pt))
convert_to_non_syscall(child,
pt,
cfm);
pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
| (*data & PFM_MASK));
}
} else
*data = cfm;
return 0;
case PT_CR_IPSR:
if (write_access) {
unsigned long tmp = *data;
/* psr.ri==3 is a reserved value: SDM 2:25 */
if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
tmp &= ~IA64_PSR_RI;
pt->cr_ipsr = ((tmp & IPSR_MASK)
| (pt->cr_ipsr & ~IPSR_MASK));
} else
*data = (pt->cr_ipsr & IPSR_MASK);
return 0;
case PT_AR_RSC:
if (write_access)
pt->ar_rsc = *data | (3 << 2); /* force PL3 */
else
*data = pt->ar_rsc;
return 0;
case PT_AR_RNAT:
ptr = pt_reg_addr(pt, ar_rnat);
break;
case PT_R1:
ptr = pt_reg_addr(pt, r1);
break;
case PT_R2: case PT_R3:
ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
break;
case PT_R8: case PT_R9: case PT_R10: case PT_R11:
ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
break;
case PT_R12: case PT_R13:
ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
break;
case PT_R14:
ptr = pt_reg_addr(pt, r14);
break;
case PT_R15:
ptr = pt_reg_addr(pt, r15);
break;
case PT_R16: case PT_R17: case PT_R18: case PT_R19:
case PT_R20: case PT_R21: case PT_R22: case PT_R23:
case PT_R24: case PT_R25: case PT_R26: case PT_R27:
case PT_R28: case PT_R29: case PT_R30: case PT_R31:
ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
break;
case PT_B0:
ptr = pt_reg_addr(pt, b0);
break;
case PT_B6:
ptr = pt_reg_addr(pt, b6);
break;
case PT_B7:
ptr = pt_reg_addr(pt, b7);
break;
case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8:
case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8:
ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
break;
case PT_AR_BSPSTORE:
ptr = pt_reg_addr(pt, ar_bspstore);
break;
case PT_AR_UNAT:
ptr = pt_reg_addr(pt, ar_unat);
break;
case PT_AR_PFS:
ptr = pt_reg_addr(pt, ar_pfs);
break;
case PT_AR_CCV:
ptr = pt_reg_addr(pt, ar_ccv);
break;
case PT_AR_FPSR:
ptr = pt_reg_addr(pt, ar_fpsr);
break;
case PT_CR_IIP:
ptr = pt_reg_addr(pt, cr_iip);
break;
case PT_PR:
ptr = pt_reg_addr(pt, pr);
break;
/* scratch register */
default:
/* disallow accessing anything else... */
dprintk("ptrace: rejecting access to register "
"address 0x%lx\n", addr);
return -1;
}
} else if (addr <= PT_AR_SSD) {
ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
} else {
/* access debug registers */
if (addr >= PT_IBR) {
regnum = (addr - PT_IBR) >> 3;
ptr = &child->thread.ibr[0];
} else {
regnum = (addr - PT_DBR) >> 3;
ptr = &child->thread.dbr[0];
}
if (regnum >= 8) {
dprintk("ptrace: rejecting access to register "
"address 0x%lx\n", addr);
return -1;
}
#ifdef CONFIG_PERFMON
/*
* Check if debug registers are used by perfmon. This
* test must be done once we know that we can do the
* operation, i.e. the arguments are all valid, but
* before we start modifying the state.
*
* Perfmon needs to keep a count of how many processes
* are trying to modify the debug registers for system
* wide monitoring sessions.
*
* We also include read access here, because they may
* cause the PMU-installed debug register state
* (dbr[], ibr[]) to be reset. The two arrays are also
* used by perfmon, but we do not use
* IA64_THREAD_DBG_VALID. The registers are restored
* by the PMU context switch code.
*/
if (pfm_use_debug_registers(child)) return -1;
#endif
if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
child->thread.flags |= IA64_THREAD_DBG_VALID;
memset(child->thread.dbr, 0,
sizeof(child->thread.dbr));
memset(child->thread.ibr, 0,
sizeof(child->thread.ibr));
}
ptr += regnum;
if ((regnum & 1) && write_access) {
/* don't let the user set kernel-level breakpoints: */
*ptr = *data & ~(7UL << 56);
return 0;
}
}
if (write_access)
*ptr = *data;
else
*data = *ptr;
return 0;
}
unsigned long *data, int write_access);
static long
ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
......@@ -1626,3 +1310,892 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
if (test_thread_flag(TIF_RESTORE_RSE))
ia64_sync_krbs();
}
/* Utrace implementation starts here */
struct regset_get {
void *kbuf;
void __user *ubuf;
};
struct regset_set {
const void *kbuf;
const void __user *ubuf;
};
struct regset_getset {
struct task_struct *target;
const struct user_regset *regset;
union {
struct regset_get get;
struct regset_set set;
} u;
unsigned int pos;
unsigned int count;
int ret;
};
static int
access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
unsigned long addr, unsigned long *data, int write_access)
{
struct pt_regs *pt;
unsigned long *ptr = NULL;
int ret;
char nat = 0;
pt = task_pt_regs(target);
switch (addr) {
case ELF_GR_OFFSET(1):
ptr = &pt->r1;
break;
case ELF_GR_OFFSET(2):
case ELF_GR_OFFSET(3):
ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
break;
case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
if (write_access) {
/* read NaT bit first: */
unsigned long dummy;
ret = unw_get_gr(info, addr/8, &dummy, &nat);
if (ret < 0)
return ret;
}
return unw_access_gr(info, addr/8, data, &nat, write_access);
case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
break;
case ELF_GR_OFFSET(12):
case ELF_GR_OFFSET(13):
ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
break;
case ELF_GR_OFFSET(14):
ptr = &pt->r14;
break;
case ELF_GR_OFFSET(15):
ptr = &pt->r15;
}
if (write_access)
*ptr = *data;
else
*data = *ptr;
return 0;
}
static int
access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
unsigned long addr, unsigned long *data, int write_access)
{
struct pt_regs *pt;
unsigned long *ptr = NULL;
pt = task_pt_regs(target);
switch (addr) {
case ELF_BR_OFFSET(0):
ptr = &pt->b0;
break;
case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
data, write_access);
case ELF_BR_OFFSET(6):
ptr = &pt->b6;
break;
case ELF_BR_OFFSET(7):
ptr = &pt->b7;
}
if (write_access)
*ptr = *data;
else
*data = *ptr;
return 0;
}
static int
access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
unsigned long addr, unsigned long *data, int write_access)
{
struct pt_regs *pt;
unsigned long cfm, urbs_end;
unsigned long *ptr = NULL;
pt = task_pt_regs(target);
if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
switch (addr) {
case ELF_AR_RSC_OFFSET:
/* force PL3 */
if (write_access)
pt->ar_rsc = *data | (3 << 2);
else
*data = pt->ar_rsc;
return 0;
case ELF_AR_BSP_OFFSET:
/*
* By convention, we use PT_AR_BSP to refer to
* the end of the user-level backing store.
* Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
* to get the real value of ar.bsp at the time
* the kernel was entered.
*
* Furthermore, when changing the contents of
* PT_AR_BSP (or PT_CFM) while the task is
* blocked in a system call, convert the state
* so that the non-system-call exit
* path is used. This ensures that the proper
* state will be picked up when resuming
* execution. However, it *also* means that
* once we write PT_AR_BSP/PT_CFM, it won't be
* possible to modify the syscall arguments of
* the pending system call any longer. This
* shouldn't be an issue because modifying
* PT_AR_BSP/PT_CFM generally implies that
* we're either abandoning the pending system
* call or that we defer it's re-execution
* (e.g., due to GDB doing an inferior
* function call).
*/
urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
if (write_access) {
if (*data != urbs_end) {
if (in_syscall(pt))
convert_to_non_syscall(target,
pt,
cfm);
/*
* Simulate user-level write
* of ar.bsp:
*/
pt->loadrs = 0;
pt->ar_bspstore = *data;
}
} else
*data = urbs_end;
return 0;
case ELF_AR_BSPSTORE_OFFSET:
ptr = &pt->ar_bspstore;
break;
case ELF_AR_RNAT_OFFSET:
ptr = &pt->ar_rnat;
break;
case ELF_AR_CCV_OFFSET:
ptr = &pt->ar_ccv;
break;
case ELF_AR_UNAT_OFFSET:
ptr = &pt->ar_unat;
break;
case ELF_AR_FPSR_OFFSET:
ptr = &pt->ar_fpsr;
break;
case ELF_AR_PFS_OFFSET:
ptr = &pt->ar_pfs;
break;
case ELF_AR_LC_OFFSET:
return unw_access_ar(info, UNW_AR_LC, data,
write_access);
case ELF_AR_EC_OFFSET:
return unw_access_ar(info, UNW_AR_EC, data,
write_access);
case ELF_AR_CSD_OFFSET:
ptr = &pt->ar_csd;
break;
case ELF_AR_SSD_OFFSET:
ptr = &pt->ar_ssd;
}
} else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
switch (addr) {
case ELF_CR_IIP_OFFSET:
ptr = &pt->cr_iip;
break;
case ELF_CFM_OFFSET:
urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
if (write_access) {
if (((cfm ^ *data) & PFM_MASK) != 0) {
if (in_syscall(pt))
convert_to_non_syscall(target,
pt,
cfm);
pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
| (*data & PFM_MASK));
}
} else
*data = cfm;
return 0;
case ELF_CR_IPSR_OFFSET:
if (write_access) {
unsigned long tmp = *data;
/* psr.ri==3 is a reserved value: SDM 2:25 */
if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
tmp &= ~IA64_PSR_RI;
pt->cr_ipsr = ((tmp & IPSR_MASK)
| (pt->cr_ipsr & ~IPSR_MASK));
} else
*data = (pt->cr_ipsr & IPSR_MASK);
return 0;
}
} else if (addr == ELF_NAT_OFFSET)
return access_nat_bits(target, pt, info,
data, write_access);
else if (addr == ELF_PR_OFFSET)
ptr = &pt->pr;
else
return -1;
if (write_access)
*ptr = *data;
else
*data = *ptr;
return 0;
}
static int
access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
unsigned long addr, unsigned long *data, int write_access)
{
if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
return access_elf_gpreg(target, info, addr, data, write_access);
else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
return access_elf_breg(target, info, addr, data, write_access);
else
return access_elf_areg(target, info, addr, data, write_access);
}
void do_gpregs_get(struct unw_frame_info *info, void *arg)
{
struct pt_regs *pt;
struct regset_getset *dst = arg;
elf_greg_t tmp[16];
unsigned int i, index, min_copy;
if (unw_unwind_to_user(info) < 0)
return;
/*
* coredump format:
* r0-r31
* NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
* predicate registers (p0-p63)
* b0-b7
* ip cfm user-mask
* ar.rsc ar.bsp ar.bspstore ar.rnat
* ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
*/
/* Skip r0 */
if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
&dst->u.get.kbuf,
&dst->u.get.ubuf,
0, ELF_GR_OFFSET(1));
if (dst->ret || dst->count == 0)
return;
}
/* gr1 - gr15 */
if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
(dst->pos + dst->count) : ELF_GR_OFFSET(16);
for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
index++)
if (access_elf_reg(dst->target, info, i,
&tmp[index], 0) < 0) {
dst->ret = -EIO;
return;
}
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
if (dst->ret || dst->count == 0)
return;
}
/* r16-r31 */
if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
pt = task_pt_regs(dst->target);
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
if (dst->ret || dst->count == 0)
return;
}
/* nat, pr, b0 - b7 */
if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
(dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
index++)
if (access_elf_reg(dst->target, info, i,
&tmp[index], 0) < 0) {
dst->ret = -EIO;
return;
}
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
if (dst->ret || dst->count == 0)
return;
}
/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
* ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
*/
if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
(dst->pos + dst->count) : ELF_AR_END_OFFSET;
for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
index++)
if (access_elf_reg(dst->target, info, i,
&tmp[index], 0) < 0) {
dst->ret = -EIO;
return;
}
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
}
}
void do_gpregs_set(struct unw_frame_info *info, void *arg)
{
struct pt_regs *pt;
struct regset_getset *dst = arg;
elf_greg_t tmp[16];
unsigned int i, index;
if (unw_unwind_to_user(info) < 0)
return;
/* Skip r0 */
if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
&dst->u.set.kbuf,
&dst->u.set.ubuf,
0, ELF_GR_OFFSET(1));
if (dst->ret || dst->count == 0)
return;
}
/* gr1-gr15 */
if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
i = dst->pos;
index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
if (dst->ret)
return;
for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
if (access_elf_reg(dst->target, info, i,
&tmp[index], 1) < 0) {
dst->ret = -EIO;
return;
}
if (dst->count == 0)
return;
}
/* gr16-gr31 */
if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
pt = task_pt_regs(dst->target);
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
if (dst->ret || dst->count == 0)
return;
}
/* nat, pr, b0 - b7 */
if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
i = dst->pos;
index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
if (dst->ret)
return;
for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
if (access_elf_reg(dst->target, info, i,
&tmp[index], 1) < 0) {
dst->ret = -EIO;
return;
}
if (dst->count == 0)
return;
}
/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
* ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
*/
if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
i = dst->pos;
index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
if (dst->ret)
return;
for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
if (access_elf_reg(dst->target, info, i,
&tmp[index], 1) < 0) {
dst->ret = -EIO;
return;
}
}
}
#define ELF_FP_OFFSET(i) (i * sizeof(elf_fpreg_t))
void do_fpregs_get(struct unw_frame_info *info, void *arg)
{
struct regset_getset *dst = arg;
struct task_struct *task = dst->target;
elf_fpreg_t tmp[30];
int index, min_copy, i;
if (unw_unwind_to_user(info) < 0)
return;
/* Skip pos 0 and 1 */
if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
&dst->u.get.kbuf,
&dst->u.get.ubuf,
0, ELF_FP_OFFSET(2));
if (dst->count == 0 || dst->ret)
return;
}
/* fr2-fr31 */
if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
dst->pos + dst->count);
for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
index++)
if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
&tmp[index])) {
dst->ret = -EIO;
return;
}
dst->ret = user_regset_copyout(&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
if (dst->count == 0 || dst->ret)
return;
}
/* fph */
if (dst->count > 0) {
ia64_flush_fph(dst->target);
if (task->thread.flags & IA64_THREAD_FPH_VALID)
dst->ret = user_regset_copyout(
&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf,
&dst->target->thread.fph,
ELF_FP_OFFSET(32), -1);
else
/* Zero fill instead. */
dst->ret = user_regset_copyout_zero(
&dst->pos, &dst->count,
&dst->u.get.kbuf, &dst->u.get.ubuf,
ELF_FP_OFFSET(32), -1);
}
}
void do_fpregs_set(struct unw_frame_info *info, void *arg)
{
struct regset_getset *dst = arg;
elf_fpreg_t fpreg, tmp[30];
int index, start, end;
if (unw_unwind_to_user(info) < 0)
return;
/* Skip pos 0 and 1 */
if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
&dst->u.set.kbuf,
&dst->u.set.ubuf,
0, ELF_FP_OFFSET(2));
if (dst->count == 0 || dst->ret)
return;
}
/* fr2-fr31 */
if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
start = dst->pos;
end = min(((unsigned int)ELF_FP_OFFSET(32)),
dst->pos + dst->count);
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
if (dst->ret)
return;
if (start & 0xF) { /* only write high part */
if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
&fpreg)) {
dst->ret = -EIO;
return;
}
tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
= fpreg.u.bits[0];
start &= ~0xFUL;
}
if (end & 0xF) { /* only write low part */
if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
&fpreg)) {
dst->ret = -EIO;
return;
}
tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
= fpreg.u.bits[1];
end = (end + 0xF) & ~0xFUL;
}
for ( ; start < end ; start += sizeof(elf_fpreg_t)) {
index = start / sizeof(elf_fpreg_t);
if (unw_set_fr(info, index, tmp[index - 2])) {
dst->ret = -EIO;
return;
}
}
if (dst->ret || dst->count == 0)
return;
}
/* fph */
if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
ia64_sync_fph(dst->target);
dst->ret = user_regset_copyin(&dst->pos, &dst->count,
&dst->u.set.kbuf,
&dst->u.set.ubuf,
&dst->target->thread.fph,
ELF_FP_OFFSET(32), -1);
}
}
static int
do_regset_call(void (*call)(struct unw_frame_info *, void *),
struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
struct regset_getset info = { .target = target, .regset = regset,
.pos = pos, .count = count,
.u.set = { .kbuf = kbuf, .ubuf = ubuf },
.ret = 0 };
if (target == current)
unw_init_running(call, &info);
else {
struct unw_frame_info ufi;
memset(&ufi, 0, sizeof(ufi));
unw_init_from_blocked_task(&ufi, target);
(*call)(&ufi, &info);
}
return info.ret;
}
static int
gpregs_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
return do_regset_call(do_gpregs_get, target, regset, pos, count,
kbuf, ubuf);
}
static int gpregs_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
return do_regset_call(do_gpregs_set, target, regset, pos, count,
kbuf, ubuf);
}
static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
{
do_sync_rbs(info, ia64_sync_user_rbs);
}
/*
* This is called to write back the register backing store.
* ptrace does this before it stops, so that a tracer reading the user
* memory after the thread stops will get the current register data.
*/
static int
gpregs_writeback(struct task_struct *target,
const struct user_regset *regset,
int now)
{
if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
return 0;
tsk_set_notify_resume(target);
return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
NULL, NULL);
}
static int
fpregs_active(struct task_struct *target, const struct user_regset *regset)
{
return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
}
static int fpregs_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
return do_regset_call(do_fpregs_get, target, regset, pos, count,
kbuf, ubuf);
}
static int fpregs_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
return do_regset_call(do_fpregs_set, target, regset, pos, count,
kbuf, ubuf);
}
static int
access_uarea(struct task_struct *child, unsigned long addr,
unsigned long *data, int write_access)
{
unsigned int pos = -1; /* an invalid value */
int ret;
unsigned long *ptr, regnum;
if ((addr & 0x7) != 0) {
dprintk("ptrace: unaligned register address 0x%lx\n", addr);
return -1;
}
if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
(addr >= PT_R7 + 8 && addr < PT_B1) ||
(addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
(addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
dprintk("ptrace: rejecting access to register "
"address 0x%lx\n", addr);
return -1;
}
switch (addr) {
case PT_F32 ... (PT_F127 + 15):
pos = addr - PT_F32 + ELF_FP_OFFSET(32);
break;
case PT_F2 ... (PT_F5 + 15):
pos = addr - PT_F2 + ELF_FP_OFFSET(2);
break;
case PT_F10 ... (PT_F31 + 15):
pos = addr - PT_F10 + ELF_FP_OFFSET(10);
break;
case PT_F6 ... (PT_F9 + 15):
pos = addr - PT_F6 + ELF_FP_OFFSET(6);
break;
}
if (pos != -1) {
if (write_access)
ret = fpregs_set(child, NULL, pos,
sizeof(unsigned long), data, NULL);
else
ret = fpregs_get(child, NULL, pos,
sizeof(unsigned long), data, NULL);
if (ret != 0)
return -1;
return 0;
}
switch (addr) {
case PT_NAT_BITS:
pos = ELF_NAT_OFFSET;
break;
case PT_R4 ... PT_R7:
pos = addr - PT_R4 + ELF_GR_OFFSET(4);
break;
case PT_B1 ... PT_B5:
pos = addr - PT_B1 + ELF_BR_OFFSET(1);
break;
case PT_AR_EC:
pos = ELF_AR_EC_OFFSET;
break;
case PT_AR_LC:
pos = ELF_AR_LC_OFFSET;
break;
case PT_CR_IPSR:
pos = ELF_CR_IPSR_OFFSET;
break;
case PT_CR_IIP:
pos = ELF_CR_IIP_OFFSET;
break;
case PT_CFM:
pos = ELF_CFM_OFFSET;
break;
case PT_AR_UNAT:
pos = ELF_AR_UNAT_OFFSET;
break;
case PT_AR_PFS:
pos = ELF_AR_PFS_OFFSET;
break;
case PT_AR_RSC:
pos = ELF_AR_RSC_OFFSET;
break;
case PT_AR_RNAT:
pos = ELF_AR_RNAT_OFFSET;
break;
case PT_AR_BSPSTORE:
pos = ELF_AR_BSPSTORE_OFFSET;
break;
case PT_PR:
pos = ELF_PR_OFFSET;
break;
case PT_B6:
pos = ELF_BR_OFFSET(6);
break;
case PT_AR_BSP:
pos = ELF_AR_BSP_OFFSET;
break;
case PT_R1 ... PT_R3:
pos = addr - PT_R1 + ELF_GR_OFFSET(1);
break;
case PT_R12 ... PT_R15:
pos = addr - PT_R12 + ELF_GR_OFFSET(12);
break;
case PT_R8 ... PT_R11:
pos = addr - PT_R8 + ELF_GR_OFFSET(8);
break;
case PT_R16 ... PT_R31:
pos = addr - PT_R16 + ELF_GR_OFFSET(16);
break;
case PT_AR_CCV:
pos = ELF_AR_CCV_OFFSET;
break;
case PT_AR_FPSR:
pos = ELF_AR_FPSR_OFFSET;
break;
case PT_B0:
pos = ELF_BR_OFFSET(0);
break;
case PT_B7:
pos = ELF_BR_OFFSET(7);
break;
case PT_AR_CSD:
pos = ELF_AR_CSD_OFFSET;
break;
case PT_AR_SSD:
pos = ELF_AR_SSD_OFFSET;
break;
}
if (pos != -1) {
if (write_access)
ret = gpregs_set(child, NULL, pos,
sizeof(unsigned long), data, NULL);
else
ret = gpregs_get(child, NULL, pos,
sizeof(unsigned long), data, NULL);
if (ret != 0)
return -1;
return 0;
}
/* access debug registers */
if (addr >= PT_IBR) {
regnum = (addr - PT_IBR) >> 3;
ptr = &child->thread.ibr[0];
} else {
regnum = (addr - PT_DBR) >> 3;
ptr = &child->thread.dbr[0];
}
if (regnum >= 8) {
dprintk("ptrace: rejecting access to register "
"address 0x%lx\n", addr);
return -1;
}
#ifdef CONFIG_PERFMON
/*
* Check if debug registers are used by perfmon. This
* test must be done once we know that we can do the
* operation, i.e. the arguments are all valid, but
* before we start modifying the state.
*
* Perfmon needs to keep a count of how many processes
* are trying to modify the debug registers for system
* wide monitoring sessions.
*
* We also include read access here, because they may
* cause the PMU-installed debug register state
* (dbr[], ibr[]) to be reset. The two arrays are also
* used by perfmon, but we do not use
* IA64_THREAD_DBG_VALID. The registers are restored
* by the PMU context switch code.
*/
if (pfm_use_debug_registers(child))
return -1;
#endif
if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
child->thread.flags |= IA64_THREAD_DBG_VALID;
memset(child->thread.dbr, 0,
sizeof(child->thread.dbr));
memset(child->thread.ibr, 0,
sizeof(child->thread.ibr));
}
ptr += regnum;
if ((regnum & 1) && write_access) {
/* don't let the user set kernel-level breakpoints: */
*ptr = *data & ~(7UL << 56);
return 0;
}
if (write_access)
*ptr = *data;
else
*data = *ptr;
return 0;
}
static const struct user_regset native_regsets[] = {
{
.core_note_type = NT_PRSTATUS,
.n = ELF_NGREG,
.size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
.get = gpregs_get, .set = gpregs_set,
.writeback = gpregs_writeback
},
{
.core_note_type = NT_PRFPREG,
.n = ELF_NFPREG,
.size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
.get = fpregs_get, .set = fpregs_set, .active = fpregs_active
},
};
static const struct user_regset_view user_ia64_view = {
.name = "ia64",
.e_machine = EM_IA_64,
.regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
};
const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
{
#ifdef CONFIG_IA32_SUPPORT
extern const struct user_regset_view user_ia32_view;
if (IS_IA32_PROCESS(task_pt_regs(tsk)))
return &user_ia32_view;
#endif
return &user_ia64_view;
}
......@@ -59,6 +59,7 @@
#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/system.h>
#include <asm/tlbflush.h>
#include <asm/unistd.h>
#include <asm/hpsim.h>
......@@ -176,6 +177,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
return 0;
}
/*
* Similar to "filter_rsvd_memory()", but the reserved memory ranges
* are not filtered out.
*/
int __init
filter_memory(unsigned long start, unsigned long end, void *arg)
{
void (*func)(unsigned long, unsigned long, int);
#if IGNORE_PFN0
if (start == PAGE_OFFSET) {
printk(KERN_WARNING "warning: skipping physical page 0\n");
start += PAGE_SIZE;
if (start >= end)
return 0;
}
#endif
func = arg;
if (start < end)
call_pernode_memory(__pa(start), end - start, func);
return 0;
}
static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{
......@@ -493,6 +517,8 @@ setup_arch (char **cmdline_p)
acpi_table_init();
# ifdef CONFIG_ACPI_NUMA
acpi_numa_init();
per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
32 : cpus_weight(early_cpu_possible_map)), additional_cpus);
# endif
#else
# ifdef CONFIG_SMP
......@@ -946,9 +972,10 @@ cpu_init (void)
#endif
/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
if (ia64_pal_vm_summary(NULL, &vmi) == 0)
if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
else {
setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
} else {
printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
max_ctx = (1U << 15) - 1; /* use architected minimum */
}
......
......@@ -209,6 +209,19 @@ send_IPI_allbutself (int op)
}
}
/*
* Called with preemption disabled.
*/
static inline void
send_IPI_mask(cpumask_t mask, int op)
{
unsigned int cpu;
for_each_cpu_mask(cpu, mask) {
send_IPI_single(cpu, op);
}
}
/*
* Called with preemption disabled.
*/
......@@ -401,6 +414,75 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
}
EXPORT_SYMBOL(smp_call_function_single);
/**
* smp_call_function_mask(): Run a function on a set of other CPUs.
* <mask> The set of cpus to run on. Must not include the current cpu.
* <func> The function to run. This must be fast and non-blocking.
* <info> An arbitrary pointer to pass to the function.
* <wait> If true, wait (atomically) until function
* has completed on other CPUs.
*
* Returns 0 on success, else a negative status code.
*
* If @wait is true, then returns once @func has returned; otherwise
* it returns just before the target cpu calls @func.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
int smp_call_function_mask(cpumask_t mask,
void (*func)(void *), void *info,
int wait)
{
struct call_data_struct data;
cpumask_t allbutself;
int cpus;
spin_lock(&call_lock);
allbutself = cpu_online_map;
cpu_clear(smp_processor_id(), allbutself);
cpus_and(mask, mask, allbutself);
cpus = cpus_weight(mask);
if (!cpus) {
spin_unlock(&call_lock);
return 0;
}
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
data.func = func;
data.info = info;
atomic_set(&data.started, 0);
data.wait = wait;
if (wait)
atomic_set(&data.finished, 0);
call_data = &data;
mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
/* Send a message to other CPUs */
if (cpus_equal(mask, allbutself))
send_IPI_allbutself(IPI_CALL_FUNC);
else
send_IPI_mask(mask, IPI_CALL_FUNC);
/* Wait for response */
while (atomic_read(&data.started) != cpus)
cpu_relax();
if (wait)
while (atomic_read(&data.finished) != cpus)
cpu_relax();
call_data = NULL;
spin_unlock(&call_lock);
return 0;
}
EXPORT_SYMBOL(smp_call_function_mask);
/*
* this function sends a 'generic call function' IPI to all other CPUs
* in the system.
......
......@@ -400,9 +400,9 @@ smp_callin (void)
/* Setup the per cpu irq handling data structures */
__setup_vector_irq(cpuid);
cpu_set(cpuid, cpu_online_map);
unlock_ipi_calllock();
per_cpu(cpu_state, cpuid) = CPU_ONLINE;
spin_unlock(&vector_lock);
unlock_ipi_calllock();
smp_setup_percpu_timer();
......
......@@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = {
};
static struct clocksource *itc_clocksource;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#include <linux/kernel_stat.h>
extern cputime_t cycle_to_cputime(u64 cyc);
/*
* Called from the context switch with interrupts disabled, to charge all
* accumulated times to the current process, and to prepare accounting on
* the next process.
*/
void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
{
struct thread_info *pi = task_thread_info(prev);
struct thread_info *ni = task_thread_info(next);
cputime_t delta_stime, delta_utime;
__u64 now;
now = ia64_get_itc();
delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
account_system_time(prev, 0, delta_stime);
account_system_time_scaled(prev, delta_stime);
if (pi->ac_utime) {
delta_utime = cycle_to_cputime(pi->ac_utime);
account_user_time(prev, delta_utime);
account_user_time_scaled(prev, delta_utime);
}
pi->ac_stamp = ni->ac_stamp = now;
ni->ac_stime = ni->ac_utime = 0;
}
/*
* Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled.
*/
void account_system_vtime(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
unsigned long flags;
cputime_t delta_stime;
__u64 now;
local_irq_save(flags);
now = ia64_get_itc();
delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
account_system_time(tsk, 0, delta_stime);
account_system_time_scaled(tsk, delta_stime);
ti->ac_stime = 0;
ti->ac_stamp = now;
local_irq_restore(flags);
}
/*
* Called from the timer interrupt handler to charge accumulated user time
* to the current process. Must be called with interrupts disabled.
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
struct thread_info *ti = task_thread_info(p);
cputime_t delta_utime;
if (ti->ac_utime) {
delta_utime = cycle_to_cputime(ti->ac_utime);
account_user_time(p, delta_utime);
account_user_time_scaled(p, delta_utime);
ti->ac_utime = 0;
}
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
static irqreturn_t
timer_interrupt (int irq, void *dev_id)
{
......
......@@ -13,6 +13,7 @@
* 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
* 2001/01/17 Add support emulation of unaligned kernel accesses.
*/
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/tty.h>
......@@ -1290,7 +1291,7 @@ within_logging_rate_limit (void)
{
static unsigned long count, last_time;
if (jiffies - last_time > 5*HZ)
if (time_after(jiffies, last_time + 5 * HZ))
count = 0;
if (count < 5) {
last_time = jiffies;
......
......@@ -45,8 +45,6 @@ void show_mem(void)
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n",
nr_swap_pages<<(PAGE_SHIFT-10));
printk(KERN_INFO "Node memory in pages:\n");
for_each_online_pgdat(pgdat) {
unsigned long present;
......@@ -255,7 +253,7 @@ paging_init (void)
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_VIRTUAL_MEM_MAP
efi_memmap_walk(register_active_ranges, NULL);
efi_memmap_walk(filter_memory, register_active_ranges);
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap < LARGE_GAP) {
vmem_map = (struct page *) 0;
......
......@@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node)
{
int cpu, n = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++)
for_each_possible_early_cpu(cpu)
if (node == node_cpuid[cpu].nid)
n++;
......@@ -124,6 +124,7 @@ static unsigned long __meminit compute_pernodesize(int node)
pernodesize += node * L1_CACHE_BYTES;
pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
pernodesize = PAGE_ALIGN(pernodesize);
return pernodesize;
}
......@@ -142,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
#ifdef CONFIG_SMP
int cpu;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
for_each_possible_early_cpu(cpu) {
if (node == node_cpuid[cpu].nid) {
memcpy(__va(cpu_data), __phys_per_cpu_start,
__per_cpu_end - __per_cpu_start);
......@@ -345,7 +346,7 @@ static void __init initialize_pernode_data(void)
#ifdef CONFIG_SMP
/* Set the node_data pointer for each per-cpu struct */
for (cpu = 0; cpu < NR_CPUS; cpu++) {
for_each_possible_early_cpu(cpu) {
node = node_cpuid[cpu].nid;
per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
}
......@@ -444,7 +445,7 @@ void __init find_memory(void)
mem_data[node].min_pfn = ~0UL;
}
efi_memmap_walk(register_active_ranges, NULL);
efi_memmap_walk(filter_memory, register_active_ranges);
/*
* Initialize the boot memory maps in reverse order since that's
......@@ -493,13 +494,9 @@ void __cpuinit *per_cpu_init(void)
int cpu;
static int first_time = 1;
if (smp_processor_id() != 0)
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
if (first_time) {
first_time = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++)
for_each_possible_early_cpu(cpu)
per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
}
......@@ -522,8 +519,6 @@ void show_mem(void)
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n",
nr_swap_pages<<(PAGE_SHIFT-10));
printk(KERN_INFO "Node memory in pages:\n");
for_each_online_pgdat(pgdat) {
unsigned long present;
......
......@@ -58,7 +58,6 @@ __ia64_sync_icache_dcache (pte_t pte)
{
unsigned long addr;
struct page *page;
unsigned long order;
page = pte_page(pte);
addr = (unsigned long) page_address(page);
......@@ -66,12 +65,7 @@ __ia64_sync_icache_dcache (pte_t pte)
if (test_bit(PG_arch_1, &page->flags))
return; /* i-cache is already coherent with d-cache */
if (PageCompound(page)) {
order = compound_order(page);
flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
}
else
flush_icache_range(addr, addr + PAGE_SIZE);
flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
set_bit(PG_arch_1, &page->flags); /* mark page as clean */
}
......@@ -553,12 +547,10 @@ find_largest_hole (u64 start, u64 end, void *arg)
#endif /* CONFIG_VIRTUAL_MEM_MAP */
int __init
register_active_ranges(u64 start, u64 end, void *arg)
register_active_ranges(u64 start, u64 len, int nid)
{
int nid = paddr_to_nid(__pa(start));
u64 end = start + len;
if (nid < 0)
nid = 0;
#ifdef CONFIG_KEXEC
if (start > crashk_res.start && start < crashk_res.end)
start = crashk_res.end;
......
......@@ -27,7 +27,9 @@
*/
int num_node_memblks;
struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
struct node_cpuid_s node_cpuid[NR_CPUS];
struct node_cpuid_s node_cpuid[NR_CPUS] =
{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
/*
* This is a matrix with "distances" between nodes, they should be
* proportional to the memory access latency ratios.
......
......@@ -11,6 +11,9 @@
* Rohit Seth <rohit.seth@intel.com>
* Ken Chen <kenneth.w.chen@intel.com>
* Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation
* Copyright (C) 2007 Intel Corp
* Fenghua Yu <fenghua.yu@intel.com>
* Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
*/
#include <linux/module.h>
#include <linux/init.h>
......@@ -26,6 +29,9 @@
#include <asm/pal.h>
#include <asm/tlbflush.h>
#include <asm/dma.h>
#include <asm/processor.h>
#include <asm/sal.h>
#include <asm/tlb.h>
static struct {
unsigned long mask; /* mask of supported purge page-sizes */
......@@ -39,6 +45,10 @@ struct ia64_ctx ia64_ctx = {
};
DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/
DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX];
/*
* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
......@@ -84,14 +94,140 @@ wrap_mmu_context (struct mm_struct *mm)
local_flush_tlb_all();
}
/*
* Implement "spinaphores" ... like counting semaphores, but they
* spin instead of sleeping. If there are ever any other users for
* this primitive it can be moved up to a spinaphore.h header.
*/
struct spinaphore {
atomic_t cur;
};
static inline void spinaphore_init(struct spinaphore *ss, int val)
{
atomic_set(&ss->cur, val);
}
static inline void down_spin(struct spinaphore *ss)
{
while (unlikely(!atomic_add_unless(&ss->cur, -1, 0)))
while (atomic_read(&ss->cur) == 0)
cpu_relax();
}
static inline void up_spin(struct spinaphore *ss)
{
atomic_add(1, &ss->cur);
}
static struct spinaphore ptcg_sem;
static u16 nptcg = 1;
static int need_ptcg_sem = 1;
static int toolatetochangeptcgsem = 0;
/*
* Kernel parameter "nptcg=" overrides max number of concurrent global TLB
* purges which is reported from either PAL or SAL PALO.
*
* We don't have sanity checking for nptcg value. It's the user's responsibility
* for valid nptcg value on the platform. Otherwise, kernel may hang in some
* cases.
*/
static int __init
set_nptcg(char *str)
{
int value = 0;
get_option(&str, &value);
setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
return 1;
}
__setup("nptcg=", set_nptcg);
/*
* Maximum number of simultaneous ptc.g purges in the system can
* be defined by PAL_VM_SUMMARY (in which case we should take
* the smallest value for any cpu in the system) or by the PAL
* override table (in which case we should ignore the value from
* PAL_VM_SUMMARY).
*
* Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
* purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
* we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
*
* Complicating the logic here is the fact that num_possible_cpus()
* isn't fully setup until we start bringing cpus online.
*/
void
setup_ptcg_sem(int max_purges, int nptcg_from)
{
static int kp_override;
static int palo_override;
static int firstcpu = 1;
if (toolatetochangeptcgsem) {
BUG_ON(max_purges < nptcg);
return;
}
if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
kp_override = 1;
nptcg = max_purges;
goto resetsema;
}
if (kp_override) {
need_ptcg_sem = num_possible_cpus() > nptcg;
return;
}
if (nptcg_from == NPTCG_FROM_PALO) {
palo_override = 1;
/* In PALO max_purges == 0 really means it! */
if (max_purges == 0)
panic("Whoa! Platform does not support global TLB purges.\n");
nptcg = max_purges;
if (nptcg == PALO_MAX_TLB_PURGES) {
need_ptcg_sem = 0;
return;
}
goto resetsema;
}
if (palo_override) {
if (nptcg != PALO_MAX_TLB_PURGES)
need_ptcg_sem = (num_possible_cpus() > nptcg);
return;
}
/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
if (max_purges == 0) max_purges = 1;
if (firstcpu) {
nptcg = max_purges;
firstcpu = 0;
}
if (max_purges < nptcg)
nptcg = max_purges;
if (nptcg == PAL_MAX_PURGES) {
need_ptcg_sem = 0;
return;
} else
need_ptcg_sem = (num_possible_cpus() > nptcg);
resetsema:
spinaphore_init(&ptcg_sem, max_purges);
}
void
ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long nbits)
{
static DEFINE_SPINLOCK(ptcg_lock);
struct mm_struct *active_mm = current->active_mm;
toolatetochangeptcgsem = 1;
if (mm != active_mm) {
/* Restore region IDs for mm */
if (mm && active_mm) {
......@@ -102,19 +238,20 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
}
}
/* HW requires global serialization of ptc.ga. */
spin_lock(&ptcg_lock);
{
do {
/*
* Flush ALAT entries also.
*/
ia64_ptcga(start, (nbits<<2));
ia64_srlz_i();
start += (1UL << nbits);
} while (start < end);
}
spin_unlock(&ptcg_lock);
if (need_ptcg_sem)
down_spin(&ptcg_sem);
do {
/*
* Flush ALAT entries also.
*/
ia64_ptcga(start, (nbits << 2));
ia64_srlz_i();
start += (1UL << nbits);
} while (start < end);
if (need_ptcg_sem)
up_spin(&ptcg_sem);
if (mm != active_mm) {
activate_context(active_mm);
......@@ -190,6 +327,9 @@ ia64_tlb_init (void)
ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
unsigned long tr_pgbits;
long status;
pal_vm_info_1_u_t vm_info_1;
pal_vm_info_2_u_t vm_info_2;
int cpu = smp_processor_id();
if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
......@@ -206,4 +346,191 @@ ia64_tlb_init (void)
local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
if (status) {
printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
per_cpu(ia64_tr_num, cpu) = 8;
return;
}
per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
if (per_cpu(ia64_tr_num, cpu) >
(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
per_cpu(ia64_tr_num, cpu) =
vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
printk(KERN_DEBUG "TR register number exceeds IA64_TR_ALLOC_MAX!"
"IA64_TR_ALLOC_MAX should be extended\n");
}
}
/*
* is_tr_overlap
*
* Check overlap with inserted TRs.
*/
static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
{
u64 tr_log_size;
u64 tr_end;
u64 va_rr = ia64_get_rr(va);
u64 va_rid = RR_TO_RID(va_rr);
u64 va_end = va + (1<<log_size) - 1;
if (va_rid != RR_TO_RID(p->rr))
return 0;
tr_log_size = (p->itir & 0xff) >> 2;
tr_end = p->ifa + (1<<tr_log_size) - 1;
if (va > tr_end || p->ifa > va_end)
return 0;
return 1;
}
/*
* ia64_insert_tr in virtual mode. Allocate a TR slot
*
* target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
*
* va : virtual address.
* pte : pte entries inserted.
* log_size: range to be covered.
*
* Return value: <0 : error No.
*
* >=0 : slot number allocated for TR.
* Must be called with preemption disabled.
*/
int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
{
int i, r;
unsigned long psr;
struct ia64_tr_entry *p;
int cpu = smp_processor_id();
r = -EINVAL;
/*Check overlap with existing TR entries*/
if (target_mask & 0x1) {
p = &__per_cpu_idtrs[cpu][0][0];
for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
i++, p++) {
if (p->pte & 0x1)
if (is_tr_overlap(p, va, log_size)) {
printk(KERN_DEBUG "Overlapped Entry"
"Inserted for TR Reigster!!\n");
goto out;
}
}
}
if (target_mask & 0x2) {
p = &__per_cpu_idtrs[cpu][1][0];
for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
i++, p++) {
if (p->pte & 0x1)
if (is_tr_overlap(p, va, log_size)) {
printk(KERN_DEBUG "Overlapped Entry"
"Inserted for TR Reigster!!\n");
goto out;
}
}
}
for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
switch (target_mask & 0x3) {
case 1:
if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1))
goto found;
continue;
case 2:
if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
goto found;
continue;
case 3:
if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) &&
!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
goto found;
continue;
default:
r = -EINVAL;
goto out;
}
}
found:
if (i >= per_cpu(ia64_tr_num, cpu))
return -EBUSY;
/*Record tr info for mca hander use!*/
if (i > per_cpu(ia64_tr_used, cpu))
per_cpu(ia64_tr_used, cpu) = i;
psr = ia64_clear_ic();
if (target_mask & 0x1) {
ia64_itr(0x1, i, va, pte, log_size);
ia64_srlz_i();
p = &__per_cpu_idtrs[cpu][0][i];
p->ifa = va;
p->pte = pte;
p->itir = log_size << 2;
p->rr = ia64_get_rr(va);
}
if (target_mask & 0x2) {
ia64_itr(0x2, i, va, pte, log_size);
ia64_srlz_i();
p = &__per_cpu_idtrs[cpu][1][i];
p->ifa = va;
p->pte = pte;
p->itir = log_size << 2;
p->rr = ia64_get_rr(va);
}
ia64_set_psr(psr);
r = i;
out:
return r;
}
EXPORT_SYMBOL_GPL(ia64_itr_entry);
/*
* ia64_purge_tr
*
* target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
* slot: slot number to be freed.
*
* Must be called with preemption disabled.
*/
void ia64_ptr_entry(u64 target_mask, int slot)
{
int cpu = smp_processor_id();
int i;
struct ia64_tr_entry *p;
if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
return;
if (target_mask & 0x1) {
p = &__per_cpu_idtrs[cpu][0][slot];
if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
p->pte = 0;
ia64_ptr(0x1, p->ifa, p->itir>>2);
ia64_srlz_i();
}
}
if (target_mask & 0x2) {
p = &__per_cpu_idtrs[cpu][1][slot];
if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
p->pte = 0;
ia64_ptr(0x2, p->ifa, p->itir>>2);
ia64_srlz_i();
}
}
for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) ||
(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
break;
}
per_cpu(ia64_tr_used, cpu) = i;
}
EXPORT_SYMBOL_GPL(ia64_ptr_entry);
......@@ -199,7 +199,7 @@ xpc_timeout_partition_disengage_request(unsigned long data)
struct xpc_partition *part = (struct xpc_partition *) data;
DBUG_ON(jiffies < part->disengage_request_timeout);
DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
(void) xpc_partition_disengaged(part);
......@@ -230,7 +230,7 @@ xpc_hb_beater(unsigned long dummy)
{
xpc_vars->heartbeat++;
if (jiffies >= xpc_hb_check_timeout) {
if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
wake_up_interruptible(&xpc_act_IRQ_wq);
}
......@@ -270,7 +270,7 @@ xpc_hb_checker(void *ignore)
/* checking of remote heartbeats is skewed by IRQ handling */
if (jiffies >= xpc_hb_check_timeout) {
if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
dev_dbg(xpc_part, "checking remote heartbeats\n");
xpc_check_remote_hb();
......@@ -305,7 +305,7 @@ xpc_hb_checker(void *ignore)
/* wait for IRQ or timeout */
(void) wait_event_interruptible(xpc_act_IRQ_wq,
(last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
jiffies >= xpc_hb_check_timeout ||
time_after_eq(jiffies, xpc_hb_check_timeout) ||
(volatile int) xpc_exiting));
}
......
......@@ -877,7 +877,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
disengaged = (xpc_partition_engaged(1UL << partid) == 0);
if (part->disengage_request_timeout) {
if (!disengaged) {
if (jiffies < part->disengage_request_timeout) {
if (time_before(jiffies, part->disengage_request_timeout)) {
/* timelimit hasn't been reached yet */
return 0;
}
......
......@@ -35,6 +35,7 @@
#include <linux/init.h>
#include <linux/numa.h>
#include <asm/system.h>
#include <asm/numa.h>
#define COMPILER_DEPENDENT_INT64 long
#define COMPILER_DEPENDENT_UINT64 unsigned long
......@@ -115,7 +116,11 @@ extern unsigned int is_cpu_cpei_target(unsigned int cpu);
extern void set_cpei_target_cpu(unsigned int cpu);
extern unsigned int get_cpei_target_cpu(void);
extern void prefill_possible_map(void);
#ifdef CONFIG_ACPI_HOTPLUG_CPU
extern int additional_cpus;
#else
#define additional_cpus 0
#endif
#ifdef CONFIG_ACPI_NUMA
#if MAX_NUMNODES > 256
......@@ -129,6 +134,34 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
#define acpi_unlazy_tlb(x)
#ifdef CONFIG_ACPI_NUMA
extern cpumask_t early_cpu_possible_map;
#define for_each_possible_early_cpu(cpu) \
for_each_cpu_mask((cpu), early_cpu_possible_map)
static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus)
{
int low_cpu, high_cpu;
int cpu;
int next_nid = 0;
low_cpu = cpus_weight(early_cpu_possible_map);
high_cpu = max(low_cpu, min_cpus);
high_cpu = min(high_cpu + reserve_cpus, NR_CPUS);
for (cpu = low_cpu; cpu < high_cpu; cpu++) {
cpu_set(cpu, early_cpu_possible_map);
if (node_cpuid[cpu].nid == NUMA_NO_NODE) {
node_cpuid[cpu].nid = next_nid;
next_nid++;
if (next_nid >= num_online_nodes())
next_nid = 0;
}
}
}
#endif /* CONFIG_ACPI_NUMA */
#endif /*__KERNEL__*/
#endif /*_ASM_ACPI_H*/
/*
* include/asm-ia64/cputime.h:
* Definitions for measuring cputime on ia64 machines.
*
* Based on <asm-powerpc/cputime.h>.
*
* Copyright (C) 2007 FUJITSU LIMITED
* Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
* Otherwise we measure cpu time in jiffies using the generic definitions.
*/
#ifndef __IA64_CPUTIME_H
#define __IA64_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#include <asm-generic/cputime.h>
#else
#include <linux/time.h>
#include <linux/jiffies.h>
#include <asm/processor.h>
typedef u64 cputime_t;
typedef u64 cputime64_t;
#define cputime_zero ((cputime_t)0)
#define cputime_max ((~((cputime_t)0) >> 1) - 1)
#define cputime_add(__a, __b) ((__a) + (__b))
#define cputime_sub(__a, __b) ((__a) - (__b))
#define cputime_div(__a, __n) ((__a) / (__n))
#define cputime_halve(__a) ((__a) >> 1)
#define cputime_eq(__a, __b) ((__a) == (__b))
#define cputime_gt(__a, __b) ((__a) > (__b))
#define cputime_ge(__a, __b) ((__a) >= (__b))
#define cputime_lt(__a, __b) ((__a) < (__b))
#define cputime_le(__a, __b) ((__a) <= (__b))
#define cputime64_zero ((cputime64_t)0)
#define cputime64_add(__a, __b) ((__a) + (__b))
#define cputime64_sub(__a, __b) ((__a) - (__b))
#define cputime_to_cputime64(__ct) (__ct)
/*
* Convert cputime <-> jiffies (HZ)
*/
#define cputime_to_jiffies(__ct) ((__ct) / (NSEC_PER_SEC / HZ))
#define jiffies_to_cputime(__jif) ((__jif) * (NSEC_PER_SEC / HZ))
#define cputime64_to_jiffies64(__ct) ((__ct) / (NSEC_PER_SEC / HZ))
#define jiffies64_to_cputime64(__jif) ((__jif) * (NSEC_PER_SEC / HZ))
/*
* Convert cputime <-> milliseconds
*/
#define cputime_to_msecs(__ct) ((__ct) / NSEC_PER_MSEC)
#define msecs_to_cputime(__msecs) ((__msecs) * NSEC_PER_MSEC)
/*
* Convert cputime <-> seconds
*/
#define cputime_to_secs(__ct) ((__ct) / NSEC_PER_SEC)
#define secs_to_cputime(__secs) ((__secs) * NSEC_PER_SEC)
/*
* Convert cputime <-> timespec (nsec)
*/
static inline cputime_t timespec_to_cputime(const struct timespec *val)
{
cputime_t ret = val->tv_sec * NSEC_PER_SEC;
return (ret + val->tv_nsec);
}
static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
{
val->tv_sec = ct / NSEC_PER_SEC;
val->tv_nsec = ct % NSEC_PER_SEC;
}
/*
* Convert cputime <-> timeval (msec)
*/
static inline cputime_t timeval_to_cputime(struct timeval *val)
{
cputime_t ret = val->tv_sec * NSEC_PER_SEC;
return (ret + val->tv_usec * NSEC_PER_USEC);
}
static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
{
val->tv_sec = ct / NSEC_PER_SEC;
val->tv_usec = (ct % NSEC_PER_SEC) / NSEC_PER_USEC;
}
/*
* Convert cputime <-> clock (USER_HZ)
*/
#define cputime_to_clock_t(__ct) ((__ct) / (NSEC_PER_SEC / USER_HZ))
#define clock_t_to_cputime(__x) ((__x) * (NSEC_PER_SEC / USER_HZ))
/*
* Convert cputime64 to clock.
*/
#define cputime64_to_clock_t(__ct) cputime_to_clock_t((cputime_t)__ct)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __IA64_CPUTIME_H */
......@@ -26,6 +26,7 @@
#define ELF_ARCH EM_IA_64
#define USE_ELF_CORE_DUMP
#define CORE_DUMP_USE_REGSET
/* Least-significant four bits of ELF header's e_flags are OS-specific. The bits are
interpreted as follows by Linux: */
......@@ -154,6 +155,30 @@ extern void ia64_init_addr_space (void);
#define ELF_NGREG 128 /* we really need just 72 but let's leave some headroom... */
#define ELF_NFPREG 128 /* f0 and f1 could be omitted, but so what... */
/* elf_gregset_t register offsets */
#define ELF_GR_0_OFFSET 0
#define ELF_NAT_OFFSET (32 * sizeof(elf_greg_t))
#define ELF_PR_OFFSET (33 * sizeof(elf_greg_t))
#define ELF_BR_0_OFFSET (34 * sizeof(elf_greg_t))
#define ELF_CR_IIP_OFFSET (42 * sizeof(elf_greg_t))
#define ELF_CFM_OFFSET (43 * sizeof(elf_greg_t))
#define ELF_CR_IPSR_OFFSET (44 * sizeof(elf_greg_t))
#define ELF_GR_OFFSET(i) (ELF_GR_0_OFFSET + i * sizeof(elf_greg_t))
#define ELF_BR_OFFSET(i) (ELF_BR_0_OFFSET + i * sizeof(elf_greg_t))
#define ELF_AR_RSC_OFFSET (45 * sizeof(elf_greg_t))
#define ELF_AR_BSP_OFFSET (46 * sizeof(elf_greg_t))
#define ELF_AR_BSPSTORE_OFFSET (47 * sizeof(elf_greg_t))
#define ELF_AR_RNAT_OFFSET (48 * sizeof(elf_greg_t))
#define ELF_AR_CCV_OFFSET (49 * sizeof(elf_greg_t))
#define ELF_AR_UNAT_OFFSET (50 * sizeof(elf_greg_t))
#define ELF_AR_FPSR_OFFSET (51 * sizeof(elf_greg_t))
#define ELF_AR_PFS_OFFSET (52 * sizeof(elf_greg_t))
#define ELF_AR_LC_OFFSET (53 * sizeof(elf_greg_t))
#define ELF_AR_EC_OFFSET (54 * sizeof(elf_greg_t))
#define ELF_AR_CSD_OFFSET (55 * sizeof(elf_greg_t))
#define ELF_AR_SSD_OFFSET (56 * sizeof(elf_greg_t))
#define ELF_AR_END_OFFSET (57 * sizeof(elf_greg_t))
typedef unsigned long elf_fpxregset_t;
typedef unsigned long elf_greg_t;
......@@ -183,12 +208,6 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst);
struct task_struct;
extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
#define ELF_CORE_COPY_TASK_REGS(tsk, elf_gregs) dump_task_regs(tsk, elf_gregs)
#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
#define GATE_EHDR ((const struct elfhdr *) GATE_ADDR)
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
......
......@@ -30,8 +30,12 @@
#include <asm/break.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
#define MAX_INSN_SIZE 1
#define MAX_INSN_SIZE 2 /* last half is for kprobe-booster */
#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6)
#define NOP_M_INST (long)(1<<27)
#define BRL_INST(i1, i2) ((long)((0xcL << 37) | /* brl */ \
(0x1L << 12) | /* many */ \
(((i1) & 1) << 36) | ((i2) << 13))) /* imm */
typedef union cmp_inst {
struct {
......@@ -112,6 +116,7 @@ struct arch_specific_insn {
#define INST_FLAG_FIX_RELATIVE_IP_ADDR 1
#define INST_FLAG_FIX_BRANCH_REG 2
#define INST_FLAG_BREAK_INST 4
#define INST_FLAG_BOOSTABLE 8
unsigned long inst_flag;
unsigned short target_br_reg;
unsigned short slot;
......
......@@ -31,6 +31,9 @@
#define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */
#define IA64_TR_CURRENT_STACK 1 /* dtr1: maps kernel's memory- & register-stacks */
#define IA64_TR_ALLOC_BASE 2 /* itr&dtr: Base of dynamic TR resource*/
#define IA64_TR_ALLOC_MAX 32 /* Max number for dynamic use*/
/* Processor status register bits: */
#define IA64_PSR_BE_BIT 1
#define IA64_PSR_UP_BIT 2
......
......@@ -35,6 +35,7 @@ extern void find_memory (void);
extern void reserve_memory (void);
extern void find_initrd (void);
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
extern int filter_memory (unsigned long start, unsigned long end, void *arg);
extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
......@@ -56,7 +57,7 @@ extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end);
#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
extern int register_active_ranges(u64 start, u64 end, void *arg);
extern int register_active_ranges(u64 start, u64 len, int nid);
#ifdef CONFIG_VIRTUAL_MEM_MAP
# define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */
......
......@@ -22,6 +22,8 @@
#include <asm/mmzone.h>
#define NUMA_NO_NODE -1
extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
extern pg_data_t *pgdat_list[MAX_NUMNODES];
......
......@@ -13,6 +13,7 @@
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
* Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com>
* Copyright (C) 2008 Silicon Graphics, Inc. (SGI)
*
* 99/10/01 davidm Make sure we pass zero for reserved parameters.
* 00/03/07 davidm Updated pal_cache_flush() to be in sync with PAL v2.6.
......@@ -73,6 +74,8 @@
#define PAL_CACHE_SHARED_INFO 43 /* returns information on caches shared by logical processor */
#define PAL_GET_HW_POLICY 48 /* Get current hardware resource sharing policy */
#define PAL_SET_HW_POLICY 49 /* Set current hardware resource sharing policy */
#define PAL_VP_INFO 50 /* Information about virtual processor features */
#define PAL_MC_HW_TRACKING 51 /* Hardware tracking status */
#define PAL_COPY_PAL 256 /* relocate PAL procedures and PAL PMI */
#define PAL_HALT_INFO 257 /* return the low power capabilities of processor */
......@@ -504,7 +507,8 @@ typedef struct pal_cache_check_info_s {
wiv : 1, /* Way field valid */
reserved2 : 1,
dp : 1, /* Data poisoned on MBE */
reserved3 : 8,
reserved3 : 6,
hlth : 2, /* Health indicator */
index : 20, /* Cache line index */
reserved4 : 2,
......@@ -542,7 +546,9 @@ typedef struct pal_tlb_check_info_s {
dtc : 1, /* Fail in data TC */
itc : 1, /* Fail in inst. TC */
op : 4, /* Cache operation */
reserved3 : 30,
reserved3 : 6,
hlth : 2, /* Health indicator */
reserved4 : 22,
is : 1, /* instruction set (1 == ia32) */
iv : 1, /* instruction set field valid */
......@@ -633,7 +639,8 @@ typedef struct pal_uarch_check_info_s {
way : 6, /* Way of structure */
wv : 1, /* way valid */
xv : 1, /* index valid */
reserved1 : 8,
reserved1 : 6,
hlth : 2, /* Health indicator */
index : 8, /* Index or set of the uarch
* structure that failed.
*/
......@@ -1213,14 +1220,12 @@ ia64_pal_mc_drain (void)
/* Return the machine check dynamic processor state */
static inline s64
ia64_pal_mc_dynamic_state (u64 offset, u64 *size, u64 *pds)
ia64_pal_mc_dynamic_state (u64 info_type, u64 dy_buffer, u64 *size)
{
struct ia64_pal_retval iprv;
PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, offset, 0, 0);
PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, info_type, dy_buffer, 0);
if (size)
*size = iprv.v0;
if (pds)
*pds = iprv.v1;
return iprv.status;
}
......@@ -1281,15 +1286,41 @@ ia64_pal_mc_expected (u64 expected, u64 *previous)
return iprv.status;
}
typedef union pal_hw_tracking_u {
u64 pht_data;
struct {
u64 itc :4, /* Instruction cache tracking */
dct :4, /* Date cache tracking */
itt :4, /* Instruction TLB tracking */
ddt :4, /* Data TLB tracking */
reserved:48;
} pal_hw_tracking_s;
} pal_hw_tracking_u_t;
/*
* Hardware tracking status.
*/
static inline s64
ia64_pal_mc_hw_tracking (u64 *status)
{
struct ia64_pal_retval iprv;
PAL_CALL(iprv, PAL_MC_HW_TRACKING, 0, 0, 0);
if (status)
*status = iprv.v0;
return iprv.status;
}
/* Register a platform dependent location with PAL to which it can save
* minimal processor state in the event of a machine check or initialization
* event.
*/
static inline s64
ia64_pal_mc_register_mem (u64 physical_addr)
ia64_pal_mc_register_mem (u64 physical_addr, u64 size, u64 *req_size)
{
struct ia64_pal_retval iprv;
PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, 0, 0);
PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, size, 0);
if (req_size)
*req_size = iprv.v0;
return iprv.status;
}
......@@ -1631,6 +1662,29 @@ ia64_pal_vm_summary (pal_vm_info_1_u_t *vm_info_1, pal_vm_info_2_u_t *vm_info_2)
return iprv.status;
}
typedef union pal_vp_info_u {
u64 pvi_val;
struct {
u64 index: 48, /* virtual feature set info */
vmm_id: 16; /* feature set id */
} pal_vp_info_s;
} pal_vp_info_u_t;
/*
* Returns infomation about virtual processor features
*/
static inline s64
ia64_pal_vp_info (u64 feature_set, u64 vp_buffer, u64 *vp_info, u64 *vmm_id)
{
struct ia64_pal_retval iprv;
PAL_CALL(iprv, PAL_VP_INFO, feature_set, vp_buffer, 0);
if (vp_info)
*vp_info = iprv.v0;
if (vmm_id)
*vmm_id = iprv.v1;
return iprv.status;
}
typedef union pal_itr_valid_u {
u64 piv_val;
struct {
......
......@@ -371,7 +371,7 @@ pgd_index (unsigned long address)
/* The offset in the 1-level directory is given by the 3 region bits
(61..63) and the level-1 bits. */
static inline pgd_t*
pgd_offset (struct mm_struct *mm, unsigned long address)
pgd_offset (const struct mm_struct *mm, unsigned long address)
{
return mm->pgd + pgd_index(address);
}
......
......@@ -296,6 +296,9 @@ enum {
EFI_GUID(0xe429faf8, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
#define SAL_PLAT_BUS_ERR_SECT_GUID \
EFI_GUID(0xe429faf9, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
#define PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID \
EFI_GUID(0x6cb0a200, 0x893a, 0x11da, 0x96, 0xd2, 0x0, 0x10, 0x83, 0xff, \
0xca, 0x4d)
#define MAX_CACHE_ERRORS 6
#define MAX_TLB_ERRORS 6
......@@ -879,6 +882,24 @@ extern void ia64_jump_to_sal(struct sal_to_os_boot *);
extern void ia64_sal_handler_init(void *entry_point, void *gpval);
#define PALO_MAX_TLB_PURGES 0xFFFF
#define PALO_SIG "PALO"
struct palo_table {
u8 signature[4]; /* Should be "PALO" */
u32 length;
u8 minor_revision;
u8 major_revision;
u8 checksum;
u8 reserved1[5];
u16 max_tlb_purges;
u8 reserved2[6];
};
#define NPTCG_FROM_PAL 0
#define NPTCG_FROM_PALO 1
#define NPTCG_FROM_KERNEL_PARAMETER 2
#endif /* __ASSEMBLY__ */
#endif /* _ASM_IA64_SAL_H */
......@@ -38,6 +38,9 @@ ia64_get_lid (void)
return lid.f.id << 8 | lid.f.eid;
}
extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
void *info, int wait);
#define hard_smp_processor_id() ia64_get_lid()
#ifdef CONFIG_SMP
......
......@@ -210,6 +210,13 @@ struct task_struct;
extern void ia64_save_extra (struct task_struct *task);
extern void ia64_load_extra (struct task_struct *task);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
#else
# define IA64_ACCOUNT_ON_SWITCH(p,n)
#endif
#ifdef CONFIG_PERFMON
DECLARE_PER_CPU(unsigned long, pfm_syst_info);
# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
......@@ -222,6 +229,7 @@ extern void ia64_load_extra (struct task_struct *task);
|| IS_IA32_PROCESS(task_pt_regs(t)) || PERFMON_IS_SYSWIDE())
#define __switch_to(prev,next,last) do { \
IA64_ACCOUNT_ON_SWITCH(prev, next); \
if (IA64_HAS_EXTRA_STATE(prev)) \
ia64_save_extra(prev); \
if (IA64_HAS_EXTRA_STATE(next)) \
......@@ -266,6 +274,10 @@ void cpu_idle_wait(void);
void default_idle(void);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void account_system_vtime(struct task_struct *);
#endif
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
......
......@@ -31,6 +31,12 @@ struct thread_info {
mm_segment_t addr_limit; /* user-level address space limit */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
struct restart_block restart_block;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
__u64 ac_stamp;
__u64 ac_leave;
__u64 ac_stime;
__u64 ac_utime;
#endif
};
#define THREAD_SIZE KERNEL_STACK_SIZE
......@@ -62,9 +68,17 @@ struct thread_info {
#define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#define setup_thread_stack(p, org) \
*task_thread_info(p) = *task_thread_info(org); \
task_thread_info(p)->ac_stime = 0; \
task_thread_info(p)->ac_utime = 0; \
task_thread_info(p)->task = (p);
#else
#define setup_thread_stack(p, org) \
*task_thread_info(p) = *task_thread_info(org); \
task_thread_info(p)->task = (p);
#endif
#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
#define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
......
......@@ -64,6 +64,32 @@ struct mmu_gather {
struct page *pages[FREE_PTE_NR];
};
struct ia64_tr_entry {
u64 ifa;
u64 itir;
u64 pte;
u64 rr;
}; /*Record for tr entry!*/
extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
extern void ia64_ptr_entry(u64 target_mask, int slot);
extern struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX];
/*
region register macros
*/
#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001)
#define RR_VE(val) (((val) & 0x0000000000000001) << 0)
#define RR_VE_MASK 0x0000000000000001L
#define RR_VE_SHIFT 0
#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f)
#define RR_PS(val) (((val) & 0x000000000000003f) << 2)
#define RR_PS_MASK 0x00000000000000fcL
#define RR_PS_SHIFT 2
#define RR_RID_MASK 0x00000000ffffff00L
#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
/* Users of the generic TLB shootdown code must declare this storage space. */
DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
......
......@@ -17,6 +17,7 @@
* Now for some TLB flushing routines. This is the kind of stuff that
* can be very expensive, so try to avoid them whenever possible.
*/
extern void setup_ptcg_sem(int max_purges, int from_palo);
/*
* Flush everything (kernel mapping may also have changed due to
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment