Commit 4786b4ee authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6

* 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6: (27 commits)
  [IA64] kdump: Add crash_save_vmcoreinfo for INIT
  [IA64] Fix NUMA configuration issue
  [IA64] Itanium Spec updates
  [IA64] Untangle sync_icache_dcache() page size determination
  [IA64] arch/ia64/kernel/: use time_* macros
  [IA64] remove redundant display of free swap space in show_mem()
  [IA64] make IOMMU respect the segment boundary limits
  [IA64] kprobes: kprobe-booster for ia64
  [IA64] fix getpid and set_tid_address fast system calls for pid namespaces
  [IA64] Replace explicit jiffies tests with time_* macros.
  [IA64] use goto to jump out do/while_each_thread
  [IA64] Fix unlock ordering in smp_callin
  [IA64] pgd_offset() constfication.
  [IA64] kdump: crash.c coding style fix
  [IA64] kdump: add kdump_on_fatal_mca
  [IA64] Minimize per_cpu reservations.
  [IA64] Correct pernodesize calculation.
  [IA64] Kernel parameter for max number of concurrent global TLB purges
  [IA64] Multiple outstanding ptc.g instruction support
  [IA64] Implement smp_call_function_mask for ia64
  ...
parents 253ba4e7 71b264f8
......@@ -1362,6 +1362,10 @@ and is between 256 and 4096 characters. It is defined in the file
nowb [ARM]
nptcg= [IA64] Override max number of concurrent global TLB
purges which is reported from either PAL_VM_SUMMARY or
SAL PALO.
numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
one of ['zone', 'node', 'default'] can be specified
This can be set from sysctl after boot.
......
......@@ -283,6 +283,17 @@ config FORCE_MAX_ZONEORDER
default "17" if HUGETLB_PAGE
default "11"
config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
default n
help
Select this option to enable more accurate task and CPU time
accounting. This is done by reading a CPU counter on each
kernel entry and exit and on transitions within the kernel
between system, softirq and hardirq state, so there is a
small performance impact.
If in doubt, say N here.
config SMP
bool "Symmetric multi-processing support"
help
......@@ -611,6 +622,9 @@ config IRQ_PER_CPU
bool
default y
config IOMMU_HELPER
def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC)
source "arch/ia64/hp/sim/Kconfig"
source "arch/ia64/Kconfig.debug"
......
......@@ -35,6 +35,7 @@
#include <linux/nodemask.h>
#include <linux/bitops.h> /* hweight64() */
#include <linux/crash_dump.h>
#include <linux/iommu-helper.h>
#include <asm/delay.h> /* ia64_get_itc() */
#include <asm/io.h>
......@@ -460,6 +461,13 @@ get_iovp_order (unsigned long size)
return order;
}
static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr,
unsigned int bitshiftcnt)
{
return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3)
+ bitshiftcnt;
}
/**
* sba_search_bitmap - find free space in IO PDIR resource bitmap
* @ioc: IO MMU structure which owns the pdir we are interested in.
......@@ -471,15 +479,25 @@ get_iovp_order (unsigned long size)
* Cool perf optimization: search for log2(size) bits at a time.
*/
static SBA_INLINE unsigned long
sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
sba_search_bitmap(struct ioc *ioc, struct device *dev,
unsigned long bits_wanted, int use_hint)
{
unsigned long *res_ptr;
unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
unsigned long flags, pide = ~0UL;
unsigned long flags, pide = ~0UL, tpide;
unsigned long boundary_size;
unsigned long shift;
int ret;
ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
ASSERT(res_ptr < res_end);
boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1;
boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift;
BUG_ON(ioc->ibase & ~iovp_mask);
shift = ioc->ibase >> iovp_shift;
spin_lock_irqsave(&ioc->res_lock, flags);
/* Allow caller to force a search through the entire resource space */
......@@ -504,9 +522,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
if (likely(*res_ptr != ~0UL)) {
bitshiftcnt = ffz(*res_ptr);
*res_ptr |= (1UL << bitshiftcnt);
pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
pide <<= 3; /* convert to bit address */
pide += bitshiftcnt;
pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
ioc->res_bitshift = bitshiftcnt + bits_wanted;
goto found_it;
}
......@@ -535,11 +551,13 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr);
ASSERT(0 != mask);
for (; mask ; mask <<= o, bitshiftcnt += o) {
if(0 == ((*res_ptr) & mask)) {
tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
ret = iommu_is_span_boundary(tpide, bits_wanted,
shift,
boundary_size);
if ((0 == ((*res_ptr) & mask)) && !ret) {
*res_ptr |= mask; /* mark resources busy! */
pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
pide <<= 3; /* convert to bit address */
pide += bitshiftcnt;
pide = tpide;
ioc->res_bitshift = bitshiftcnt + bits_wanted;
goto found_it;
}
......@@ -560,6 +578,11 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
end = res_end - qwords;
for (; res_ptr < end; res_ptr++) {
tpide = ptr_to_pide(ioc, res_ptr, 0);
ret = iommu_is_span_boundary(tpide, bits_wanted,
shift, boundary_size);
if (ret)
goto next_ptr;
for (i = 0 ; i < qwords ; i++) {
if (res_ptr[i] != 0)
goto next_ptr;
......@@ -572,8 +595,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
res_ptr[i] = ~0UL;
res_ptr[i] |= RESMAP_MASK(bits);
pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
pide <<= 3; /* convert to bit address */
pide = tpide;
res_ptr += qwords;
ioc->res_bitshift = bits;
goto found_it;
......@@ -605,7 +627,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
* resource bit map.
*/
static int
sba_alloc_range(struct ioc *ioc, size_t size)
sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
{
unsigned int pages_needed = size >> iovp_shift;
#ifdef PDIR_SEARCH_TIMING
......@@ -622,9 +644,9 @@ sba_alloc_range(struct ioc *ioc, size_t size)
/*
** "seek and ye shall find"...praying never hurts either...
*/
pide = sba_search_bitmap(ioc, pages_needed, 1);
pide = sba_search_bitmap(ioc, dev, pages_needed, 1);
if (unlikely(pide >= (ioc->res_size << 3))) {
pide = sba_search_bitmap(ioc, pages_needed, 0);
pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
if (unlikely(pide >= (ioc->res_size << 3))) {
#if DELAYED_RESOURCE_CNT > 0
unsigned long flags;
......@@ -653,7 +675,7 @@ sba_alloc_range(struct ioc *ioc, size_t size)
}
spin_unlock_irqrestore(&ioc->saved_lock, flags);
pide = sba_search_bitmap(ioc, pages_needed, 0);
pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
if (unlikely(pide >= (ioc->res_size << 3)))
panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
ioc->ioc_hpa);
......@@ -936,7 +958,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
pide = sba_alloc_range(ioc, size);
pide = sba_alloc_range(ioc, dev, size);
iovp = (dma_addr_t) pide << iovp_shift;
......@@ -1373,7 +1395,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
ASSERT(dma_len <= DMA_CHUNK_SIZE);
dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
| (sba_alloc_range(ioc, dma_len) << iovp_shift)
| (sba_alloc_range(ioc, dev, dma_len) << iovp_shift)
| dma_offset);
n_mappings++;
}
......
......@@ -30,7 +30,19 @@ struct elf_siginfo
int si_errno; /* errno */
};
#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
* Hacks are here since types between compat_timeval (= pair of s32) and
* ia64-native timeval (= pair of s64) are not compatible, at least a file
* arch/ia64/ia32/../../../fs/binfmt_elf.c will get warnings from compiler on
* use of cputime_to_timeval(), which usually an alias of jiffies_to_timeval().
*/
#define cputime_to_timeval(a,b) \
do { (b)->tv_usec = 0; (b)->tv_sec = (a)/NSEC_PER_SEC; } while(0)
#else
#define jiffies_to_timeval(a,b) \
do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; } while(0)
#endif
struct elf_prstatus
{
......
This diff is collapsed.
......@@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
static struct acpi_table_slit __initdata *slit_table;
cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
{
......@@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
(pa->apic_id << 8) | (pa->local_sapic_eid);
/* nid should be overridden as logical node id later */
node_cpuid[srat_num_cpus].nid = pxm;
cpu_set(srat_num_cpus, early_cpu_possible_map);
srat_num_cpus++;
}
......@@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void)
}
/* set logical node id in cpu structure */
for (i = 0; i < srat_num_cpus; i++)
for_each_possible_early_cpu(i)
node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
printk(KERN_INFO "Number of logical nodes in system = %d\n",
......
......@@ -7,6 +7,7 @@
#define ASM_OFFSETS_C 1
#include <linux/sched.h>
#include <linux/pid.h>
#include <linux/clocksource.h>
#include <asm-ia64/processor.h>
......@@ -34,17 +35,29 @@ void foo(void)
DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
BUILD_BUG_ON(sizeof(struct upid) != 32);
DEFINE(IA64_UPID_SHIFT, 5);
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
#endif
BLANK();
DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
......
......@@ -24,6 +24,7 @@ int kdump_status[NR_CPUS];
static atomic_t kdump_cpu_frozen;
atomic_t kdump_in_progress;
static int kdump_on_init = 1;
static int kdump_on_fatal_mca = 1;
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
......@@ -118,6 +119,7 @@ machine_crash_shutdown(struct pt_regs *pt)
static void
machine_kdump_on_init(void)
{
crash_save_vmcoreinfo();
local_irq_disable();
kexec_disable_iosapic();
machine_kexec(ia64_kimage);
......@@ -148,7 +150,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
struct ia64_mca_notify_die *nd;
struct die_args *args = data;
if (!kdump_on_init)
if (!kdump_on_init && !kdump_on_fatal_mca)
return NOTIFY_DONE;
if (!ia64_kimage) {
......@@ -173,32 +175,38 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
return NOTIFY_DONE;
switch (val) {
case DIE_INIT_MONARCH_PROCESS:
case DIE_INIT_MONARCH_PROCESS:
if (kdump_on_init) {
atomic_set(&kdump_in_progress, 1);
*(nd->monarch_cpu) = -1;
break;
case DIE_INIT_MONARCH_LEAVE:
}
break;
case DIE_INIT_MONARCH_LEAVE:
if (kdump_on_init)
machine_kdump_on_init();
break;
case DIE_INIT_SLAVE_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_RENDZVOUS_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_MONARCH_LEAVE:
/* die_register->signr indicate if MCA is recoverable */
if (!args->signr)
machine_kdump_on_init();
break;
break;
case DIE_INIT_SLAVE_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_RENDZVOUS_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_MONARCH_LEAVE:
/* die_register->signr indicate if MCA is recoverable */
if (kdump_on_fatal_mca && !args->signr) {
atomic_set(&kdump_in_progress, 1);
*(nd->monarch_cpu) = -1;
machine_kdump_on_init();
}
break;
}
return NOTIFY_DONE;
}
#ifdef CONFIG_SYSCTL
static ctl_table kdump_on_init_table[] = {
static ctl_table kdump_ctl_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_init",
......@@ -207,6 +215,14 @@ static ctl_table kdump_on_init_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_fatal_mca",
.data = &kdump_on_fatal_mca,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
......@@ -215,7 +231,7 @@ static ctl_table sys_table[] = {
.ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = kdump_on_init_table,
.child = kdump_ctl_table,
},
{ .ctl_name = 0 }
};
......
......@@ -37,6 +37,7 @@
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mca.h>
#include <asm/tlbflush.h>
#define EFI_DEBUG 0
......@@ -403,6 +404,41 @@ efi_get_pal_addr (void)
return NULL;
}
static u8 __init palo_checksum(u8 *buffer, u32 length)
{
u8 sum = 0;
u8 *end = buffer + length;
while (buffer < end)
sum = (u8) (sum + *(buffer++));
return sum;
}
/*
* Parse and handle PALO table which is published at:
* http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
*/
static void __init handle_palo(unsigned long palo_phys)
{
struct palo_table *palo = __va(palo_phys);
u8 checksum;
if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
printk(KERN_INFO "PALO signature incorrect.\n");
return;
}
checksum = palo_checksum((u8 *)palo, palo->length);
if (checksum) {
printk(KERN_INFO "PALO checksum incorrect.\n");
return;
}
setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
}
void
efi_map_pal_code (void)
{
......@@ -432,6 +468,7 @@ efi_init (void)
u64 efi_desc_size;
char *cp, vendor[100] = "unknown";
int i;
unsigned long palo_phys;
/*
* It's too early to be able to use the standard kernel command line
......@@ -496,6 +533,8 @@ efi_init (void)
efi.hcdp = EFI_INVALID_TABLE_ADDR;
efi.uga = EFI_INVALID_TABLE_ADDR;
palo_phys = EFI_INVALID_TABLE_ADDR;
for (i = 0; i < (int) efi.systab->nr_tables; i++) {
if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
efi.mps = config_tables[i].table;
......@@ -515,10 +554,17 @@ efi_init (void)
} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
efi.hcdp = config_tables[i].table;
printk(" HCDP=0x%lx", config_tables[i].table);
} else if (efi_guidcmp(config_tables[i].guid,
PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
palo_phys = config_tables[i].table;
printk(" PALO=0x%lx", config_tables[i].table);
}
}
printk("\n");
if (palo_phys != EFI_INVALID_TABLE_ADDR)
handle_palo(palo_phys);
runtime = __va(efi.systab->runtime);
efi.get_time = phys_get_time;
efi.set_time = phys_set_time;
......
......@@ -710,6 +710,16 @@ ENTRY(ia64_leave_syscall)
(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
#endif
.work_processed_syscall:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
adds r2=PT(LOADRS)+16,r12
(pUStk) mov.m r22=ar.itc // fetch time at leave
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
;;
(p6) ld4 r31=[r18] // load current_thread_info()->flags
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
adds r3=PT(AR_BSPSTORE)+16,r12 // deferred
;;
#else
adds r2=PT(LOADRS)+16,r12
adds r3=PT(AR_BSPSTORE)+16,r12
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
......@@ -718,6 +728,7 @@ ENTRY(ia64_leave_syscall)
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
nop.i 0
;;
#endif
mov r16=ar.bsp // M2 get existing backing store pointer
ld8 r18=[r2],PT(R9)-PT(B6) // load b6
(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
......@@ -737,12 +748,21 @@ ENTRY(ia64_leave_syscall)
ld8 r29=[r2],16 // M0|1 load cr.ipsr
ld8 r28=[r3],16 // M0|1 load cr.iip
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r3],16 // M0|1 load ar.unat
(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
#else
mov r22=r0 // A clear r22
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r3],16 // M0|1 load ar.unat
(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
#endif
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
nop 0
......@@ -759,7 +779,11 @@ ENTRY(ia64_leave_syscall)
ld8.fill r1=[r3],16 // M0|1 load r1
(pUStk) mov r17=1 // A
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
(pUStk) st1 [r15]=r17 // M2|3
#else
(pUStk) st1 [r14]=r17 // M2|3
#endif
ld8.fill r13=[r3],16 // M0|1
mov f8=f0 // F clear f8
;;
......@@ -775,12 +799,22 @@ ENTRY(ia64_leave_syscall)
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
cover // B add current frame into dirty partition & set cr.ifs
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
mov r19=ar.bsp // M2 get new backing store pointer
st8 [r14]=r22 // M save time at leave
mov f10=f0 // F clear f10
mov r22=r0 // A clear r22
movl r14=__kernel_syscall_via_epc // X
;;
#else
mov r19=ar.bsp // M2 get new backing store pointer
mov f10=f0 // F clear f10
nop.m 0
movl r14=__kernel_syscall_via_epc // X
;;
#endif
mov.m ar.csd=r0 // M2 clear ar.csd
mov.m ar.ccv=r0 // M2 clear ar.ccv
mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
......@@ -913,10 +947,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
.pred.rel.mutex pUStk,pKStk
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
(pUStk) mov.m r22=ar.itc // M fetch time at leave
nop.i 0
;;
#else
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
nop.i 0
nop.i 0
;;
#endif
ld8 r29=[r16],16 // load cr.ipsr
ld8 r28=[r17],16 // load cr.iip
;;
......@@ -938,15 +980,37 @@ GLOBAL_ENTRY(ia64_leave_kernel)
;;
ld8.fill r12=[r16],16
ld8.fill r13=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
#else
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
#endif
;;
ld8 r20=[r16],16 // ar.fpsr
ld8.fill r15=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred
#endif
;;
ld8.fill r14=[r16],16
ld8.fill r2=[r17]
(pUStk) mov r17=1
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
// mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;;
// mib : mov add br -> mib : ld8 add br
// bbb_ : br nop cover;; mbb_ : mov br cover;;
//
// no one require bsp in r16 if (pKStk) branch is selected.
(pUStk) st8 [r3]=r22 // save time at leave
(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
shr.u r18=r19,16 // get byte size of existing "dirty" partition
;;
ld8.fill r3=[r16] // deferred
LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
mov r16=ar.bsp // get existing backing store pointer
#else
ld8.fill r3=[r16]
(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
shr.u r18=r19,16 // get byte size of existing "dirty" partition
......@@ -954,6 +1018,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
mov r16=ar.bsp // get existing backing store pointer
LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
#endif
/*
* Restore user backing store.
......
......@@ -61,13 +61,29 @@ ENTRY(fsys_getpid)
.prologue
.altrp b6
.body
add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
;;
ld8 r17=[r17] // r17 = current->group_leader
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;;
ld4 r9=[r9]
add r8=IA64_TASK_TGID_OFFSET,r16
add r17=IA64_TASK_TGIDLINK_OFFSET,r17
;;
and r9=TIF_ALLWORK_MASK,r9
ld4 r8=[r8] // r8 = current->tgid
ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
;;
add r8=IA64_PID_LEVEL_OFFSET,r17
;;
ld4 r8=[r8] // r8 = pid->level
add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
;;
shl r8=r8,IA64_UPID_SHIFT
;;
add r17=r17,r8 // r17 = &pid->numbers[pid->level]
;;
ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
;;
mov r17=0
;;
cmp.ne p8,p0=0,r9
(p8) br.spnt.many fsys_fallback_syscall
......@@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address)
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
add r17=IA64_TASK_TGIDLINK_OFFSET,r16
;;
ld4 r9=[r9]
tnat.z p6,p7=r32 // check argument register for being NaT
ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
;;
and r9=TIF_ALLWORK_MASK,r9
add r8=IA64_TASK_PID_OFFSET,r16
add r8=IA64_PID_LEVEL_OFFSET,r17
add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
;;
ld4 r8=[r8]
ld4 r8=[r8] // r8 = pid->level
add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
;;
shl r8=r8,IA64_UPID_SHIFT
;;
add r17=r17,r8 // r17 = &pid->numbers[pid->level]
;;
ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
;;
cmp.ne p8,p0=0,r9
mov r17=-1
;;
......@@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday)
// Note that instructions are optimized for McKinley. McKinley can
// process two bundles simultaneously and therefore we continuously
// try to feed the CPU two bundles and then a stop.
//
// Additional note that code has changed a lot. Optimization is TBD.
// Comments begin with "?" are maybe outdated.
tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle
mov pr = r30,0xc000 // Set predicates according to function
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
tnat.nz p6,p0 = r31 // guard against Nat argument
(p6) br.cond.spnt.few .fail_einval
movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
;;
ld4 r2 = [r2] // process work pending flags
movl r29 = itc_jitter_data // itc_jitter
add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
ld4 r2 = [r2] // process work pending flags
;;
(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
mov pr = r30,0xc000 // Set predicates according to function
;;
and r2 = TIF_ALLWORK_MASK,r2
(p6) br.cond.spnt.few .fail_einval // ? deferred branch
add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
;;
add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
(p6) br.cond.spnt.many fsys_fallback_syscall
(p6) br.cond.spnt.many fsys_fallback_syscall
;;
// Begin critical section
.time_redo:
......@@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday)
(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
(p13) ld8 r25 = [r19] // get itc_lastcycle value
;; // ? could be removed by moving the last add upward
ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
;;
ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
......@@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday)
EX(.fail_efault, probe.w.fault r31, 3)
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
;;
// ? simulate tbit.nz.or p7,p0 = r28,0
getf.sig r2 = f8
mf
;;
ld4 r10 = [r20] // gtod_lock.sequence
shr.u r2 = r2,r23 // shift by factor
;; // ? overloaded 3 bundles!
;;
add r8 = r8,r2 // Add xtime.nsecs
cmp4.ne p7,p0 = r28,r10
(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
......@@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
;;
mov r8 = r0
(p14) getf.sig r2 = f8
;;
mov r8 = r0
(p14) shr.u r21 = r2, 4
;;
EX(.fail_efault, st8 [r31] = r9)
......@@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
nop.i 0
;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
mov.m r30=ar.itc // M get cycle for accounting
#else
nop.m 0
#endif
nop.i 0
;;
mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
......@@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down)
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
br.call.sptk.many b7=ia64_syscall_setup // B
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
// mov.m r30=ar.itc is called in advance
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
;;
ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
;;
ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
ld8 r21=[r17] // cumulated utime
sub r22=r19,r18 // stime before leave kernel
;;
st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
sub r18=r30,r19 // elapsed time in user mode
;;
add r20=r20,r22 // sum stime
add r21=r21,r18 // sum utime
;;
st8 [r16]=r20 // update stime
st8 [r17]=r21 // update utime
;;
#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
mov rp=r14 // I0 set the real return addr
and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
......
......@@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock)
br.ret.sptk.many rp
END(sched_clock)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
GLOBAL_ENTRY(cycle_to_cputime)
alloc r16=ar.pfs,1,0,0,0
addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
;;
ldf8 f8=[r8]
;;
setf.sig f9=r32
;;
xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
;;
getf.sig r8=f10 // (5 cyc)
getf.sig r9=f11
;;
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp
END(cycle_to_cputime)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
GLOBAL_ENTRY(start_kernel_thread)
.prologue
.save rp, r0 // this is the end of the call-chain
......
......@@ -472,7 +472,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
static unsigned char count;
static long last_time;
if (jiffies - last_time > 5*HZ)
if (time_after(jiffies, last_time + 5 * HZ))
count = 0;
if (++count < 5) {
last_time = jiffies;
......
......@@ -805,8 +805,13 @@ ENTRY(break_fault)
(p8) adds r28=16,r28 // A switch cr.iip to next bundle
(p9) adds r8=1,r8 // A increment ei to next slot
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
;;
mov b6=r30 // I0 setup syscall handler branch reg early
#else
nop.i 0
;;
#endif
mov.m r25=ar.unat // M2 (5 cyc)
dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr
......@@ -817,7 +822,11 @@ ENTRY(break_fault)
//
///////////////////////////////////////////////////////////////////////
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
mov.m r30=ar.itc // M get cycle for accounting
#else
mov b6=r30 // I0 setup syscall handler branch reg early
#endif
cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already?
and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit
......@@ -829,6 +838,30 @@ ENTRY(break_fault)
cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
br.call.sptk.many b7=ia64_syscall_setup // B
1:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
// mov.m r30=ar.itc is called in advance, and r13 is current
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A
(pKStk) br.cond.spnt .skip_accounting // B unlikely skip
;;
ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp
ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave
;;
ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime
ld8 r21=[r17] // M cumulated utime
sub r22=r19,r18 // A stime before leave
;;
st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp
sub r18=r30,r19 // A elapsed time in user
;;
add r20=r20,r22 // A sum stime
add r21=r21,r18 // A sum utime
;;
st8 [r16]=r20 // M update stime
st8 [r17]=r21 // M update utime
;;
.skip_accounting:
#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
nop 0
bsw.1 // B (6 cyc) regs are saved, switch to bank 1
......@@ -928,6 +961,7 @@ END(interrupt)
* - r27: saved ar.rsc
* - r28: saved cr.iip
* - r29: saved cr.ipsr
* - r30: ar.itc for accounting (don't touch)
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
......@@ -1090,6 +1124,41 @@ END(dispatch_illegal_op_fault)
DBG_FAULT(16)
FAULT(16)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
* There is no particular reason for this code to be here, other than
* that there happens to be space here that would go unused otherwise.
* If this fault ever gets "unreserved", simply moved the following
* code to a more suitable spot...
*
* account_sys_enter is called from SAVE_MIN* macros if accounting is
* enabled and if the macro is entered from user mode.
*/
ENTRY(account_sys_enter)
// mov.m r20=ar.itc is called in advance, and r13 is current
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
;;
ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel
;;
ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
ld8 r21=[r17] // cumulated utime
sub r22=r19,r18 // stime before leave kernel
;;
st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp
sub r18=r20,r19 // elapsed time in user mode
;;
add r23=r23,r22 // sum stime
add r21=r21,r18 // sum utime
;;
st8 [r16]=r23 // update stime
st8 [r17]=r21 // update utime
;;
br.ret.sptk.many rp
END(account_sys_enter)
#endif
.org ia64_ivt+0x4400
/////////////////////////////////////////////////////////////////////////////////////////
// 0x4400 Entry 17 (size 64 bundles) Reserved
......
......@@ -78,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = {
{ u, u, u }, /* 1F */
};
/* Insert a long branch code */
static void __kprobes set_brl_inst(void *from, void *to)
{
s64 rel = ((s64) to - (s64) from) >> 4;
bundle_t *brl;
brl = (bundle_t *) ((u64) from & ~0xf);
brl->quad0.template = 0x05; /* [MLX](stop) */
brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */
brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
/* brl.cond.sptk.many.clr rel<<4 (qp=0) */
brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
}
/*
* In this function we check to see if the instruction
* is IP relative instruction and update the kprobe
......@@ -496,6 +510,77 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
}
/* Check the instruction in the slot is break */
static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
{
unsigned int major_opcode;
unsigned int template = bundle->quad0.template;
unsigned long kprobe_inst;
/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
if (slot == 1 && bundle_encoding[template][1] == L)
slot++;
/* Get Kprobe probe instruction at given slot*/
get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
/* For break instruction,
* Bits 37:40 Major opcode to be zero
* Bits 27:32 X6 to be zero
* Bits 32:35 X3 to be zero
*/
if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
/* Not a break instruction */
return 0;
}
/* Is a break instruction */
return 1;
}
/*
* In this function, we check whether the target bundle modifies IP or
* it triggers an exception. If so, it cannot be boostable.
*/
static int __kprobes can_boost(bundle_t *bundle, uint slot,
unsigned long bundle_addr)
{
unsigned int template = bundle->quad0.template;
do {
if (search_exception_tables(bundle_addr + slot) ||
__is_ia64_break_inst(bundle, slot))
return 0; /* exception may occur in this bundle*/
} while ((++slot) < 3);
template &= 0x1e;
if (template >= 0x10 /* including B unit */ ||
template == 0x04 /* including X unit */ ||
template == 0x06) /* undefined */
return 0;
return 1;
}
/* Prepare long jump bundle and disables other boosters if need */
static void __kprobes prepare_booster(struct kprobe *p)
{
unsigned long addr = (unsigned long)p->addr & ~0xFULL;
unsigned int slot = (unsigned long)p->addr & 0xf;
struct kprobe *other_kp;
if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
}
/* disables boosters in previous slots */
for (; addr < (unsigned long)p->addr; addr++) {
other_kp = get_kprobe((void *)addr);
if (other_kp)
other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
}
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
unsigned long addr = (unsigned long) p->addr;
......@@ -530,6 +615,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
prepare_booster(p);
return 0;
}
......@@ -543,7 +630,9 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
src = &p->opcode.bundle;
flush_icache_range((unsigned long)p->ainsn.insn,
(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
(unsigned long)p->ainsn.insn +
sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
switch (p->ainsn.slot) {
case 0:
dest->quad0.slot0 = src->quad0.slot0;
......@@ -584,13 +673,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
mutex_lock(&kprobe_mutex);
free_insn_slot(p->ainsn.insn, 0);
free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
mutex_unlock(&kprobe_mutex);
}
/*
* We are resuming execution after a single step fault, so the pt_regs
* structure reflects the register state after we executed the instruction
* located in the kprobe (p->ainsn.insn.bundle). We still need to adjust
* located in the kprobe (p->ainsn.insn->bundle). We still need to adjust
* the ip to point back to the original stack address. To set the IP address
* to original stack address, handle the case where we need to fixup the
* relative IP address and/or fixup branch register.
......@@ -607,7 +696,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
if (slot == 1 && bundle_encoding[template][1] == L)
slot = 2;
if (p->ainsn.inst_flag) {
if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
/* Fix relative IP address */
......@@ -686,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
{
unsigned int slot = ia64_psr(regs)->ri;
unsigned int template, major_opcode;
unsigned long kprobe_inst;
unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
bundle_t bundle;
memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
template = bundle.quad0.template;
/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
if (slot == 1 && bundle_encoding[template][1] == L)
slot++;
/* Get Kprobe probe instruction at given slot*/
get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
/* For break instruction,
* Bits 37:40 Major opcode to be zero
* Bits 27:32 X6 to be zero
* Bits 32:35 X3 to be zero
*/
if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
/* Not a break instruction */
return 0;
}
/* Is a break instruction */
return 1;
return __is_ia64_break_inst(&bundle, slot);
}
static int __kprobes pre_kprobes_handler(struct die_args *args)
......@@ -802,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
return 1;
ss_probe:
#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
ia64_psr(regs)->ri = p->ainsn.slot;
regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
/* turn single stepping off */
ia64_psr(regs)->ss = 0;
reset_current_kprobe();
preempt_enable_no_resched();
return 1;
}
#endif
prepare_ss(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
......
......@@ -69,6 +69,7 @@
* 2007-04-27 Russ Anderson <rja@sgi.com>
* Support multiple cpus going through OS_MCA in the same event.
*/
#include <linux/jiffies.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/sched.h>
......@@ -97,6 +98,7 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/tlb.h>
#include "mca_drv.h"
#include "entry.h"
......@@ -112,6 +114,7 @@ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */
DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */
unsigned long __per_cpu_mca[NR_CPUS];
......@@ -293,7 +296,8 @@ static void ia64_mlogbuf_dump_from_init(void)
if (mlogbuf_finished)
return;
if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
if (mlogbuf_timestamp &&
time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
" and the system seems to be messed up.\n");
ia64_mlogbuf_finish(0);
......@@ -1182,6 +1186,49 @@ ia64_wait_for_slaves(int monarch, const char *type)
return;
}
/* mca_insert_tr
*
* Switch rid when TR reload and needed!
* iord: 1: itr, 2: itr;
*
*/
static void mca_insert_tr(u64 iord)
{
int i;
u64 old_rr;
struct ia64_tr_entry *p;
unsigned long psr;
int cpu = smp_processor_id();
psr = ia64_clear_ic();
for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
p = &__per_cpu_idtrs[cpu][iord-1][i];
if (p->pte & 0x1) {
old_rr = ia64_get_rr(p->ifa);
if (old_rr != p->rr) {
ia64_set_rr(p->ifa, p->rr);
ia64_srlz_d();
}
ia64_ptr(iord, p->ifa, p->itir >> 2);
ia64_srlz_i();
if (iord & 0x1) {
ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
ia64_srlz_i();
}
if (iord & 0x2) {
ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
ia64_srlz_i();
}
if (old_rr != p->rr) {
ia64_set_rr(p->ifa, old_rr);
ia64_srlz_d();
}
}
}
ia64_set_psr(psr);
}
/*
* ia64_mca_handler
*
......@@ -1266,16 +1313,17 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
} else {
/* Dump buffered message to console */
ia64_mlogbuf_finish(1);
#ifdef CONFIG_KEXEC
atomic_set(&kdump_in_progress, 1);
monarch_cpu = -1;
#endif
}
if (__get_cpu_var(ia64_mca_tr_reload)) {
mca_insert_tr(0x1); /*Reload dynamic itrs*/
mca_insert_tr(0x2); /*Reload dynamic itrs*/
}
if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
== NOTIFY_STOP)
ia64_mca_spin(__func__);
if (atomic_dec_return(&mca_count) > 0) {
int i;
......
......@@ -219,8 +219,13 @@ ia64_reload_tr:
mov r20=IA64_TR_CURRENT_STACK
;;
itr.d dtr[r20]=r16
GET_THIS_PADDR(r2, ia64_mca_tr_reload)
mov r18 = 1
;;
srlz.d
;;
st8 [r2] =r18
;;
done_tlb_purge_and_reload:
......
......@@ -3,6 +3,18 @@
#include "entry.h"
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/* read ar.itc in advance, and use it before leaving bank 0 */
#define ACCOUNT_GET_STAMP \
(pUStk) mov.m r20=ar.itc;
#define ACCOUNT_SYS_ENTER \
(pUStk) br.call.spnt rp=account_sys_enter \
;;
#else
#define ACCOUNT_GET_STAMP
#define ACCOUNT_SYS_ENTER
#endif
/*
* DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
* the minimum state necessary that allows us to turn psr.ic back
......@@ -122,11 +134,13 @@
;; \
.mem.offset 0,0; st8.spill [r16]=r2,16; \
.mem.offset 8,0; st8.spill [r17]=r3,16; \
ACCOUNT_GET_STAMP \
adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
;; \
EXTRA; \
movl r1=__gp; /* establish kernel global pointer */ \
;; \
ACCOUNT_SYS_ENTER \
bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
;;
......
......@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
for(node=0; node < MAX_NUMNODES; node++)
cpus_clear(node_to_cpu_mask[node]);
for(cpu = 0; cpu < NR_CPUS; ++cpu) {
for_each_possible_early_cpu(cpu) {
node = -1;
for (i = 0; i < NR_CPUS; ++i)
if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
......
......@@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
while (offp < (s32 *) end) {
wp = (u64 *) ia64_imva((char *) offp + *offp);
wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
wp[1] = 0x0004000000000200UL;
wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
wp[3] = 0x0084006880000200UL;
wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
wp[1] = 0x0084006880000200UL;
wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
wp[3] = 0x0004000000000200UL;
ia64_fc(wp); ia64_fc(wp + 2);
++offp;
}
......
......@@ -4204,10 +4204,10 @@ pfm_check_task_exist(pfm_context_t *ctx)
do_each_thread (g, t) {
if (t->thread.pfm_context == ctx) {
ret = 0;
break;
goto out;
}
} while_each_thread (g, t);
out:
read_unlock(&tasklist_lock);
DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
......
......@@ -625,42 +625,12 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
do_dump_task_fpu(current, info, arg);
}
int
dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
{
struct unw_frame_info tcore_info;
if (current == task) {
unw_init_running(do_copy_regs, regs);
} else {
memset(&tcore_info, 0, sizeof(tcore_info));
unw_init_from_blocked_task(&tcore_info, task);
do_copy_task_regs(task, &tcore_info, regs);
}
return 1;
}
void
ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
{
unw_init_running(do_copy_regs, dst);
}
int
dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
{
struct unw_frame_info tcore_info;
if (current == task) {
unw_init_running(do_dump_fpu, dst);
} else {
memset(&tcore_info, 0, sizeof(tcore_info));
unw_init_from_blocked_task(&tcore_info, task);
do_dump_task_fpu(task, &tcore_info, dst);
}
return 1;
}
int
dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
{
......
This diff is collapsed.
......@@ -59,6 +59,7 @@
#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/system.h>
#include <asm/tlbflush.h>
#include <asm/unistd.h>
#include <asm/hpsim.h>
......@@ -176,6 +177,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
return 0;
}
/*
* Similar to "filter_rsvd_memory()", but the reserved memory ranges
* are not filtered out.
*/
int __init
filter_memory(unsigned long start, unsigned long end, void *arg)
{
void (*func)(unsigned long, unsigned long, int);
#if IGNORE_PFN0
if (start == PAGE_OFFSET) {
printk(KERN_WARNING "warning: skipping physical page 0\n");
start += PAGE_SIZE;
if (start >= end)
return 0;
}
#endif
func = arg;
if (start < end)
call_pernode_memory(__pa(start), end - start, func);
return 0;
}
static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{
......@@ -493,6 +517,8 @@ setup_arch (char **cmdline_p)
acpi_table_init();
# ifdef CONFIG_ACPI_NUMA
acpi_numa_init();
per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
32 : cpus_weight(early_cpu_possible_map)), additional_cpus);
# endif
#else
# ifdef CONFIG_SMP
......@@ -946,9 +972,10 @@ cpu_init (void)
#endif
/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
if (ia64_pal_vm_summary(NULL, &vmi) == 0)
if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
else {
setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
} else {
printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
max_ctx = (1U << 15) - 1; /* use architected minimum */
}
......
......@@ -209,6 +209,19 @@ send_IPI_allbutself (int op)
}
}
/*
* Called with preemption disabled.
*/
static inline void
send_IPI_mask(cpumask_t mask, int op)
{
unsigned int cpu;
for_each_cpu_mask(cpu, mask) {
send_IPI_single(cpu, op);
}
}
/*
* Called with preemption disabled.
*/
......@@ -401,6 +414,75 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
}
EXPORT_SYMBOL(smp_call_function_single);
/**
* smp_call_function_mask(): Run a function on a set of other CPUs.
* <mask> The set of cpus to run on. Must not include the current cpu.
* <func> The function to run. This must be fast and non-blocking.
* <info> An arbitrary pointer to pass to the function.
* <wait> If true, wait (atomically) until function
* has completed on other CPUs.
*
* Returns 0 on success, else a negative status code.
*
* If @wait is true, then returns once @func has returned; otherwise
* it returns just before the target cpu calls @func.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
int smp_call_function_mask(cpumask_t mask,
void (*func)(void *), void *info,
int wait)
{
struct call_data_struct data;
cpumask_t allbutself;
int cpus;
spin_lock(&call_lock);
allbutself = cpu_online_map;
cpu_clear(smp_processor_id(), allbutself);
cpus_and(mask, mask, allbutself);
cpus = cpus_weight(mask);
if (!cpus) {
spin_unlock(&call_lock);
return 0;
}
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
data.func = func;
data.info = info;
atomic_set(&data.started, 0);
data.wait = wait;
if (wait)
atomic_set(&data.finished, 0);
call_data = &data;
mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
/* Send a message to other CPUs */
if (cpus_equal(mask, allbutself))
send_IPI_allbutself(IPI_CALL_FUNC);
else
send_IPI_mask(mask, IPI_CALL_FUNC);
/* Wait for response */
while (atomic_read(&data.started) != cpus)
cpu_relax();
if (wait)
while (atomic_read(&data.finished) != cpus)
cpu_relax();
call_data = NULL;
spin_unlock(&call_lock);
return 0;
}
EXPORT_SYMBOL(smp_call_function_mask);
/*
* this function sends a 'generic call function' IPI to all other CPUs
* in the system.
......
......@@ -400,9 +400,9 @@ smp_callin (void)
/* Setup the per cpu irq handling data structures */
__setup_vector_irq(cpuid);
cpu_set(cpuid, cpu_online_map);
unlock_ipi_calllock();
per_cpu(cpu_state, cpuid) = CPU_ONLINE;
spin_unlock(&vector_lock);
unlock_ipi_calllock();
smp_setup_percpu_timer();
......
......@@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = {
};
static struct clocksource *itc_clocksource;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#include <linux/kernel_stat.h>
extern cputime_t cycle_to_cputime(u64 cyc);
/*
* Called from the context switch with interrupts disabled, to charge all
* accumulated times to the current process, and to prepare accounting on
* the next process.
*/
void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
{
struct thread_info *pi = task_thread_info(prev);
struct thread_info *ni = task_thread_info(next);
cputime_t delta_stime, delta_utime;
__u64 now;
now = ia64_get_itc();
delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
account_system_time(prev, 0, delta_stime);
account_system_time_scaled(prev, delta_stime);
if (pi->ac_utime) {
delta_utime = cycle_to_cputime(pi->ac_utime);
account_user_time(prev, delta_utime);
account_user_time_scaled(prev, delta_utime);
}
pi->ac_stamp = ni->ac_stamp = now;
ni->ac_stime = ni->ac_utime = 0;
}
/*
* Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled.
*/
void account_system_vtime(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
unsigned long flags;
cputime_t delta_stime;
__u64 now;
local_irq_save(flags);
now = ia64_get_itc();
delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
account_system_time(tsk, 0, delta_stime);
account_system_time_scaled(tsk, delta_stime);
ti->ac_stime = 0;
ti->ac_stamp = now;
local_irq_restore(flags);
}
/*
* Called from the timer interrupt handler to charge accumulated user time
* to the current process. Must be called with interrupts disabled.
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
struct thread_info *ti = task_thread_info(p);
cputime_t delta_utime;
if (ti->ac_utime) {
delta_utime = cycle_to_cputime(ti->ac_utime);
account_user_time(p, delta_utime);
account_user_time_scaled(p, delta_utime);
ti->ac_utime = 0;
}
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
static irqreturn_t
timer_interrupt (int irq, void *dev_id)
{
......
......@@ -13,6 +13,7 @@
* 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
* 2001/01/17 Add support emulation of unaligned kernel accesses.
*/
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/tty.h>
......@@ -1290,7 +1291,7 @@ within_logging_rate_limit (void)
{
static unsigned long count, last_time;
if (jiffies - last_time > 5*HZ)
if (time_after(jiffies, last_time + 5 * HZ))
count = 0;
if (count < 5) {
last_time = jiffies;
......
......@@ -45,8 +45,6 @@ void show_mem(void)
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n",
nr_swap_pages<<(PAGE_SHIFT-10));
printk(KERN_INFO "Node memory in pages:\n");
for_each_online_pgdat(pgdat) {
unsigned long present;
......@@ -255,7 +253,7 @@ paging_init (void)
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_VIRTUAL_MEM_MAP
efi_memmap_walk(register_active_ranges, NULL);
efi_memmap_walk(filter_memory, register_active_ranges);
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap < LARGE_GAP) {
vmem_map = (struct page *) 0;
......
......@@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node)
{
int cpu, n = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++)
for_each_possible_early_cpu(cpu)
if (node == node_cpuid[cpu].nid)
n++;
......@@ -124,6 +124,7 @@ static unsigned long __meminit compute_pernodesize(int node)
pernodesize += node * L1_CACHE_BYTES;
pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
pernodesize = PAGE_ALIGN(pernodesize);
return pernodesize;
}
......@@ -142,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
#ifdef CONFIG_SMP
int cpu;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
for_each_possible_early_cpu(cpu) {
if (node == node_cpuid[cpu].nid) {
memcpy(__va(cpu_data), __phys_per_cpu_start,
__per_cpu_end - __per_cpu_start);
......@@ -345,7 +346,7 @@ static void __init initialize_pernode_data(void)
#ifdef CONFIG_SMP
/* Set the node_data pointer for each per-cpu struct */
for (cpu = 0; cpu < NR_CPUS; cpu++) {
for_each_possible_early_cpu(cpu) {
node = node_cpuid[cpu].nid;
per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
}
......@@ -444,7 +445,7 @@ void __init find_memory(void)
mem_data[node].min_pfn = ~0UL;
}
efi_memmap_walk(register_active_ranges, NULL);
efi_memmap_walk(filter_memory, register_active_ranges);
/*
* Initialize the boot memory maps in reverse order since that's
......@@ -493,13 +494,9 @@ void __cpuinit *per_cpu_init(void)
int cpu;
static int first_time = 1;
if (smp_processor_id() != 0)
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
if (first_time) {
first_time = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++)
for_each_possible_early_cpu(cpu)
per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
}
......@@ -522,8 +519,6 @@ void show_mem(void)
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n",
nr_swap_pages<<(PAGE_SHIFT-10));
printk(KERN_INFO "Node memory in pages:\n");
for_each_online_pgdat(pgdat) {
unsigned long present;
......
......@@ -58,7 +58,6 @@ __ia64_sync_icache_dcache (pte_t pte)
{
unsigned long addr;
struct page *page;
unsigned long order;
page = pte_page(pte);
addr = (unsigned long) page_address(page);
......@@ -66,12 +65,7 @@ __ia64_sync_icache_dcache (pte_t pte)
if (test_bit(PG_arch_1, &page->flags))
return; /* i-cache is already coherent with d-cache */
if (PageCompound(page)) {
order = compound_order(page);
flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
}
else
flush_icache_range(addr, addr + PAGE_SIZE);
flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
set_bit(PG_arch_1, &page->flags); /* mark page as clean */
}
......@@ -553,12 +547,10 @@ find_largest_hole (u64 start, u64 end, void *arg)
#endif /* CONFIG_VIRTUAL_MEM_MAP */
int __init
register_active_ranges(u64 start, u64 end, void *arg)
register_active_ranges(u64 start, u64 len, int nid)
{
int nid = paddr_to_nid(__pa(start));
u64 end = start + len;
if (nid < 0)
nid = 0;
#ifdef CONFIG_KEXEC
if (start > crashk_res.start && start < crashk_res.end)
start = crashk_res.end;
......
......@@ -27,7 +27,9 @@
*/
int num_node_memblks;
struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
struct node_cpuid_s node_cpuid[NR_CPUS];
struct node_cpuid_s node_cpuid[NR_CPUS] =
{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
/*
* This is a matrix with "distances" between nodes, they should be
* proportional to the memory access latency ratios.
......
This diff is collapsed.
......@@ -199,7 +199,7 @@ xpc_timeout_partition_disengage_request(unsigned long data)
struct xpc_partition *part = (struct xpc_partition *) data;
DBUG_ON(jiffies < part->disengage_request_timeout);
DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
(void) xpc_partition_disengaged(part);
......@@ -230,7 +230,7 @@ xpc_hb_beater(unsigned long dummy)
{
xpc_vars->heartbeat++;
if (jiffies >= xpc_hb_check_timeout) {
if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
wake_up_interruptible(&xpc_act_IRQ_wq);
}
......@@ -270,7 +270,7 @@ xpc_hb_checker(void *ignore)
/* checking of remote heartbeats is skewed by IRQ handling */
if (jiffies >= xpc_hb_check_timeout) {
if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
dev_dbg(xpc_part, "checking remote heartbeats\n");
xpc_check_remote_hb();
......@@ -305,7 +305,7 @@ xpc_hb_checker(void *ignore)
/* wait for IRQ or timeout */
(void) wait_event_interruptible(xpc_act_IRQ_wq,
(last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
jiffies >= xpc_hb_check_timeout ||
time_after_eq(jiffies, xpc_hb_check_timeout) ||
(volatile int) xpc_exiting));
}
......
......@@ -877,7 +877,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
disengaged = (xpc_partition_engaged(1UL << partid) == 0);
if (part->disengage_request_timeout) {
if (!disengaged) {
if (jiffies < part->disengage_request_timeout) {
if (time_before(jiffies, part->disengage_request_timeout)) {
/* timelimit hasn't been reached yet */
return 0;
}
......
......@@ -35,6 +35,7 @@
#include <linux/init.h>
#include <linux/numa.h>
#include <asm/system.h>
#include <asm/numa.h>
#define COMPILER_DEPENDENT_INT64 long
#define COMPILER_DEPENDENT_UINT64 unsigned long
......@@ -115,7 +116,11 @@ extern unsigned int is_cpu_cpei_target(unsigned int cpu);
extern void set_cpei_target_cpu(unsigned int cpu);
extern unsigned int get_cpei_target_cpu(void);
extern void prefill_possible_map(void);
#ifdef CONFIG_ACPI_HOTPLUG_CPU
extern int additional_cpus;
#else
#define additional_cpus 0
#endif
#ifdef CONFIG_ACPI_NUMA
#if MAX_NUMNODES > 256
......@@ -129,6 +134,34 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
#define acpi_unlazy_tlb(x)
#ifdef CONFIG_ACPI_NUMA
extern cpumask_t early_cpu_possible_map;
#define for_each_possible_early_cpu(cpu) \
for_each_cpu_mask((cpu), early_cpu_possible_map)
static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus)
{
int low_cpu, high_cpu;
int cpu;
int next_nid = 0;
low_cpu = cpus_weight(early_cpu_possible_map);
high_cpu = max(low_cpu, min_cpus);
high_cpu = min(high_cpu + reserve_cpus, NR_CPUS);
for (cpu = low_cpu; cpu < high_cpu; cpu++) {
cpu_set(cpu, early_cpu_possible_map);
if (node_cpuid[cpu].nid == NUMA_NO_NODE) {
node_cpuid[cpu].nid = next_nid;
next_nid++;
if (next_nid >= num_online_nodes())
next_nid = 0;
}
}
}
#endif /* CONFIG_ACPI_NUMA */
#endif /*__KERNEL__*/
#endif /*_ASM_ACPI_H*/
/*
* include/asm-ia64/cputime.h:
* Definitions for measuring cputime on ia64 machines.
*
* Based on <asm-powerpc/cputime.h>.
*
* Copyright (C) 2007 FUJITSU LIMITED
* Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
* Otherwise we measure cpu time in jiffies using the generic definitions.
*/
#ifndef __IA64_CPUTIME_H
#define __IA64_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#include <asm-generic/cputime.h>
#else
#include <linux/time.h>
#include <linux/jiffies.h>
#include <asm/processor.h>
typedef u64 cputime_t;
typedef u64 cputime64_t;
#define cputime_zero ((cputime_t)0)
#define cputime_max ((~((cputime_t)0) >> 1) - 1)
#define cputime_add(__a, __b) ((__a) + (__b))
#define cputime_sub(__a, __b) ((__a) - (__b))
#define cputime_div(__a, __n) ((__a) / (__n))
#define cputime_halve(__a) ((__a) >> 1)
#define cputime_eq(__a, __b) ((__a) == (__b))
#define cputime_gt(__a, __b) ((__a) > (__b))
#define cputime_ge(__a, __b) ((__a) >= (__b))
#define cputime_lt(__a, __b) ((__a) < (__b))
#define cputime_le(__a, __b) ((__a) <= (__b))
#define cputime64_zero ((cputime64_t)0)
#define cputime64_add(__a, __b) ((__a) + (__b))
#define cputime64_sub(__a, __b) ((__a) - (__b))
#define cputime_to_cputime64(__ct) (__ct)
/*
* Convert cputime <-> jiffies (HZ)
*/
#define cputime_to_jiffies(__ct) ((__ct) / (NSEC_PER_SEC / HZ))
#define jiffies_to_cputime(__jif) ((__jif) * (NSEC_PER_SEC / HZ))
#define cputime64_to_jiffies64(__ct) ((__ct) / (NSEC_PER_SEC / HZ))
#define jiffies64_to_cputime64(__jif) ((__jif) * (NSEC_PER_SEC / HZ))
/*
* Convert cputime <-> milliseconds
*/
#define cputime_to_msecs(__ct) ((__ct) / NSEC_PER_MSEC)
#define msecs_to_cputime(__msecs) ((__msecs) * NSEC_PER_MSEC)
/*
* Convert cputime <-> seconds
*/
#define cputime_to_secs(__ct) ((__ct) / NSEC_PER_SEC)
#define secs_to_cputime(__secs) ((__secs) * NSEC_PER_SEC)
/*
* Convert cputime <-> timespec (nsec)
*/
static inline cputime_t timespec_to_cputime(const struct timespec *val)
{
cputime_t ret = val->tv_sec * NSEC_PER_SEC;
return (ret + val->tv_nsec);
}
static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
{
val->tv_sec = ct / NSEC_PER_SEC;
val->tv_nsec = ct % NSEC_PER_SEC;
}
/*
* Convert cputime <-> timeval (msec)
*/
static inline cputime_t timeval_to_cputime(struct timeval *val)
{
cputime_t ret = val->tv_sec * NSEC_PER_SEC;
return (ret + val->tv_usec * NSEC_PER_USEC);
}
static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
{
val->tv_sec = ct / NSEC_PER_SEC;
val->tv_usec = (ct % NSEC_PER_SEC) / NSEC_PER_USEC;
}
/*
* Convert cputime <-> clock (USER_HZ)
*/
#define cputime_to_clock_t(__ct) ((__ct) / (NSEC_PER_SEC / USER_HZ))
#define clock_t_to_cputime(__x) ((__x) * (NSEC_PER_SEC / USER_HZ))
/*
* Convert cputime64 to clock.
*/
#define cputime64_to_clock_t(__ct) cputime_to_clock_t((cputime_t)__ct)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __IA64_CPUTIME_H */
......@@ -26,6 +26,7 @@
#define ELF_ARCH EM_IA_64
#define USE_ELF_CORE_DUMP
#define CORE_DUMP_USE_REGSET
/* Least-significant four bits of ELF header's e_flags are OS-specific. The bits are
interpreted as follows by Linux: */
......@@ -154,6 +155,30 @@ extern void ia64_init_addr_space (void);
#define ELF_NGREG 128 /* we really need just 72 but let's leave some headroom... */
#define ELF_NFPREG 128 /* f0 and f1 could be omitted, but so what... */
/* elf_gregset_t register offsets */
#define ELF_GR_0_OFFSET 0
#define ELF_NAT_OFFSET (32 * sizeof(elf_greg_t))
#define ELF_PR_OFFSET (33 * sizeof(elf_greg_t))
#define ELF_BR_0_OFFSET (34 * sizeof(elf_greg_t))
#define ELF_CR_IIP_OFFSET (42 * sizeof(elf_greg_t))
#define ELF_CFM_OFFSET (43 * sizeof(elf_greg_t))
#define ELF_CR_IPSR_OFFSET (44 * sizeof(elf_greg_t))
#define ELF_GR_OFFSET(i) (ELF_GR_0_OFFSET + i * sizeof(elf_greg_t))
#define ELF_BR_OFFSET(i) (ELF_BR_0_OFFSET + i * sizeof(elf_greg_t))
#define ELF_AR_RSC_OFFSET (45 * sizeof(elf_greg_t))
#define ELF_AR_BSP_OFFSET (46 * sizeof(elf_greg_t))
#define ELF_AR_BSPSTORE_OFFSET (47 * sizeof(elf_greg_t))
#define ELF_AR_RNAT_OFFSET (48 * sizeof(elf_greg_t))
#define ELF_AR_CCV_OFFSET (49 * sizeof(elf_greg_t))
#define ELF_AR_UNAT_OFFSET (50 * sizeof(elf_greg_t))
#define ELF_AR_FPSR_OFFSET (51 * sizeof(elf_greg_t))
#define ELF_AR_PFS_OFFSET (52 * sizeof(elf_greg_t))
#define ELF_AR_LC_OFFSET (53 * sizeof(elf_greg_t))
#define ELF_AR_EC_OFFSET (54 * sizeof(elf_greg_t))
#define ELF_AR_CSD_OFFSET (55 * sizeof(elf_greg_t))
#define ELF_AR_SSD_OFFSET (56 * sizeof(elf_greg_t))
#define ELF_AR_END_OFFSET (57 * sizeof(elf_greg_t))
typedef unsigned long elf_fpxregset_t;
typedef unsigned long elf_greg_t;
......@@ -183,12 +208,6 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst);
struct task_struct;
extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
#define ELF_CORE_COPY_TASK_REGS(tsk, elf_gregs) dump_task_regs(tsk, elf_gregs)
#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
#define GATE_EHDR ((const struct elfhdr *) GATE_ADDR)
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
......
......@@ -30,8 +30,12 @@
#include <asm/break.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
#define MAX_INSN_SIZE 1
#define MAX_INSN_SIZE 2 /* last half is for kprobe-booster */
#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6)
#define NOP_M_INST (long)(1<<27)
#define BRL_INST(i1, i2) ((long)((0xcL << 37) | /* brl */ \
(0x1L << 12) | /* many */ \
(((i1) & 1) << 36) | ((i2) << 13))) /* imm */
typedef union cmp_inst {
struct {
......@@ -112,6 +116,7 @@ struct arch_specific_insn {
#define INST_FLAG_FIX_RELATIVE_IP_ADDR 1
#define INST_FLAG_FIX_BRANCH_REG 2
#define INST_FLAG_BREAK_INST 4
#define INST_FLAG_BOOSTABLE 8
unsigned long inst_flag;
unsigned short target_br_reg;
unsigned short slot;
......
......@@ -31,6 +31,9 @@
#define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */
#define IA64_TR_CURRENT_STACK 1 /* dtr1: maps kernel's memory- & register-stacks */
#define IA64_TR_ALLOC_BASE 2 /* itr&dtr: Base of dynamic TR resource*/
#define IA64_TR_ALLOC_MAX 32 /* Max number for dynamic use*/
/* Processor status register bits: */
#define IA64_PSR_BE_BIT 1
#define IA64_PSR_UP_BIT 2
......
......@@ -35,6 +35,7 @@ extern void find_memory (void);
extern void reserve_memory (void);
extern void find_initrd (void);
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
extern int filter_memory (unsigned long start, unsigned long end, void *arg);
extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
......@@ -56,7 +57,7 @@ extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end);
#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
extern int register_active_ranges(u64 start, u64 end, void *arg);
extern int register_active_ranges(u64 start, u64 len, int nid);
#ifdef CONFIG_VIRTUAL_MEM_MAP
# define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */
......
......@@ -22,6 +22,8 @@
#include <asm/mmzone.h>
#define NUMA_NO_NODE -1
extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
extern pg_data_t *pgdat_list[MAX_NUMNODES];
......
......@@ -13,6 +13,7 @@
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
* Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com>
* Copyright (C) 2008 Silicon Graphics, Inc. (SGI)
*
* 99/10/01 davidm Make sure we pass zero for reserved parameters.
* 00/03/07 davidm Updated pal_cache_flush() to be in sync with PAL v2.6.
......@@ -73,6 +74,8 @@
#define PAL_CACHE_SHARED_INFO 43 /* returns information on caches shared by logical processor */
#define PAL_GET_HW_POLICY 48 /* Get current hardware resource sharing policy */
#define PAL_SET_HW_POLICY 49 /* Set current hardware resource sharing policy */
#define PAL_VP_INFO 50 /* Information about virtual processor features */
#define PAL_MC_HW_TRACKING 51 /* Hardware tracking status */
#define PAL_COPY_PAL 256 /* relocate PAL procedures and PAL PMI */
#define PAL_HALT_INFO 257 /* return the low power capabilities of processor */
......@@ -504,7 +507,8 @@ typedef struct pal_cache_check_info_s {
wiv : 1, /* Way field valid */
reserved2 : 1,
dp : 1, /* Data poisoned on MBE */
reserved3 : 8,
reserved3 : 6,
hlth : 2, /* Health indicator */
index : 20, /* Cache line index */
reserved4 : 2,
......@@ -542,7 +546,9 @@ typedef struct pal_tlb_check_info_s {
dtc : 1, /* Fail in data TC */
itc : 1, /* Fail in inst. TC */
op : 4, /* Cache operation */
reserved3 : 30,
reserved3 : 6,
hlth : 2, /* Health indicator */
reserved4 : 22,
is : 1, /* instruction set (1 == ia32) */
iv : 1, /* instruction set field valid */
......@@ -633,7 +639,8 @@ typedef struct pal_uarch_check_info_s {
way : 6, /* Way of structure */
wv : 1, /* way valid */
xv : 1, /* index valid */
reserved1 : 8,
reserved1 : 6,
hlth : 2, /* Health indicator */
index : 8, /* Index or set of the uarch
* structure that failed.
*/
......@@ -1213,14 +1220,12 @@ ia64_pal_mc_drain (void)
/* Return the machine check dynamic processor state */
static inline s64
ia64_pal_mc_dynamic_state (u64 offset, u64 *size, u64 *pds)
ia64_pal_mc_dynamic_state (u64 info_type, u64 dy_buffer, u64 *size)
{
struct ia64_pal_retval iprv;
PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, offset, 0, 0);
PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, info_type, dy_buffer, 0);
if (size)
*size = iprv.v0;
if (pds)
*pds = iprv.v1;
return iprv.status;
}
......@@ -1281,15 +1286,41 @@ ia64_pal_mc_expected (u64 expected, u64 *previous)
return iprv.status;
}
typedef union pal_hw_tracking_u {
u64 pht_data;
struct {
u64 itc :4, /* Instruction cache tracking */
dct :4, /* Date cache tracking */
itt :4, /* Instruction TLB tracking */
ddt :4, /* Data TLB tracking */
reserved:48;
} pal_hw_tracking_s;
} pal_hw_tracking_u_t;
/*
* Hardware tracking status.
*/
static inline s64
ia64_pal_mc_hw_tracking (u64 *status)
{
struct ia64_pal_retval iprv;
PAL_CALL(iprv, PAL_MC_HW_TRACKING, 0, 0, 0);
if (status)
*status = iprv.v0;
return iprv.status;
}
/* Register a platform dependent location with PAL to which it can save
* minimal processor state in the event of a machine check or initialization
* event.
*/
static inline s64
ia64_pal_mc_register_mem (u64 physical_addr)
ia64_pal_mc_register_mem (u64 physical_addr, u64 size, u64 *req_size)
{
struct ia64_pal_retval iprv;
PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, 0, 0);
PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, size, 0);
if (req_size)
*req_size = iprv.v0;
return iprv.status;
}
......@@ -1631,6 +1662,29 @@ ia64_pal_vm_summary (pal_vm_info_1_u_t *vm_info_1, pal_vm_info_2_u_t *vm_info_2)
return iprv.status;
}
typedef union pal_vp_info_u {
u64 pvi_val;
struct {
u64 index: 48, /* virtual feature set info */
vmm_id: 16; /* feature set id */
} pal_vp_info_s;
} pal_vp_info_u_t;
/*
* Returns infomation about virtual processor features
*/
static inline s64
ia64_pal_vp_info (u64 feature_set, u64 vp_buffer, u64 *vp_info, u64 *vmm_id)
{
struct ia64_pal_retval iprv;
PAL_CALL(iprv, PAL_VP_INFO, feature_set, vp_buffer, 0);
if (vp_info)
*vp_info = iprv.v0;
if (vmm_id)
*vmm_id = iprv.v1;
return iprv.status;
}
typedef union pal_itr_valid_u {
u64 piv_val;
struct {
......
......@@ -371,7 +371,7 @@ pgd_index (unsigned long address)
/* The offset in the 1-level directory is given by the 3 region bits
(61..63) and the level-1 bits. */
static inline pgd_t*
pgd_offset (struct mm_struct *mm, unsigned long address)
pgd_offset (const struct mm_struct *mm, unsigned long address)
{
return mm->pgd + pgd_index(address);
}
......
......@@ -296,6 +296,9 @@ enum {
EFI_GUID(0xe429faf8, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
#define SAL_PLAT_BUS_ERR_SECT_GUID \
EFI_GUID(0xe429faf9, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
#define PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID \
EFI_GUID(0x6cb0a200, 0x893a, 0x11da, 0x96, 0xd2, 0x0, 0x10, 0x83, 0xff, \
0xca, 0x4d)
#define MAX_CACHE_ERRORS 6
#define MAX_TLB_ERRORS 6
......@@ -879,6 +882,24 @@ extern void ia64_jump_to_sal(struct sal_to_os_boot *);
extern void ia64_sal_handler_init(void *entry_point, void *gpval);
#define PALO_MAX_TLB_PURGES 0xFFFF
#define PALO_SIG "PALO"
struct palo_table {
u8 signature[4]; /* Should be "PALO" */
u32 length;
u8 minor_revision;
u8 major_revision;
u8 checksum;
u8 reserved1[5];
u16 max_tlb_purges;
u8 reserved2[6];
};
#define NPTCG_FROM_PAL 0
#define NPTCG_FROM_PALO 1
#define NPTCG_FROM_KERNEL_PARAMETER 2
#endif /* __ASSEMBLY__ */
#endif /* _ASM_IA64_SAL_H */
......@@ -38,6 +38,9 @@ ia64_get_lid (void)
return lid.f.id << 8 | lid.f.eid;
}
extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
void *info, int wait);
#define hard_smp_processor_id() ia64_get_lid()
#ifdef CONFIG_SMP
......
......@@ -210,6 +210,13 @@ struct task_struct;
extern void ia64_save_extra (struct task_struct *task);
extern void ia64_load_extra (struct task_struct *task);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
#else
# define IA64_ACCOUNT_ON_SWITCH(p,n)
#endif
#ifdef CONFIG_PERFMON
DECLARE_PER_CPU(unsigned long, pfm_syst_info);
# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
......@@ -222,6 +229,7 @@ extern void ia64_load_extra (struct task_struct *task);
|| IS_IA32_PROCESS(task_pt_regs(t)) || PERFMON_IS_SYSWIDE())
#define __switch_to(prev,next,last) do { \
IA64_ACCOUNT_ON_SWITCH(prev, next); \
if (IA64_HAS_EXTRA_STATE(prev)) \
ia64_save_extra(prev); \
if (IA64_HAS_EXTRA_STATE(next)) \
......@@ -266,6 +274,10 @@ void cpu_idle_wait(void);
void default_idle(void);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void account_system_vtime(struct task_struct *);
#endif
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
......
......@@ -31,6 +31,12 @@ struct thread_info {
mm_segment_t addr_limit; /* user-level address space limit */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
struct restart_block restart_block;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
__u64 ac_stamp;
__u64 ac_leave;
__u64 ac_stime;
__u64 ac_utime;
#endif
};
#define THREAD_SIZE KERNEL_STACK_SIZE
......@@ -62,9 +68,17 @@ struct thread_info {
#define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#define setup_thread_stack(p, org) \
*task_thread_info(p) = *task_thread_info(org); \
task_thread_info(p)->ac_stime = 0; \
task_thread_info(p)->ac_utime = 0; \
task_thread_info(p)->task = (p);
#else
#define setup_thread_stack(p, org) \
*task_thread_info(p) = *task_thread_info(org); \
task_thread_info(p)->task = (p);
#endif
#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
#define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
......
......@@ -64,6 +64,32 @@ struct mmu_gather {
struct page *pages[FREE_PTE_NR];
};
struct ia64_tr_entry {
u64 ifa;
u64 itir;
u64 pte;
u64 rr;
}; /*Record for tr entry!*/
extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
extern void ia64_ptr_entry(u64 target_mask, int slot);
extern struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX];
/*
region register macros
*/
#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001)
#define RR_VE(val) (((val) & 0x0000000000000001) << 0)
#define RR_VE_MASK 0x0000000000000001L
#define RR_VE_SHIFT 0
#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f)
#define RR_PS(val) (((val) & 0x000000000000003f) << 2)
#define RR_PS_MASK 0x00000000000000fcL
#define RR_PS_SHIFT 2
#define RR_RID_MASK 0x00000000ffffff00L
#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
/* Users of the generic TLB shootdown code must declare this storage space. */
DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
......
......@@ -17,6 +17,7 @@
* Now for some TLB flushing routines. This is the kind of stuff that
* can be very expensive, so try to avoid them whenever possible.
*/
extern void setup_ptcg_sem(int max_purges, int from_palo);
/*
* Flush everything (kernel mapping may also have changed due to
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment