Merge http://lia64.bkbits.net/to-linus-2.5

into home.osdl.org:/home/torvalds/v2.5/linux

Merge http://lia64.bkbits.net/to-linus-2.5
into home.osdl.org:/home/torvalds/v2.5/linux
eb1a40d9 · Linus Torvalds · 91a7d546 · d93c4308 · eb1a40d9 · eb1a40d9
Commit eb1a40d9 authored Oct 16, 2003 by Linus Torvalds
25 changed files
--- a/Documentation/ia64/fsys.txt
+++ b/Documentation/ia64/fsys.txt
@@ -4,7 +4,7 @@
 		-----------------------------------

 		        Started: 13-Jan-2003
-		    Last update: 11-Feb-2003
+		    Last update: 27-Sep-2003

 	              David Mosberger-Tang
 		      <davidm@hpl.hp.com>
@@ -146,6 +146,12 @@ speed comes a set of restrictions:
   task pointer is not considered sensitive: it's already exposed
   through ar.k6).

+ o Fsyscall-handlers MUST NOT access user-memory without first
+   validating access-permission (this can be done typically via
+   probe.r.fault and/or probe.w.fault) and without guarding against
+   memory access exceptions (this can be done with the EX() macros
+   defined by asmmacro.h).
+
 The above restrictions may seem draconian, but remember that it's
 possible to trade off some of the restrictions by paying a slightly
 higher overhead.  For example, if an fsyscall-handler could benefit
@@ -229,3 +235,52 @@ PSR.ed	Unchanged.  Note: This bit could only have an effect if an fsys-mode
 PSR.bn	Unchanged.  Note: fsys-mode handlers may clear the bit, if needed.
 	Doing so requires clearing PSR.i and PSR.ic as well.
 PSR.ia	Unchanged.  Note: the ia64 linux kernel never sets this bit.
+
+* Using fast system calls
+
+To use fast system calls, userspace applications need simply call
+__kernel_syscall_via_epc().  For example
+
+-- example fgettimeofday() call --
+-- fgettimeofday.S --
+
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(fgettimeofday)
+.prologue
+.save ar.pfs, r11
+mov r11 = ar.pfs
+.body 
+
+mov r2 = 0xa000000000020660;;  // gate address 
+			       // found by inspection of System.map for the 
+			       // __kernel_syscall_via_epc() function.  See
+			       // below for how to do this for real.
+
+mov b7 = r2
+mov r15 = 1087		       // gettimeofday syscall
+;;
+br.call.sptk.many b6 = b7
+;;
+
+.restore sp
+
+mov ar.pfs = r11
+br.ret.sptk.many rp;;	      // return to caller
+END(fgettimeofday)
+
+-- end fgettimeofday.S --
+
+In reality, getting the gate address is accomplished by two extra
+values passed via the ELF auxiliary vector (include/asm-ia64/elf.h)
+
+ o AT_SYSINFO : is the address of __kernel_syscall_via_epc()
+ o AT_SYSINFO_EHDR : is the address of the kernel gate ELF DSO
+
+The ELF DSO is a pre-linked library that is mapped in by the kernel at
+the gate page.  It is a proper ELF shared object so, with a dynamic
+loader that recognises the library, you should be able to make calls to
+the exported functions within it as with any other shared library.
+AT_SYSINFO points into the kernel DSO at the
+__kernel_syscall_via_epc() function for historical reasons (it was
+used before the kernel DSO) and as a convenience.
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -57,6 +57,10 @@ choice

 config IA64_GENERIC
 	bool "generic"
+	select NUMA
+	select ACPI_NUMA
+	select VIRTUAL_MEM_MAP
+	select DISCONTIGMEM
 	---help---
 	  This selects the system type of your hardware.  A "generic" kernel
 	  will run on any supported IA-64 system.  However, if you configure

--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -56,6 +56,7 @@ void (*pm_idle) (void);
 void (*pm_power_off) (void);

 unsigned char acpi_kbd_controller_present = 1;
+unsigned char acpi_legacy_devices;

 int acpi_disabled;	/* XXX this shouldn't be needed---we can't boot without ACPI! */

@@ -509,6 +510,9 @@ acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
 	if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
 		acpi_kbd_controller_present = 0;

+	if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
+		acpi_legacy_devices = 1;
+
 	acpi_register_irq(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE);
 	return 0;
 }

--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -33,16 +33,30 @@ void foo(void)

 	BLANK();

+	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
 	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
 	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
 	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));

 	BLANK();

+	DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
+
+	BLANK();
+
+	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
+							     group_stop_count));
+	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+
+	BLANK();
+
 	DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
 	DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
 	DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
@@ -158,6 +172,10 @@ void foo(void)

 	BLANK();

+	DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
+
+	BLANK();
+
 	DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
 	DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
 	DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));

--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -4,6 +4,7 @@
 * Copyright (C) 2003 Hewlett-Packard Co
 * 	David Mosberger-Tang <davidm@hpl.hp.com>
 *
+ * 25-Sep-03 davidm	Implement fsys_rt_sigprocmask().
 * 18-Feb-03 louisk	Implement fsys_gettimeofday().
 * 28-Feb-03 davidm	Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
 *			probably broke it along the way... ;-)
@@ -15,6 +16,7 @@
 #include <asm/percpu.h>
 #include <asm/thread_info.h>
 #include <asm/sal.h>
+#include <asm/signal.h>
 #include <asm/system.h>
 #include <asm/unistd.h>

@@ -48,8 +50,7 @@ ENTRY(fsys_ni_syscall)
 	.body
 	mov r8=ENOSYS
 	mov r10=-1
-	MCKINLEY_E9_WORKAROUND
-	br.ret.sptk.many b6
+	FSYS_RETURN
 END(fsys_ni_syscall)

 ENTRY(fsys_getpid)
@@ -66,8 +67,7 @@ ENTRY(fsys_getpid)
 	;;
 	cmp.ne p8,p0=0,r9
 (p8)	br.spnt.many fsys_fallback_syscall
-	MCKINLEY_E9_WORKAROUND
-	br.ret.sptk.many b6
+	FSYS_RETURN
 END(fsys_getpid)

 ENTRY(fsys_getppid)
@@ -114,8 +114,7 @@ ENTRY(fsys_getppid)
 	mov r18=0			// i must not leak kernel bits...
 	mov r19=0			// i must not leak kernel bits...
 #endif
-	MCKINLEY_E9_WORKAROUND
-	br.ret.sptk.many b6
+	FSYS_RETURN
 END(fsys_getppid)

 ENTRY(fsys_set_tid_address)
@@ -141,8 +140,7 @@ ENTRY(fsys_set_tid_address)
 	;;
 	mov r17=0			// i must not leak kernel bits...
 	mov r18=0			// i must not leak kernel bits...
-	MCKINLEY_E9_WORKAROUND
-	br.ret.sptk.many b6
+	FSYS_RETURN
 END(fsys_set_tid_address)

 /*
@@ -199,7 +197,7 @@ ENTRY(fsys_gettimeofday)

 	adds r10=IA64_CPUINFO_ITM_DELTA_OFFSET, r10
 (p7)	tnat.nz p6,p0=r33
-(p6)	br.cond.spnt.few .fail
+(p6)	br.cond.spnt.few .fail_einval

 	adds r8=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r3
 	movl r24=2361183241434822607	// for division hack (only for / 1000)
@@ -225,8 +223,8 @@ ENTRY(fsys_gettimeofday)
 	 * to store the result.  That's OK as long as the stores are also
 	 * protect by EX().
 	 */
-EX(.fail, probe.w.fault r32, 3)		// this must come _after_ NaT-check
-EX(.fail, probe.w.fault r10, 3)		// this must come _after_ NaT-check
+EX(.fail_efault, probe.w.fault r32, 3)		// this must come _after_ NaT-check
+EX(.fail_efault, probe.w.fault r10, 3)		// this must come _after_ NaT-check
 	nop 0

 	ldf8 f10=[r8]			// f10 <- local_cpu_data->nsec_per_cyc value
@@ -311,14 +309,13 @@ EX(.fail, probe.w.fault r10, 3)		// this must come _after_ NaT-check
 (p7)	br.spnt.many 1b

 	// finally: r2 = sec, r3 = usec
-EX(.fail, st8 [r32]=r2)
+EX(.fail_efault, st8 [r32]=r2)
 	adds r9=8, r32
 	mov r8=r0			// success
 	;;
-EX(.fail, st8 [r9]=r3)			// store them in the timeval struct
+EX(.fail_efault, st8 [r9]=r3)		// store them in the timeval struct
 	mov r10=0
-	MCKINLEY_E9_WORKAROUND
-	br.ret.sptk.many b6		// return to caller
+	FSYS_RETURN
 	/*
 	 * Note: We are NOT clearing the scratch registers here.  Since the only things
 	 *	 in those registers are time-related variables and some addresses (which
@@ -326,12 +323,183 @@ EX(.fail, st8 [r9]=r3)			// store them in the timeval struct
 	 *	 and we should be fine.
 	 */

-.fail:	adds r8=EINVAL, r0		// r8 = EINVAL
-	adds r10=-1, r0			// r10 = -1
-	MCKINLEY_E9_WORKAROUND
-	br.ret.spnt.many b6		// return with r8 set to EINVAL
+.fail_einval:
+	mov r8=EINVAL			// r8 = EINVAL
+	mov r10=-1			// r10 = -1
+	FSYS_RETURN
+
+.fail_efault:
+	mov r8=EFAULT			// r8 = EFAULT
+	mov r10=-1			// r10 = -1
+	FSYS_RETURN
 END(fsys_gettimeofday)

+/*
+ * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
+ */
+#if _NSIG_WORDS != 1
+# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
+#endif
+ENTRY(fsys_rt_sigprocmask)
+	.prologue
+	.altrp b6
+	.body
+
+	mf					// ensure reading of current->blocked is ordered
+	add r2=IA64_TASK_BLOCKED_OFFSET,r16
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+	/*
+	 * Since we're only reading a single word, we can do it
+	 * atomically without acquiring current->sighand->siglock.  To
+	 * be on the safe side, we need a fully-ordered load, though:
+	 */
+	ld8.acq r3=[r2]				// read/prefetch current->blocked
+	ld4 r9=[r9]
+	add r31=IA64_TASK_SIGHAND_OFFSET,r16
+	;;
+#ifdef CONFIG_SMP
+	ld8 r31=[r31]				// r31 <- current->sighand
+#endif
+	and r9=TIF_ALLWORK_MASK,r9
+	tnat.nz p6,p0=r32
+	;;
+	cmp.ne p7,p0=0,r9
+	tnat.nz.or p6,p0=r35
+	tnat.nz p8,p0=r34
+	;;
+	cmp.ne p15,p0=r0,r34			// oset != NULL?
+	cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
+	tnat.nz.or p8,p0=r33
+
+(p6)	br.spnt.few .fail_einval		// fail with EINVAL
+(p7)	br.spnt.many fsys_fallback_syscall	// got pending kernel work...
+(p8)	br.spnt.few .fail_efault		// fail with EFAULT
+	;;
+
+	cmp.eq p6,p7=r0,r33			// set == NULL?
+	add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31	// r31 <- current->sighand->siglock
+(p6)	br.dpnt.many .store_mask		// -> short-circuit to just reading the signal mask
+
+	/* Argh, we actually have to do some work and _update_ the signal mask: */
+
+EX(.fail_efault, probe.r.fault r33, 3)		// verify user has read-access to *set
+EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
+	mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
+	;;
+
+	rsm psr.i				// mask interrupt delivery
+	mov ar.ccv=0
+	andcm r14=r14,r17			// filter out SIGKILL & SIGSTOP
+
+#ifdef CONFIG_SMP
+	mov r17=1
+	;;
+	cmpxchg4.acq r18=[r31],r17,ar.ccv	// try to acquire the lock
+	mov r8=EINVAL			// default to EINVAL
+	;;
+	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
+	cmp4.ne p6,p0=r18,r0
+(p6)	br.cond.spnt.many .lock_contention
+	;;
+#else
+	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
+	mov r8=EINVAL			// default to EINVAL
+#endif
+	add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
+	add r19=IA64_TASK_SIGNAL_OFFSET,r16
+	cmp4.eq p6,p0=SIG_BLOCK,r32
+	;;
+	ld8 r19=[r19]			// r19 <- current->signal
+	cmp4.eq p7,p0=SIG_UNBLOCK,r32
+	cmp4.eq p8,p0=SIG_SETMASK,r32
+	;;
+	ld8 r18=[r18]			// r18 <- current->pending.signal
+	.pred.rel.mutex p6,p7,p8
+(p6)	or r3=r3,r14			// SIG_BLOCK
+(p7)	andcm r3=r3,r14			// SIG_UNBLOCK
+
+(p8)	mov r3=r14			// SIG_SETMASK
+(p6)	mov r8=0			// clear error code
+	// recalc_sigpending()
+	add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
+
+	add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
+	;;
+	ld4 r17=[r17]		// r17 <- current->signal->group_stop_count
+(p7)	mov r8=0		// clear error code
+
+	ld8 r19=[r19]		// r19 <- current->signal->shared_pending
+	;;
+	cmp4.gt p6,p7=r17,r0	// p6/p7 <- (current->signal->group_stop_count > 0)?
+(p8)	mov r8=0		// clear error code
+
+	or r18=r18,r19		// r18 <- current->pending | current->signal->shared_pending
+	;;
+	// r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
+	andcm r18=r18,r3
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+
+(p7)	cmp.ne.or.andcm p6,p7=r18,r0		// p6/p7 <- signal pending
+	mov r19=0					// i must not leak kernel bits...
+(p6)	br.cond.dpnt.many .sig_pending
+	;;
+
+1:	ld4 r17=[r9]				// r17 <- current->thread_info->flags
+	;;
+	mov ar.ccv=r17
+	and r18=~_TIF_SIGPENDING,r17		// r18 <- r17 & ~(1 << TIF_SIGPENDING)
+	;;
+
+	st8 [r2]=r3				// update current->blocked with new mask
+	cmpxchg4.acq r14=[r9],r18,ar.ccv	// current->thread_info->flags <- r18
+	;;
+	cmp.ne p6,p0=r17,r14			// update failed?
+(p6)	br.cond.spnt.few 1b			// yes -> retry
+
+#ifdef CONFIG_SMP
+	st4.rel [r31]=r0			// release the lock
+#endif
+	ssm psr.i
+	cmp.ne p9,p0=r8,r0			// check for bad HOW value
+	;;
+
+	srlz.d					// ensure psr.i is set again
+	mov r18=0					// i must not leak kernel bits...
+(p9)	br.spnt.few .fail_einval		// bail out for bad HOW value
+
+.store_mask:
+EX(.fail_efault, (p15) probe.w.fault r34, 3)	// verify user has write-access to *oset
+EX(.fail_efault, (p15) st8 [r34]=r3)
+	mov r2=0					// i must not leak kernel bits...
+	mov r3=0					// i must not leak kernel bits...
+	mov r8=0				// return 0
+	mov r9=0					// i must not leak kernel bits...
+	mov r14=0					// i must not leak kernel bits...
+	mov r17=0					// i must not leak kernel bits...
+	mov r31=0					// i must not leak kernel bits...
+	FSYS_RETURN
+
+.sig_pending:
+#ifdef CONFIG_SMP
+	st4.rel [r31]=r0			// release the lock
+#endif
+	ssm psr.i
+	;;
+	srlz.d
+	br.sptk.many fsys_fallback_syscall	// with signal pending, do the heavy-weight syscall
+
+#ifdef CONFIG_SMP
+.lock_contention:
+	/* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
+	ssm psr.i
+	;;
+	srlz.d
+	br.sptk.many fsys_fallback_syscall
+#endif
+END(fsys_rt_sigprocmask)
+
 ENTRY(fsys_fallback_syscall)
 	.prologue
 	.altrp b6
@@ -600,7 +768,7 @@ fsyscall_table:
 	data8 0				// sigaltstack
 	data8 0				// rt_sigaction
 	data8 0				// rt_sigpending
-	data8 0				// rt_sigprocmask
+	data8 fsys_rt_sigprocmask	// rt_sigprocmask
 	data8 0				// rt_sigqueueinfo	// 1180
 	data8 0				// rt_sigreturn
 	data8 0				// rt_sigsuspend

--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -118,8 +118,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)

 	mov r10=-1
 	mov r8=ENOSYS
-	MCKINLEY_E9_WORKAROUND
-	br.ret.sptk.many b6
+	FSYS_RETURN
 END(__kernel_syscall_via_epc)

 #	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)

--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -797,6 +797,25 @@ GLOBAL_ENTRY(ia64_switch_mode_virt)
 	br.ret.sptk.many rp
 END(ia64_switch_mode_virt)

+GLOBAL_ENTRY(ia64_delay_loop)
+	.prologue
+{	nop 0			// work around GAS unwind info generation bug...
+	.save ar.lc,r2
+	mov r2=ar.lc
+	.body
+	;;
+	mov ar.lc=r32
+}
+	;;
+	// force loop to be 32-byte aligned (GAS bug means we cannot use .align
+	// inside function body without corrupting unwind info).
+{	nop 0 }
+1:	br.cloop.sptk.few 1b
+	;;
+	mov ar.lc=r2
+	br.ret.sptk.many rp
+END(ia64_delay_loop)
+
 #ifdef CONFIG_IA64_BRL_EMU

 /*

--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -81,8 +81,6 @@ u64				ia64_init_stack[KERNEL_STACK_SIZE/8] __attribute__((aligned(16)));
 u64				ia64_mca_sal_data_area[1356];
 u64				ia64_tlb_functional;
 u64				ia64_os_mca_recovery_successful;
-/* TODO: need to assign min-state structure to UC memory */
-u64				ia64_mca_min_state_save_info[MIN_STATE_AREA_SIZE] __attribute__((aligned(512)));
 static void			ia64_mca_wakeup_ipi_wait(void);
 static void			ia64_mca_wakeup(int cpu);
 static void			ia64_mca_wakeup_all(void);
@@ -465,26 +463,6 @@ ia64_mca_register_cpev (int cpev)

 #endif /* PLATFORM_MCA_HANDLERS */

-/*
- * routine to process and prepare to dump min_state_save
- * information for debugging purposes.
- */
-void
-ia64_process_min_state_save (pal_min_state_area_t *pmss)
-{
-	int i, max = MIN_STATE_AREA_SIZE;
-	u64 *tpmss_ptr = (u64 *)pmss;
-	u64 *return_min_state_ptr = ia64_mca_min_state_save_info;
-
-	for (i=0;i<max;i++) {
-
-		/* copy min-state register info for eventual return to PAL */
-		*return_min_state_ptr++ = *tpmss_ptr;
-
-		tpmss_ptr++;  /* skip to next entry */
-	}
-}
-
 /*
 * ia64_mca_cmc_vector_setup
 *
@@ -828,7 +806,7 @@ ia64_mca_wakeup_ipi_wait(void)
 			irr = ia64_getreg(_IA64_REG_CR_IRR3);
 			break;
 		}
-	} while (!(irr & (1 << irr_bit))) ;
+	} while (!(irr & (1UL << irr_bit))) ;
 }

 /*
@@ -961,9 +939,8 @@ ia64_return_to_sal_check(void)
 	/* Default = tell SAL to return to same context */
 	ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;

-	/* Register pointer to new min state values */
 	ia64_os_to_sal_handoff_state.imots_new_min_state =
-		ia64_mca_min_state_save_info;
+		(u64 *)ia64_sal_to_os_handoff_state.pal_min_state;
 }

 /*
@@ -2154,9 +2131,6 @@ ia64_log_proc_dev_err_info_print (sal_log_processor_info_t  *slpi,
 	if (slpi->valid.psi_static_struct) {
 		spsi = (sal_processor_static_info_t *)p_data;

-		/* copy interrupted context PAL min-state info */
-		ia64_process_min_state_save(&spsi->min_state_area);
-
 		/* Print branch register contents if valid */
 		if (spsi->valid.br)
 			ia64_log_processor_regs_print(spsi->br, 8, "Branch", "br",

--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -77,12 +77,11 @@
 (p6)	movl	r10=IA64_MCA_SAME_CONTEXT;		\
 (p6)	add     _tmp=0x18,_tmp;;			\
 (p6)	ld8	r9=[_tmp],0x10;				\
-(p6)	movl	r22=ia64_mca_min_state_save_info;;	\
+(p6)	mov	r22=r0;;				\
 (p7)	ld8	r8=[_tmp],0x08;;			\
 (p7)	ld8	r9=[_tmp],0x08;;			\
 (p7)	ld8     r10=[_tmp],0x08;;			\
-(p7)	ld8     r22=[_tmp],0x08;;			\
-	DATA_VA_TO_PA(r22)
+(p7)	ld8     r22=[_tmp],0x08;;
 	// now _tmp is pointing to SAL rtn save location


@@ -97,7 +96,6 @@
 	.global ia64_init_stack
 	.global ia64_mca_sal_data_area
 	.global ia64_tlb_functional
-	.global ia64_mca_min_state_save_info

 	.text
 	.align 16

--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -130,9 +130,11 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)

 	while (offp < (s32 *) end) {
 		wp = (u64 *) ia64_imva((char *) offp + *offp);
-		wp[0] = 0x0000000100000000;
+		wp[0] = 0x0000000100000000; /* nop.m 0; nop.i 0; nop.i 0 */
 		wp[1] = 0x0004000000000200;
-		ia64_fc(wp);
+		wp[2] = 0x0000000100000011; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+		wp[3] = 0x0084006880000200;
+		ia64_fc(wp); ia64_fc(wp + 2);
 		++offp;
 	}
 	ia64_sync_i();

--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -140,7 +140,7 @@
 * in UP:
 * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
 *
- * spin_lock_irqsave()/spin_unlock_irqrestore():
+ * spin_lock_irqsave()/spin_lock_irqrestore():
 * 	in SMP: local_irq_disable + spin_lock
 * 	in UP : local_irq_disable
 *
@@ -254,7 +254,6 @@ typedef struct {
 	unsigned long	seed;		/* seed for random-number generator */
 	unsigned long	mask;		/* mask for random-number generator */
 	unsigned int 	flags;		/* notify/do not notify */
-	int 		next_reset_type;/* PFM_PMD_NO_RESET, PFM_PMD_LONG_RESET, PFM_PMD_SHORT_RESET */
 	unsigned long	eventid;	/* overflow event identifier */
 } pfm_counter_t;

@@ -267,10 +266,10 @@ typedef struct {
 	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
 	unsigned int is_sampling:1;	/* true if using a custom format */
 	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
-	unsigned int unsecure:1;	/* exclude idle task in system wide session */
 	unsigned int going_zombie:1;	/* context is zombie (MASKED+blocking) */
 	unsigned int trap_reason:2;	/* reason for going into pfm_handle_work() */
 	unsigned int no_msg:1;		/* no message sent on overflow */
+	unsigned int can_restart:1;	/* allowed to issue a PFM_RESTART */
 	unsigned int reserved:22;
 } pfm_context_flags_t;

@@ -356,10 +355,10 @@ typedef struct pfm_context {
 #define ctx_fl_using_dbreg	ctx_flags.using_dbreg
 #define ctx_fl_is_sampling	ctx_flags.is_sampling
 #define ctx_fl_excl_idle	ctx_flags.excl_idle
-#define ctx_fl_unsecure		ctx_flags.unsecure
 #define ctx_fl_going_zombie	ctx_flags.going_zombie
 #define ctx_fl_trap_reason	ctx_flags.trap_reason
 #define ctx_fl_no_msg		ctx_flags.no_msg
+#define ctx_fl_can_restart	ctx_flags.can_restart

 #define PFM_SET_WORK_PENDING(t, v)	do { (t)->thread.pfm_needs_checking = v; } while(0);
 #define PFM_GET_WORK_PENDING(t)		(t)->thread.pfm_needs_checking
@@ -493,12 +492,11 @@ typedef struct {

 typedef struct {
 	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
+	unsigned long pfm_replay_ovfl_intr_count;	/* keep track of replayed ovfl interrupts */
 	unsigned long pfm_ovfl_intr_count; 		/* keep track of ovfl interrupts */
 	unsigned long pfm_ovfl_intr_cycles;		/* cycles spent processing ovfl interrupts */
 	unsigned long pfm_ovfl_intr_cycles_min;		/* min cycles spent processing ovfl interrupts */
 	unsigned long pfm_ovfl_intr_cycles_max;		/* max cycles spent processing ovfl interrupts */
-	unsigned long pfm_sysupdt_count;
-	unsigned long pfm_sysupdt_cycles;
 	unsigned long pfm_smpl_handler_calls;
 	unsigned long pfm_smpl_handler_cycles;
 	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
@@ -513,10 +511,8 @@ static pfm_session_t		pfm_sessions;	/* global sessions information */
 static struct proc_dir_entry 	*perfmon_dir;
 static pfm_uuid_t		pfm_null_uuid = {0,};

-static spinlock_t		pfm_smpl_fmt_lock;
-static pfm_buffer_fmt_t		*pfm_buffer_fmt_list;
-#define LOCK_BUF_FMT_LIST()	    spin_lock(&pfm_smpl_fmt_lock)
-#define UNLOCK_BUF_FMT_LIST()	    spin_unlock(&pfm_smpl_fmt_lock)
+static spinlock_t		pfm_buffer_fmt_lock;
+static LIST_HEAD(pfm_buffer_fmt_list);

 /* sysctl() controls */
 static pfm_sysctl_t pfm_sysctl;
@@ -544,14 +540,8 @@ static struct vm_operations_struct pfm_vm_ops={
 	close: pfm_vm_close
 };

-#define pfm_wait_task_inactive(t)	wait_task_inactive(t)
 #define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
 #define pfm_get_cpu_data(a,b)		per_cpu(a, b)
-typedef	irqreturn_t	pfm_irq_handler_t;
-#define PFM_IRQ_HANDLER_RET(v)	do {  \
-		put_cpu_no_resched(); \
-		return IRQ_HANDLED;   \
-	} while(0);

 static inline void
 pfm_put_task(struct task_struct *task)
@@ -628,7 +618,6 @@ static struct file_system_type pfm_fs_type = {
 	.get_sb   = pfmfs_get_sb,
 	.kill_sb  = kill_anon_super,
 };
-
 DEFINE_PER_CPU(unsigned long, pfm_syst_info);
 DEFINE_PER_CPU(struct task_struct *, pmu_owner);
 DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
@@ -734,12 +723,14 @@ pfm_read_soft_counter(pfm_context_t *ctx, int i)
 static inline void
 pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
 {
-	ctx->ctx_pmds[i].val = val  & ~pmu_conf.ovfl_val;
+	unsigned long ovfl_val = pmu_conf.ovfl_val;
+
+	ctx->ctx_pmds[i].val = val  & ~ovfl_val;
 	/*
 	 * writing to unimplemented part is ignore, so we do not need to
 	 * mask off top part
 	 */
-	ia64_set_pmd(i, val & pmu_conf.ovfl_val);
+	ia64_set_pmd(i, val & ovfl_val);
 }

 static pfm_msg_t *
@@ -870,11 +861,12 @@ pfm_mask_monitoring(struct task_struct *task)
 {
 	pfm_context_t *ctx = PFM_GET_CTX(task);
 	struct thread_struct *th = &task->thread;
-	unsigned long mask, val;
+	unsigned long mask, val, ovfl_mask;
 	int i;

-	DPRINT(("[%d] masking monitoring for [%d]\n", current->pid, task->pid));
+	DPRINT_ovfl(("[%d] masking monitoring for [%d]\n", current->pid, task->pid));

+	ovfl_mask = pmu_conf.ovfl_val;
 	/*
 	 * monitoring can only be masked as a result of a valid
 	 * counter overflow. In UP, it means that the PMU still
@@ -904,14 +896,14 @@ pfm_mask_monitoring(struct task_struct *task)
 			/*
 		 	 * we rebuild the full 64 bit value of the counter
 		 	 */
-			ctx->ctx_pmds[i].val += (val & pmu_conf.ovfl_val);
+			ctx->ctx_pmds[i].val += (val & ovfl_mask);
 		} else {
 			ctx->ctx_pmds[i].val = val;
 		}
-		DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
+		DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
 			i,
 			ctx->ctx_pmds[i].val,
-			val & pmu_conf.ovfl_val));
+			val & ovfl_mask));
 	}
 	/*
 	 * mask monitoring by setting the privilege level to 0
@@ -926,6 +918,7 @@ pfm_mask_monitoring(struct task_struct *task)
 		if ((mask & 0x1) == 0UL) continue;
 		ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
 		th->pmcs[i] &= ~0xfUL;
+		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
 	}
 	/*
 	 * make all of this visible
@@ -943,11 +936,12 @@ pfm_restore_monitoring(struct task_struct *task)
 {
 	pfm_context_t *ctx = PFM_GET_CTX(task);
 	struct thread_struct *th = &task->thread;
-	unsigned long mask;
+	unsigned long mask, ovfl_mask;
 	unsigned long psr, val;
 	int i, is_system;

 	is_system = ctx->ctx_fl_system;
+	ovfl_mask = pmu_conf.ovfl_val;

 	if (task != current) {
 		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
@@ -989,8 +983,8 @@ pfm_restore_monitoring(struct task_struct *task)
 			 * we split the 64bit value according to
 			 * counter width
 			 */
-			val = ctx->ctx_pmds[i].val & pmu_conf.ovfl_val;
-			ctx->ctx_pmds[i].val &= ~pmu_conf.ovfl_val;
+			val = ctx->ctx_pmds[i].val & ovfl_mask;
+			ctx->ctx_pmds[i].val &= ~ovfl_mask;
 		} else {
 			val = ctx->ctx_pmds[i].val;
 		}
@@ -1206,12 +1200,36 @@ pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_
 	return ret;
 }

+static pfm_buffer_fmt_t *
+__pfm_find_buffer_fmt(pfm_uuid_t uuid)
+{
+	struct list_head * pos;
+	pfm_buffer_fmt_t * entry;

+	list_for_each(pos, &pfm_buffer_fmt_list) {
+		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
+		if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
+			return entry;
+	}
+	return NULL;
+}
+ 
+/*
+ * find a buffer format based on its uuid
+ */
+static pfm_buffer_fmt_t *
+pfm_find_buffer_fmt(pfm_uuid_t uuid)
+{
+	pfm_buffer_fmt_t * fmt;
+	spin_lock(&pfm_buffer_fmt_lock);
+	fmt = __pfm_find_buffer_fmt(uuid);
+	spin_unlock(&pfm_buffer_fmt_lock);
+	return fmt;
+}
 
 int
 pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
 {
-	pfm_buffer_fmt_t *p;
 	int ret = 0;

 	/* some sanity checks */
@@ -1224,80 +1242,44 @@ pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
 	 * XXX: need check validity of fmt_arg_size
 	 */

-	LOCK_BUF_FMT_LIST();
-	p = pfm_buffer_fmt_list;
-
-
-	while (p) {
-		if (pfm_uuid_cmp(fmt->fmt_uuid, p->fmt_uuid) == 0) break;
-		p = p->fmt_next;
-	}
+	spin_lock(&pfm_buffer_fmt_lock);

-	if (p) {
+	if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
 		printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
 		ret = -EBUSY;
-	} else {
-		fmt->fmt_prev = NULL;
-		fmt->fmt_next = pfm_buffer_fmt_list;
-		pfm_buffer_fmt_list = fmt;
-		printk(KERN_ERR "perfmon: added sampling format %s\n", fmt->fmt_name);
+		goto out;
 	} 
-	UNLOCK_BUF_FMT_LIST();
+	list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
+	printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);

+out:
+	spin_unlock(&pfm_buffer_fmt_lock);
 	return ret;
 }

 int
 pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
 {
-	pfm_buffer_fmt_t *p;
+	pfm_buffer_fmt_t *fmt;
 	int ret = 0;

-	LOCK_BUF_FMT_LIST();
-	p = pfm_buffer_fmt_list;
-	while (p) {
-		if (memcmp(uuid, p->fmt_uuid, sizeof(pfm_uuid_t)) == 0) break;
-		p = p->fmt_next;
-	}
-	if (p) {
-		if (p->fmt_prev)
-			p->fmt_prev->fmt_next = p->fmt_next;
-		else
-			pfm_buffer_fmt_list = p->fmt_next;
-
-		if (p->fmt_next)
-			p->fmt_next->fmt_prev = p->fmt_prev;
+	spin_lock(&pfm_buffer_fmt_lock);

-		printk(KERN_ERR "perfmon: removed sampling format: %s\n",  p->fmt_name);
-		p->fmt_next = p->fmt_prev = NULL;
-	} else {
+	fmt = __pfm_find_buffer_fmt(uuid);
+	if (!fmt) {
 		printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
 		ret = -EINVAL;
+		goto out;
 	}
-	UNLOCK_BUF_FMT_LIST();
+	list_del_init(&fmt->fmt_list);
+	printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);

+out:
+	spin_unlock(&pfm_buffer_fmt_lock);
 	return ret;

 }

-/*
- * find a buffer format based on its uuid
- */
-static pfm_buffer_fmt_t *
-pfm_find_buffer_fmt(pfm_uuid_t uuid, int nolock)
-{
-	pfm_buffer_fmt_t *p;
-
-	LOCK_BUF_FMT_LIST();
-	for (p = pfm_buffer_fmt_list; p ; p = p->fmt_next) {
-		if (pfm_uuid_cmp(uuid, p->fmt_uuid) == 0) break;
-	}
-
-	UNLOCK_BUF_FMT_LIST();
-
-	return p;
-}
-
 static int
 pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 {
@@ -2420,7 +2402,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int
 #define PFM_CTXARG_BUF_ARG(a)	(pfm_buffer_fmt_t *)(a+1)

 	/* invoke and lock buffer format, if found */
-	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id, 0);
+	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
 	if (fmt == NULL) {
 		DPRINT(("[%d] cannot find buffer format\n", task->pid));
 		return -EINVAL;
@@ -2528,8 +2510,7 @@ pfm_ctx_getsize(void *arg, size_t *sz)

 	if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0;

-	/* no buffer locking here, will be called again */
-	fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id, 1);
+	fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id);
 	if (fmt == NULL) {
 		DPRINT(("cannot find buffer format\n"));
 		return -EINVAL;
@@ -2588,7 +2569,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
 	/*
 	 * make sure the task is off any CPU
 	 */
-	pfm_wait_task_inactive(task);
+	wait_task_inactive(task);

 	/* more to come... */

@@ -2679,7 +2660,6 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	 */
 	ctx->ctx_fl_block       = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
 	ctx->ctx_fl_system      = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
-	ctx->ctx_fl_unsecure	= (ctx_flags & PFM_FL_UNSECURE) ? 1: 0;
 	ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
 	ctx->ctx_fl_no_msg      = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
 	/*
@@ -2705,13 +2685,12 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	init_waitqueue_head(&ctx->ctx_msgq_wait);
 	init_waitqueue_head(&ctx->ctx_zombieq);

-	DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d unsecure=%d no_msg=%d ctx_fd=%d \n",
+	DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
 		ctx,
 		ctx_flags,
 		ctx->ctx_fl_system,
 		ctx->ctx_fl_block,
 		ctx->ctx_fl_excl_idle,
-		ctx->ctx_fl_unsecure,
 		ctx->ctx_fl_no_msg,
 		ctx->ctx_fd));

@@ -2755,14 +2734,12 @@ pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
 }

 static void
-pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
+pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
 {
 	unsigned long mask = ovfl_regs[0];
 	unsigned long reset_others = 0UL;
 	unsigned long val;
-	int i, is_long_reset = (flag == PFM_PMD_LONG_RESET);
-
-	DPRINT_ovfl(("ovfl_regs=0x%lx flag=%d\n", ovfl_regs[0], flag));
+	int i;

 	/*
 	 * now restore reset value on sampling overflowed counters
@@ -2793,19 +2770,17 @@ pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
 }

 static void
-pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
+pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
 {
 	unsigned long mask = ovfl_regs[0];
 	unsigned long reset_others = 0UL;
 	unsigned long val;
-	int i, is_long_reset = (flag == PFM_PMD_LONG_RESET);
-
-	DPRINT_ovfl(("ovfl_regs=0x%lx flag=%d\n", ovfl_regs[0], flag));
+	int i;

-	if (flag == PFM_PMD_NO_RESET) return;
+	DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset));

 	if (ctx->ctx_state == PFM_CTX_MASKED) {
-		pfm_reset_regs_masked(ctx, ovfl_regs, flag);
+		pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset);
 		return;
 	}

@@ -3084,7 +3059,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
 	struct thread_struct *thread = NULL;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
-	unsigned long value, hw_value;
+	unsigned long value, hw_value, ovfl_mask;
 	unsigned int cnum;
 	int i, can_access_pmu = 0, state;
 	int is_counting, is_loaded, is_system;
@@ -3094,6 +3069,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	state     = ctx->ctx_state;
 	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
 	is_system = ctx->ctx_fl_system;
+	ovfl_mask = pmu_conf.ovfl_val;

 	if (state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE) return -EINVAL;

@@ -3162,22 +3138,21 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			 * when context is load we use the split value
 			 */
 			if (is_loaded) {
-				hw_value = value &  pmu_conf.ovfl_val;
-				value    = value & ~pmu_conf.ovfl_val;
+				hw_value = value &  ovfl_mask;
+				value    = value & ~ovfl_mask;
+			}
 		}
-
 		/*
-			 * update sampling periods
+		 * update reset values (not just for counters)
 		 */
 		ctx->ctx_pmds[cnum].long_reset  = req->reg_long_reset;
 		ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset;

 		/*
-			 * update randomization parameters
+		 * update randomization parameters (not just for counters)
 		 */
 		ctx->ctx_pmds[cnum].seed = req->reg_random_seed;
 		ctx->ctx_pmds[cnum].mask = req->reg_random_mask;
-		}

 		/*
 		 * update context value
@@ -3284,7 +3259,7 @@ static int
 pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
 	struct thread_struct *thread = NULL;
-	unsigned long val = 0UL, lval ;
+	unsigned long val = 0UL, lval, ovfl_mask;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
 	unsigned int cnum, reg_flags = 0;
 	int i, can_access_pmu = 0, state;
@@ -3299,6 +3274,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	state     = ctx->ctx_state;
 	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
 	is_system = ctx->ctx_fl_system;
+	ovfl_mask = pmu_conf.ovfl_val;

 	if (state == PFM_CTX_ZOMBIE) return -EINVAL;

@@ -3368,7 +3344,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			/*
 			 * XXX: need to check for overflow when loaded
 			 */
-			val &= pmu_conf.ovfl_val;
+			val &= ovfl_mask;
 			val += ctx->ctx_pmds[cnum].val;

 			lval = ctx->ctx_pmds[cnum].lval;
@@ -3672,22 +3648,48 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		 */
 		ctx->ctx_state = PFM_CTX_LOADED;

+		/*
+		 * XXX: not really useful for self monitoring
+		 */
+		ctx->ctx_fl_can_restart = 0;
+
 		return 0;
 	}
-	/* restart another task */

 	/* 
-	 * if blocking, then post the semaphore.
+	 * restart another task
+	 */
+
+	/*
+	 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 
+	 * one is seen by the task.
+	 */
+	if (state == PFM_CTX_MASKED) {
+		if (ctx->ctx_fl_can_restart == 0) return -EINVAL;
+		/*
+		 * will prevent subsequent restart before this one is
+		 * seen by other task
+		 */
+		ctx->ctx_fl_can_restart = 0;
+	}
+
+	/*
+	 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e.
+	 * the task is blocked or on its way to block. That's the normal
+	 * restart path. If the monitoring is not masked, then the task
+	 * can be actively monitoring and we cannot directly intervene.
+	 * Therefore we use the trap mechanism to catch the task and
+	 * force it to reset the buffer/reset PMDs.
+	 *
 	 * if non-blocking, then we ensure that the task will go into
 	 * pfm_handle_work() before returning to user mode.
+	 *
 	 * We cannot explicitely reset another task, it MUST always
 	 * be done by the task itself. This works for system wide because
-	 * the tool that is controlling the session is doing "self-monitoring".
-	 *
-	 * XXX: what if the task never goes back to user?
-	 *
+	 * the tool that is controlling the session is logically doing 
+	 * "self-monitoring".
 	 */
-	if (CTX_OVFL_NOBLOCK(ctx) == 0) {
+	if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
 		DPRINT(("unblocking [%d] \n", task->pid));
 		up(&ctx->ctx_restart_sem);
 	} else {
@@ -3725,6 +3727,9 @@ pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	return 0;
 }

+/*
+ * arg can be NULL and count can be zero for this function
+ */
 static int
 pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
@@ -3783,10 +3788,11 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
 	/*
 	 * check for debug registers in system wide mode
 	 *
-	 * We make the reservation even when context is not loaded
-	 * to make sure we get our slot. Note that the PFM_LOAD_CONTEXT
-	 * may still fail if the task has DBG_VALID set.
+	 * If though a check is done in pfm_context_load(),
+	 * we must repeat it here, in case the registers are
+	 * written after the context is loaded
 	 */
+	if (is_loaded) {
 		LOCK_PFS();

 		if (first_time && is_system) {
@@ -3795,8 +3801,8 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
 			else
 				pfm_sessions.pfs_sys_use_dbregs++;
 		}
-
 		UNLOCK_PFS();
+	}

 	if (ret != 0) return ret;

@@ -4158,7 +4164,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	unsigned long *pmcs_source, *pmds_source;
 	int the_cpu;
 	int ret = 0;
-	int state, is_system;
+	int state, is_system, set_dbregs = 0;

 	state     = ctx->ctx_state;
 	is_system = ctx->ctx_fl_system;
@@ -4173,7 +4179,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		return -EINVAL;
 	}

-	DPRINT(("load_pid [%d]\n", req->load_pid));
+	DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));

 	if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) {
 		DPRINT(("cannot use blocking mode on self for [%d]\n", current->pid));
@@ -4200,16 +4206,34 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)

 	thread = &task->thread;

-	ret = -EBUSY;
-
+	ret = 0;
 	/*
 	 * cannot load a context which is using range restrictions,
 	 * into a task that is being debugged.
 	 */
-	if (ctx->ctx_fl_using_dbreg && (thread->flags & IA64_THREAD_DBG_VALID)) {
+	if (ctx->ctx_fl_using_dbreg) {
+		if (thread->flags & IA64_THREAD_DBG_VALID) {
+			ret = -EBUSY;
 			DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid));
 			goto error;
 		}
+		LOCK_PFS();
+
+		if (is_system) {
+			if (pfm_sessions.pfs_ptrace_use_dbregs) {
+				DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
+				ret = -EBUSY;
+			} else {
+				pfm_sessions.pfs_sys_use_dbregs++;
+				DPRINT(("load [%d] increased sys_use_dbreg=%lu\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
+				set_dbregs = 1;
+			}
+		}
+
+		UNLOCK_PFS();
+
+		if (ret) goto error;
+	}

 	/*
 	 * SMP system-wide monitoring implies self-monitoring.
@@ -4228,13 +4252,13 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	 */
 	the_cpu = ctx->ctx_cpu = smp_processor_id();

+	ret = -EBUSY;
 	/*
 	 * now reserve the session
 	 */
 	ret = pfm_reserve_session(current, is_system, the_cpu);
 	if (ret) goto error;

-	ret = -EBUSY;
 	/*
 	 * task is necessarily stopped at this point.
 	 *
@@ -4342,11 +4366,6 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		/* initial saved psr (stopped) */
 		ctx->ctx_saved_psr_up = 0UL;
 		ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
-
-		if (ctx->ctx_fl_unsecure) {
-			ia64_psr(regs)->sp = 0;
-			DPRINT(("context unsecured for [%d]\n", task->pid));
-		}
 	}

 	ret = 0;
@@ -4354,6 +4373,14 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 error_unres:
 	if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu);
 error:
+	/*
+	 * we must undo the dbregs setting (for system-wide)
+	 */
+	if (ret && set_dbregs) {
+		LOCK_PFS();
+		pfm_sessions.pfs_sys_use_dbregs--;
+		UNLOCK_PFS();
+	}
 	/*
 	 * release task, there is now a link with the context
 	 */
@@ -4455,7 +4482,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	 */
 	tregs = task == current ? regs : ia64_task_regs(task);

-	if (task == current || ctx->ctx_fl_unsecure) {
+	if (task == current) {
 		/*
 		 * cancel user level control
 		 */
@@ -4493,7 +4520,10 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	ctx->ctx_task             = NULL;

 	PFM_SET_WORK_PENDING(task, 0);
+
 	ctx->ctx_fl_trap_reason  = PFM_TRAP_REASON_NONE;
+	ctx->ctx_fl_can_restart  = 0;
+	ctx->ctx_fl_going_zombie = 0;

 	DPRINT(("disconnected [%d] from context\n", task->pid));

@@ -4686,7 +4716,7 @@ pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)

 	UNPROTECT_CTX(ctx, flags);

-	pfm_wait_task_inactive(task);
+	wait_task_inactive(task);

 	PROTECT_CTX(ctx, flags);

@@ -4725,7 +4755,8 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon
 		PFM_CMD_IDX(cmd),
 		PFM_CMD_IS_VALID(cmd),
 		PFM_CMD_NARG(cmd),
-		PFM_CMD_ARG_SIZE(cmd), count));
+		PFM_CMD_ARG_SIZE(cmd), 
+		count));

 	/*
 	 * check if number of arguments matches what the command expects
@@ -4842,8 +4873,10 @@ pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_reg
 {
 	pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt;
 	pfm_ovfl_ctrl_t rst_ctrl;
+	int state;
 	int ret = 0;

+	state = ctx->ctx_state;
 	/*
 	 * Unlock sampling buffer and reset index atomically
 	 * XXX: not really needed when blocking
@@ -4853,9 +4886,10 @@ pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_reg
 		rst_ctrl.bits.mask_monitoring = 0;
 		rst_ctrl.bits.reset_ovfl_pmds = 1;

-		/* XXX: check return value */
-		if (fmt->fmt_restart)
-			ret = (*fmt->fmt_restart)(current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+		if (state == PFM_CTX_LOADED)
+			ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+		else
+			ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
 	} else {
 		rst_ctrl.bits.mask_monitoring = 0;
 		rst_ctrl.bits.reset_ovfl_pmds = 1;
@@ -4876,7 +4910,6 @@ pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_reg
 	}
 }

-
 /*
 * context MUST BE LOCKED when calling
 * can only be called for current
@@ -4954,7 +4987,7 @@ pfm_handle_work(void)
 	reason = ctx->ctx_fl_trap_reason;
 	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;

-	DPRINT(("[%d] reason=%d\n", current->pid, reason));
+	DPRINT(("[%d] reason=%d state=%d\n", current->pid, reason, ctx->ctx_state));

 	/*
 	 * must be done before we check non-blocking mode
@@ -5085,7 +5118,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 {
 	pfm_ovfl_arg_t ovfl_arg;
 	unsigned long mask;
-	unsigned long old_val;
+	unsigned long old_val, ovfl_val;
 	unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL;
 	unsigned long tstamp;
 	pfm_ovfl_ctrl_t	ovfl_ctrl;
@@ -5102,6 +5135,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 	tstamp = ia64_get_itc();

 	mask     = pmc0 >> PMU_FIRST_COUNTER;
+	ovfl_val = pmu_conf.ovfl_val;

 	DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
 		     "used_pmds=0x%lx reload_pmcs=0x%lx\n",
@@ -5133,7 +5167,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 		 * pfm_read_pmds().
 		 */
 		old_val               = ctx->ctx_pmds[i].val;
-		ctx->ctx_pmds[i].val += 1 + pmu_conf.ovfl_val;
+		ctx->ctx_pmds[i].val += 1 + ovfl_val;

 		/*
 		 * check for overflow condition
@@ -5145,7 +5179,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str

 		DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx smpl_pmds=0x%lx\n",
 			i, ctx->ctx_pmds[i].val, old_val,
-			ia64_get_pmd(i) & pmu_conf.ovfl_val, ovfl_pmds, ovfl_notify, smpl_pmds));
+			ia64_get_pmd(i) & ovfl_val, ovfl_pmds, ovfl_notify, smpl_pmds));
 	}

 	/*
@@ -5196,6 +5230,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 				for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
 					if ((smpl_pmds & 0x1) == 0) continue;
 					ovfl_arg.smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ?  pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
+					DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg.smpl_pmds_values[k-1]));
 				}
 			}

@@ -5294,6 +5329,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 	if (ovfl_ctrl.bits.mask_monitoring) {
 		pfm_mask_monitoring(task);
 		ctx->ctx_state = PFM_CTX_MASKED;
+		ctx->ctx_fl_can_restart = 1;
 	}

 	/*
@@ -5376,12 +5412,10 @@ pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 		 */

 		/* sanity check */
-		if (!ctx) goto report_spurious;
+		if (!ctx) goto report_spurious1;

-		if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) {
-			printk("perfmon: current [%d] owner = [%d] PMVALID=0 state=%d\n", current->pid, task->pid, ctx->ctx_state);
-			goto report_spurious;
-		}
+		if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 
+			goto report_spurious2;

 		PROTECT_CTX_NOPRINT(ctx, flags);

@@ -5400,14 +5434,20 @@ pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)

 	return retval;

-report_spurious:
+report_spurious1:
 	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
 		this_cpu, task->pid);
 	pfm_unfreeze_pmu();
 	return -1;
+report_spurious2:
+	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 
+		this_cpu, 
+		task->pid);
+	pfm_unfreeze_pmu();
+	return -1;
 }

-static pfm_irq_handler_t
+static irqreturn_t
 pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 {
 	unsigned long start_cycles, total_cycles;
@@ -5436,7 +5476,8 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)

 		pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
 	}
-	PFM_IRQ_HANDLER_RET();
+	put_cpu_no_resched();
+	return IRQ_HANDLED;
 }


@@ -5445,10 +5486,13 @@ static int
 pfm_proc_info(char *page)
 {
 	char *p = page;
-	pfm_buffer_fmt_t *b;
+	struct list_head * pos;
+	pfm_buffer_fmt_t * entry;
 	unsigned long psr;
+	int online_cpus = 0;
 	int i;

+		p += sprintf(p, "perfmon version           : %u.%u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN);
 		p += sprintf(p, "model                     : %s\n", pmu_conf.pmu_name);
 		p += sprintf(p, "fastctxsw                 : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No");
 		p += sprintf(p, "ovfl_mask                 : 0x%lx\n", pmu_conf.ovfl_val);
@@ -5462,17 +5506,17 @@ pfm_proc_info(char *page)
 		p += sprintf(p, "CPU%-2d smpl handler calls  : %lu\n", i, pfm_stats[i].pfm_smpl_handler_calls);
 		p += sprintf(p, "CPU%-2d smpl handler cycles : %lu\n", i, pfm_stats[i].pfm_smpl_handler_cycles);
 		p += sprintf(p, "CPU%-2d spurious intrs      : %lu\n", i, pfm_stats[i].pfm_spurious_ovfl_intr_count);
-		p += sprintf(p, "CPU%-2d sysupdt count       : %lu\n", i, pfm_stats[i].pfm_sysupdt_count);
-		p += sprintf(p, "CPU%-2d sysupdt cycles      : %lu\n", i, pfm_stats[i].pfm_sysupdt_cycles);
+		p += sprintf(p, "CPU%-2d replay   intrs      : %lu\n", i, pfm_stats[i].pfm_replay_ovfl_intr_count);
 		p += sprintf(p, "CPU%-2d syst_wide           : %d\n" , i, pfm_get_cpu_data(pfm_syst_info, i) & PFM_CPUINFO_SYST_WIDE ? 1 : 0);
 		p += sprintf(p, "CPU%-2d dcr_pp              : %d\n" , i, pfm_get_cpu_data(pfm_syst_info, i) & PFM_CPUINFO_DCR_PP ? 1 : 0);
 		p += sprintf(p, "CPU%-2d exclude idle        : %d\n" , i, pfm_get_cpu_data(pfm_syst_info, i) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0);
 		p += sprintf(p, "CPU%-2d owner               : %d\n" , i, pfm_get_cpu_data(pmu_owner, i) ? pfm_get_cpu_data(pmu_owner, i)->pid: -1);
 		p += sprintf(p, "CPU%-2d context             : %p\n" , i, pfm_get_cpu_data(pmu_ctx, i));
 		p += sprintf(p, "CPU%-2d activations         : %lu\n", i, pfm_get_cpu_data(pmu_activation_number,i));
+		online_cpus++;
 	}

-	if (num_online_cpus() == 1)
+	if (online_cpus == 1)
 	{
 		psr = pfm_get_psr();
 		ia64_srlz_d();
@@ -5495,29 +5539,30 @@ pfm_proc_info(char *page)
 			pfm_sessions.pfs_ptrace_use_dbregs);
 	UNLOCK_PFS();

-	LOCK_BUF_FMT_LIST();
+	spin_lock(&pfm_buffer_fmt_lock);

-	for (b = pfm_buffer_fmt_list; b ; b = b->fmt_next) {
+	list_for_each(pos, &pfm_buffer_fmt_list) {
+		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
 		p += sprintf(p, "format                    : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
-				b->fmt_uuid[0],
-				b->fmt_uuid[1],
-				b->fmt_uuid[2],
-				b->fmt_uuid[3],
-				b->fmt_uuid[4],
-				b->fmt_uuid[5],
-				b->fmt_uuid[6],
-				b->fmt_uuid[7],
-				b->fmt_uuid[8],
-				b->fmt_uuid[9],
-				b->fmt_uuid[10],
-				b->fmt_uuid[11],
-				b->fmt_uuid[12],
-				b->fmt_uuid[13],
-				b->fmt_uuid[14],
-				b->fmt_uuid[15],
-				b->fmt_name);
-	}
-	UNLOCK_BUF_FMT_LIST();
+				entry->fmt_uuid[0],
+				entry->fmt_uuid[1],
+				entry->fmt_uuid[2],
+				entry->fmt_uuid[3],
+				entry->fmt_uuid[4],
+				entry->fmt_uuid[5],
+				entry->fmt_uuid[6],
+				entry->fmt_uuid[7],
+				entry->fmt_uuid[8],
+				entry->fmt_uuid[9],
+				entry->fmt_uuid[10],
+				entry->fmt_uuid[11],
+				entry->fmt_uuid[12],
+				entry->fmt_uuid[13],
+				entry->fmt_uuid[14],
+				entry->fmt_uuid[15],
+				entry->fmt_name);
+	}
+	spin_unlock(&pfm_buffer_fmt_lock);

 	return p - page;
 }
@@ -5546,7 +5591,7 @@ perfmon_read_entry(char *page, char **start, off_t off, int count, int *eof, voi
 * local_cpu_data->pfm_syst_info
 */
 void
-pfm_do_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
+pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
 {
 	struct pt_regs *regs;
 	unsigned long dcr;
@@ -5591,21 +5636,10 @@ pfm_do_syst_wide_update_task(struct task_struct *task, unsigned long info, int i
 	}
 }

-void
-pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
-{
-	unsigned long start, end;
-
-	pfm_stats[smp_processor_id()].pfm_sysupdt_count++;
-	start = ia64_get_itc();
-
-	pfm_do_syst_wide_update_task(task, info, is_ctxswin);
-
-	end = ia64_get_itc();
-	pfm_stats[smp_processor_id()].pfm_sysupdt_cycles += end-start;
-}
-
 #ifdef CONFIG_SMP
+/*
+ * in 2.6, interrupts are masked when we come here and the runqueue lock is held
+ */
 void
 pfm_save_regs(struct task_struct *task)
 {
@@ -5706,14 +5740,11 @@ pfm_save_regs(struct task_struct *task)
 	/*
 	 * unfreeze PMU if had pending overflows
 	 */
-	if (t->pmcs[0] & ~1UL) pfm_unfreeze_pmu();
+	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();

 	/*
-	 * finally, unmask interrupts and allow context
-	 * access.
-	 * Any pended overflow interrupt may be delivered
-	 * here and will be treated as spurious because we
-	 * have have no PMU owner anymore.
+	 * finally, allow context access.
+	 * interrupts will still be masked after this call.
 	 */
 	pfm_unprotect_ctx_ctxsw(ctx, flags);

@@ -5726,10 +5757,6 @@ pfm_save_regs(struct task_struct *task)
 }

 #else /* !CONFIG_SMP */
-
-/*
- * in 2.5, interrupts are masked when we come here
- */
 void
 pfm_save_regs(struct task_struct *task)
 {
@@ -5836,6 +5863,9 @@ pfm_lazy_save_regs (struct task_struct *task)
 #endif /* CONFIG_SMP */

 #ifdef CONFIG_SMP
+/*
+ * in 2.6, interrupts are masked when we come here and the runqueue lock is held
+ */
 void
 pfm_load_regs (struct task_struct *task)
 {
@@ -5959,18 +5989,22 @@ pfm_load_regs (struct task_struct *task)
 	 * was saved.
 	 */
 	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
-		struct pt_regs *regs = ia64_task_regs(task);
-		pfm_overflow_handler(task, ctx, t->pmcs[0], regs);
-	}
-
 		/*
-	 * we clear PMC0, to ensure that any in flight interrupt
-	 * will not be attributed to the new context we are installing
-	 * because the actual overflow has been processed above already.
-	 * No real effect until we unmask interrupts at the end of the
-	 * function.
+		 * reload pmc0 with the overflow information
+		 * On McKinley PMU, this will trigger a PMU interrupt
 		 */
-	pfm_unfreeze_pmu();
+		ia64_set_pmc(0, t->pmcs[0]);
+		ia64_srlz_d();
+		t->pmcs[0] = 0UL;
+#ifndef CONFIG_MCKINLEY
+		/*
+		 * will replay the PMU interrupt
+		 */
+		DRPINT(("perfmon: resend irq for [%d]\n", task->pid));
+		hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
+#endif
+		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
+	}

 	/*
 	 * we just did a reload, so we reset the partial reload fields
@@ -5990,13 +6024,15 @@ pfm_load_regs (struct task_struct *task)
 	SET_ACTIVATION(ctx);

 	/*
-	 * establish new ownership. Interrupts
-	 * are still masked at this point.
+	 * establish new ownership. 
 	 */
 	SET_PMU_OWNER(task, ctx);

 	/*
-	 * restore the psr.up bit 
+	 * restore the psr.up bit. measurement
+	 * is active again.
+	 * no PMU interrupt can happen at this point
+	 * because we still have interrupts disabled.
 	 */
 	if (likely(psr_up)) pfm_set_psr_up();

@@ -6091,42 +6127,39 @@ pfm_load_regs (struct task_struct *task)
 	pfm_restore_pmcs(t->pmcs, pmc_mask);

 	/*
-	 * Check for pending overflow when state was last saved.
-	 * invoked handler is overflow status bits set.
-	 *
-	 * Any PMU overflow in flight at this point, will still
-	 * be treated as spurious because we have no declared
-	 * owner. Note that the first level interrupt handler
-	 * DOES NOT TOUCH any PMC except PMC0 for which we have
-	 * a copy already.
+	 * check for pending overflow at the time the state
+	 * was saved.
 	 */
 	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
-		struct pt_regs *regs = ia64_task_regs(task);
-		pfm_overflow_handler(task, ctx, t->pmcs[0], regs);
-	}
+		/*
+		 * reload pmc0 with the overflow information
+		 * On McKinley PMU, this will trigger a PMU interrupt
+		 */
+		ia64_set_pmc(0, t->pmcs[0]);
+		ia64_srlz_d();

+		t->pmcs[0] = 0UL;
+
+#ifndef CONFIG_MCKINLEY
 		/*
-	 * we clear PMC0, to ensure that any in flight interrupt
-	 * will not be attributed to the new context we are installing
-	 * because the actual overflow has been processed above already.
-	 *
-	 * This is an atomic operation.
+		 * will replay the PMU interrupt
 		 */
-	pfm_unfreeze_pmu();
+		DRPINT(("perfmon: resend irq for [%d]\n", task->pid));
+		hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
+#endif
+		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
+	}

 	/*
-	 * establish new ownership. If there was an in-flight
-	 * overflow interrupt, it will be treated as spurious
-	 * before and after the call, because no overflow
-	 * status bit can possibly be set. No new overflow
-	 * can be generated because, at this point, psr.up
-	 * is still cleared.
+	 * establish new ownership. 
 	 */
 	SET_PMU_OWNER(task, ctx);

 	/*
-	 * restore the psr. This is the point at which
-	 * new overflow interrupts can be generated again.
+	 * restore the psr.up bit. measurement
+	 * is active again.
+	 * no PMU interrupt can happen at this point
+	 * because we still have interrupts disabled.
 	 */
 	if (likely(psr_up)) pfm_set_psr_up();
 }
@@ -6139,7 +6172,7 @@ static void
 pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 {
 	u64 pmc0;
-	unsigned long mask2, val, pmd_val;
+	unsigned long mask2, val, pmd_val, ovfl_val;
 	int i, can_access_pmu = 0;
 	int is_self;

@@ -6187,7 +6220,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 		 */
 		task->thread.pmcs[0] &= ~0x1;
 	}
-
+	ovfl_val = pmu_conf.ovfl_val;
 	/*
 	 * we save all the used pmds
 	 * we take care of overflows for counting PMDs
@@ -6210,12 +6243,12 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 				task->pid,
 				i,
 				ctx->ctx_pmds[i].val,
-				val & pmu_conf.ovfl_val));
+				val & ovfl_val));

 			/*
 			 * we rebuild the full 64 bit value of the counter
 			 */
-			val = ctx->ctx_pmds[i].val + (val & pmu_conf.ovfl_val);
+			val = ctx->ctx_pmds[i].val + (val & ovfl_val);

 			/*
 			 * now everything is in ctx_pmds[] and we need
@@ -6228,7 +6261,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 			 * take care of overflow inline
 			 */
 			if (pmc0 & (1UL << i)) {
-				val += 1 + pmu_conf.ovfl_val;
+				val += 1 + ovfl_val;
 				DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
 			}
 		}
@@ -6338,7 +6371,7 @@ pfm_init(void)
 	 * initialize all our spinlocks
 	 */
 	spin_lock_init(&pfm_sessions.pfs_lock);
-	spin_lock_init(&pfm_smpl_fmt_lock);
+	spin_lock_init(&pfm_buffer_fmt_lock);

 	init_pfm_fs();

@@ -6352,6 +6385,9 @@ pfm_init(void)

 __initcall(pfm_init);

+/*
+ * this function is called before pfm_init()
+ */
 void
 pfm_init_percpu (void)
 {
@@ -6364,7 +6400,6 @@ pfm_init_percpu (void)
 	pfm_clear_psr_pp();
 	pfm_clear_psr_up();

-
 	if (smp_processor_id() == 0)
 		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);


--- a/arch/ia64/kernel/perfmon_itanium.h
+++ b/arch/ia64/kernel/perfmon_itanium.h
@@ -81,6 +81,8 @@ pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnu
 	 */
 	if (cnum == 13 && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {

+		DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
+
 		/* don't mix debug with perfmon */
 		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;

@@ -98,6 +100,8 @@ pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnu
 	 */
 	if (cnum == 11 && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {

+		DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
+
 		/* don't mix debug with perfmon */
 		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;


--- a/arch/ia64/kernel/perfmon_mckinley.h
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -109,10 +109,20 @@ pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnu
 	if (ctx == NULL) return -EINVAL;

 	/*
-	 * we must clear the debug registers if any pmc13.ena_dbrpX bit is enabled
-	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
+	 * we must clear the debug registers if pmc13 has a value which enable
+	 * memory pipeline event constraints. In this case we need to clear the
+	 * the debug registers if they have not yet been accessed. This is required
+	 * to avoid picking stale state.
+	 * PMC13 is "active" if:
+	 * 	one of the pmc13.cfg_dbrpXX field is different from 0x3
+	 * AND
+	 * 	at the corresponding pmc13.ena_dbrpXX is set.
+	 *
+	 * For now, we just check on cfg_dbrXX != 0x3.
 	 */
-	if (cnum == 13 && (*val & (0xfUL << 45)) && ctx->ctx_fl_using_dbreg == 0) {
+	if (cnum == 13 && ((*val & 0x18181818UL) != 0x18181818UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));

 		/* don't mix debug with perfmon */
 		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
@@ -128,7 +138,9 @@ pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnu
 	 * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
 	 * before they are (fl_using_dbreg==0) to avoid picking up stale information.
 	 */
-	if (cnum == 14 && ((*val & 0x2222) != 0x2222) && ctx->ctx_fl_using_dbreg == 0) {
+	if (cnum == 14 && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));

 		/* don't mix debug with perfmon */
 		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
@@ -170,7 +182,7 @@ pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnu
 		   && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
 		       ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));

-		if (ret) printk("perfmon: failure check_case1\n");
+		if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
 	}

 	return ret ? -EINVAL : 0;

--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -30,6 +30,8 @@
 #include <linux/string.h>
 #include <linux/threads.h>
 #include <linux/tty.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
 #include <linux/efi.h>
 #include <linux/initrd.h>

@@ -43,6 +45,7 @@
 #include <asm/processor.h>
 #include <asm/sal.h>
 #include <asm/sections.h>
+#include <asm/serial.h>
 #include <asm/smp.h>
 #include <asm/system.h>
 #include <asm/unistd.h>
@@ -221,6 +224,25 @@ find_initrd (void)
 #endif
 }

+#ifdef CONFIG_SERIAL_8250_CONSOLE
+static void __init
+setup_serial_legacy (void)
+{
+	struct uart_port port;
+	unsigned int i, iobase[] = {0x3f8, 0x2f8};
+
+	printk(KERN_INFO "Registering legacy COM ports for serial console\n");
+	memset(&port, 0, sizeof(port));
+	port.iotype = SERIAL_IO_PORT;
+	port.uartclk = BASE_BAUD * 16;
+	for (i = 0; i < ARRAY_SIZE(iobase); i++) {
+		port.line = i;
+		port.iobase = iobase[i];
+		early_serial_setup(&port);
+	}
+}
+#endif
+
 void __init
 setup_arch (char **cmdline_p)
 {
@@ -294,11 +316,22 @@ setup_arch (char **cmdline_p)
 #ifdef CONFIG_SERIAL_8250_HCDP
 	if (efi.hcdp) {
 		void setup_serial_hcdp(void *);
-
-		/* Setup the serial ports described by HCDP */
 		setup_serial_hcdp(efi.hcdp);
 	}
 #endif
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+	/*
+	 * Without HCDP, we won't discover any serial ports until the serial driver looks
+	 * in the ACPI namespace.  If ACPI claims there are some legacy devices, register
+	 * the legacy COM ports so serial console works earlier.  This is slightly dangerous
+	 * because we don't *really* know whether there's anything there, but we hope that
+	 * all new boxes will implement HCDP.
+	 */
+	extern unsigned char acpi_legacy_devices;
+	if (!efi.hcdp && acpi_legacy_devices)
+		setup_serial_legacy();
+#endif
+
 #ifdef CONFIG_VT
 # if defined(CONFIG_DUMMY_CONSOLE)
 	conswitchp = &dummy_con;

--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -65,8 +65,12 @@ itc_update (long delta_nsec)
 }

 /*
- * Return the number of nano-seconds that elapsed since the last update to jiffy.  The
- * xtime_lock must be at least read-locked when calling this routine.
+ * Return the number of nano-seconds that elapsed since the last
+ * update to jiffy.  It is quite possible that the timer interrupt
+ * will interrupt this and result in a race for any of jiffies,
+ * wall_jiffies or itm_next.  Thus, the xtime_lock must be at least
+ * read synchronised when calling this routine (see do_gettimeofday()
+ * below for an example).
 */
 unsigned long
 itc_get_offset (void)
@@ -77,11 +81,6 @@ itc_get_offset (void)
 	last_tick = (cpu_data(TIME_KEEPER_ID)->itm_next
 		     - (lost + 1)*cpu_data(TIME_KEEPER_ID)->itm_delta);

-	if (unlikely((long) (now - last_tick) < 0)) {
-		printk(KERN_ERR "CPU %d: now < last_tick (now=0x%lx,last_tick=0x%lx)!\n",
-		       smp_processor_id(), now, last_tick);
-		return last_nsec_offset;
-	}
 	elapsed_cycles = now - last_tick;
 	return (elapsed_cycles*local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT;
 }

--- a/arch/ia64/sn/kernel/sn2/io.c
+++ b/arch/ia64/sn/kernel/sn2/io.c
@@ -11,6 +11,8 @@

 #include <asm/sn/sn2/io.h>

+#ifdef CONFIG_IA64_GENERIC
+
 #undef __sn_inb
 #undef __sn_inw
 #undef __sn_inl
@@ -81,3 +83,5 @@ __sn_readq (void *addr)
 {
 	return ___sn_readq (addr);
 }
+
+#endif
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -68,20 +68,25 @@
 * we'll patch out the work-around bundles with NOPs, so their impact is minimal.
 */
 #define DO_MCKINLEY_E9_WORKAROUND
+
 #ifdef DO_MCKINLEY_E9_WORKAROUND
 	.section ".data.patch.mckinley_e9", "a"
 	.previous
 /* workaround for Itanium 2 Errata 9: */
-# define MCKINLEY_E9_WORKAROUND			\
-	.xdata4 ".data.patch.mckinley_e9", 1f-.;\
+# define FSYS_RETURN					\
+	.xdata4 ".data.patch.mckinley_e9", 1f-.;	\
 1:{ .mib;						\
 	nop.m 0;					\
-	nop.i 0;				\
-	br.call.sptk.many b7=1f;;		\
+	mov r16=ar.pfs;					\
+	br.call.sptk.many b7=2f;;			\
  };							\
-1:
+2:{ .mib;						\
+	nop.m 0;					\
+	mov ar.pfs=r16;					\
+	br.ret.sptk.many b6;;				\
+  }
 #else
-# define MCKINLEY_E9_WORKAROUND
+# define FSYS_RETURN	br.ret.sptk.many b6
 #endif

 #endif /* _ASM_IA64_ASMMACRO_H */
--- a/include/asm-ia64/delay.h
+++ b/include/asm-ia64/delay.h
@@ -67,14 +67,15 @@ ia64_get_itc (void)
 	return result;
 }

+extern void ia64_delay_loop (unsigned long loops);
+
 static __inline__ void
 __delay (unsigned long loops)
 {
-	if (loops < 1)
+	if (unlikely(loops < 1))
 		return;

-	while (loops--)
-		ia64_nop(0);
+	ia64_delay_loop (loops - 1);
 }

 static __inline__ void

--- a/include/asm-ia64/machvec_sn2.h
+++ b/include/asm-ia64/machvec_sn2.h
@@ -99,4 +99,6 @@ extern ia64_mv_dma_supported		sn_dma_supported;
 #define platform_dma_sync_sg		sn_dma_sync_sg
 #define platform_dma_supported		sn_dma_supported

+#include <asm/sn/sn2/io.h>
+
 #endif /* _ASM_IA64_MACHVEC_SN2_H */
--- a/include/asm-ia64/mca.h
+++ b/include/asm-ia64/mca.h
@@ -108,8 +108,6 @@ enum {
 	IA64_MCA_NEW_CONTEXT	=	-1	/* SAL to return to new context */
 };

-#define MIN_STATE_AREA_SIZE     57
-
 typedef struct ia64_mca_os_to_sal_state_s {
 	u64		imots_os_status;	/*   OS status to SAL as to what happened
 						 *   with the MCA handling.

--- a/include/asm-ia64/mca_asm.h
+++ b/include/asm-ia64/mca_asm.h
@@ -110,10 +110,9 @@
 	;;										\
 	dep	temp1 = -1, temp1, PSR_MC, 1;						\
 	;;										\
-	movl	temp2 = start_addr;							\
 	mov	cr.ipsr = temp1;							\
 	;;										\
-	INST_VA_TO_PA(temp2);								\
+	LOAD_PHYSICAL(p0, temp2, start_addr);						\
 	;;										\
 	mov	cr.iip = temp2;								\
 	mov	cr.ifs = r0;								\

--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -405,10 +405,11 @@ typedef struct pal_process_state_info_s {
 						 * generated.
 						 * (Trap Lost )
 						 */
-			op		: 3,	/* Operation that
-						 * caused the machine
-						 * check
+			mi		: 1,	/* More information available
+						 * call PAL_MC_ERROR_INFO
 						 */
+			pi		: 1,	/* Precise instruction pointer */
+			pm		: 1,	/* Precise min-state save area */

 			dy		: 1,	/* Processor dynamic
 						 * state valid
@@ -450,11 +451,12 @@ typedef struct pal_process_state_info_s {
 						 * by the processor
 						 */

-			reserved2	: 12,
+			reserved2	: 11,
 			cc		: 1,	/* Cache check */
 			tc		: 1,	/* TLB check */
 			bc		: 1,	/* Bus check */
-			uc		: 1;	/* Unknown check */
+			rc		: 1,	/* Register file check */
+			uc		: 1;	/* Uarch check */

 } pal_processor_state_info_t;


--- a/include/asm-ia64/perfmon.h
+++ b/include/asm-ia64/perfmon.h
@@ -38,7 +38,6 @@
 */
 #define PFM_FL_NOTIFY_BLOCK    	 0x01	/* block task on user level notifications */
 #define PFM_FL_SYSTEM_WIDE	 0x02	/* create a system wide context */
-#define PFM_FL_UNSECURE		 0x04   /* allow unsecure monitoring for non self-monitoring task */
 #define PFM_FL_OVFL_NO_MSG	 0x80   /* do not post overflow/end messages for notification */

 /*
@@ -162,8 +161,6 @@ typedef union {
 */
 #define PFM_VERSION_MAJ		 2U
 #define PFM_VERSION_MIN		 0U
-#define PFM_SMPL_HDR_VERSION_MAJ 2U
-#define PFM_SMPL_HDR_VERSION_MIN 0U
 #define PFM_VERSION		 (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
 #define PFM_VERSION_MAJOR(x)	 (((x)>>16) & 0xffff)
 #define PFM_VERSION_MINOR(x)	 ((x) & 0xffff)
@@ -194,9 +191,8 @@ extern void pfm_handle_work(void);
 /*
 * Reset PMD register flags
 */
-#define PFM_PMD_NO_RESET	0
+#define PFM_PMD_SHORT_RESET	0
 #define PFM_PMD_LONG_RESET	1
-#define PFM_PMD_SHORT_RESET	2

 typedef union {
 	unsigned int val;
@@ -223,7 +219,7 @@ typedef struct {
 } pfm_ovfl_arg_t;


-typedef struct _pfm_buffer_fmt_t {
+typedef struct {
 	char		*fmt_name;
 	pfm_uuid_t	fmt_uuid;
 	size_t		fmt_arg_size;
@@ -237,8 +233,7 @@ typedef struct _pfm_buffer_fmt_t {
 	int		(*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
 	int		(*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs);

-	struct _pfm_buffer_fmt_t *fmt_next;
-	struct _pfm_buffer_fmt_t *fmt_prev;
+	struct list_head fmt_list;
 } pfm_buffer_fmt_t;

 extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt);

--- a/include/asm-ia64/posix_types.h
+++ b/include/asm-ia64/posix_types.h
@@ -10,7 +10,7 @@
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 */

-typedef unsigned int	__kernel_ino_t;
+typedef unsigned long	__kernel_ino_t;
 typedef unsigned int	__kernel_mode_t;
 typedef unsigned int	__kernel_nlink_t;
 typedef long		__kernel_off_t;

--- a/include/asm-ia64/serial.h
+++ b/include/asm-ia64/serial.h
@@ -4,8 +4,6 @@
 * Derived from the i386 version.
 */

-#include <linux/config.h>
-
 /*
 * This assumes you have a 1.8432 MHz clock for your UART.
 *
@@ -15,107 +13,7 @@
 */
 #define BASE_BAUD ( 1843200 / 16 )

-#define CONFIG_SERIAL_DETECT_IRQ	/* on IA-64, we always want to autodetect irqs */
-
-/* Standard COM flags (except for COM4, because of the 8514 problem) */
-#ifdef CONFIG_SERIAL_DETECT_IRQ
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ)
-#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ)
-#else
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
-#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF
-#endif
-
-#ifdef CONFIG_SERIAL_MANY_PORTS
-#define FOURPORT_FLAGS ASYNC_FOURPORT
-#define ACCENT_FLAGS 0
-#define BOCA_FLAGS 0
-#define HUB6_FLAGS 0
-#define RS_TABLE_SIZE	64
-#else
-#define RS_TABLE_SIZE
-#endif
-
 /*
- * The following define the access methods for the HUB6 card. All
- * access is through two ports for all 24 possible chips. The card is
- * selected through the high 2 bits, the port on that card with the
- * "middle" 3 bits, and the register on that port with the bottom
- * 3 bits.
- *
- * While the access port and interrupt is configurable, the default
- * port locations are 0x302 for the port control register, and 0x303
- * for the data read/write register. Normally, the interrupt is at irq3
- * but can be anything from 3 to 7 inclusive. Note that using 3 will
- * require disabling com2.
- */
-
-#define C_P(card,port) (((card)<<6|(port)<<3) + 1)
-
-#define STD_SERIAL_PORT_DEFNS			\
-	/* UART CLK   PORT IRQ     FLAGS        */			\
-	{ 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS },	/* ttyS0 */	\
-	{ 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS },	/* ttyS1 */	\
-	{ 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS },	/* ttyS2 */	\
-	{ 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS },	/* ttyS3 */
-
-#ifdef CONFIG_SERIAL_MANY_PORTS
-#define EXTRA_SERIAL_PORT_DEFNS			\
-	{ 0, BASE_BAUD, 0x1A0, 9, FOURPORT_FLAGS }, 	/* ttyS4 */	\
-	{ 0, BASE_BAUD, 0x1A8, 9, FOURPORT_FLAGS },	/* ttyS5 */	\
-	{ 0, BASE_BAUD, 0x1B0, 9, FOURPORT_FLAGS },	/* ttyS6 */	\
-	{ 0, BASE_BAUD, 0x1B8, 9, FOURPORT_FLAGS },	/* ttyS7 */	\
-	{ 0, BASE_BAUD, 0x2A0, 5, FOURPORT_FLAGS },	/* ttyS8 */	\
-	{ 0, BASE_BAUD, 0x2A8, 5, FOURPORT_FLAGS },	/* ttyS9 */	\
-	{ 0, BASE_BAUD, 0x2B0, 5, FOURPORT_FLAGS },	/* ttyS10 */	\
-	{ 0, BASE_BAUD, 0x2B8, 5, FOURPORT_FLAGS },	/* ttyS11 */	\
-	{ 0, BASE_BAUD, 0x330, 4, ACCENT_FLAGS },	/* ttyS12 */	\
-	{ 0, BASE_BAUD, 0x338, 4, ACCENT_FLAGS },	/* ttyS13 */	\
-	{ 0, BASE_BAUD, 0x000, 0, 0 },	/* ttyS14 (spare) */		\
-	{ 0, BASE_BAUD, 0x000, 0, 0 },	/* ttyS15 (spare) */		\
-	{ 0, BASE_BAUD, 0x100, 12, BOCA_FLAGS },	/* ttyS16 */	\
-	{ 0, BASE_BAUD, 0x108, 12, BOCA_FLAGS },	/* ttyS17 */	\
-	{ 0, BASE_BAUD, 0x110, 12, BOCA_FLAGS },	/* ttyS18 */	\
-	{ 0, BASE_BAUD, 0x118, 12, BOCA_FLAGS },	/* ttyS19 */	\
-	{ 0, BASE_BAUD, 0x120, 12, BOCA_FLAGS },	/* ttyS20 */	\
-	{ 0, BASE_BAUD, 0x128, 12, BOCA_FLAGS },	/* ttyS21 */	\
-	{ 0, BASE_BAUD, 0x130, 12, BOCA_FLAGS },	/* ttyS22 */	\
-	{ 0, BASE_BAUD, 0x138, 12, BOCA_FLAGS },	/* ttyS23 */	\
-	{ 0, BASE_BAUD, 0x140, 12, BOCA_FLAGS },	/* ttyS24 */	\
-	{ 0, BASE_BAUD, 0x148, 12, BOCA_FLAGS },	/* ttyS25 */	\
-	{ 0, BASE_BAUD, 0x150, 12, BOCA_FLAGS },	/* ttyS26 */	\
-	{ 0, BASE_BAUD, 0x158, 12, BOCA_FLAGS },	/* ttyS27 */	\
-	{ 0, BASE_BAUD, 0x160, 12, BOCA_FLAGS },	/* ttyS28 */	\
-	{ 0, BASE_BAUD, 0x168, 12, BOCA_FLAGS },	/* ttyS29 */	\
-	{ 0, BASE_BAUD, 0x170, 12, BOCA_FLAGS },	/* ttyS30 */	\
-	{ 0, BASE_BAUD, 0x178, 12, BOCA_FLAGS },	/* ttyS31 */
-#else
-#define EXTRA_SERIAL_PORT_DEFNS
-#endif
-
-/* You can have up to four HUB6's in the system, but I've only
- * included two cards here for a total of twelve ports.
+ * All legacy serial ports should be enumerated via ACPI namespace, so
+ * we need not list them here.
 */
-#if (defined(CONFIG_HUB6) && defined(CONFIG_SERIAL_MANY_PORTS))
-#define HUB6_SERIAL_PORT_DFNS		\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,0) },  /* ttyS32 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,1) },  /* ttyS33 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,2) },  /* ttyS34 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,3) },  /* ttyS35 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,4) },  /* ttyS36 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,5) },  /* ttyS37 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,0) },  /* ttyS38 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,1) },  /* ttyS39 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,2) },  /* ttyS40 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,3) },  /* ttyS41 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,4) },  /* ttyS42 */	\
-	{ 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,5) },  /* ttyS43 */
-#else
-#define HUB6_SERIAL_PORT_DFNS
-#endif
-
-#define SERIAL_PORT_DFNS		\
-	STD_SERIAL_PORT_DEFNS		\
-	EXTRA_SERIAL_PORT_DEFNS		\
-	HUB6_SERIAL_PORT_DFNS
-