ia64: Sync up with 2.5.18.

1afd09aa · David Mosberger · bf1c989a · 1afd09aa · 1afd09aa · 1afd09aa
Commit 1afd09aa authored May 30, 2002 by David Mosberger
8 changed files
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -106,6 +106,12 @@

 #define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)

+#ifdef CONFIG_SMP
+#define cpu_is_online(i) (cpu_online_map & (1UL << i))
+#else
+#define cpu_is_online(i)        (i==0)
+#endif
+
 /*
 * debugging
 */
@@ -277,8 +283,8 @@ typedef struct {
 typedef struct {
 	pfm_pmu_reg_type_t	type;
 	int			pm_pos;
-	int			(*read_check)(struct task_struct *task, unsigned int cnum, unsigned long *val);
-	int			(*write_check)(struct task_struct *task, unsigned int cnum, unsigned long *val);
+	int			(*read_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+	int			(*write_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
 	unsigned long		dep_pmd[4];
 	unsigned long		dep_pmc[4];
 } pfm_reg_desc_t;
@@ -902,8 +908,8 @@ pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
 		/*
 		 * and it must be a valid CPU
 		 */
-		cpu = ffs(pfx->ctx_cpu_mask);
-		if (cpu > smp_num_cpus) {
+		cpu = ffz(~pfx->ctx_cpu_mask);
+		if (cpu_is_online(cpu) == 0) {
 			DBprintk(("CPU%d is not online\n", cpu));
 			return -EINVAL;
 		}
@@ -968,7 +974,7 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
 	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {

 		/* at this point, we know there is at least one bit set */
-		cpu = ffs(tmp.ctx_cpu_mask) - 1;
+		cpu = ffz(~tmp.ctx_cpu_mask);

 		DBprintk(("requesting CPU%d currently on CPU%d\n",cpu, smp_processor_id()));

@@ -1280,7 +1286,7 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 		/*
 		 * execute write checker, if any
 		 */
-		if (PMC_WR_FUNC(cnum)) ret = PMC_WR_FUNC(cnum)(task, cnum, &tmp.reg_value);
+		if (PMC_WR_FUNC(cnum)) ret = PMC_WR_FUNC(cnum)(task, cnum, &tmp.reg_value, regs);
 abort_mission:
 		if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;

@@ -1371,7 +1377,7 @@ pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 		/*
 		 * execute write checker, if any
 		 */
-		if (PMD_WR_FUNC(cnum)) ret = PMD_WR_FUNC(cnum)(task, cnum, &tmp.reg_value);
+		if (PMD_WR_FUNC(cnum)) ret = PMD_WR_FUNC(cnum)(task, cnum, &tmp.reg_value, regs);
 abort_mission:
 		if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;

@@ -1394,6 +1400,8 @@ pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun

 		/* keep track of what we use */
 		CTX_USED_PMD(ctx, pmu_conf.pmd_desc[(cnum)].dep_pmd[0]);
+		/* mark this register as used as well */
+		CTX_USED_PMD(ctx, RDEP(cnum));

 		/* writes to unimplemented part is ignored, so this is safe */
 		ia64_set_pmd(cnum, tmp.reg_value & pmu_conf.perf_ovfl_val);
@@ -1438,7 +1446,7 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
 	DBprintk(("ctx_last_cpu=%d for [%d]\n", atomic_read(&ctx->ctx_last_cpu), task->pid));

 	for (i = 0; i < count; i++, req++) {
-		unsigned long reg_val = ~0UL, ctx_val = ~0UL;
+		unsigned long ctx_val = ~0UL;

 		if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;

@@ -1462,7 +1470,7 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
 		 */
 		if (atomic_read(&ctx->ctx_last_cpu) == smp_processor_id()){
 			ia64_srlz_d();
-			val = reg_val = ia64_get_pmd(cnum);
+			val = ia64_get_pmd(cnum);
 			DBprintk(("reading pmd[%u]=0x%lx from hw\n", cnum, val));
 		} else {
 #ifdef CONFIG_SMP
@@ -1484,7 +1492,7 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
 			}
 #endif
 			/* context has been saved */
-			val = reg_val = th->pmd[cnum];
+			val = th->pmd[cnum];
 		}
 		if (PMD_IS_COUNTING(cnum)) {
 			/*
@@ -1493,9 +1501,7 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count

 			val &= pmu_conf.perf_ovfl_val;
 			val += ctx_val = ctx->ctx_soft_pmds[cnum].val;
-		} else {
-			val = reg_val = ia64_get_pmd(cnum);
-		}
+		} 

 		tmp.reg_value = val;

@@ -1503,14 +1509,13 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
 		 * execute read checker, if any
 		 */
 		if (PMD_RD_FUNC(cnum)) {
-			ret = PMD_RD_FUNC(cnum)(task, cnum, &tmp.reg_value);
+			ret = PMD_RD_FUNC(cnum)(task, cnum, &tmp.reg_value, regs);
 		}

 		PFM_REG_RETFLAG_SET(tmp.reg_flags, ret);

-		DBprintk(("read pmd[%u] ret=%d soft_pmd=0x%lx reg=0x%lx pmc=0x%lx\n", 
-					cnum, ret, ctx_val, reg_val, 
-					ia64_get_pmc(cnum)));
+		DBprintk(("read pmd[%u] ret=%d value=0x%lx pmc=0x%lx\n", 
+					cnum, ret, val, ia64_get_pmc(cnum)));

 		if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
 	}
@@ -1553,15 +1558,11 @@ pfm_use_debug_registers(struct task_struct *task)
 	 */
 	if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;

-	/*
-	 * XXX: not pretty
-	 */
 	LOCK_PFS();

 	/*
-	 * We only allow the use of debug registers when there is no system
-	 * wide monitoring 
-	 * XXX: we could relax this by 
+	 * We cannot allow setting breakpoints when system wide monitoring
+	 * sessions are using the debug registers.
 	 */
 	if (pfm_sessions.pfs_sys_use_dbregs> 0)
 		ret = -1;
@@ -1921,7 +1922,6 @@ typedef union {
 	dbr_mask_reg_t dbr;
 } dbreg_t;

-
 static int
 pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs)
 {
@@ -1963,8 +1963,8 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
 	if (ctx->ctx_fl_system) {
 		/* we mark ourselves as owner  of the debug registers */
 		ctx->ctx_fl_using_dbreg = 1;
-	} else {
-       		if (ctx->ctx_fl_using_dbreg == 0) {
+		DBprintk(("system-wide setting fl_using_dbreg for [%d]\n", task->pid));
+	} else if (first_time) {
 			ret= -EBUSY;
 			if ((thread->flags & IA64_THREAD_DBG_VALID) != 0) {
 				DBprintk(("debug registers already in use for [%d]\n", task->pid));
@@ -1973,6 +1973,7 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
 			/* we mark ourselves as owner  of the debug registers */
 			ctx->ctx_fl_using_dbreg = 1;

+			DBprintk(("setting fl_using_dbreg for [%d]\n", task->pid));
 			/* 
 			 * Given debug registers cannot be used for both debugging 
 			 * and performance monitoring at the same time, we reuse
@@ -1980,20 +1981,27 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
 			 */
 			memset(task->thread.dbr, 0, sizeof(task->thread.dbr));
 			memset(task->thread.ibr, 0, sizeof(task->thread.ibr));
+	}

-			/*
-			 * clear hardware registers to make sure we don't
-			 * pick up stale state
-			 */
-			for (i=0; i < pmu_conf.num_ibrs; i++) {
-				ia64_set_ibr(i, 0UL);
-			}
-			ia64_srlz_i();
-			for (i=0; i < pmu_conf.num_dbrs; i++) {
-				ia64_set_dbr(i, 0UL);
-			}
-			ia64_srlz_d();
+	if (first_time) {
+		DBprintk(("[%d] clearing ibrs,dbrs\n", task->pid));
+		/*
+	 	 * clear hardware registers to make sure we don't
+	 	 * pick up stale state. 
+		 *
+		 * for a system wide session, we do not use
+		 * thread.dbr, thread.ibr because this process
+		 * never leaves the current CPU and the state
+		 * is shared by all processes running on it
+	 	 */
+		for (i=0; i < pmu_conf.num_ibrs; i++) {
+			ia64_set_ibr(i, 0UL);
 		}
+		ia64_srlz_i();
+		for (i=0; i < pmu_conf.num_dbrs; i++) {
+			ia64_set_dbr(i, 0UL);
+		}
+		ia64_srlz_d();
 	}

 	ret = -EFAULT;
@@ -2361,9 +2369,9 @@ sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6,
 {
 	struct pt_regs *regs = (struct pt_regs *)&stack;
 	struct task_struct *task = current;
-	pfm_context_t *ctx = task->thread.pfm_context;
+	pfm_context_t *ctx;
 	size_t sz;
-	int ret = -ESRCH, narg;
+	int ret, narg;

 	/* 
 	 * reject any call if perfmon was disabled at initialization time
@@ -2393,6 +2401,8 @@ sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6,

 		if (pid != current->pid) {

+			ret = -ESRCH;
+
 			read_lock(&tasklist_lock);

 			task = find_task_by_pid(pid);
@@ -2407,10 +2417,11 @@ sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6,
 				ret = check_task_state(task);
 				if (ret != 0) goto abort_call;
 			}
-			ctx = task->thread.pfm_context;
 		}
 	} 

+	ctx = task->thread.pfm_context;
+
 	if (PFM_CMD_USE_CTX(cmd)) {
 		ret = -EINVAL;
 	       if (ctx == NULL) {
@@ -2953,11 +2964,6 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
 static int
 perfmon_proc_info(char *page)
 {
-#ifdef CONFIG_SMP
-#define cpu_is_online(i) (cpu_online_map & (1UL << i))
-#else
-#define cpu_is_online(i)        1
-#endif
 	char *p = page;
 	int i;


--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -542,7 +542,18 @@ cpu_init (void)
 	extern char __per_cpu_end[];
 	int cpu = smp_processor_id();

-	my_cpu_data = alloc_bootmem_pages(__per_cpu_end - __per_cpu_start);
+	if (__per_cpu_end - __per_cpu_start > PAGE_SIZE)
+		panic("Per-cpu data area too big! (%Zu > %Zu)",
+		      __per_cpu_end - __per_cpu_start, PAGE_SIZE);
+
+	/*
+	 * On the BSP, the page allocator isn't initialized by the time we get here.  On
+	 * the APs, the bootmem allocator is no longer available...
+	 */
+	if (cpu == 0)
+		my_cpu_data = alloc_bootmem_pages(__per_cpu_end - __per_cpu_start);
+	else
+		my_cpu_data = (void *) get_free_page(GFP_KERNEL);
 	memcpy(my_cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
 	__per_cpu_offset[cpu] = (char *) my_cpu_data - __per_cpu_start;
 	my_cpu_info = my_cpu_data + ((char *) &cpu_info - __per_cpu_start);

--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -428,7 +428,7 @@ do_boot_cpu (int sapicid)

 	task_for_booting_cpu = idle;

-	Dprintk("Sending wakeup vector %u to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
+	Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);

 	platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);

@@ -543,7 +543,7 @@ smp_boot_cpus (void)

 		printk("Before bogomips.\n");
 		if (!cpucount) {
-			printk(KERN_ERR "Error: only one processor found.\n");
+			printk(KERN_WARNING "Warning: only one processor found.\n");
 		} else {
 			unsigned long bogosum = 0;
  			for (cpu = 0; cpu < NR_CPUS; cpu++)

--- a/include/asm-ia64/offsets.h
+++ b/include/asm-ia64/offsets.h
@@ -6,6 +6,8 @@
 * This file was generated by arch/ia64/tools/print_offsets.awk.
 *
 */
+
+#define CLONE_IDLETASK_BIT	12
 #define IA64_TASK_SIZE			3952	/* 0xf70 */
 #define IA64_THREAD_INFO_SIZE		32	/* 0x20 */
 #define IA64_PT_REGS_SIZE		400	/* 0x190 */
@@ -16,6 +18,7 @@
 #define UNW_FRAME_INFO_SIZE		448	/* 0x1c0 */

 #define IA64_TASK_THREAD_KSP_OFFSET	1496	/* 0x5d8 */
+#define IA64_TASK_PID_OFFSET		212	/* 0xd4 */
 #define IA64_PT_REGS_CR_IPSR_OFFSET	0	/* 0x0 */
 #define IA64_PT_REGS_CR_IIP_OFFSET	8	/* 0x8 */
 #define IA64_PT_REGS_CR_IFS_OFFSET	16	/* 0x10 */

--- a/include/asm-ia64/perfmon.h
+++ b/include/asm-ia64/perfmon.h
@@ -172,9 +172,7 @@ extern int  pfm_use_debug_registers(struct task_struct *);
 extern int  pfm_release_debug_registers(struct task_struct *);
 extern int  pfm_cleanup_smpl_buf(struct task_struct *);
 extern void pfm_syst_wide_update_task(struct task_struct *, int);
-extern void pfm_ovfl_block_reset (void);
-
-extern int pfm_syst_wide;
+extern void pfm_ovfl_block_reset(void);

 #endif /* __KERNEL__ */


--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -13,6 +13,7 @@
 * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
 */
 #include <linux/config.h>
+#include <linux/percpu.h>

 #include <asm/kregs.h>
 #include <asm/page.h>
@@ -386,7 +387,8 @@ extern void ia64_save_extra (struct task_struct *task);
 extern void ia64_load_extra (struct task_struct *task);

 #if defined(CONFIG_SMP) && defined(CONFIG_PERFMON)
-# define PERFMON_IS_SYSWIDE() (local_cpu_data->pfm_syst_wide != 0)
+  extern int __per_cpu_data pfm_syst_wide;
+# define PERFMON_IS_SYSWIDE() (this_cpu(pfm_syst_wide) != 0)
 #else
 # define PERFMON_IS_SYSWIDE() (0)
 #endif

--- a/include/asm-ia64/tlb.h
+++ b/include/asm-ia64/tlb.h
-/* XXX fix me! */
+#ifndef _ASM_IA64_TLB_H
+#define _ASM_IA64_TLB_H
+/*
+ * Copyright (C) 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * This file was derived from asm-generic/tlb.h.
+ */
+/*
+ * Removing a translation from a page table (including TLB-shootdown) is a four-step
+ * procedure:
+ *
+ *	(1) Flush (virtual) caches --- ensures virtual memory is coherent with kernel memory
+ *	    (this is a no-op on ia64).
+ *	(2) Clear the relevant portions of the page-table
+ *	(3) Flush the TLBs --- ensures that stale content is gone from CPU TLBs
+ *	(4) Release the pages that were freed up in step (2).
+ *
+ * Note that the ordering of these steps is crucial to avoid races on MP machines.
+ *
+ * The Linux kernel defines several platform-specific hooks for TLB-shootdown.  When
+ * unmapping a portion of the virtual address space, these hooks are called according to
+ * the following template:
+ *
+ *	tlb <- tlb_gather_mmu(mm);		// start unmap for address space MM
+ *	{
+ *	  for each vma that needs a shootdown do {
+ *	    tlb_start_vma(tlb, vma);
+ *	      for each page-table-entry PTE that needs to be removed do {
+ *		tlb_remove_tlb_entry(tlb, pte, address);
+ *		if (pte refers to a normal page) {
+ *		  tlb_remove_page(tlb, page);
+ *		}
+ *	      }
+ *	    tlb_end_vma(tlb, vma);
+ *	  }
+ *	}
+ *	tlb_finish_mmu(tlb, start, end);	// finish unmap for address space MM
+ */
+#include <linux/config.h>
+#include <linux/mm.h>
+
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_SMP
+# define FREE_PTE_NR		2048
+# define tlb_fast_mode(tlb)	((tlb)->nr == ~0UL)
+#else
+# define FREE_PTE_NR		0
+# define tlb_fast_mode(tlb)	(1)
+#endif
+
+typedef struct {
+	struct mm_struct	*mm;
+	unsigned long		nr;	/* == ~0UL => fast mode */
+	unsigned long		freed;	/* number of pages freed */
+	unsigned long		start_addr;
+	unsigned long		end_addr;
+	struct page 		*pages[FREE_PTE_NR];
+} mmu_gather_t;
+
+/* Users of the generic TLB shootdown code must declare this storage space. */
+extern mmu_gather_t	mmu_gathers[NR_CPUS];
+
+/*
+ * Flush the TLB for address range START to END and, if not in fast mode, release the
+ * freed pages that where gathered up to this point.
+ */
+static inline void
+ia64_tlb_flush_mmu (mmu_gather_t *tlb, unsigned long start, unsigned long end)
+{
+	unsigned long nr;
+
+	if (end - start >= 1024*1024*1024*1024UL) {
+		/*
+		 * If we flush more than a tera-byte, we're probably better off just
+		 * flushing the entire address space.
+		 */
+		flush_tlb_mm(tlb->mm);
+	} else {
+		/*
+		 * XXX fix me: flush_tlb_range() should take an mm pointer instead of a
+		 * vma pointer.
+		 */
+		struct vm_area_struct vma;
+
+		vma.vm_mm = tlb->mm;
+		/* flush the address range from the tlb: */
+		flush_tlb_range(&vma, start, end);
+		/* now flush the virt. page-table area mapping the address range: */
+		flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
+	}
+
+	/* lastly, release the freed pages */
+	nr = tlb->nr;
+	if (!tlb_fast_mode(tlb)) {
+		unsigned long i;
+		tlb->nr = 0;
+		tlb->start_addr = ~0UL;
+		for (i = 0; i < nr; ++i)
+			free_page_and_swap_cache(tlb->pages[i]);
+	}
+}
+
+/*
+ * Return a pointer to an initialized mmu_gather_t.
+ */
+static inline mmu_gather_t *
+tlb_gather_mmu (struct mm_struct *mm)
+{
+	mmu_gather_t *tlb = &mmu_gathers[smp_processor_id()];
+
+	tlb->mm = mm;
+	tlb->freed = 0;
+	tlb->start_addr = ~0UL;
+
+	/* Use fast mode if only one CPU is online */
+	tlb->nr = smp_num_cpus > 1 ? 0UL : ~0UL;
+	return tlb;
+}
+
+/*
+ * Called at the end of the shootdown operation to free up any resources that were
+ * collected.  The page table lock is still held at this point.
+ */
+static inline void
+tlb_finish_mmu (mmu_gather_t *tlb, unsigned long start, unsigned long end)
+{
+	unsigned long freed = tlb->freed;
+	struct mm_struct *mm = tlb->mm;
+	unsigned long rss = mm->rss;
+
+	if (rss < freed)
+		freed = rss;
+	mm->rss = rss - freed;
+	/*
+	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
+	 * tlb->end_addr.
+	 */
+	ia64_tlb_flush_mmu(tlb, start, end);
+
+	/* keep the page table cache within bounds */
+	check_pgt_cache();
+}
+
+/*
+ * Remove TLB entry for PTE mapped at virtual address ADDRESS.  This is called for any
+ * PTE, not just those pointing to (normal) physical memory.
+ */
+static inline void
+tlb_remove_tlb_entry (mmu_gather_t *tlb, pte_t pte, unsigned long address)
+{
+	if (tlb->start_addr == ~0UL)
+		tlb->start_addr = address;
+	tlb->end_addr = address + PAGE_SIZE;
+}
+
+/*
+ * Logically, this routine frees PAGE.  On MP machines, the actual freeing of the page
+ * must be delayed until after the TLB has been flushed (see comments at the beginning of
+ * this file).
+ */
+static inline void
+tlb_remove_page (mmu_gather_t *tlb, struct page *page)
+{
+	if (tlb_fast_mode(tlb)) {
+		free_page_and_swap_cache(page);
+		return;
+	}
+	tlb->pages[tlb->nr++] = page;
+	if (tlb->nr >= FREE_PTE_NR)
+		ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
+}
+
 #define tlb_start_vma(tlb, vma)			do { } while (0)
 #define tlb_end_vma(tlb, vma)			do { } while (0)
-#define tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
-#define tlb_flush(tlb)				flush_tlb_mm((tlb)->mm)

-#include <asm-generic/tlb.h>
+#endif /* _ASM_IA64_TLB_H */
--- a/include/asm-ia64/tlbflush.h
+++ b/include/asm-ia64/tlbflush.h
@@ -70,12 +70,10 @@ flush_tlb_page (struct vm_area_struct *vma, unsigned long addr)
 static inline void
 flush_tlb_pgtables (struct mm_struct *mm, unsigned long start, unsigned long end)
 {
-	struct vm_area_struct vma;
-
-	if (REGION_NUMBER(start) != REGION_NUMBER(end))
-		printk("flush_tlb_pgtables: can't flush across regions!!\n");
-	vma.vm_mm = mm;
-	flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
+	/*
+	 * Deprecated.  The virtual page table is now flushed via the normal gather/flush
+	 * interface (see tlb.h).
+	 */
 }

 #define flush_tlb_kernel_range(start, end)	flush_tlb_all()	/* XXX fix me */