Commit b015de48 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: shared processor support, from Dave Engebretsen

From: Anton Blanchard <anton@samba.org>

Initial round of code to add shared processor support into 2.6.  This adds
h_call interfaces, paca/VPA fields, and vpa register.  Add adds new idle loop
code.
parent 5a843935
......@@ -273,7 +273,7 @@ chrp_init(unsigned long r3, unsigned long r4, unsigned long r5,
ppc_md.progress = chrp_progress;
/* build up the firmware_features bitmask field
/* Build up the firmware_features bitmask field
* using contents of device-tree/ibm,hypertas-functions.
* Ultimately this functionality may be moved into prom.c prom_init().
*/
......
/*
* idle.c
* Idle daemon for PowerPC. Idle daemon will handle any action
* that needs to be taken when the system becomes idle.
*
* Originally Written by Cort Dougan (cort@cs.nmt.edu)
*
* iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
*
* Additional shared processor, SMT, and firmware support
* Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
......@@ -26,79 +34,115 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/cache.h>
#include <asm/cputable.h>
#include <asm/time.h>
#ifdef CONFIG_PPC_ISERIES
#include <asm/iSeries/LparData.h>
#include <asm/iSeries/HvCall.h>
#include <asm/iSeries/ItLpQueue.h>
unsigned long maxYieldTime = 0;
unsigned long minYieldTime = 0xffffffffffffffffUL;
extern long cede_processor(void);
extern long poll_pending(void);
int (*idle_loop)(void);
#ifdef CONFIG_PPC_ISERIES
static void yield_shared_processor(void)
{
unsigned long tb;
unsigned long yieldTime;
struct paca_struct *lpaca = get_paca();
HvCall_setEnabledInterrupts(HvCall_MaskIPI |
HvCall_MaskLpEvent |
HvCall_MaskLpProd |
HvCall_MaskTimeout);
tb = get_tb();
/* Compute future tb value when yield should expire */
HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy);
if (!ItLpQueue_isLpIntPending(paca->lpQueuePtr)) {
/*
* Compute future tb value when yield should expire.
* We want to be woken up when the next decrementer is
* to fire.
*/
local_irq_disable();
lpaca->yielded = 1; /* Indicate a prod is desired */
lpaca->xLpPaca.xIdle = 1; /* Inform the HV we are idle */
yieldTime = get_tb() - tb;
if (yieldTime > maxYieldTime)
maxYieldTime = yieldTime;
HvCall_yieldProcessor(HvCall_YieldTimed,
lpaca->next_jiffy_update_tb);
if (yieldTime < minYieldTime)
minYieldTime = yieldTime;
lpaca->yielded = 0; /* Back to IPI's */
locale_irq_enable();
/*
* The decrementer stops during the yield. Force a fake decrementer
* here and let the timer_interrupt code sort out the actual time.
* The decrementer stops during the yield. Force a fake
* decrementer here and let the timer_interrupt code sort
* out the actual time.
*/
get_paca()->xLpPaca.xIntDword.xFields.xDecrInt = 1;
lpaca->xLpPaca.xIntDword.xFields.xDecrInt = 1;
}
process_iSeries_events();
}
int cpu_idle(void)
int iSeries_idle(void)
{
struct paca_struct *lpaca;
long oldval;
unsigned long CTRL;
#warning fix iseries run light
#if 0
/* endless loop with no priority at all */
current->nice = 20;
current->counter = -100;
/* ensure iSeries run light will be out when idle */
current->thread.flags &= ~PPC_FLAG_RUN_LIGHT;
CTRL = mfspr(CTRLF);
CTRL &= ~RUNLATCH;
mtspr(CTRLT, CTRL);
#endif
init_idle();
lpaca = get_paca();
while (1) {
for (;;) {
if (lpaca->xLpPaca.xSharedProc) {
if (ItLpQueue_isLpIntPending(lpaca->lpQueuePtr))
process_iSeries_events();
if (!need_resched())
if (!current->need_resched)
yield_shared_processor();
} else {
/* Avoid an IPI by setting need_resched */
oldval = xchg(&current->need_resched, -1);
if (!oldval) {
while(current->need_resched == -1) {
HMT_medium();
if (ItLpQueue_isLpIntPending(lpaca->lpQueuePtr))
process_iSeries_events();
HMT_low();
}
}
}
HMT_medium();
if (current->need_resched) {
lpaca->xLpPaca.xIdle = 0;
schedule();
check_pgt_cache();
}
}
return 0;
}
#endif
int default_idle(void)
{
long oldval;
while (1) {
oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
if (!oldval) {
set_thread_flag(TIF_POLLING_NRFLAG);
while (!need_resched()) {
HMT_medium();
if (ItLpQueue_isLpIntPending(lpaca->lpQueuePtr))
process_iSeries_events();
barrier();
HMT_low();
}
......@@ -107,47 +151,160 @@ int cpu_idle(void)
} else {
set_need_resched();
}
}
if (need_resched())
schedule();
}
return 0;
}
#else /* CONFIG_PPC_ISERIES */
int cpu_idle(void)
int dedicated_idle(void)
{
long oldval;
struct paca_struct *lpaca = get_paca(), *ppaca;;
unsigned long start_snooze;
ppaca = &paca[(lpaca->xPacaIndex) ^ 1];
while (1) {
oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
/* Indicate to the HV that we are idle. Now would be
* a good time to find other work to dispatch. */
lpaca->xLpPaca.xIdle = 1;
oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
if (!oldval) {
set_thread_flag(TIF_POLLING_NRFLAG);
start_snooze = __get_tb();
while (!need_resched()) {
barrier();
HMT_low();
/* need_resched could be 1 or 0 at this
* point. If it is 0, set it to 0, so
* an IPI/Prod is sent. If it is 1, keep
* it that way & schedule work.
*/
if (__get_tb() <
(start_snooze +
naca->smt_snooze_delay*tb_ticks_per_usec)) {
HMT_low(); /* Low thread priority */
continue;
}
HMT_medium();
HMT_very_low(); /* Low power mode */
/* If the SMT mode is system controlled & the
* partner thread is doing work, switch into
* ST mode.
*/
if((naca->smt_state == SMT_DYNAMIC) &&
(!(ppaca->xLpPaca.xIdle))) {
/* Indicate we are no longer polling for
* work, and then clear need_resched. If
* need_resched was 1, set it back to 1
* and schedule work
*/
clear_thread_flag(TIF_POLLING_NRFLAG);
oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
if(oldval == 1) {
set_need_resched();
break;
}
/* DRENG: Go HMT_medium here ? */
local_irq_disable();
lpaca->yielded = 1;
/* SMT dynamic mode. Cede will result
* in this thread going dormant, if the
* partner thread is still doing work.
* Thread wakes up if partner goes idle,
* an interrupt is presented, or a prod
* occurs. Returning from the cede
* enables external interrupts.
*/
cede_processor();
lpaca->yielded = 0;
} else {
/* Give the HV an opportunity at the
* processor, since we are not doing
* any work.
*/
poll_pending();
}
}
} else {
set_need_resched();
}
HMT_medium();
lpaca->xLpPaca.xIdle = 0;
schedule();
}
return 0;
}
int shared_idle(void)
{
struct paca_struct *lpaca = get_paca();
while (1) {
/* Indicate to the HV that we are idle. Now would be
* a good time to find other work to dispatch. */
lpaca->xLpPaca.xIdle = 1;
if (!need_resched()) {
local_irq_disable();
lpaca->yielded = 1;
/*
* Yield the processor to the hypervisor. We return if
* an external interrupt occurs (which are driven prior
* to returning here) or if a prod occurs from another
* processor. When returning here, external interrupts
* are enabled.
*/
cede_processor();
lpaca->yielded = 0;
}
HMT_medium();
lpaca->xLpPaca.xIdle = 0;
schedule();
}
return 0;
}
#endif /* CONFIG_PPC_ISERIES */
int cpu_idle(void)
{
idle_loop();
return 0;
}
void default_idle(void)
int idle_setup(void)
{
barrier();
#ifdef CONFIG_PPC_ISERIES
idle_loop = iSeries_idle;
#else
if (systemcfg->platform & PLATFORM_PSERIES) {
if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
if(get_paca()->xLpPaca.xSharedProc) {
printk("idle = shared_idle\n");
idle_loop = shared_idle;
} else {
printk("idle = dedicated_idle\n");
idle_loop = dedicated_idle;
}
} else {
printk("idle = default_idle\n");
idle_loop = default_idle;
}
} else {
printk("idle_setup: unknown platform, use default_idle\n");
idle_loop = default_idle;
}
#endif
return 1;
}
......@@ -37,6 +37,31 @@
#include <asm/hvcall.h>
#include <asm/prom.h>
long poll_pending(void)
{
unsigned long dummy;
return plpar_hcall(H_POLL_PENDING, 0, 0, 0, 0,
&dummy, &dummy, &dummy);
}
long prod_processor(void)
{
plpar_hcall_norets(H_PROD);
return(0);
}
long cede_processor(void)
{
plpar_hcall_norets(H_CEDE);
return(0);
}
long register_vpa(unsigned long flags, unsigned long proc, unsigned long vpa)
{
plpar_hcall_norets(H_REGISTER_VPA, flags, proc, vpa);
return(0);
}
long plpar_pte_remove(unsigned long flags,
unsigned long ptex,
unsigned long avpn,
......
......@@ -58,6 +58,7 @@ extern void iSeries_init_early( void );
extern void pSeries_init_early( void );
extern void pSeriesLP_init_early(void);
extern void mm_init_ppc64( void );
extern void vpa_init(int cpu);
unsigned long decr_overclock = 1;
unsigned long decr_overclock_proc0 = 1;
......@@ -211,6 +212,13 @@ void setup_system(unsigned long r3, unsigned long r4, unsigned long r5,
mm_init_ppc64();
if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
vpa_init(boot_cpuid);
}
/* Select the correct idle loop for the platform. */
idle_setup();
switch (systemcfg->platform) {
#ifdef CONFIG_PPC_ISERIES
case PLATFORM_ISERIES_LPAR:
......
......@@ -266,6 +266,16 @@ static void __init smp_space_timers(unsigned int max_cpus)
}
#ifdef CONFIG_PPC_PSERIES
void vpa_init(int cpu) {
unsigned long flags;
/* Register the Virtual Processor Area (VPA) */
printk(KERN_INFO "register_vpa: cpu 0x%x\n", cpu);
flags = 1UL << (63 - 18);
paca[cpu].xLpPaca.xSLBCount = 64; /* SLB restore highwater mark */
register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].xLpPaca)));
}
static void __devinit pSeries_setup_cpu(int cpu)
{
if (OpenPIC_Addr) {
......@@ -670,6 +680,12 @@ int __devinit start_secondary(void *unused)
if (smp_ops->take_timebase)
smp_ops->take_timebase();
get_paca()->yielded = 0;
if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
vpa_init(cpu);
}
local_irq_enable();
return cpu_idle(NULL);
......
......@@ -9,6 +9,14 @@
#define H_PTEG_Full -6 /* PTEG is full */
#define H_Not_Found -7 /* PTE was not found" */
#define H_Reserved_DABR -8 /* DABR address is reserved by the hypervisor on this processor" */
#define H_NoMem -9
#define H_Authority -10
#define H_Permission -11
#define H_Dropped -12
#define H_SourceParm -13
#define H_DestParm -14
#define H_RemoteParm -15
#define H_Resource -16
/* Flags */
#define H_LARGE_PAGE (1UL<<(63-16))
......@@ -58,6 +66,16 @@
#define H_IPOLL 0x70
#define H_XIRR 0x74
#define H_PERFMON 0x7c
#define H_MIGRATE_DMA 0x78
#define H_REGISTER_VPA 0xDC
#define H_CEDE 0xE0
#define H_CONFER 0xE4
#define H_PROD 0xE8
#define H_GET_PPP 0xEC
#define H_SET_PPP 0xF0
#define H_SET_PURR 0xF4
#define H_PIC 0xF8
#define H_POLL_PENDING 0x1D8
/* plpar_hcall() -- Generic call interface using above opcodes
*
......
......@@ -110,7 +110,10 @@ struct ItLpPaca
u64 xPDCSavedSPRG1; // Saved SPRG1 for PMC int x68-x6F
u64 xPDCSavedSRR0; // Saved SRR0 for PMC int x70-x77
volatile u32 xVirtualDecr; // Virtual DECR for shared procsx78-x7B
u32 xRsvd2_2; // Reserved x7C-x7F
u16 xSLBCount; // # of SLBs to maintain x7C-x7D
u8 xIdle; // Indicate OS is idle x7E
u8 xRsvd2_2; // Reserved x7F
//=============================================================================
// CACHE_LINE_3 0x0100 - 0x007F: This line is shared with other processors
......
......@@ -42,23 +42,28 @@ static inline void isync(void)
#endif
/* Macros for adjusting thread priority (hardware multi-threading) */
#if defined(CONFIG_PPC_ISERIES) || defined(CONFIG_HMT)
#define HMT_very_low() asm volatile("or 31,31,31 # very low priority")
#define HMT_low() asm volatile("or 1,1,1 # low priority")
#define HMT_medium_low() asm volatile("or 6,6,6 # medium low priority")
#define HMT_medium() asm volatile("or 2,2,2 # medium priority")
#define HMT_medium_high() asm volatile("or 5,5,5 # medium high priority")
#define HMT_high() asm volatile("or 3,3,3 # high priority")
#define HMT_VERY_LOW "\tor 31,31,31 # very low priority\n"
#define HMT_LOW "\tor 1,1,1 # low priority\n"
#define HMT_MEDIUM_LOW "\tor 6,6,6 # medium low priority\n"
#define HMT_MEDIUM "\tor 2,2,2 # medium priority\n"
#define HMT_MEDIUM_HIGH "\tor 5,5,5 # medium high priority\n"
#define HMT_HIGH "\tor 3,3,3 # high priority\n"
#else
#define HMT_low() do { } while(0)
#define HMT_medium() do { } while(0)
#define HMT_high() do { } while(0)
#define HMT_LOW
#define HMT_MEDIUM
#define HMT_HIGH
#endif
/*
* Various operational modes for SMT
* Off : never run threaded
* On : always run threaded
* Dynamic: Allow the system to switch modes as needed
*/
#define SMT_OFF 0
#define SMT_ON 1
#define SMT_DYNAMIC 2
#endif
......@@ -37,7 +37,12 @@ struct naca_struct {
u32 dCacheL1LinesPerPage; /* L1 d-cache lines / page 0x64 */
u32 iCacheL1LogLineSize; /* L1 i-cache line size Log2 0x68 */
u32 iCacheL1LinesPerPage; /* L1 i-cache lines / page 0x6c */
u64 resv0[2]; /* Reserved 0x70 - 0x7F */
u64 smt_snooze_delay; /* Delay (in usec) before 0x70 */
/* entering ST mode */
u8 smt_state; /* 0 = SMT off 0x78 */
/* 1 = SMT on */
/* 2 = SMT dynamic */
u8 resv0[7]; /* Reserved 0x70 - 0x7F */
};
extern struct naca_struct *naca;
......
......@@ -94,7 +94,9 @@ struct paca_struct {
u32 *prof_buffer; /* iSeries profiling buffer 0x38 */
u32 *prof_stext; /* iSeries start of kernel text 0x40 */
u32 prof_len; /* iSeries length of profile buffer -1 0x48 */
u8 rsvd2[128-76]; /* 0x4C */
u8 yielded; /* 0 = this processor is running 0x4c */
/* 1 = this processor is yielded */
u8 rsvd2[128-77]; /* 0x49 */
/*=====================================================================================
* CACHE_LINE_3 0x0100 - 0x017F
......@@ -117,7 +119,7 @@ struct paca_struct {
struct ItLpRegSave xRegSav; /* Register save for proc */
/*=====================================================================================
* CACHE_LINE_17-18 0x0800 - 0x0EFF Reserved
* CACHE_LINE_17-18 0x0800 - 0x08FF Reserved
*=====================================================================================
*/
struct rtas_args xRtas; /* Per processor RTAS struct */
......@@ -126,10 +128,12 @@ struct paca_struct {
u8 rsvd5[256-16-sizeof(struct rtas_args)];
/*=====================================================================================
* CACHE_LINE_19-30 0x0800 - 0x0EFF Reserved
* CACHE_LINE_19-30 0x0900 - 0x0EFF Reserved
*=====================================================================================
*/
u8 rsvd6[0x600];
u64 slb_shadow[0x20];
u64 dispatch_log;
u8 rsvd6[0x500 - 0x8];
/*=====================================================================================
* CACHE_LINE_31 0x0F00 - 0x0F7F Exception stack
......
......@@ -378,6 +378,7 @@
#define PLATFORM_PSERIES 0x0100
#define PLATFORM_PSERIES_LPAR 0x0101
#define PLATFORM_ISERIES_LPAR 0x0201
#define PLATFORM_LPAR 0x0001
/*
* List of interrupt controllers.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment