Commit f073cc8f authored by Cliff Wickman's avatar Cliff Wickman Committed by Ingo Molnar

x86, UV: Clean up uv_tlb.c

SGI UV's uv_tlb.c driver has become rather hard to read, with overly large
functions, non-standard coding style and (way) too long variable, constant
and function names and non-obvious code flow sequences.

This patch improves the readability and maintainability of the driver
significantly, by doing the following strict code cleanups with no side
effects:

 - Split long functions into shorter logical functions.

 - Shortened some variable and structure member names.

 - Added special functions for reads and writes of MMR regs with
   very long names.

 - Added the 'tunables' table to shortened tunables_write().

 - Added the 'stat_description' table to shorten uv_ptc_proc_write().

 - Pass fewer 'stat' arguments where it can be derived from the 'bcp'
   argument.

 - Function definitions consistent on one line, and inline in few (short) cases.

 - Moved some small structures and an atomic inline function to the header file.

 - Moved some local variables to the blocks where they are used.

 - Updated the copyright date.

 - Shortened uv_write_global_mmr64() etc. using some aliasing; no
   line breaks. Renamed many uv_.. functions that are not exported.

 - Aligned structure fields.
    [ note that not all structures are aligned the same way though; I'd like
      to keep the extensive commenting in some of them. ]

 - Shortened some long structure names.

 - Standard pass/fail exit from init_per_cpu()

 - Vertical alignment for mass initializations.

 - More separation between blocks of code.

Tested on a 16-processor Altix UV.
Signed-off-by: default avatarCliff Wickman <cpw@sgi.com>
Cc: penberg@kernel.org
Link: http://lkml.kernel.org/r/E1QOw12-0004MN-Lp@eag09.americas.sgi.comSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 2a919596
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* SGI UV Broadcast Assist Unit definitions * SGI UV Broadcast Assist Unit definitions
* *
* Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved.
*/ */
#ifndef _ASM_X86_UV_UV_BAU_H #ifndef _ASM_X86_UV_UV_BAU_H
...@@ -35,9 +35,9 @@ ...@@ -35,9 +35,9 @@
#define MAX_CPUS_PER_UVHUB 64 #define MAX_CPUS_PER_UVHUB 64
#define MAX_CPUS_PER_SOCKET 32 #define MAX_CPUS_PER_SOCKET 32
#define UV_ADP_SIZE 64 /* hardware-provided max. */ #define ADP_SZ 64 /* hardware-provided max. */
#define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */ #define UV_CPUS_PER_AS 32 /* hardware-provided max. */
#define UV_ITEMS_PER_DESCRIPTOR 8 #define ITEMS_PER_DESC 8
/* the 'throttle' to prevent the hardware stay-busy bug */ /* the 'throttle' to prevent the hardware stay-busy bug */
#define MAX_BAU_CONCURRENT 3 #define MAX_BAU_CONCURRENT 3
#define UV_ACT_STATUS_MASK 0x3 #define UV_ACT_STATUS_MASK 0x3
...@@ -48,7 +48,7 @@ ...@@ -48,7 +48,7 @@
#define UV2_NET_ENDPOINT_INTD 0x28 #define UV2_NET_ENDPOINT_INTD 0x28
#define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \ #define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \
UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD) UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD)
#define UV_DESC_BASE_PNODE_SHIFT 49 #define UV_DESC_PSHIFT 49
#define UV_PAYLOADQ_PNODE_SHIFT 49 #define UV_PAYLOADQ_PNODE_SHIFT 49
#define UV_PTC_BASENAME "sgi_uv/ptc_statistics" #define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
#define UV_BAU_BASENAME "sgi_uv/bau_tunables" #define UV_BAU_BASENAME "sgi_uv/bau_tunables"
...@@ -56,7 +56,8 @@ ...@@ -56,7 +56,8 @@
#define UV_BAU_TUNABLES_FILE "bau_tunables" #define UV_BAU_TUNABLES_FILE "bau_tunables"
#define WHITESPACE " \t\n" #define WHITESPACE " \t\n"
#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
#define cpubit_isset(cpu, bau_local_cpumask) \
test_bit((cpu), (bau_local_cpumask).bits)
/* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */ /* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */
/* /*
...@@ -72,25 +73,37 @@ ...@@ -72,25 +73,37 @@
UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \ UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \
UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD) UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD)
#define BAU_MISC_CONTROL_MULT_MASK 3 #define BAU_MISC_CONTROL_MULT_MASK 3
#define UVH_AGING_PRESCALE_SEL 0x000000b000UL #define UVH_AGING_PRESCALE_SEL 0x000000b000UL
/* [30:28] URGENCY_7 an index into a table of times */ /* [30:28] URGENCY_7 an index into a table of times */
#define BAU_URGENCY_7_SHIFT 28 #define BAU_URGENCY_7_SHIFT 28
#define BAU_URGENCY_7_MASK 7 #define BAU_URGENCY_7_MASK 7
#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL #define UVH_TRANSACTION_TIMEOUT 0x000000b200UL
/* [45:40] BAU - BAU transaction timeout select - a multiplier */ /* [45:40] BAU - BAU transaction timeout select - a multiplier */
#define BAU_TRANS_SHIFT 40 #define BAU_TRANS_SHIFT 40
#define BAU_TRANS_MASK 0x3f #define BAU_TRANS_MASK 0x3f
/*
* shorten some awkward names
*/
#define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT
#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT
#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD
#define write_gmmr uv_write_global_mmr64
#define write_lmmr uv_write_local_mmr
#define read_lmmr uv_read_local_mmr
#define read_gmmr uv_read_global_mmr64
/* /*
* bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
*/ */
#define DESC_STATUS_IDLE 0 #define DS_IDLE 0
#define DESC_STATUS_ACTIVE 1 #define DS_ACTIVE 1
#define DESC_STATUS_DESTINATION_TIMEOUT 2 #define DS_DESTINATION_TIMEOUT 2
#define DESC_STATUS_SOURCE_TIMEOUT 3 #define DS_SOURCE_TIMEOUT 3
/* /*
* bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2 * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2
* values 1 and 5 will not occur * values 1 and 5 will not occur
...@@ -111,22 +124,22 @@ ...@@ -111,22 +124,22 @@
* threshholds at which to use IPI to free resources * threshholds at which to use IPI to free resources
*/ */
/* after this # consecutive 'plugged' timeouts, use IPI to release resources */ /* after this # consecutive 'plugged' timeouts, use IPI to release resources */
#define PLUGSB4RESET 100 #define PLUGSB4RESET 100
/* after this many consecutive timeouts, use IPI to release resources */ /* after this many consecutive timeouts, use IPI to release resources */
#define TIMEOUTSB4RESET 1 #define TIMEOUTSB4RESET 1
/* at this number uses of IPI to release resources, giveup the request */ /* at this number uses of IPI to release resources, giveup the request */
#define IPI_RESET_LIMIT 1 #define IPI_RESET_LIMIT 1
/* after this # consecutive successes, bump up the throttle if it was lowered */ /* after this # consecutive successes, bump up the throttle if it was lowered */
#define COMPLETE_THRESHOLD 5 #define COMPLETE_THRESHOLD 5
#define UV_LB_SUBNODEID 0x10 #define UV_LB_SUBNODEID 0x10
/* these two are the same for UV1 and UV2: */ /* these two are the same for UV1 and UV2: */
#define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT #define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
#define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK #define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK
/* 4 bits of software ack period */ /* 4 bits of software ack period */
#define UV2_ACK_MASK 0x7UL #define UV2_ACK_MASK 0x7UL
#define UV2_ACK_UNITS_SHFT 3 #define UV2_ACK_UNITS_SHFT 3
#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT #define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT
#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
...@@ -149,9 +162,16 @@ ...@@ -149,9 +162,16 @@
/* /*
* tuning the action when the numalink network is extremely delayed * tuning the action when the numalink network is extremely delayed
*/ */
#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in microseconds */ #define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in
#define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */ microseconds */
#define CONGESTED_PERIOD 30 /* time for the bau to be disabled, in seconds */ #define CONGESTED_REPS 10 /* long delays averaged over
this many broadcasts */
#define CONGESTED_PERIOD 30 /* time for the bau to be
disabled, in seconds */
/* see msg_type: */
#define MSG_NOOP 0
#define MSG_REGULAR 1
#define MSG_RETRY 2
/* /*
* Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
...@@ -163,8 +183,8 @@ ...@@ -163,8 +183,8 @@
* 'base_dest_nasid' field of the header corresponds to the * 'base_dest_nasid' field of the header corresponds to the
* destination nodeID associated with that specified bit. * destination nodeID associated with that specified bit.
*/ */
struct bau_target_uvhubmask { struct bau_targ_hubmask {
unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
}; };
/* /*
...@@ -173,7 +193,7 @@ struct bau_target_uvhubmask { ...@@ -173,7 +193,7 @@ struct bau_target_uvhubmask {
* enough bits for max. cpu's per uvhub) * enough bits for max. cpu's per uvhub)
*/ */
struct bau_local_cpumask { struct bau_local_cpumask {
unsigned long bits; unsigned long bits;
}; };
/* /*
...@@ -194,14 +214,14 @@ struct bau_local_cpumask { ...@@ -194,14 +214,14 @@ struct bau_local_cpumask {
* The payload is software-defined for INTD transactions * The payload is software-defined for INTD transactions
*/ */
struct bau_msg_payload { struct bau_msg_payload {
unsigned long address; /* signifies a page or all TLB's unsigned long address; /* signifies a page or all
of the cpu */ TLB's of the cpu */
/* 64 bits */ /* 64 bits */
unsigned short sending_cpu; /* filled in by sender */ unsigned short sending_cpu; /* filled in by sender */
/* 16 bits */ /* 16 bits */
unsigned short acknowledge_count;/* filled in by destination */ unsigned short acknowledge_count; /* filled in by destination */
/* 16 bits */ /* 16 bits */
unsigned int reserved1:32; /* not usable */ unsigned int reserved1:32; /* not usable */
}; };
...@@ -210,93 +230,96 @@ struct bau_msg_payload { ...@@ -210,93 +230,96 @@ struct bau_msg_payload {
* see table 4.2.3.0.1 in broacast_assist spec. * see table 4.2.3.0.1 in broacast_assist spec.
*/ */
struct bau_msg_header { struct bau_msg_header {
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */ /* bits 5:0 */
unsigned int base_dest_nasid:15; /* nasid of the */ unsigned int base_dest_nasid:15; /* nasid of the first bit */
/* bits 20:6 */ /* first bit in uvhub map */ /* bits 20:6 */ /* in uvhub map */
unsigned int command:8; /* message type */ unsigned int command:8; /* message type */
/* bits 28:21 */ /* bits 28:21 */
/* 0x38: SN3net EndPoint Message */ /* 0x38: SN3net EndPoint Message */
unsigned int rsvd_1:3; /* must be zero */ unsigned int rsvd_1:3; /* must be zero */
/* bits 31:29 */ /* bits 31:29 */
/* int will align on 32 bits */ /* int will align on 32 bits */
unsigned int rsvd_2:9; /* must be zero */ unsigned int rsvd_2:9; /* must be zero */
/* bits 40:32 */ /* bits 40:32 */
/* Suppl_A is 56-41 */ /* Suppl_A is 56-41 */
unsigned int sequence:16;/* message sequence number */ unsigned int sequence:16; /* message sequence number */
/* bits 56:41 */ /* becomes bytes 16-17 of msg */ /* bits 56:41 */ /* becomes bytes 16-17 of msg */
/* Address field (96:57) is never used as an /* Address field (96:57) is
address (these are address bits 42:3) */ never used as an address
(these are address bits
unsigned int rsvd_3:1; /* must be zero */ 42:3) */
unsigned int rsvd_3:1; /* must be zero */
/* bit 57 */ /* bit 57 */
/* address bits 27:4 are payload */ /* address bits 27:4 are payload */
/* these next 24 (58-81) bits become bytes 12-14 of msg */ /* these next 24 (58-81) bits become bytes 12-14 of msg */
/* bits 65:58 land in byte 12 */ /* bits 65:58 land in byte 12 */
unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ unsigned int replied_to:1; /* sent as 0 by the source to
byte 12 */
/* bit 58 */ /* bit 58 */
unsigned int msg_type:3; /* software type of the message*/ unsigned int msg_type:3; /* software type of the
message */
/* bits 61:59 */ /* bits 61:59 */
unsigned int canceled:1; /* message canceled, resource to be freed*/ unsigned int canceled:1; /* message canceled, resource
is to be freed*/
/* bit 62 */ /* bit 62 */
unsigned int payload_1a:1;/* not currently used */ unsigned int payload_1a:1; /* not currently used */
/* bit 63 */ /* bit 63 */
unsigned int payload_1b:2;/* not currently used */ unsigned int payload_1b:2; /* not currently used */
/* bits 65:64 */ /* bits 65:64 */
/* bits 73:66 land in byte 13 */ /* bits 73:66 land in byte 13 */
unsigned int payload_1ca:6;/* not currently used */ unsigned int payload_1ca:6; /* not currently used */
/* bits 71:66 */ /* bits 71:66 */
unsigned int payload_1c:2;/* not currently used */ unsigned int payload_1c:2; /* not currently used */
/* bits 73:72 */ /* bits 73:72 */
/* bits 81:74 land in byte 14 */ /* bits 81:74 land in byte 14 */
unsigned int payload_1d:6;/* not currently used */ unsigned int payload_1d:6; /* not currently used */
/* bits 79:74 */ /* bits 79:74 */
unsigned int payload_1e:2;/* not currently used */ unsigned int payload_1e:2; /* not currently used */
/* bits 81:80 */ /* bits 81:80 */
unsigned int rsvd_4:7; /* must be zero */ unsigned int rsvd_4:7; /* must be zero */
/* bits 88:82 */ /* bits 88:82 */
unsigned int sw_ack_flag:1;/* software acknowledge flag */ unsigned int swack_flag:1; /* software acknowledge flag */
/* bit 89 */ /* bit 89 */
/* INTD trasactions at destination are to /* INTD trasactions at
wait for software acknowledge */ destination are to wait for
unsigned int rsvd_5:6; /* must be zero */ software acknowledge */
unsigned int rsvd_5:6; /* must be zero */
/* bits 95:90 */ /* bits 95:90 */
unsigned int rsvd_6:5; /* must be zero */ unsigned int rsvd_6:5; /* must be zero */
/* bits 100:96 */ /* bits 100:96 */
unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */ unsigned int int_both:1; /* if 1, interrupt both sockets
on the uvhub */
/* bit 101*/ /* bit 101*/
unsigned int fairness:3;/* usually zero */ unsigned int fairness:3; /* usually zero */
/* bits 104:102 */ /* bits 104:102 */
unsigned int multilevel:1; /* multi-level multicast format */ unsigned int multilevel:1; /* multi-level multicast
format */
/* bit 105 */ /* bit 105 */
/* 0 for TLB: endpoint multi-unicast messages */ /* 0 for TLB: endpoint multi-unicast messages */
unsigned int chaining:1;/* next descriptor is part of this activation*/ unsigned int chaining:1; /* next descriptor is part of
this activation*/
/* bit 106 */ /* bit 106 */
unsigned int rsvd_7:21; /* must be zero */ unsigned int rsvd_7:21; /* must be zero */
/* bits 127:107 */ /* bits 127:107 */
}; };
/* see msg_type: */
#define MSG_NOOP 0
#define MSG_REGULAR 1
#define MSG_RETRY 2
/* /*
* The activation descriptor: * The activation descriptor:
* The format of the message to send, plus all accompanying control * The format of the message to send, plus all accompanying control
* Should be 64 bytes * Should be 64 bytes
*/ */
struct bau_desc { struct bau_desc {
struct bau_target_uvhubmask distribution; struct bau_targ_hubmask distribution;
/* /*
* message template, consisting of header and payload: * message template, consisting of header and payload:
*/ */
struct bau_msg_header header; struct bau_msg_header header;
struct bau_msg_payload payload; struct bau_msg_payload payload;
}; };
/* /*
* -payload-- ---------header------ * -payload-- ---------header------
...@@ -315,59 +338,51 @@ struct bau_desc { ...@@ -315,59 +338,51 @@ struct bau_desc {
* are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17 * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
* bytes of usable data, including the sw ack vector in byte 15 (bits 127:120) * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
* (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
* sw_ack_vector and payload_2) * swack_vec and payload_2)
* "Enabling Software Acknowledgment mode (see Section 4.3.3 Software * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
* Acknowledge Processing) also selects 32 byte (17 bytes usable) payload * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
* operation." * operation."
*/ */
struct bau_payload_queue_entry { struct bau_pq_entry {
unsigned long address; /* signifies a page or all TLB's unsigned long address; /* signifies a page or all TLB's
of the cpu */ of the cpu */
/* 64 bits, bytes 0-7 */ /* 64 bits, bytes 0-7 */
unsigned short sending_cpu; /* cpu that sent the message */
unsigned short sending_cpu; /* cpu that sent the message */
/* 16 bits, bytes 8-9 */ /* 16 bits, bytes 8-9 */
unsigned short acknowledge_count; /* filled in by destination */
unsigned short acknowledge_count; /* filled in by destination */
/* 16 bits, bytes 10-11 */ /* 16 bits, bytes 10-11 */
/* these next 3 bytes come from bits 58-81 of the message header */ /* these next 3 bytes come from bits 58-81 of the message header */
unsigned short replied_to:1; /* sent as 0 by the source */ unsigned short replied_to:1; /* sent as 0 by the source */
unsigned short msg_type:3; /* software message type */ unsigned short msg_type:3; /* software message type */
unsigned short canceled:1; /* sent as 0 by the source */ unsigned short canceled:1; /* sent as 0 by the source */
unsigned short unused1:3; /* not currently using */ unsigned short unused1:3; /* not currently using */
/* byte 12 */ /* byte 12 */
unsigned char unused2a; /* not currently using */
unsigned char unused2a; /* not currently using */
/* byte 13 */ /* byte 13 */
unsigned char unused2; /* not currently using */ unsigned char unused2; /* not currently using */
/* byte 14 */ /* byte 14 */
unsigned char swack_vec; /* filled in by the hardware */
unsigned char sw_ack_vector; /* filled in by the hardware */
/* byte 15 (bits 127:120) */ /* byte 15 (bits 127:120) */
unsigned short sequence; /* message sequence number */
unsigned short sequence; /* message sequence number */
/* bytes 16-17 */ /* bytes 16-17 */
unsigned char unused4[2]; /* not currently using bytes 18-19 */ unsigned char unused4[2]; /* not currently using bytes 18-19 */
/* bytes 18-19 */ /* bytes 18-19 */
int number_of_cpus; /* filled in at destination */
int number_of_cpus; /* filled in at destination */
/* 32 bits, bytes 20-23 (aligned) */ /* 32 bits, bytes 20-23 (aligned) */
unsigned char unused5[8]; /* not using */
unsigned char unused5[8]; /* not using */
/* bytes 24-31 */ /* bytes 24-31 */
}; };
struct msg_desc { struct msg_desc {
struct bau_payload_queue_entry *msg; struct bau_pq_entry *msg;
int msg_slot; int msg_slot;
int sw_ack_slot; int swack_slot;
struct bau_payload_queue_entry *va_queue_first; struct bau_pq_entry *queue_first;
struct bau_payload_queue_entry *va_queue_last; struct bau_pq_entry *queue_last;
}; };
struct reset_args { struct reset_args {
int sender; int sender;
}; };
/* /*
...@@ -375,112 +390,226 @@ struct reset_args { ...@@ -375,112 +390,226 @@ struct reset_args {
*/ */
struct ptc_stats { struct ptc_stats {
/* sender statistics */ /* sender statistics */
unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ unsigned long s_giveup; /* number of fall backs to
unsigned long s_requestor; /* number of shootdown requests */ IPI-style flushes */
unsigned long s_stimeout; /* source side timeouts */ unsigned long s_requestor; /* number of shootdown
unsigned long s_dtimeout; /* destination side timeouts */ requests */
unsigned long s_time; /* time spent in sending side */ unsigned long s_stimeout; /* source side timeouts */
unsigned long s_retriesok; /* successful retries */ unsigned long s_dtimeout; /* destination side timeouts */
unsigned long s_ntargcpu; /* total number of cpu's targeted */ unsigned long s_time; /* time spent in sending side */
unsigned long s_ntargself; /* times the sending cpu was targeted */ unsigned long s_retriesok; /* successful retries */
unsigned long s_ntarglocals; /* targets of cpus on the local blade */ unsigned long s_ntargcpu; /* total number of cpu's
unsigned long s_ntargremotes; /* targets of cpus on remote blades */ targeted */
unsigned long s_ntarglocaluvhub; /* targets of the local hub */ unsigned long s_ntargself; /* times the sending cpu was
unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ targeted */
unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ unsigned long s_ntarglocals; /* targets of cpus on the local
unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ blade */
unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ unsigned long s_ntargremotes; /* targets of cpus on remote
unsigned long s_ntarguvhub4; /* number of times target hubs >= 4 */ blades */
unsigned long s_ntarguvhub2; /* number of times target hubs >= 2 */ unsigned long s_ntarglocaluvhub; /* targets of the local hub */
unsigned long s_ntarguvhub1; /* number of times target hubs == 1 */ unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */
unsigned long s_resets_plug; /* ipi-style resets from plug state */ unsigned long s_ntarguvhub; /* total number of uvhubs
unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ targeted */
unsigned long s_busy; /* status stayed busy past s/w timer */ unsigned long s_ntarguvhub16; /* number of times target
unsigned long s_throttles; /* waits in throttle */ hubs >= 16*/
unsigned long s_retry_messages; /* retry broadcasts */ unsigned long s_ntarguvhub8; /* number of times target
unsigned long s_bau_reenabled; /* for bau enable/disable */ hubs >= 8 */
unsigned long s_bau_disabled; /* for bau enable/disable */ unsigned long s_ntarguvhub4; /* number of times target
hubs >= 4 */
unsigned long s_ntarguvhub2; /* number of times target
hubs >= 2 */
unsigned long s_ntarguvhub1; /* number of times target
hubs == 1 */
unsigned long s_resets_plug; /* ipi-style resets from plug
state */
unsigned long s_resets_timeout; /* ipi-style resets from
timeouts */
unsigned long s_busy; /* status stayed busy past
s/w timer */
unsigned long s_throttles; /* waits in throttle */
unsigned long s_retry_messages; /* retry broadcasts */
unsigned long s_bau_reenabled; /* for bau enable/disable */
unsigned long s_bau_disabled; /* for bau enable/disable */
/* destination statistics */ /* destination statistics */
unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ unsigned long d_alltlb; /* times all tlb's on this
unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ cpu were flushed */
unsigned long d_multmsg; /* interrupts with multiple messages */ unsigned long d_onetlb; /* times just one tlb on this
unsigned long d_nomsg; /* interrupts with no message */ cpu was flushed */
unsigned long d_time; /* time spent on destination side */ unsigned long d_multmsg; /* interrupts with multiple
unsigned long d_requestee; /* number of messages processed */ messages */
unsigned long d_retries; /* number of retry messages processed */ unsigned long d_nomsg; /* interrupts with no message */
unsigned long d_canceled; /* number of messages canceled by retries */ unsigned long d_time; /* time spent on destination
unsigned long d_nocanceled; /* retries that found nothing to cancel */ side */
unsigned long d_resets; /* number of ipi-style requests processed */ unsigned long d_requestee; /* number of messages
unsigned long d_rcanceled; /* number of messages canceled by resets */ processed */
unsigned long d_retries; /* number of retry messages
processed */
unsigned long d_canceled; /* number of messages canceled
by retries */
unsigned long d_nocanceled; /* retries that found nothing
to cancel */
unsigned long d_resets; /* number of ipi-style requests
processed */
unsigned long d_rcanceled; /* number of messages canceled
by resets */
};
struct tunables {
int *tunp;
int deflt;
}; };
struct hub_and_pnode { struct hub_and_pnode {
short uvhub; short uvhub;
short pnode; short pnode;
};
struct socket_desc {
short num_cpus;
short cpu_number[MAX_CPUS_PER_SOCKET];
};
struct uvhub_desc {
unsigned short socket_mask;
short num_cpus;
short uvhub;
short pnode;
struct socket_desc socket[2];
}; };
/* /*
* one per-cpu; to locate the software tables * one per-cpu; to locate the software tables
*/ */
struct bau_control { struct bau_control {
struct bau_desc *descriptor_base; struct bau_desc *descriptor_base;
struct bau_payload_queue_entry *va_queue_first; struct bau_pq_entry *queue_first;
struct bau_payload_queue_entry *va_queue_last; struct bau_pq_entry *queue_last;
struct bau_payload_queue_entry *bau_msg_head; struct bau_pq_entry *bau_msg_head;
struct bau_control *uvhub_master; struct bau_control *uvhub_master;
struct bau_control *socket_master; struct bau_control *socket_master;
struct ptc_stats *statp; struct ptc_stats *statp;
unsigned long timeout_interval; unsigned long timeout_interval;
unsigned long set_bau_on_time; unsigned long set_bau_on_time;
atomic_t active_descriptor_count; atomic_t active_descriptor_count;
int plugged_tries; int plugged_tries;
int timeout_tries; int timeout_tries;
int ipi_attempts; int ipi_attempts;
int conseccompletes; int conseccompletes;
int baudisabled; int baudisabled;
int set_bau_off; int set_bau_off;
short cpu; short cpu;
short osnode; short osnode;
short uvhub_cpu; short uvhub_cpu;
short uvhub; short uvhub;
short cpus_in_socket; short cpus_in_socket;
short cpus_in_uvhub; short cpus_in_uvhub;
short partition_base_pnode; short partition_base_pnode;
unsigned short message_number; unsigned short message_number;
unsigned short uvhub_quiesce; unsigned short uvhub_quiesce;
short socket_acknowledge_count[DEST_Q_SIZE]; short socket_acknowledge_count[DEST_Q_SIZE];
cycles_t send_message; cycles_t send_message;
spinlock_t uvhub_lock; spinlock_t uvhub_lock;
spinlock_t queue_lock; spinlock_t queue_lock;
/* tunables */ /* tunables */
int max_bau_concurrent; int max_concurr;
int max_bau_concurrent_constant; int max_concurr_const;
int plugged_delay; int plugged_delay;
int plugsb4reset; int plugsb4reset;
int timeoutsb4reset; int timeoutsb4reset;
int ipi_reset_limit; int ipi_reset_limit;
int complete_threshold; int complete_threshold;
int congested_response_us; int cong_response_us;
int congested_reps; int cong_reps;
int congested_period; int cong_period;
cycles_t period_time; cycles_t period_time;
long period_requests; long period_requests;
struct hub_and_pnode *target_hub_and_pnode; struct hub_and_pnode *thp;
}; };
static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) static unsigned long read_mmr_uv2_status(void)
{
return read_lmmr(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2);
}
static void write_mmr_data_broadcast(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image);
}
static void write_mmr_descriptor_base(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image);
}
static void write_mmr_activation(unsigned long index)
{
write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
}
static void write_gmmr_activation(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image);
}
static void write_mmr_payload_first(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image);
}
static void write_mmr_payload_tail(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image);
}
static void write_mmr_payload_last(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image);
}
static void write_mmr_misc_control(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
}
static unsigned long read_mmr_misc_control(int pnode)
{
return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL);
}
static void write_mmr_sw_ack(unsigned long mr)
{
uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
}
static unsigned long read_mmr_sw_ack(void)
{
return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
}
static unsigned long read_gmmr_sw_ack(int pnode)
{
return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
}
static void write_mmr_data_config(int pnode, unsigned long mr)
{
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr);
}
static inline int bau_uvhub_isset(int uvhub, struct bau_targ_hubmask *dstp)
{ {
return constant_test_bit(uvhub, &dstp->bits[0]); return constant_test_bit(uvhub, &dstp->bits[0]);
} }
static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) static inline void bau_uvhub_set(int pnode, struct bau_targ_hubmask *dstp)
{ {
__set_bit(pnode, &dstp->bits[0]); __set_bit(pnode, &dstp->bits[0]);
} }
static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, static inline void bau_uvhubs_clear(struct bau_targ_hubmask *dstp,
int nbits) int nbits)
{ {
bitmap_zero(&dstp->bits[0], nbits); bitmap_zero(&dstp->bits[0], nbits);
} }
static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp) static inline int bau_uvhub_weight(struct bau_targ_hubmask *dstp)
{ {
return bitmap_weight((unsigned long *)&dstp->bits[0], return bitmap_weight((unsigned long *)&dstp->bits[0],
UV_DISTRIBUTION_SIZE); UV_DISTRIBUTION_SIZE);
...@@ -491,9 +620,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) ...@@ -491,9 +620,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
bitmap_zero(&dstp->bits, nbits); bitmap_zero(&dstp->bits, nbits);
} }
#define cpubit_isset(cpu, bau_local_cpumask) \
test_bit((cpu), (bau_local_cpumask).bits)
extern void uv_bau_message_intr1(void); extern void uv_bau_message_intr1(void);
extern void uv_bau_timeout_intr1(void); extern void uv_bau_timeout_intr1(void);
...@@ -501,7 +627,7 @@ struct atomic_short { ...@@ -501,7 +627,7 @@ struct atomic_short {
short counter; short counter;
}; };
/** /*
* atomic_read_short - read a short atomic variable * atomic_read_short - read a short atomic variable
* @v: pointer of type atomic_short * @v: pointer of type atomic_short
* *
...@@ -512,14 +638,14 @@ static inline int atomic_read_short(const struct atomic_short *v) ...@@ -512,14 +638,14 @@ static inline int atomic_read_short(const struct atomic_short *v)
return v->counter; return v->counter;
} }
/** /*
* atomic_add_short_return - add and return a short int * atom_asr - add and return a short int
* @i: short value to add * @i: short value to add
* @v: pointer of type atomic_short * @v: pointer of type atomic_short
* *
* Atomically adds @i to @v and returns @i + @v * Atomically adds @i to @v and returns @i + @v
*/ */
static inline int atomic_add_short_return(short i, struct atomic_short *v) static inline int atom_asr(short i, struct atomic_short *v)
{ {
short __i = i; short __i = i;
asm volatile(LOCK_PREFIX "xaddw %0, %1" asm volatile(LOCK_PREFIX "xaddw %0, %1"
...@@ -528,4 +654,26 @@ static inline int atomic_add_short_return(short i, struct atomic_short *v) ...@@ -528,4 +654,26 @@ static inline int atomic_add_short_return(short i, struct atomic_short *v)
return i + __i; return i + __i;
} }
/*
* conditionally add 1 to *v, unless *v is >= u
* return 0 if we cannot add 1 to *v because it is >= u
* return 1 if we can add 1 to *v because it is < u
* the add is atomic
*
* This is close to atomic_add_unless(), but this allows the 'u' value
* to be lowered below the current 'v'. atomic_add_unless can only stop
* on equal.
*/
static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
{
spin_lock(lock);
if (atomic_read(v) >= u) {
spin_unlock(lock);
return 0;
}
atomic_inc(v);
spin_unlock(lock);
return 1;
}
#endif /* _ASM_X86_UV_UV_BAU_H */ #endif /* _ASM_X86_UV_UV_BAU_H */
/* /*
* SGI UltraViolet TLB flush routines. * SGI UltraViolet TLB flush routines.
* *
* (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI. * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI.
* *
* This code is released under the GNU General Public License version 2 or * This code is released under the GNU General Public License version 2 or
* later. * later.
...@@ -35,6 +35,7 @@ static int timeout_base_ns[] = { ...@@ -35,6 +35,7 @@ static int timeout_base_ns[] = {
5242880, 5242880,
167772160 167772160
}; };
static int timeout_us; static int timeout_us;
static int nobau; static int nobau;
static int baudisabled; static int baudisabled;
...@@ -42,20 +43,70 @@ static spinlock_t disable_lock; ...@@ -42,20 +43,70 @@ static spinlock_t disable_lock;
static cycles_t congested_cycles; static cycles_t congested_cycles;
/* tunables: */ /* tunables: */
static int max_bau_concurrent = MAX_BAU_CONCURRENT; static int max_concurr = MAX_BAU_CONCURRENT;
static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; static int max_concurr_const = MAX_BAU_CONCURRENT;
static int plugged_delay = PLUGGED_DELAY; static int plugged_delay = PLUGGED_DELAY;
static int plugsb4reset = PLUGSB4RESET; static int plugsb4reset = PLUGSB4RESET;
static int timeoutsb4reset = TIMEOUTSB4RESET; static int timeoutsb4reset = TIMEOUTSB4RESET;
static int ipi_reset_limit = IPI_RESET_LIMIT; static int ipi_reset_limit = IPI_RESET_LIMIT;
static int complete_threshold = COMPLETE_THRESHOLD; static int complete_threshold = COMPLETE_THRESHOLD;
static int congested_response_us = CONGESTED_RESPONSE_US; static int congested_respns_us = CONGESTED_RESPONSE_US;
static int congested_reps = CONGESTED_REPS; static int congested_reps = CONGESTED_REPS;
static int congested_period = CONGESTED_PERIOD; static int congested_period = CONGESTED_PERIOD;
static struct tunables tunables[] = {
{&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
{&plugged_delay, PLUGGED_DELAY},
{&plugsb4reset, PLUGSB4RESET},
{&timeoutsb4reset, TIMEOUTSB4RESET},
{&ipi_reset_limit, IPI_RESET_LIMIT},
{&complete_threshold, COMPLETE_THRESHOLD},
{&congested_respns_us, CONGESTED_RESPONSE_US},
{&congested_reps, CONGESTED_REPS},
{&congested_period, CONGESTED_PERIOD}
};
static struct dentry *tunables_dir; static struct dentry *tunables_dir;
static struct dentry *tunables_file; static struct dentry *tunables_file;
static int __init setup_nobau(char *arg) /* these correspond to the statistics printed by ptc_seq_show() */
static char *stat_description[] = {
"sent: number of shootdown messages sent",
"stime: time spent sending messages",
"numuvhubs: number of hubs targeted with shootdown",
"numuvhubs16: number times 16 or more hubs targeted",
"numuvhubs8: number times 8 or more hubs targeted",
"numuvhubs4: number times 4 or more hubs targeted",
"numuvhubs2: number times 2 or more hubs targeted",
"numuvhubs1: number times 1 hub targeted",
"numcpus: number of cpus targeted with shootdown",
"dto: number of destination timeouts",
"retries: destination timeout retries sent",
"rok: : destination timeouts successfully retried",
"resetp: ipi-style resource resets for plugs",
"resett: ipi-style resource resets for timeouts",
"giveup: fall-backs to ipi-style shootdowns",
"sto: number of source timeouts",
"bz: number of stay-busy's",
"throt: number times spun in throttle",
"swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
"recv: shootdown messages received",
"rtime: time spent processing messages",
"all: shootdown all-tlb messages",
"one: shootdown one-tlb messages",
"mult: interrupts that found multiple messages",
"none: interrupts that found no messages",
"retry: number of retry messages processed",
"canc: number messages canceled by retries",
"nocan: number retries that found nothing to cancel",
"reset: number of ipi-style reset requests processed",
"rcan: number messages canceled by reset requests",
"disable: number times use of the BAU was disabled",
"enable: number times use of the BAU was re-enabled"
};
static int __init
setup_nobau(char *arg)
{ {
nobau = 1; nobau = 1;
return 0; return 0;
...@@ -63,7 +114,7 @@ static int __init setup_nobau(char *arg) ...@@ -63,7 +114,7 @@ static int __init setup_nobau(char *arg)
early_param("nobau", setup_nobau); early_param("nobau", setup_nobau);
/* base pnode in this partition */ /* base pnode in this partition */
static int uv_partition_base_pnode __read_mostly; static int uv_base_pnode __read_mostly;
/* position of pnode (which is nasid>>1): */ /* position of pnode (which is nasid>>1): */
static int uv_nshift __read_mostly; static int uv_nshift __read_mostly;
static unsigned long uv_mmask __read_mostly; static unsigned long uv_mmask __read_mostly;
...@@ -109,60 +160,52 @@ static int __init uvhub_to_first_apicid(int uvhub) ...@@ -109,60 +160,52 @@ static int __init uvhub_to_first_apicid(int uvhub)
* clear of the Timeout bit (as well) will free the resource. No reply will * clear of the Timeout bit (as well) will free the resource. No reply will
* be sent (the hardware will only do one reply per message). * be sent (the hardware will only do one reply per message).
*/ */
static inline void uv_reply_to_message(struct msg_desc *mdp, static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp)
struct bau_control *bcp)
{ {
unsigned long dw; unsigned long dw;
struct bau_payload_queue_entry *msg; struct bau_pq_entry *msg;
msg = mdp->msg; msg = mdp->msg;
if (!msg->canceled) { if (!msg->canceled) {
dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
msg->sw_ack_vector; write_mmr_sw_ack(dw);
uv_write_local_mmr(
UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
} }
msg->replied_to = 1; msg->replied_to = 1;
msg->sw_ack_vector = 0; msg->swack_vec = 0;
} }
/* /*
* Process the receipt of a RETRY message * Process the receipt of a RETRY message
*/ */
static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, static void bau_process_retry_msg(struct msg_desc *mdp,
struct bau_control *bcp) struct bau_control *bcp)
{ {
int i; int i;
int cancel_count = 0; int cancel_count = 0;
int slot2;
unsigned long msg_res; unsigned long msg_res;
unsigned long mmr = 0; unsigned long mmr = 0;
struct bau_payload_queue_entry *msg; struct bau_pq_entry *msg = mdp->msg;
struct bau_payload_queue_entry *msg2; struct bau_pq_entry *msg2;
struct ptc_stats *stat; struct ptc_stats *stat = bcp->statp;
msg = mdp->msg;
stat = bcp->statp;
stat->d_retries++; stat->d_retries++;
/* /*
* cancel any message from msg+1 to the retry itself * cancel any message from msg+1 to the retry itself
*/ */
for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
if (msg2 > mdp->va_queue_last) if (msg2 > mdp->queue_last)
msg2 = mdp->va_queue_first; msg2 = mdp->queue_first;
if (msg2 == msg) if (msg2 == msg)
break; break;
/* same conditions for cancellation as uv_do_reset */ /* same conditions for cancellation as do_reset */
if ((msg2->replied_to == 0) && (msg2->canceled == 0) && if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
(msg2->sw_ack_vector) && ((msg2->sw_ack_vector & (msg2->swack_vec) && ((msg2->swack_vec &
msg->sw_ack_vector) == 0) && msg->swack_vec) == 0) &&
(msg2->sending_cpu == msg->sending_cpu) && (msg2->sending_cpu == msg->sending_cpu) &&
(msg2->msg_type != MSG_NOOP)) { (msg2->msg_type != MSG_NOOP)) {
slot2 = msg2 - mdp->va_queue_first; mmr = read_mmr_sw_ack();
mmr = uv_read_local_mmr msg_res = msg2->swack_vec;
(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
msg_res = msg2->sw_ack_vector;
/* /*
* This is a message retry; clear the resources held * This is a message retry; clear the resources held
* by the previous message only if they timed out. * by the previous message only if they timed out.
...@@ -170,6 +213,7 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, ...@@ -170,6 +213,7 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,
* situation to report. * situation to report.
*/ */
if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
unsigned long mr;
/* /*
* is the resource timed out? * is the resource timed out?
* make everyone ignore the cancelled message. * make everyone ignore the cancelled message.
...@@ -177,10 +221,8 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, ...@@ -177,10 +221,8 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,
msg2->canceled = 1; msg2->canceled = 1;
stat->d_canceled++; stat->d_canceled++;
cancel_count++; cancel_count++;
uv_write_local_mmr( mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, write_mmr_sw_ack(mr);
(msg_res << UV_SW_ACK_NPENDING) |
msg_res);
} }
} }
} }
...@@ -192,20 +234,19 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, ...@@ -192,20 +234,19 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,
* Do all the things a cpu should do for a TLB shootdown message. * Do all the things a cpu should do for a TLB shootdown message.
* Other cpu's may come here at the same time for this message. * Other cpu's may come here at the same time for this message.
*/ */
static void uv_bau_process_message(struct msg_desc *mdp, static void bau_process_message(struct msg_desc *mdp,
struct bau_control *bcp) struct bau_control *bcp)
{ {
int msg_ack_count;
short socket_ack_count = 0; short socket_ack_count = 0;
struct ptc_stats *stat; short *sp;
struct bau_payload_queue_entry *msg; struct atomic_short *asp;
struct ptc_stats *stat = bcp->statp;
struct bau_pq_entry *msg = mdp->msg;
struct bau_control *smaster = bcp->socket_master; struct bau_control *smaster = bcp->socket_master;
/* /*
* This must be a normal message, or retry of a normal message * This must be a normal message, or retry of a normal message
*/ */
msg = mdp->msg;
stat = bcp->statp;
if (msg->address == TLB_FLUSH_ALL) { if (msg->address == TLB_FLUSH_ALL) {
local_flush_tlb(); local_flush_tlb();
stat->d_alltlb++; stat->d_alltlb++;
...@@ -222,30 +263,32 @@ static void uv_bau_process_message(struct msg_desc *mdp, ...@@ -222,30 +263,32 @@ static void uv_bau_process_message(struct msg_desc *mdp,
* cpu number. * cpu number.
*/ */
if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
uv_bau_process_retry_msg(mdp, bcp); bau_process_retry_msg(mdp, bcp);
/* /*
* This is a sw_ack message, so we have to reply to it. * This is a swack message, so we have to reply to it.
* Count each responding cpu on the socket. This avoids * Count each responding cpu on the socket. This avoids
* pinging the count's cache line back and forth between * pinging the count's cache line back and forth between
* the sockets. * the sockets.
*/ */
socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
&smaster->socket_acknowledge_count[mdp->msg_slot]); asp = (struct atomic_short *)sp;
socket_ack_count = atom_asr(1, asp);
if (socket_ack_count == bcp->cpus_in_socket) { if (socket_ack_count == bcp->cpus_in_socket) {
int msg_ack_count;
/* /*
* Both sockets dump their completed count total into * Both sockets dump their completed count total into
* the message's count. * the message's count.
*/ */
smaster->socket_acknowledge_count[mdp->msg_slot] = 0; smaster->socket_acknowledge_count[mdp->msg_slot] = 0;
msg_ack_count = atomic_add_short_return(socket_ack_count, asp = (struct atomic_short *)&msg->acknowledge_count;
(struct atomic_short *)&msg->acknowledge_count); msg_ack_count = atom_asr(socket_ack_count, asp);
if (msg_ack_count == bcp->cpus_in_uvhub) { if (msg_ack_count == bcp->cpus_in_uvhub) {
/* /*
* All cpus in uvhub saw it; reply * All cpus in uvhub saw it; reply
*/ */
uv_reply_to_message(mdp, bcp); reply_to_message(mdp, bcp);
} }
} }
...@@ -268,62 +311,51 @@ static int uvhub_to_first_cpu(int uvhub) ...@@ -268,62 +311,51 @@ static int uvhub_to_first_cpu(int uvhub)
* Last resort when we get a large number of destination timeouts is * Last resort when we get a large number of destination timeouts is
* to clear resources held by a given cpu. * to clear resources held by a given cpu.
* Do this with IPI so that all messages in the BAU message queue * Do this with IPI so that all messages in the BAU message queue
* can be identified by their nonzero sw_ack_vector field. * can be identified by their nonzero swack_vec field.
* *
* This is entered for a single cpu on the uvhub. * This is entered for a single cpu on the uvhub.
* The sender want's this uvhub to free a specific message's * The sender want's this uvhub to free a specific message's
* sw_ack resources. * swack resources.
*/ */
static void static void do_reset(void *ptr)
uv_do_reset(void *ptr)
{ {
int i; int i;
int slot; struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
int count = 0; struct reset_args *rap = (struct reset_args *)ptr;
unsigned long mmr; struct bau_pq_entry *msg;
unsigned long msg_res; struct ptc_stats *stat = bcp->statp;
struct bau_control *bcp;
struct reset_args *rap;
struct bau_payload_queue_entry *msg;
struct ptc_stats *stat;
bcp = &per_cpu(bau_control, smp_processor_id());
rap = (struct reset_args *)ptr;
stat = bcp->statp;
stat->d_resets++; stat->d_resets++;
/* /*
* We're looking for the given sender, and * We're looking for the given sender, and
* will free its sw_ack resource. * will free its swack resource.
* If all cpu's finally responded after the timeout, its * If all cpu's finally responded after the timeout, its
* message 'replied_to' was set. * message 'replied_to' was set.
*/ */
for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
/* uv_do_reset: same conditions for cancellation as unsigned long msg_res;
uv_bau_process_retry_msg() */ /* do_reset: same conditions for cancellation as
bau_process_retry_msg() */
if ((msg->replied_to == 0) && if ((msg->replied_to == 0) &&
(msg->canceled == 0) && (msg->canceled == 0) &&
(msg->sending_cpu == rap->sender) && (msg->sending_cpu == rap->sender) &&
(msg->sw_ack_vector) && (msg->swack_vec) &&
(msg->msg_type != MSG_NOOP)) { (msg->msg_type != MSG_NOOP)) {
unsigned long mmr;
unsigned long mr;
/* /*
* make everyone else ignore this message * make everyone else ignore this message
*/ */
msg->canceled = 1; msg->canceled = 1;
slot = msg - bcp->va_queue_first;
count++;
/* /*
* only reset the resource if it is still pending * only reset the resource if it is still pending
*/ */
mmr = uv_read_local_mmr mmr = read_mmr_sw_ack();
(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); msg_res = msg->swack_vec;
msg_res = msg->sw_ack_vector; mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
if (mmr & msg_res) { if (mmr & msg_res) {
stat->d_rcanceled++; stat->d_rcanceled++;
uv_write_local_mmr( write_mmr_sw_ack(mr);
UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
(msg_res << UV_SW_ACK_NPENDING) |
msg_res);
} }
} }
} }
...@@ -334,39 +366,38 @@ uv_do_reset(void *ptr) ...@@ -334,39 +366,38 @@ uv_do_reset(void *ptr)
* Use IPI to get all target uvhubs to release resources held by * Use IPI to get all target uvhubs to release resources held by
* a given sending cpu number. * a given sending cpu number.
*/ */
static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, static void reset_with_ipi(struct bau_targ_hubmask *distribution, int sender)
int sender)
{ {
int uvhub; int uvhub;
int cpu; int maskbits;
cpumask_t mask; cpumask_t mask;
struct reset_args reset_args; struct reset_args reset_args;
reset_args.sender = sender; reset_args.sender = sender;
cpus_clear(mask); cpus_clear(mask);
/* find a single cpu for each uvhub in this distribution mask */ /* find a single cpu for each uvhub in this distribution mask */
for (uvhub = 0; maskbits = sizeof(struct bau_targ_hubmask) * BITSPERBYTE;
uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; for (uvhub = 0; uvhub < maskbits; uvhub++) {
uvhub++) { int cpu;
if (!bau_uvhub_isset(uvhub, distribution)) if (!bau_uvhub_isset(uvhub, distribution))
continue; continue;
/* find a cpu for this uvhub */ /* find a cpu for this uvhub */
cpu = uvhub_to_first_cpu(uvhub); cpu = uvhub_to_first_cpu(uvhub);
cpu_set(cpu, mask); cpu_set(cpu, mask);
} }
/* IPI all cpus; Preemption is already disabled */
smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); /* IPI all cpus; preemption is already disabled */
smp_call_function_many(&mask, do_reset, (void *)&reset_args, 1);
return; return;
} }
static inline unsigned long static inline unsigned long cycles_2_us(unsigned long long cyc)
cycles_2_us(unsigned long long cyc)
{ {
unsigned long long ns; unsigned long long ns;
unsigned long us; unsigned long us;
ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) int cpu = smp_processor_id();
>> CYC2NS_SCALE_FACTOR;
ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR;
us = ns / 1000; us = ns / 1000;
return us; return us;
} }
...@@ -376,21 +407,27 @@ cycles_2_us(unsigned long long cyc) ...@@ -376,21 +407,27 @@ cycles_2_us(unsigned long long cyc)
* leaves uvhub_quiesce set so that no new broadcasts are started by * leaves uvhub_quiesce set so that no new broadcasts are started by
* bau_flush_send_and_wait() * bau_flush_send_and_wait()
*/ */
static inline void static inline void quiesce_local_uvhub(struct bau_control *hmaster)
quiesce_local_uvhub(struct bau_control *hmaster)
{ {
atomic_add_short_return(1, (struct atomic_short *) atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
&hmaster->uvhub_quiesce);
} }
/* /*
* mark this quiet-requestor as done * mark this quiet-requestor as done
*/ */
static inline void static inline void end_uvhub_quiesce(struct bau_control *hmaster)
end_uvhub_quiesce(struct bau_control *hmaster)
{ {
atomic_add_short_return(-1, (struct atomic_short *) atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
&hmaster->uvhub_quiesce); }
static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift)
{
unsigned long descriptor_status;
descriptor_status = uv_read_local_mmr(mmr_offset);
descriptor_status >>= right_shift;
descriptor_status &= UV_ACT_STATUS_MASK;
return descriptor_status;
} }
/* /*
...@@ -398,31 +435,28 @@ end_uvhub_quiesce(struct bau_control *hmaster) ...@@ -398,31 +435,28 @@ end_uvhub_quiesce(struct bau_control *hmaster)
* return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
*/ */
static int uv1_wait_completion(struct bau_desc *bau_desc, static int uv1_wait_completion(struct bau_desc *bau_desc,
unsigned long mmr_offset, int right_shift, int this_cpu, unsigned long mmr_offset, int right_shift,
struct bau_control *bcp, struct bau_control *smaster, long try) struct bau_control *bcp, long try)
{ {
unsigned long descriptor_status; unsigned long descriptor_status;
cycles_t ttime; cycles_t ttm;
struct ptc_stats *stat = bcp->statp; struct ptc_stats *stat = bcp->statp;
descriptor_status = uv1_read_status(mmr_offset, right_shift);
/* spin on the status MMR, waiting for it to go idle */ /* spin on the status MMR, waiting for it to go idle */
while ((descriptor_status = (((unsigned long) while ((descriptor_status != DS_IDLE)) {
uv_read_local_mmr(mmr_offset) >>
right_shift) & UV_ACT_STATUS_MASK)) !=
DESC_STATUS_IDLE) {
/* /*
* Our software ack messages may be blocked because * Our software ack messages may be blocked because
* there are no swack resources available. As long * there are no swack resources available. As long
* as none of them has timed out hardware will NACK * as none of them has timed out hardware will NACK
* our message and its state will stay IDLE. * our message and its state will stay IDLE.
*/ */
if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { if (descriptor_status == DS_SOURCE_TIMEOUT) {
stat->s_stimeout++; stat->s_stimeout++;
return FLUSH_GIVEUP; return FLUSH_GIVEUP;
} else if (descriptor_status == } else if (descriptor_status == DS_DESTINATION_TIMEOUT) {
DESC_STATUS_DESTINATION_TIMEOUT) {
stat->s_dtimeout++; stat->s_dtimeout++;
ttime = get_cycles(); ttm = get_cycles();
/* /*
* Our retries may be blocked by all destination * Our retries may be blocked by all destination
...@@ -430,8 +464,7 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, ...@@ -430,8 +464,7 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
* pending. In that case hardware returns the * pending. In that case hardware returns the
* ERROR that looks like a destination timeout. * ERROR that looks like a destination timeout.
*/ */
if (cycles_2_us(ttime - bcp->send_message) < if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
timeout_us) {
bcp->conseccompletes = 0; bcp->conseccompletes = 0;
return FLUSH_RETRY_PLUGGED; return FLUSH_RETRY_PLUGGED;
} }
...@@ -444,93 +477,106 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, ...@@ -444,93 +477,106 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
*/ */
cpu_relax(); cpu_relax();
} }
descriptor_status = uv1_read_status(mmr_offset, right_shift);
} }
bcp->conseccompletes++; bcp->conseccompletes++;
return FLUSH_COMPLETE; return FLUSH_COMPLETE;
} }
static int uv2_wait_completion(struct bau_desc *bau_desc, /*
unsigned long mmr_offset, int right_shift, int this_cpu, * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
struct bau_control *bcp, struct bau_control *smaster, long try) */
static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu)
{ {
unsigned long descriptor_status; unsigned long descriptor_status;
unsigned long descriptor_status2; unsigned long descriptor_status2;
int cpu;
cycles_t ttime; descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL;
descriptor_status = (descriptor_status << 1) | descriptor_status2;
return descriptor_status;
}
static int uv2_wait_completion(struct bau_desc *bau_desc,
unsigned long mmr_offset, int right_shift,
struct bau_control *bcp, long try)
{
unsigned long descriptor_stat;
cycles_t ttm;
int cpu = bcp->uvhub_cpu;
struct ptc_stats *stat = bcp->statp; struct ptc_stats *stat = bcp->statp;
/* UV2 has an extra bit of status */ descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu);
cpu = bcp->uvhub_cpu;
/* spin on the status MMR, waiting for it to go idle */ /* spin on the status MMR, waiting for it to go idle */
descriptor_status = (((unsigned long)(uv_read_local_mmr while (descriptor_stat != UV2H_DESC_IDLE) {
(mmr_offset)) >> right_shift) & UV_ACT_STATUS_MASK);
descriptor_status2 = (((unsigned long)uv_read_local_mmr
(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2) >> cpu) & 0x1UL);
descriptor_status = (descriptor_status << 1) |
descriptor_status2;
while (descriptor_status != UV2H_DESC_IDLE) {
/* /*
* Our software ack messages may be blocked because * Our software ack messages may be blocked because
* there are no swack resources available. As long * there are no swack resources available. As long
* as none of them has timed out hardware will NACK * as none of them has timed out hardware will NACK
* our message and its state will stay IDLE. * our message and its state will stay IDLE.
*/ */
if ((descriptor_status == UV2H_DESC_SOURCE_TIMEOUT) || if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) ||
(descriptor_status == UV2H_DESC_DEST_STRONG_NACK) || (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) ||
(descriptor_status == UV2H_DESC_DEST_PUT_ERR)) { (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) {
stat->s_stimeout++; stat->s_stimeout++;
return FLUSH_GIVEUP; return FLUSH_GIVEUP;
} else if (descriptor_status == UV2H_DESC_DEST_TIMEOUT) { } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
stat->s_dtimeout++; stat->s_dtimeout++;
ttime = get_cycles(); ttm = get_cycles();
/* /*
* Our retries may be blocked by all destination * Our retries may be blocked by all destination
* swack resources being consumed, and a timeout * swack resources being consumed, and a timeout
* pending. In that case hardware returns the * pending. In that case hardware returns the
* ERROR that looks like a destination timeout. * ERROR that looks like a destination timeout.
*/ */
if (cycles_2_us(ttime - bcp->send_message) < if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
timeout_us) {
bcp->conseccompletes = 0; bcp->conseccompletes = 0;
return FLUSH_RETRY_PLUGGED; return FLUSH_RETRY_PLUGGED;
} }
bcp->conseccompletes = 0; bcp->conseccompletes = 0;
return FLUSH_RETRY_TIMEOUT; return FLUSH_RETRY_TIMEOUT;
} else { } else {
/* /*
* descriptor_status is still BUSY * descriptor_stat is still BUSY
*/ */
cpu_relax(); cpu_relax();
} }
descriptor_status = (((unsigned long)(uv_read_local_mmr descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu);
(mmr_offset)) >> right_shift) &
UV_ACT_STATUS_MASK);
descriptor_status2 = (((unsigned long)uv_read_local_mmr
(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2) >> cpu) &
0x1UL);
descriptor_status = (descriptor_status << 1) |
descriptor_status2;
} }
bcp->conseccompletes++; bcp->conseccompletes++;
return FLUSH_COMPLETE; return FLUSH_COMPLETE;
} }
static int uv_wait_completion(struct bau_desc *bau_desc, /*
unsigned long mmr_offset, int right_shift, int this_cpu, * There are 2 status registers; each and array[32] of 2 bits. Set up for
struct bau_control *bcp, struct bau_control *smaster, long try) * which register to read and position in that register based on cpu in
* current hub.
*/
static int wait_completion(struct bau_desc *bau_desc,
struct bau_control *bcp, long try)
{ {
int right_shift;
unsigned long mmr_offset;
int cpu = bcp->uvhub_cpu;
if (cpu < UV_CPUS_PER_AS) {
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
right_shift = cpu * UV_ACT_STATUS_SIZE;
} else {
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
}
if (is_uv1_hub()) if (is_uv1_hub())
return uv1_wait_completion(bau_desc, mmr_offset, right_shift, return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
this_cpu, bcp, smaster, try); bcp, try);
else else
return uv2_wait_completion(bau_desc, mmr_offset, right_shift, return uv2_wait_completion(bau_desc, mmr_offset, right_shift,
this_cpu, bcp, smaster, try); bcp, try);
} }
static inline cycles_t static inline cycles_t sec_2_cycles(unsigned long sec)
sec_2_cycles(unsigned long sec)
{ {
unsigned long ns; unsigned long ns;
cycles_t cyc; cycles_t cyc;
...@@ -541,63 +587,50 @@ sec_2_cycles(unsigned long sec) ...@@ -541,63 +587,50 @@ sec_2_cycles(unsigned long sec)
} }
/* /*
* conditionally add 1 to *v, unless *v is >= u * Our retries are blocked by all destination sw ack resources being
* return 0 if we cannot add 1 to *v because it is >= u
* return 1 if we can add 1 to *v because it is < u
* the add is atomic
*
* This is close to atomic_add_unless(), but this allows the 'u' value
* to be lowered below the current 'v'. atomic_add_unless can only stop
* on equal.
*/
static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
{
spin_lock(lock);
if (atomic_read(v) >= u) {
spin_unlock(lock);
return 0;
}
atomic_inc(v);
spin_unlock(lock);
return 1;
}
/*
* Our retries are blocked by all destination swack resources being
* in use, and a timeout is pending. In that case hardware immediately * in use, and a timeout is pending. In that case hardware immediately
* returns the ERROR that looks like a destination timeout. * returns the ERROR that looks like a destination timeout.
*/ */
static void static void destination_plugged(struct bau_desc *bau_desc,
destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, struct bau_control *bcp,
struct bau_control *hmaster, struct ptc_stats *stat) struct bau_control *hmaster, struct ptc_stats *stat)
{ {
udelay(bcp->plugged_delay); udelay(bcp->plugged_delay);
bcp->plugged_tries++; bcp->plugged_tries++;
if (bcp->plugged_tries >= bcp->plugsb4reset) { if (bcp->plugged_tries >= bcp->plugsb4reset) {
bcp->plugged_tries = 0; bcp->plugged_tries = 0;
quiesce_local_uvhub(hmaster); quiesce_local_uvhub(hmaster);
spin_lock(&hmaster->queue_lock); spin_lock(&hmaster->queue_lock);
uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); reset_with_ipi(&bau_desc->distribution, bcp->cpu);
spin_unlock(&hmaster->queue_lock); spin_unlock(&hmaster->queue_lock);
end_uvhub_quiesce(hmaster); end_uvhub_quiesce(hmaster);
bcp->ipi_attempts++; bcp->ipi_attempts++;
stat->s_resets_plug++; stat->s_resets_plug++;
} }
} }
static void static void destination_timeout(struct bau_desc *bau_desc,
destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, struct bau_control *bcp, struct bau_control *hmaster,
struct bau_control *hmaster, struct ptc_stats *stat) struct ptc_stats *stat)
{ {
hmaster->max_bau_concurrent = 1; hmaster->max_concurr = 1;
bcp->timeout_tries++; bcp->timeout_tries++;
if (bcp->timeout_tries >= bcp->timeoutsb4reset) { if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
bcp->timeout_tries = 0; bcp->timeout_tries = 0;
quiesce_local_uvhub(hmaster); quiesce_local_uvhub(hmaster);
spin_lock(&hmaster->queue_lock); spin_lock(&hmaster->queue_lock);
uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); reset_with_ipi(&bau_desc->distribution, bcp->cpu);
spin_unlock(&hmaster->queue_lock); spin_unlock(&hmaster->queue_lock);
end_uvhub_quiesce(hmaster); end_uvhub_quiesce(hmaster);
bcp->ipi_attempts++; bcp->ipi_attempts++;
stat->s_resets_timeout++; stat->s_resets_timeout++;
} }
...@@ -607,34 +640,104 @@ destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, ...@@ -607,34 +640,104 @@ destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp,
* Completions are taking a very long time due to a congested numalink * Completions are taking a very long time due to a congested numalink
* network. * network.
*/ */
static void static void disable_for_congestion(struct bau_control *bcp,
disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) struct ptc_stats *stat)
{ {
int tcpu;
struct bau_control *tbcp;
/* let only one cpu do this disabling */ /* let only one cpu do this disabling */
spin_lock(&disable_lock); spin_lock(&disable_lock);
if (!baudisabled && bcp->period_requests && if (!baudisabled && bcp->period_requests &&
((bcp->period_time / bcp->period_requests) > congested_cycles)) { ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
int tcpu;
struct bau_control *tbcp;
/* it becomes this cpu's job to turn on the use of the /* it becomes this cpu's job to turn on the use of the
BAU again */ BAU again */
baudisabled = 1; baudisabled = 1;
bcp->set_bau_off = 1; bcp->set_bau_off = 1;
bcp->set_bau_on_time = get_cycles() + bcp->set_bau_on_time = get_cycles();
sec_2_cycles(bcp->congested_period); bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period);
stat->s_bau_disabled++; stat->s_bau_disabled++;
for_each_present_cpu(tcpu) { for_each_present_cpu(tcpu) {
tbcp = &per_cpu(bau_control, tcpu); tbcp = &per_cpu(bau_control, tcpu);
tbcp->baudisabled = 1; tbcp->baudisabled = 1;
} }
} }
spin_unlock(&disable_lock); spin_unlock(&disable_lock);
} }
/** static void count_max_concurr(int stat, struct bau_control *bcp,
* uv_flush_send_and_wait struct bau_control *hmaster)
* {
bcp->plugged_tries = 0;
bcp->timeout_tries = 0;
if (stat != FLUSH_COMPLETE)
return;
if (bcp->conseccompletes <= bcp->complete_threshold)
return;
if (hmaster->max_concurr >= hmaster->max_concurr_const)
return;
hmaster->max_concurr++;
}
static void record_send_stats(cycles_t time1, cycles_t time2,
struct bau_control *bcp, struct ptc_stats *stat,
int completion_status, int try)
{
cycles_t elapsed;
if (time2 > time1) {
elapsed = time2 - time1;
stat->s_time += elapsed;
if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
bcp->period_requests++;
bcp->period_time += elapsed;
if ((elapsed > congested_cycles) &&
(bcp->period_requests > bcp->cong_reps))
disable_for_congestion(bcp, stat);
}
} else
stat->s_requestor--;
if (completion_status == FLUSH_COMPLETE && try > 1)
stat->s_retriesok++;
else if (completion_status == FLUSH_GIVEUP)
stat->s_giveup++;
}
/*
* Because of a uv1 hardware bug only a limited number of concurrent
* requests can be made.
*/
static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
{
spinlock_t *lock = &hmaster->uvhub_lock;
atomic_t *v;
v = &hmaster->active_descriptor_count;
if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) {
stat->s_throttles++;
do {
cpu_relax();
} while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr));
}
}
/*
* Handle the completion status of a message send.
*/
static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
struct bau_control *bcp, struct bau_control *hmaster,
struct ptc_stats *stat)
{
if (completion_status == FLUSH_RETRY_PLUGGED)
destination_plugged(bau_desc, bcp, hmaster, stat);
else if (completion_status == FLUSH_RETRY_TIMEOUT)
destination_timeout(bau_desc, bcp, hmaster, stat);
}
/*
* Send a broadcast and wait for it to complete. * Send a broadcast and wait for it to complete.
* *
* The flush_mask contains the cpus the broadcast is to be sent to including * The flush_mask contains the cpus the broadcast is to be sent to including
...@@ -645,45 +748,23 @@ disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) ...@@ -645,45 +748,23 @@ disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat)
* returned to the kernel. * returned to the kernel.
*/ */
int uv_flush_send_and_wait(struct bau_desc *bau_desc, int uv_flush_send_and_wait(struct bau_desc *bau_desc,
struct cpumask *flush_mask, struct bau_control *bcp) struct cpumask *flush_mask, struct bau_control *bcp)
{ {
int right_shift;
int completion_status = 0;
int seq_number = 0; int seq_number = 0;
int completion_stat = 0;
long try = 0; long try = 0;
int cpu = bcp->uvhub_cpu;
int this_cpu = bcp->cpu;
unsigned long mmr_offset;
unsigned long index; unsigned long index;
cycles_t time1; cycles_t time1;
cycles_t time2; cycles_t time2;
cycles_t elapsed;
struct ptc_stats *stat = bcp->statp; struct ptc_stats *stat = bcp->statp;
struct bau_control *smaster = bcp->socket_master;
struct bau_control *hmaster = bcp->uvhub_master; struct bau_control *hmaster = bcp->uvhub_master;
if (is_uv1_hub() && if (is_uv1_hub())
!atomic_inc_unless_ge(&hmaster->uvhub_lock, uv1_throttle(hmaster, stat);
&hmaster->active_descriptor_count,
hmaster->max_bau_concurrent)) {
stat->s_throttles++;
do {
cpu_relax();
} while (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
&hmaster->active_descriptor_count,
hmaster->max_bau_concurrent));
}
while (hmaster->uvhub_quiesce) while (hmaster->uvhub_quiesce)
cpu_relax(); cpu_relax();
if (cpu < UV_CPUS_PER_ACT_STATUS) {
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
right_shift = cpu * UV_ACT_STATUS_SIZE;
} else {
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
right_shift =
((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
}
time1 = get_cycles(); time1 = get_cycles();
do { do {
if (try == 0) { if (try == 0) {
...@@ -693,64 +774,134 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc, ...@@ -693,64 +774,134 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
bau_desc->header.msg_type = MSG_RETRY; bau_desc->header.msg_type = MSG_RETRY;
stat->s_retry_messages++; stat->s_retry_messages++;
} }
bau_desc->header.sequence = seq_number; bau_desc->header.sequence = seq_number;
index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
bcp->uvhub_cpu;
bcp->send_message = get_cycles(); bcp->send_message = get_cycles();
uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
write_mmr_activation(index);
try++; try++;
completion_status = uv_wait_completion(bau_desc, mmr_offset, completion_stat = wait_completion(bau_desc, bcp, try);
right_shift, this_cpu, bcp, smaster, try);
handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
if (completion_status == FLUSH_RETRY_PLUGGED) {
destination_plugged(bau_desc, bcp, hmaster, stat);
} else if (completion_status == FLUSH_RETRY_TIMEOUT) {
destination_timeout(bau_desc, bcp, hmaster, stat);
}
if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
bcp->ipi_attempts = 0; bcp->ipi_attempts = 0;
completion_status = FLUSH_GIVEUP; completion_stat = FLUSH_GIVEUP;
break; break;
} }
cpu_relax(); cpu_relax();
} while ((completion_status == FLUSH_RETRY_PLUGGED) || } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
(completion_status == FLUSH_RETRY_TIMEOUT)); (completion_stat == FLUSH_RETRY_TIMEOUT));
time2 = get_cycles(); time2 = get_cycles();
bcp->plugged_tries = 0;
bcp->timeout_tries = 0; count_max_concurr(completion_stat, bcp, hmaster);
if ((completion_status == FLUSH_COMPLETE) &&
(bcp->conseccompletes > bcp->complete_threshold) &&
(hmaster->max_bau_concurrent <
hmaster->max_bau_concurrent_constant))
hmaster->max_bau_concurrent++;
while (hmaster->uvhub_quiesce) while (hmaster->uvhub_quiesce)
cpu_relax(); cpu_relax();
atomic_dec(&hmaster->active_descriptor_count); atomic_dec(&hmaster->active_descriptor_count);
if (time2 > time1) {
elapsed = time2 - time1; record_send_stats(time1, time2, bcp, stat, completion_stat, try);
stat->s_time += elapsed;
if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { if (completion_stat == FLUSH_GIVEUP)
bcp->period_requests++; return 1;
bcp->period_time += elapsed; return 0;
if ((elapsed > congested_cycles) && }
(bcp->period_requests > bcp->congested_reps)) {
disable_for_congestion(bcp, stat); /*
* The BAU is disabled. When the disabled time period has expired, the cpu
* that disabled it must re-enable it.
* Return 0 if it is re-enabled for all cpus.
*/
static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
{
int tcpu;
struct bau_control *tbcp;
if (bcp->set_bau_off) {
if (get_cycles() >= bcp->set_bau_on_time) {
stat->s_bau_reenabled++;
baudisabled = 0;
for_each_present_cpu(tcpu) {
tbcp = &per_cpu(bau_control, tcpu);
tbcp->baudisabled = 0;
tbcp->period_requests = 0;
tbcp->period_time = 0;
} }
return 0;
} }
}
return -1;
}
static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
int remotes, struct bau_desc *bau_desc)
{
stat->s_requestor++;
stat->s_ntargcpu += remotes + locals;
stat->s_ntargremotes += remotes;
stat->s_ntarglocals += locals;
/* uvhub statistics */
hubs = bau_uvhub_weight(&bau_desc->distribution);
if (locals) {
stat->s_ntarglocaluvhub++;
stat->s_ntargremoteuvhub += (hubs - 1);
} else } else
stat->s_requestor--; stat->s_ntargremoteuvhub += hubs;
if (completion_status == FLUSH_COMPLETE && try > 1)
stat->s_retriesok++; stat->s_ntarguvhub += hubs;
else if (completion_status == FLUSH_GIVEUP) {
stat->s_giveup++; if (hubs >= 16)
return 1; stat->s_ntarguvhub16++;
else if (hubs >= 8)
stat->s_ntarguvhub8++;
else if (hubs >= 4)
stat->s_ntarguvhub4++;
else if (hubs >= 2)
stat->s_ntarguvhub2++;
else
stat->s_ntarguvhub1++;
}
/*
* Translate a cpu mask to the uvhub distribution mask in the BAU
* activation descriptor.
*/
static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
struct bau_desc *bau_desc, int *localsp, int *remotesp)
{
int cpu;
int pnode;
int cnt = 0;
struct hub_and_pnode *hpp;
for_each_cpu(cpu, flush_mask) {
/*
* The distribution vector is a bit map of pnodes, relative
* to the partition base pnode (and the partition base nasid
* in the header).
* Translate cpu to pnode and hub using a local memory array.
*/
hpp = &bcp->socket_master->thp[cpu];
pnode = hpp->pnode - bcp->partition_base_pnode;
bau_uvhub_set(pnode, &bau_desc->distribution);
cnt++;
if (hpp->uvhub == bcp->uvhub)
(*localsp)++;
else
(*remotesp)++;
} }
if (!cnt)
return 1;
return 0; return 0;
} }
/** /*
* uv_flush_tlb_others - globally purge translation cache of a virtual * globally purge translation cache of a virtual address or all TLB's
* address or all TLB's
* @cpumask: mask of all cpu's in which the address is to be removed * @cpumask: mask of all cpu's in which the address is to be removed
* @mm: mm_struct containing virtual address range * @mm: mm_struct containing virtual address range
* @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
...@@ -774,20 +925,16 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc, ...@@ -774,20 +925,16 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
* done. The returned pointer is valid till preemption is re-enabled. * done. The returned pointer is valid till preemption is re-enabled.
*/ */
const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm, struct mm_struct *mm, unsigned long va,
unsigned long va, unsigned int cpu) unsigned int cpu)
{ {
int locals = 0; int locals = 0;
int remotes = 0; int remotes = 0;
int hubs = 0; int hubs = 0;
int tcpu;
int tpnode;
struct bau_desc *bau_desc; struct bau_desc *bau_desc;
struct cpumask *flush_mask; struct cpumask *flush_mask;
struct ptc_stats *stat; struct ptc_stats *stat;
struct bau_control *bcp; struct bau_control *bcp;
struct bau_control *tbcp;
struct hub_and_pnode *hpp;
/* kernel was booted 'nobau' */ /* kernel was booted 'nobau' */
if (nobau) if (nobau)
...@@ -798,20 +945,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, ...@@ -798,20 +945,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
/* bau was disabled due to slow response */ /* bau was disabled due to slow response */
if (bcp->baudisabled) { if (bcp->baudisabled) {
/* the cpu that disabled it must re-enable it */ if (check_enable(bcp, stat))
if (bcp->set_bau_off) { return cpumask;
if (get_cycles() >= bcp->set_bau_on_time) {
stat->s_bau_reenabled++;
baudisabled = 0;
for_each_present_cpu(tcpu) {
tbcp = &per_cpu(bau_control, tcpu);
tbcp->baudisabled = 0;
tbcp->period_requests = 0;
tbcp->period_time = 0;
}
}
}
return cpumask;
} }
/* /*
...@@ -822,59 +957,20 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, ...@@ -822,59 +957,20 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
/* don't actually do a shootdown of the local cpu */ /* don't actually do a shootdown of the local cpu */
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
if (cpu_isset(cpu, *cpumask)) if (cpu_isset(cpu, *cpumask))
stat->s_ntargself++; stat->s_ntargself++;
bau_desc = bcp->descriptor_base; bau_desc = bcp->descriptor_base;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu;
bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
for_each_cpu(tcpu, flush_mask) {
/*
* The distribution vector is a bit map of pnodes, relative
* to the partition base pnode (and the partition base nasid
* in the header).
* Translate cpu to pnode and hub using an array stored
* in local memory.
*/
hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
tpnode = hpp->pnode - bcp->partition_base_pnode;
bau_uvhub_set(tpnode, &bau_desc->distribution);
if (hpp->uvhub == bcp->uvhub)
locals++;
else
remotes++;
}
if ((locals + remotes) == 0)
return NULL; return NULL;
stat->s_requestor++;
stat->s_ntargcpu += remotes + locals;
stat->s_ntargremotes += remotes;
stat->s_ntarglocals += locals;
remotes = bau_uvhub_weight(&bau_desc->distribution);
/* uvhub statistics */ record_send_statistics(stat, locals, hubs, remotes, bau_desc);
hubs = bau_uvhub_weight(&bau_desc->distribution);
if (locals) {
stat->s_ntarglocaluvhub++;
stat->s_ntargremoteuvhub += (hubs - 1);
} else
stat->s_ntargremoteuvhub += hubs;
stat->s_ntarguvhub += hubs;
if (hubs >= 16)
stat->s_ntarguvhub16++;
else if (hubs >= 8)
stat->s_ntarguvhub8++;
else if (hubs >= 4)
stat->s_ntarguvhub4++;
else if (hubs >= 2)
stat->s_ntarguvhub2++;
else
stat->s_ntarguvhub1++;
bau_desc->payload.address = va; bau_desc->payload.address = va;
bau_desc->payload.sending_cpu = cpu; bau_desc->payload.sending_cpu = cpu;
/* /*
* uv_flush_send_and_wait returns 0 if all cpu's were messaged, * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
* or 1 if it gave up and the original cpumask should be returned. * or 1 if it gave up and the original cpumask should be returned.
...@@ -903,26 +999,31 @@ void uv_bau_message_interrupt(struct pt_regs *regs) ...@@ -903,26 +999,31 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
{ {
int count = 0; int count = 0;
cycles_t time_start; cycles_t time_start;
struct bau_payload_queue_entry *msg; struct bau_pq_entry *msg;
struct bau_control *bcp; struct bau_control *bcp;
struct ptc_stats *stat; struct ptc_stats *stat;
struct msg_desc msgdesc; struct msg_desc msgdesc;
time_start = get_cycles(); time_start = get_cycles();
bcp = &per_cpu(bau_control, smp_processor_id()); bcp = &per_cpu(bau_control, smp_processor_id());
stat = bcp->statp; stat = bcp->statp;
msgdesc.va_queue_first = bcp->va_queue_first;
msgdesc.va_queue_last = bcp->va_queue_last; msgdesc.queue_first = bcp->queue_first;
msgdesc.queue_last = bcp->queue_last;
msg = bcp->bau_msg_head; msg = bcp->bau_msg_head;
while (msg->sw_ack_vector) { while (msg->swack_vec) {
count++; count++;
msgdesc.msg_slot = msg - msgdesc.va_queue_first;
msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; msgdesc.msg_slot = msg - msgdesc.queue_first;
msgdesc.swack_slot = ffs(msg->swack_vec) - 1;
msgdesc.msg = msg; msgdesc.msg = msg;
uv_bau_process_message(&msgdesc, bcp); bau_process_message(&msgdesc, bcp);
msg++; msg++;
if (msg > msgdesc.va_queue_last) if (msg > msgdesc.queue_last)
msg = msgdesc.va_queue_first; msg = msgdesc.queue_first;
bcp->bau_msg_head = msg; bcp->bau_msg_head = msg;
} }
stat->d_time += (get_cycles() - time_start); stat->d_time += (get_cycles() - time_start);
...@@ -930,18 +1031,17 @@ void uv_bau_message_interrupt(struct pt_regs *regs) ...@@ -930,18 +1031,17 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
stat->d_nomsg++; stat->d_nomsg++;
else if (count > 1) else if (count > 1)
stat->d_multmsg++; stat->d_multmsg++;
ack_APIC_irq(); ack_APIC_irq();
} }
/* /*
* uv_enable_timeouts * Each target uvhub (i.e. a uvhub that has cpu's) needs to have
*
* Each target uvhub (i.e. a uvhub that has no cpu's) needs to have
* shootdown message timeouts enabled. The timeout does not cause * shootdown message timeouts enabled. The timeout does not cause
* an interrupt, but causes an error message to be returned to * an interrupt, but causes an error message to be returned to
* the sender. * the sender.
*/ */
static void __init uv_enable_timeouts(void) static void __init enable_timeouts(void)
{ {
int uvhub; int uvhub;
int nuvhubs; int nuvhubs;
...@@ -955,52 +1055,44 @@ static void __init uv_enable_timeouts(void) ...@@ -955,52 +1055,44 @@ static void __init uv_enable_timeouts(void)
continue; continue;
pnode = uv_blade_to_pnode(uvhub); pnode = uv_blade_to_pnode(uvhub);
mmr_image = mmr_image = read_mmr_misc_control(pnode);
uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
/* /*
* Set the timeout period and then lock it in, in three * Set the timeout period and then lock it in, in three
* steps; captures and locks in the period. * steps; captures and locks in the period.
* *
* To program the period, the SOFT_ACK_MODE must be off. * To program the period, the SOFT_ACK_MODE must be off.
*/ */
mmr_image &= ~((unsigned long)1 << mmr_image &= ~(1L << SOFTACK_MSHIFT);
UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); write_mmr_misc_control(pnode, mmr_image);
uv_write_global_mmr64
(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
/* /*
* Set the 4-bit period. * Set the 4-bit period.
*/ */
mmr_image &= ~((unsigned long)0xf << mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT);
UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT);
mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << write_mmr_misc_control(pnode, mmr_image);
UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
uv_write_global_mmr64
(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
/* /*
* UV1: * UV1:
* Subsequent reversals of the timebase bit (3) cause an * Subsequent reversals of the timebase bit (3) cause an
* immediate timeout of one or all INTD resources as * immediate timeout of one or all INTD resources as
* indicated in bits 2:0 (7 causes all of them to timeout). * indicated in bits 2:0 (7 causes all of them to timeout).
*/ */
mmr_image |= ((unsigned long)1 << mmr_image |= (1L << SOFTACK_MSHIFT);
UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
if (is_uv2_hub()) { if (is_uv2_hub()) {
mmr_image |= ((unsigned long)1 << UV2_LEG_SHFT); mmr_image |= (1L << UV2_LEG_SHFT);
mmr_image |= ((unsigned long)1 << UV2_EXT_SHFT); mmr_image |= (1L << UV2_EXT_SHFT);
} }
uv_write_global_mmr64 write_mmr_misc_control(pnode, mmr_image);
(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
} }
} }
static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) static void *ptc_seq_start(struct seq_file *file, loff_t *offset)
{ {
if (*offset < num_possible_cpus()) if (*offset < num_possible_cpus())
return offset; return offset;
return NULL; return NULL;
} }
static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
{ {
(*offset)++; (*offset)++;
if (*offset < num_possible_cpus()) if (*offset < num_possible_cpus())
...@@ -1008,12 +1100,11 @@ static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) ...@@ -1008,12 +1100,11 @@ static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
return NULL; return NULL;
} }
static void uv_ptc_seq_stop(struct seq_file *file, void *data) static void ptc_seq_stop(struct seq_file *file, void *data)
{ {
} }
static inline unsigned long long static inline unsigned long long usec_2_cycles(unsigned long microsec)
microsec_2_cycles(unsigned long microsec)
{ {
unsigned long ns; unsigned long ns;
unsigned long long cyc; unsigned long long cyc;
...@@ -1024,29 +1115,27 @@ microsec_2_cycles(unsigned long microsec) ...@@ -1024,29 +1115,27 @@ microsec_2_cycles(unsigned long microsec)
} }
/* /*
* Display the statistics thru /proc. * Display the statistics thru /proc/sgi_uv/ptc_statistics
* 'data' points to the cpu number * 'data' points to the cpu number
* Note: see the descriptions in stat_description[].
*/ */
static int uv_ptc_seq_show(struct seq_file *file, void *data) static int ptc_seq_show(struct seq_file *file, void *data)
{ {
struct ptc_stats *stat; struct ptc_stats *stat;
int cpu; int cpu;
cpu = *(loff_t *)data; cpu = *(loff_t *)data;
if (!cpu) { if (!cpu) {
seq_printf(file, seq_printf(file,
"# cpu sent stime self locals remotes ncpus localhub "); "# cpu sent stime self locals remotes ncpus localhub ");
seq_printf(file, seq_printf(file,
"remotehub numuvhubs numuvhubs16 numuvhubs8 "); "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
seq_printf(file, seq_printf(file,
"numuvhubs4 numuvhubs2 numuvhubs1 dto "); "numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok ");
seq_printf(file, seq_printf(file,
"retries rok resetp resett giveup sto bz throt "); "resetp resett giveup sto bz throt swack recv rtime ");
seq_printf(file, seq_printf(file,
"sw_ack recv rtime all "); "all one mult none retry canc nocan reset rcan ");
seq_printf(file,
"one mult none retry canc nocan reset rcan ");
seq_printf(file, seq_printf(file,
"disable enable\n"); "disable enable\n");
} }
...@@ -1073,8 +1162,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) ...@@ -1073,8 +1162,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
/* destination side statistics */ /* destination side statistics */
seq_printf(file, seq_printf(file,
"%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
uv_read_global_mmr64(uv_cpu_to_pnode(cpu), read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
stat->d_requestee, cycles_2_us(stat->d_time), stat->d_requestee, cycles_2_us(stat->d_time),
stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
stat->d_nomsg, stat->d_retries, stat->d_canceled, stat->d_nomsg, stat->d_retries, stat->d_canceled,
...@@ -1083,7 +1171,6 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) ...@@ -1083,7 +1171,6 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
seq_printf(file, "%ld %ld\n", seq_printf(file, "%ld %ld\n",
stat->s_bau_disabled, stat->s_bau_reenabled); stat->s_bau_disabled, stat->s_bau_reenabled);
} }
return 0; return 0;
} }
...@@ -1091,18 +1178,18 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) ...@@ -1091,18 +1178,18 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
* Display the tunables thru debugfs * Display the tunables thru debugfs
*/ */
static ssize_t tunables_read(struct file *file, char __user *userbuf, static ssize_t tunables_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
char *buf; char *buf;
int ret; int ret;
buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n",
"max_bau_concurrent plugged_delay plugsb4reset", "max_concur plugged_delay plugsb4reset",
"timeoutsb4reset ipi_reset_limit complete_threshold", "timeoutsb4reset ipi_reset_limit complete_threshold",
"congested_response_us congested_reps congested_period", "congested_response_us congested_reps congested_period",
max_bau_concurrent, plugged_delay, plugsb4reset, max_concurr, plugged_delay, plugsb4reset,
timeoutsb4reset, ipi_reset_limit, complete_threshold, timeoutsb4reset, ipi_reset_limit, complete_threshold,
congested_response_us, congested_reps, congested_period); congested_respns_us, congested_reps, congested_period);
if (!buf) if (!buf)
return -ENOMEM; return -ENOMEM;
...@@ -1113,13 +1200,16 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf, ...@@ -1113,13 +1200,16 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf,
} }
/* /*
* -1: resetf the statistics * handle a write to /proc/sgi_uv/ptc_statistics
* -1: reset the statistics
* 0: display meaning of the statistics * 0: display meaning of the statistics
*/ */
static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, static ssize_t ptc_proc_write(struct file *file, const char __user *user,
size_t count, loff_t *data) size_t count, loff_t *data)
{ {
int cpu; int cpu;
int i;
int elements;
long input_arg; long input_arg;
char optstr[64]; char optstr[64];
struct ptc_stats *stat; struct ptc_stats *stat;
...@@ -1129,79 +1219,18 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, ...@@ -1129,79 +1219,18 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
if (copy_from_user(optstr, user, count)) if (copy_from_user(optstr, user, count))
return -EFAULT; return -EFAULT;
optstr[count - 1] = '\0'; optstr[count - 1] = '\0';
if (strict_strtol(optstr, 10, &input_arg) < 0) { if (strict_strtol(optstr, 10, &input_arg) < 0) {
printk(KERN_DEBUG "%s is invalid\n", optstr); printk(KERN_DEBUG "%s is invalid\n", optstr);
return -EINVAL; return -EINVAL;
} }
if (input_arg == 0) { if (input_arg == 0) {
elements = sizeof(stat_description)/sizeof(*stat_description);
printk(KERN_DEBUG "# cpu: cpu number\n"); printk(KERN_DEBUG "# cpu: cpu number\n");
printk(KERN_DEBUG "Sender statistics:\n"); printk(KERN_DEBUG "Sender statistics:\n");
printk(KERN_DEBUG for (i = 0; i < elements; i++)
"sent: number of shootdown messages sent\n"); printk(KERN_DEBUG "%s\n", stat_description[i]);
printk(KERN_DEBUG
"stime: time spent sending messages\n");
printk(KERN_DEBUG
"numuvhubs: number of hubs targeted with shootdown\n");
printk(KERN_DEBUG
"numuvhubs16: number times 16 or more hubs targeted\n");
printk(KERN_DEBUG
"numuvhubs8: number times 8 or more hubs targeted\n");
printk(KERN_DEBUG
"numuvhubs4: number times 4 or more hubs targeted\n");
printk(KERN_DEBUG
"numuvhubs2: number times 2 or more hubs targeted\n");
printk(KERN_DEBUG
"numuvhubs1: number times 1 hub targeted\n");
printk(KERN_DEBUG
"numcpus: number of cpus targeted with shootdown\n");
printk(KERN_DEBUG
"dto: number of destination timeouts\n");
printk(KERN_DEBUG
"retries: destination timeout retries sent\n");
printk(KERN_DEBUG
"rok: : destination timeouts successfully retried\n");
printk(KERN_DEBUG
"resetp: ipi-style resource resets for plugs\n");
printk(KERN_DEBUG
"resett: ipi-style resource resets for timeouts\n");
printk(KERN_DEBUG
"giveup: fall-backs to ipi-style shootdowns\n");
printk(KERN_DEBUG
"sto: number of source timeouts\n");
printk(KERN_DEBUG
"bz: number of stay-busy's\n");
printk(KERN_DEBUG
"throt: number times spun in throttle\n");
printk(KERN_DEBUG "Destination side statistics:\n");
printk(KERN_DEBUG
"sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
printk(KERN_DEBUG
"recv: shootdown messages received\n");
printk(KERN_DEBUG
"rtime: time spent processing messages\n");
printk(KERN_DEBUG
"all: shootdown all-tlb messages\n");
printk(KERN_DEBUG
"one: shootdown one-tlb messages\n");
printk(KERN_DEBUG
"mult: interrupts that found multiple messages\n");
printk(KERN_DEBUG
"none: interrupts that found no messages\n");
printk(KERN_DEBUG
"retry: number of retry messages processed\n");
printk(KERN_DEBUG
"canc: number messages canceled by retries\n");
printk(KERN_DEBUG
"nocan: number retries that found nothing to cancel\n");
printk(KERN_DEBUG
"reset: number of ipi-style reset requests processed\n");
printk(KERN_DEBUG
"rcan: number messages canceled by reset requests\n");
printk(KERN_DEBUG
"disable: number times use of the BAU was disabled\n");
printk(KERN_DEBUG
"enable: number times use of the BAU was re-enabled\n");
} else if (input_arg == -1) { } else if (input_arg == -1) {
for_each_present_cpu(cpu) { for_each_present_cpu(cpu) {
stat = &per_cpu(ptcstats, cpu); stat = &per_cpu(ptcstats, cpu);
...@@ -1228,27 +1257,18 @@ static int local_atoi(const char *name) ...@@ -1228,27 +1257,18 @@ static int local_atoi(const char *name)
} }
/* /*
* set the tunables * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables.
* 0 values reset them to defaults * Zero values reset them to defaults.
*/ */
static ssize_t tunables_write(struct file *file, const char __user *user, static int parse_tunables_write(struct bau_control *bcp, char *instr,
size_t count, loff_t *data) int count)
{ {
int cpu;
int cnt = 0;
int val;
char *p; char *p;
char *q; char *q;
char instr[64]; int cnt = 0;
struct bau_control *bcp; int val;
int e = sizeof(tunables) / sizeof(*tunables);
if (count == 0 || count > sizeof(instr)-1)
return -EINVAL;
if (copy_from_user(instr, user, count))
return -EFAULT;
instr[count] = '\0';
/* count the fields */
p = instr + strspn(instr, WHITESPACE); p = instr + strspn(instr, WHITESPACE);
q = p; q = p;
for (; *p; p = q + strspn(q, WHITESPACE)) { for (; *p; p = q + strspn(q, WHITESPACE)) {
...@@ -1257,8 +1277,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user, ...@@ -1257,8 +1277,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user,
if (q == p) if (q == p)
break; break;
} }
if (cnt != 9) { if (cnt != e) {
printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); printk(KERN_INFO "bau tunable error: should be %d values\n", e);
return -EINVAL; return -EINVAL;
} }
...@@ -1270,97 +1290,80 @@ static ssize_t tunables_write(struct file *file, const char __user *user, ...@@ -1270,97 +1290,80 @@ static ssize_t tunables_write(struct file *file, const char __user *user,
switch (cnt) { switch (cnt) {
case 0: case 0:
if (val == 0) { if (val == 0) {
max_bau_concurrent = MAX_BAU_CONCURRENT; max_concurr = MAX_BAU_CONCURRENT;
max_bau_concurrent_constant = max_concurr_const = MAX_BAU_CONCURRENT;
MAX_BAU_CONCURRENT;
continue; continue;
} }
bcp = &per_cpu(bau_control, smp_processor_id());
if (val < 1 || val > bcp->cpus_in_uvhub) { if (val < 1 || val > bcp->cpus_in_uvhub) {
printk(KERN_DEBUG printk(KERN_DEBUG
"Error: BAU max concurrent %d is invalid\n", "Error: BAU max concurrent %d is invalid\n",
val); val);
return -EINVAL; return -EINVAL;
} }
max_bau_concurrent = val; max_concurr = val;
max_bau_concurrent_constant = val; max_concurr_const = val;
continue;
case 1:
if (val == 0)
plugged_delay = PLUGGED_DELAY;
else
plugged_delay = val;
continue; continue;
case 2: default:
if (val == 0)
plugsb4reset = PLUGSB4RESET;
else
plugsb4reset = val;
continue;
case 3:
if (val == 0)
timeoutsb4reset = TIMEOUTSB4RESET;
else
timeoutsb4reset = val;
continue;
case 4:
if (val == 0)
ipi_reset_limit = IPI_RESET_LIMIT;
else
ipi_reset_limit = val;
continue;
case 5:
if (val == 0)
complete_threshold = COMPLETE_THRESHOLD;
else
complete_threshold = val;
continue;
case 6:
if (val == 0)
congested_response_us = CONGESTED_RESPONSE_US;
else
congested_response_us = val;
continue;
case 7:
if (val == 0)
congested_reps = CONGESTED_REPS;
else
congested_reps = val;
continue;
case 8:
if (val == 0) if (val == 0)
congested_period = CONGESTED_PERIOD; *tunables[cnt].tunp = tunables[cnt].deflt;
else else
congested_period = val; *tunables[cnt].tunp = val;
continue; continue;
} }
if (q == p) if (q == p)
break; break;
} }
return 0;
}
/*
* Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables)
*/
static ssize_t tunables_write(struct file *file, const char __user *user,
size_t count, loff_t *data)
{
int cpu;
int ret;
char instr[100];
struct bau_control *bcp;
if (count == 0 || count > sizeof(instr)-1)
return -EINVAL;
if (copy_from_user(instr, user, count))
return -EFAULT;
instr[count] = '\0';
bcp = &per_cpu(bau_control, smp_processor_id());
ret = parse_tunables_write(bcp, instr, count);
if (ret)
return ret;
for_each_present_cpu(cpu) { for_each_present_cpu(cpu) {
bcp = &per_cpu(bau_control, cpu); bcp = &per_cpu(bau_control, cpu);
bcp->max_bau_concurrent = max_bau_concurrent; bcp->max_concurr = max_concurr;
bcp->max_bau_concurrent_constant = max_bau_concurrent; bcp->max_concurr_const = max_concurr;
bcp->plugged_delay = plugged_delay; bcp->plugged_delay = plugged_delay;
bcp->plugsb4reset = plugsb4reset; bcp->plugsb4reset = plugsb4reset;
bcp->timeoutsb4reset = timeoutsb4reset; bcp->timeoutsb4reset = timeoutsb4reset;
bcp->ipi_reset_limit = ipi_reset_limit; bcp->ipi_reset_limit = ipi_reset_limit;
bcp->complete_threshold = complete_threshold; bcp->complete_threshold = complete_threshold;
bcp->congested_response_us = congested_response_us; bcp->cong_response_us = congested_respns_us;
bcp->congested_reps = congested_reps; bcp->cong_reps = congested_reps;
bcp->congested_period = congested_period; bcp->cong_period = congested_period;
} }
return count; return count;
} }
static const struct seq_operations uv_ptc_seq_ops = { static const struct seq_operations uv_ptc_seq_ops = {
.start = uv_ptc_seq_start, .start = ptc_seq_start,
.next = uv_ptc_seq_next, .next = ptc_seq_next,
.stop = uv_ptc_seq_stop, .stop = ptc_seq_stop,
.show = uv_ptc_seq_show .show = ptc_seq_show
}; };
static int uv_ptc_proc_open(struct inode *inode, struct file *file) static int ptc_proc_open(struct inode *inode, struct file *file)
{ {
return seq_open(file, &uv_ptc_seq_ops); return seq_open(file, &uv_ptc_seq_ops);
} }
...@@ -1371,9 +1374,9 @@ static int tunables_open(struct inode *inode, struct file *file) ...@@ -1371,9 +1374,9 @@ static int tunables_open(struct inode *inode, struct file *file)
} }
static const struct file_operations proc_uv_ptc_operations = { static const struct file_operations proc_uv_ptc_operations = {
.open = uv_ptc_proc_open, .open = ptc_proc_open,
.read = seq_read, .read = seq_read,
.write = uv_ptc_proc_write, .write = ptc_proc_write,
.llseek = seq_lseek, .llseek = seq_lseek,
.release = seq_release, .release = seq_release,
}; };
...@@ -1407,7 +1410,7 @@ static int __init uv_ptc_init(void) ...@@ -1407,7 +1410,7 @@ static int __init uv_ptc_init(void)
return -EINVAL; return -EINVAL;
} }
tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
tunables_dir, NULL, &tunables_fops); tunables_dir, NULL, &tunables_fops);
if (!tunables_file) { if (!tunables_file) {
printk(KERN_ERR "unable to create debugfs file %s\n", printk(KERN_ERR "unable to create debugfs file %s\n",
UV_BAU_TUNABLES_FILE); UV_BAU_TUNABLES_FILE);
...@@ -1419,24 +1422,24 @@ static int __init uv_ptc_init(void) ...@@ -1419,24 +1422,24 @@ static int __init uv_ptc_init(void)
/* /*
* Initialize the sending side's sending buffers. * Initialize the sending side's sending buffers.
*/ */
static void static void activation_descriptor_init(int node, int pnode, int base_pnode)
uv_activation_descriptor_init(int node, int pnode, int base_pnode)
{ {
int i; int i;
int cpu; int cpu;
unsigned long pa; unsigned long pa;
unsigned long m; unsigned long m;
unsigned long n; unsigned long n;
size_t dsize;
struct bau_desc *bau_desc; struct bau_desc *bau_desc;
struct bau_desc *bd2; struct bau_desc *bd2;
struct bau_control *bcp; struct bau_control *bcp;
/* /*
* each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC)
* per cpu; and one per cpu on the uvhub (UV_ADP_SIZE) * per cpu; and one per cpu on the uvhub (ADP_SZ)
*/ */
bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC;
* UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
BUG_ON(!bau_desc); BUG_ON(!bau_desc);
pa = uv_gpa(bau_desc); /* need the real nasid*/ pa = uv_gpa(bau_desc); /* need the real nasid*/
...@@ -1444,27 +1447,25 @@ uv_activation_descriptor_init(int node, int pnode, int base_pnode) ...@@ -1444,27 +1447,25 @@ uv_activation_descriptor_init(int node, int pnode, int base_pnode)
m = pa & uv_mmask; m = pa & uv_mmask;
/* the 14-bit pnode */ /* the 14-bit pnode */
uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
(n << UV_DESC_BASE_PNODE_SHIFT | m));
/* /*
* Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each * Initializing all 8 (ITEMS_PER_DESC) descriptors for each
* cpu even though we only use the first one; one descriptor can * cpu even though we only use the first one; one descriptor can
* describe a broadcast to 256 uv hubs. * describe a broadcast to 256 uv hubs.
*/ */
for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
i++, bd2++) {
memset(bd2, 0, sizeof(struct bau_desc)); memset(bd2, 0, sizeof(struct bau_desc));
bd2->header.sw_ack_flag = 1; bd2->header.swack_flag = 1;
/* /*
* The base_dest_nasid set in the message header is the nasid * The base_dest_nasid set in the message header is the nasid
* of the first uvhub in the partition. The bit map will * of the first uvhub in the partition. The bit map will
* indicate destination pnode numbers relative to that base. * indicate destination pnode numbers relative to that base.
* They may not be consecutive if nasid striding is being used. * They may not be consecutive if nasid striding is being used.
*/ */
bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
bd2->header.dest_subnodeid = UV_LB_SUBNODEID; bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
bd2->header.command = UV_NET_ENDPOINT_INTD; bd2->header.command = UV_NET_ENDPOINT_INTD;
bd2->header.int_both = 1; bd2->header.int_both = 1;
/* /*
* all others need to be set to zero: * all others need to be set to zero:
* fairness chaining multilevel count replied_to * fairness chaining multilevel count replied_to
...@@ -1484,57 +1485,55 @@ uv_activation_descriptor_init(int node, int pnode, int base_pnode) ...@@ -1484,57 +1485,55 @@ uv_activation_descriptor_init(int node, int pnode, int base_pnode)
* - node is first node (kernel memory notion) on the uvhub * - node is first node (kernel memory notion) on the uvhub
* - pnode is the uvhub's physical identifier * - pnode is the uvhub's physical identifier
*/ */
static void static void pq_init(int node, int pnode)
uv_payload_queue_init(int node, int pnode)
{ {
int pn;
int cpu; int cpu;
size_t plsize;
char *cp; char *cp;
unsigned long pa; void *vp;
struct bau_payload_queue_entry *pqp; unsigned long pn;
struct bau_payload_queue_entry *pqp_malloc; unsigned long first;
unsigned long pn_first;
unsigned long last;
struct bau_pq_entry *pqp;
struct bau_control *bcp; struct bau_control *bcp;
pqp = kmalloc_node((DEST_Q_SIZE + 1) plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
* sizeof(struct bau_payload_queue_entry), vp = kmalloc_node(plsize, GFP_KERNEL, node);
GFP_KERNEL, node); pqp = (struct bau_pq_entry *)vp;
BUG_ON(!pqp); BUG_ON(!pqp);
pqp_malloc = pqp;
cp = (char *)pqp + 31; cp = (char *)pqp + 31;
pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
for_each_present_cpu(cpu) { for_each_present_cpu(cpu) {
if (pnode != uv_cpu_to_pnode(cpu)) if (pnode != uv_cpu_to_pnode(cpu))
continue; continue;
/* for every cpu on this pnode: */ /* for every cpu on this pnode: */
bcp = &per_cpu(bau_control, cpu); bcp = &per_cpu(bau_control, cpu);
bcp->va_queue_first = pqp; bcp->queue_first = pqp;
bcp->bau_msg_head = pqp; bcp->bau_msg_head = pqp;
bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); bcp->queue_last = pqp + (DEST_Q_SIZE - 1);
} }
/* /*
* need the pnode of where the memory was really allocated * need the pnode of where the memory was really allocated
*/ */
pa = uv_gpa(pqp); pn = uv_gpa(pqp) >> uv_nshift;
pn = pa >> uv_nshift; first = uv_physnodeaddr(pqp);
uv_write_global_mmr64(pnode, pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first;
UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1));
((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | write_mmr_payload_first(pnode, pn_first);
uv_physnodeaddr(pqp)); write_mmr_payload_tail(pnode, first);
uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, write_mmr_payload_last(pnode, last);
uv_physnodeaddr(pqp));
uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
(unsigned long)
uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)));
/* in effect, all msg_type's are set to MSG_NOOP */ /* in effect, all msg_type's are set to MSG_NOOP */
memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
} }
/* /*
* Initialization of each UV hub's structures * Initialization of each UV hub's structures
*/ */
static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) static void __init init_uvhub(int uvhub, int vector, int base_pnode)
{ {
int node; int node;
int pnode; int pnode;
...@@ -1542,24 +1541,24 @@ static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) ...@@ -1542,24 +1541,24 @@ static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
node = uvhub_to_first_node(uvhub); node = uvhub_to_first_node(uvhub);
pnode = uv_blade_to_pnode(uvhub); pnode = uv_blade_to_pnode(uvhub);
uv_activation_descriptor_init(node, pnode, base_pnode);
uv_payload_queue_init(node, pnode); activation_descriptor_init(node, pnode, base_pnode);
pq_init(node, pnode);
/* /*
* The below initialization can't be in firmware because the * The below initialization can't be in firmware because the
* messaging IRQ will be determined by the OS. * messaging IRQ will be determined by the OS.
*/ */
apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, write_mmr_data_config(pnode, ((apicid << 32) | vector));
((apicid << 32) | vector));
} }
/* /*
* We will set BAU_MISC_CONTROL with a timeout period. * We will set BAU_MISC_CONTROL with a timeout period.
* But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
* So the destination timeout period has be be calculated from them. * So the destination timeout period has to be calculated from them.
*/ */
static int static int calculate_destination_timeout(void)
calculate_destination_timeout(void)
{ {
unsigned long mmr_image; unsigned long mmr_image;
int mult1; int mult1;
...@@ -1570,8 +1569,7 @@ calculate_destination_timeout(void) ...@@ -1570,8 +1569,7 @@ calculate_destination_timeout(void)
unsigned long ts_ns; unsigned long ts_ns;
if (is_uv1_hub()) { if (is_uv1_hub()) {
mult1 = UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD & mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
BAU_MISC_CONTROL_MULT_MASK;
mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
...@@ -1583,7 +1581,7 @@ calculate_destination_timeout(void) ...@@ -1583,7 +1581,7 @@ calculate_destination_timeout(void)
/* 4 bits 0/1 for 10/80us, 3 bits of multiplier */ /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */
mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
if (mmr_image & ((unsigned long)1 << UV2_ACK_UNITS_SHFT)) if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
mult1 = 80; mult1 = 80;
else else
mult1 = 10; mult1 = 10;
...@@ -1593,62 +1591,69 @@ calculate_destination_timeout(void) ...@@ -1593,62 +1591,69 @@ calculate_destination_timeout(void)
return ret; return ret;
} }
static void __init init_per_cpu_tunables(void)
{
int cpu;
struct bau_control *bcp;
for_each_present_cpu(cpu) {
bcp = &per_cpu(bau_control, cpu);
bcp->baudisabled = 0;
bcp->statp = &per_cpu(ptcstats, cpu);
/* time interval to catch a hardware stay-busy bug */
bcp->timeout_interval = usec_2_cycles(2*timeout_us);
bcp->max_concurr = max_concurr;
bcp->max_concurr_const = max_concurr;
bcp->plugged_delay = plugged_delay;
bcp->plugsb4reset = plugsb4reset;
bcp->timeoutsb4reset = timeoutsb4reset;
bcp->ipi_reset_limit = ipi_reset_limit;
bcp->complete_threshold = complete_threshold;
bcp->cong_response_us = congested_respns_us;
bcp->cong_reps = congested_reps;
bcp->cong_period = congested_period;
}
}
/* /*
* initialize the bau_control structure for each cpu * Scan all cpus to collect blade and socket summaries.
*/ */
static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) static int __init get_cpu_topology(int base_pnode,
struct uvhub_desc *uvhub_descs,
unsigned char *uvhub_mask)
{ {
int i;
int cpu; int cpu;
int tcpu;
int pnode; int pnode;
int uvhub; int uvhub;
int have_hmaster; int socket;
short socket = 0;
unsigned short socket_mask;
unsigned char *uvhub_mask;
struct bau_control *bcp; struct bau_control *bcp;
struct uvhub_desc *bdp; struct uvhub_desc *bdp;
struct socket_desc *sdp; struct socket_desc *sdp;
struct bau_control *hmaster = NULL;
struct bau_control *smaster = NULL;
struct socket_desc {
short num_cpus;
short cpu_number[MAX_CPUS_PER_SOCKET];
};
struct uvhub_desc {
unsigned short socket_mask;
short num_cpus;
short uvhub;
short pnode;
struct socket_desc socket[2];
};
struct uvhub_desc *uvhub_descs;
timeout_us = calculate_destination_timeout();
uvhub_descs = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
for_each_present_cpu(cpu) { for_each_present_cpu(cpu) {
bcp = &per_cpu(bau_control, cpu); bcp = &per_cpu(bau_control, cpu);
memset(bcp, 0, sizeof(struct bau_control)); memset(bcp, 0, sizeof(struct bau_control));
pnode = uv_cpu_hub_info(cpu)->pnode; pnode = uv_cpu_hub_info(cpu)->pnode;
if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
printk(KERN_EMERG printk(KERN_EMERG
"cpu %d pnode %d-%d beyond %d; BAU disabled\n", "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
cpu, pnode, base_part_pnode, cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
UV_DISTRIBUTION_SIZE);
return 1; return 1;
} }
bcp->osnode = cpu_to_node(cpu); bcp->osnode = cpu_to_node(cpu);
bcp->partition_base_pnode = uv_partition_base_pnode; bcp->partition_base_pnode = base_pnode;
uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
*(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
bdp = &uvhub_descs[uvhub]; bdp = &uvhub_descs[uvhub];
bdp->num_cpus++; bdp->num_cpus++;
bdp->uvhub = uvhub; bdp->uvhub = uvhub;
bdp->pnode = pnode; bdp->pnode = pnode;
/* kludge: 'assuming' one node per socket, and assuming that /* kludge: 'assuming' one node per socket, and assuming that
disabling a socket just leaves a gap in node numbers */ disabling a socket just leaves a gap in node numbers */
socket = bcp->osnode & 1; socket = bcp->osnode & 1;
...@@ -1657,84 +1662,129 @@ static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) ...@@ -1657,84 +1662,129 @@ static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
sdp->cpu_number[sdp->num_cpus] = cpu; sdp->cpu_number[sdp->num_cpus] = cpu;
sdp->num_cpus++; sdp->num_cpus++;
if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); printk(KERN_EMERG "%d cpus per socket invalid\n",
sdp->num_cpus);
return 1; return 1;
} }
} }
return 0;
}
/*
* Each socket is to get a local array of pnodes/hubs.
*/
static void make_per_cpu_thp(struct bau_control *smaster)
{
int cpu;
size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
memset(smaster->thp, 0, hpsz);
for_each_present_cpu(cpu) {
smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
}
}
/*
* Initialize all the per_cpu information for the cpu's on a given socket,
* given what has been gathered into the socket_desc struct.
* And reports the chosen hub and socket masters back to the caller.
*/
static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
struct bau_control **smasterp,
struct bau_control **hmasterp)
{
int i;
int cpu;
struct bau_control *bcp;
for (i = 0; i < sdp->num_cpus; i++) {
cpu = sdp->cpu_number[i];
bcp = &per_cpu(bau_control, cpu);
bcp->cpu = cpu;
if (i == 0) {
*smasterp = bcp;
if (!(*hmasterp))
*hmasterp = bcp;
}
bcp->cpus_in_uvhub = bdp->num_cpus;
bcp->cpus_in_socket = sdp->num_cpus;
bcp->socket_master = *smasterp;
bcp->uvhub = bdp->uvhub;
bcp->uvhub_master = *hmasterp;
bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
printk(KERN_EMERG "%d cpus per uvhub invalid\n",
bcp->uvhub_cpu);
return 1;
}
}
return 0;
}
/*
* Summarize the blade and socket topology into the per_cpu structures.
*/
static int __init summarize_uvhub_sockets(int nuvhubs,
struct uvhub_desc *uvhub_descs,
unsigned char *uvhub_mask)
{
int socket;
int uvhub;
unsigned short socket_mask;
for (uvhub = 0; uvhub < nuvhubs; uvhub++) { for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
struct uvhub_desc *bdp;
struct bau_control *smaster = NULL;
struct bau_control *hmaster = NULL;
if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
continue; continue;
have_hmaster = 0;
bdp = &uvhub_descs[uvhub]; bdp = &uvhub_descs[uvhub];
socket_mask = bdp->socket_mask; socket_mask = bdp->socket_mask;
socket = 0; socket = 0;
while (socket_mask) { while (socket_mask) {
if (!(socket_mask & 1)) struct socket_desc *sdp;
goto nextsocket; if ((socket_mask & 1)) {
sdp = &bdp->socket[socket]; sdp = &bdp->socket[socket];
for (i = 0; i < sdp->num_cpus; i++) { if (scan_sock(sdp, bdp, &smaster, &hmaster))
cpu = sdp->cpu_number[i];
bcp = &per_cpu(bau_control, cpu);
bcp->cpu = cpu;
if (i == 0) {
smaster = bcp;
if (!have_hmaster) {
have_hmaster++;
hmaster = bcp;
}
}
bcp->cpus_in_uvhub = bdp->num_cpus;
bcp->cpus_in_socket = sdp->num_cpus;
bcp->socket_master = smaster;
bcp->uvhub = bdp->uvhub;
bcp->uvhub_master = hmaster;
bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
blade_processor_id;
if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
printk(KERN_EMERG
"%d cpus per uvhub invalid\n",
bcp->uvhub_cpu);
return 1; return 1;
}
} }
nextsocket:
socket++; socket++;
socket_mask = (socket_mask >> 1); socket_mask = (socket_mask >> 1);
/* each socket gets a local array of pnodes/hubs */ make_per_cpu_thp(smaster);
bcp = smaster;
bcp->target_hub_and_pnode = kmalloc_node(
sizeof(struct hub_and_pnode) *
num_possible_cpus(), GFP_KERNEL, bcp->osnode);
memset(bcp->target_hub_and_pnode, 0,
sizeof(struct hub_and_pnode) *
num_possible_cpus());
for_each_present_cpu(tcpu) {
bcp->target_hub_and_pnode[tcpu].pnode =
uv_cpu_hub_info(tcpu)->pnode;
bcp->target_hub_and_pnode[tcpu].uvhub =
uv_cpu_hub_info(tcpu)->numa_blade_id;
}
} }
} }
return 0;
}
/*
* initialize the bau_control structure for each cpu
*/
static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
{
unsigned char *uvhub_mask;
void *vp;
struct uvhub_desc *uvhub_descs;
timeout_us = calculate_destination_timeout();
vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
uvhub_descs = (struct uvhub_desc *)vp;
memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
return 1;
if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask))
return 1;
kfree(uvhub_descs); kfree(uvhub_descs);
kfree(uvhub_mask); kfree(uvhub_mask);
for_each_present_cpu(cpu) { init_per_cpu_tunables();
bcp = &per_cpu(bau_control, cpu);
bcp->baudisabled = 0;
bcp->statp = &per_cpu(ptcstats, cpu);
/* time interval to catch a hardware stay-busy bug */
bcp->timeout_interval = microsec_2_cycles(2*timeout_us);
bcp->max_bau_concurrent = max_bau_concurrent;
bcp->max_bau_concurrent_constant = max_bau_concurrent;
bcp->plugged_delay = plugged_delay;
bcp->plugsb4reset = plugsb4reset;
bcp->timeoutsb4reset = timeoutsb4reset;
bcp->ipi_reset_limit = ipi_reset_limit;
bcp->complete_threshold = complete_threshold;
bcp->congested_response_us = congested_response_us;
bcp->congested_reps = congested_reps;
bcp->congested_period = congested_period;
}
return 0; return 0;
} }
...@@ -1747,8 +1797,9 @@ static int __init uv_bau_init(void) ...@@ -1747,8 +1797,9 @@ static int __init uv_bau_init(void)
int pnode; int pnode;
int nuvhubs; int nuvhubs;
int cur_cpu; int cur_cpu;
int cpus;
int vector; int vector;
unsigned long mmr; cpumask_var_t *mask;
if (!is_uv_system()) if (!is_uv_system())
return 0; return 0;
...@@ -1756,24 +1807,25 @@ static int __init uv_bau_init(void) ...@@ -1756,24 +1807,25 @@ static int __init uv_bau_init(void)
if (nobau) if (nobau)
return 0; return 0;
for_each_possible_cpu(cur_cpu) for_each_possible_cpu(cur_cpu) {
zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
GFP_KERNEL, cpu_to_node(cur_cpu)); zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
}
uv_nshift = uv_hub_info->m_val; uv_nshift = uv_hub_info->m_val;
uv_mmask = (1UL << uv_hub_info->m_val) - 1; uv_mmask = (1UL << uv_hub_info->m_val) - 1;
nuvhubs = uv_num_possible_blades(); nuvhubs = uv_num_possible_blades();
spin_lock_init(&disable_lock); spin_lock_init(&disable_lock);
congested_cycles = microsec_2_cycles(congested_response_us); congested_cycles = usec_2_cycles(congested_respns_us);
uv_partition_base_pnode = 0x7fffffff; uv_base_pnode = 0x7fffffff;
for (uvhub = 0; uvhub < nuvhubs; uvhub++) { for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
if (uv_blade_nr_possible_cpus(uvhub) && cpus = uv_blade_nr_possible_cpus(uvhub);
(uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode))
uv_partition_base_pnode = uv_blade_to_pnode(uvhub); uv_base_pnode = uv_blade_to_pnode(uvhub);
} }
if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { if (init_per_cpu(nuvhubs, uv_base_pnode)) {
nobau = 1; nobau = 1;
return 0; return 0;
} }
...@@ -1781,21 +1833,21 @@ static int __init uv_bau_init(void) ...@@ -1781,21 +1833,21 @@ static int __init uv_bau_init(void)
vector = UV_BAU_MESSAGE; vector = UV_BAU_MESSAGE;
for_each_possible_blade(uvhub) for_each_possible_blade(uvhub)
if (uv_blade_nr_possible_cpus(uvhub)) if (uv_blade_nr_possible_cpus(uvhub))
uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); init_uvhub(uvhub, vector, uv_base_pnode);
uv_enable_timeouts(); enable_timeouts();
alloc_intr_gate(vector, uv_bau_message_intr1); alloc_intr_gate(vector, uv_bau_message_intr1);
for_each_possible_blade(uvhub) { for_each_possible_blade(uvhub) {
if (uv_blade_nr_possible_cpus(uvhub)) { if (uv_blade_nr_possible_cpus(uvhub)) {
unsigned long val;
unsigned long mmr;
pnode = uv_blade_to_pnode(uvhub); pnode = uv_blade_to_pnode(uvhub);
/* INIT the bau */ /* INIT the bau */
uv_write_global_mmr64(pnode, val = 1L << 63;
UVH_LB_BAU_SB_ACTIVATION_CONTROL, write_gmmr_activation(pnode, val);
((unsigned long)1 << 63));
mmr = 1; /* should be 1 to broadcast to both sockets */ mmr = 1; /* should be 1 to broadcast to both sockets */
uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, write_mmr_data_broadcast(pnode, mmr);
mmr);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment