Commit 8b6e511e authored by Cliff Wickman's avatar Cliff Wickman Committed by Ingo Molnar

x86/uv: Work around UV2 BAU hangs

On SGI's UV2 the BAU (Broadcast Assist Unit) driver can hang
under a heavy load. To cure this:

- Disable the UV2 extended status mode (see UV2_EXT_SHFT), as
  this mode changes BAU behavior in more ways then just delivering
  an extra bit of status.  Revert status to just two meaningful bits,
  like UV1.

- Use no IPI-style resets on UV2.  Just give up the request for
  whatever the reason it failed and let it be accomplished with
  the legacy IPI method.

- Use no alternate sending descriptor (the former UV2 workaround
  bcp->using_desc and handle_uv2_busy() stuff).  Just disable the
  use of the BAU for a period of time in favor of the legacy IPI
  method when the h/w bug leaves a descriptor busy.

  -- new tunable: giveup_limit determines the threshold at which a hub is
     so plugged that it should do all requests with the legacy IPI method for a
     period of time
  -- generalize disable_for_congestion() (renamed disable_for_period()) for
     use whenever a hub should avoid using the BAU for a period of time

Also:

 - Fix find_another_by_swack(), which is part of the UV2 bug workaround

 - Correct and clarify the statistics (new stats s_overipilimit, s_giveuplimit,
   s_enters, s_ipifordisabled, s_plugged, s_congested)
Signed-off-by: default avatarCliff Wickman <cpw@sgi.com>
Link: http://lkml.kernel.org/r/20120622131459.GC31884@sgi.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 26ef8577
...@@ -140,6 +140,9 @@ ...@@ -140,6 +140,9 @@
#define IPI_RESET_LIMIT 1 #define IPI_RESET_LIMIT 1
/* after this # consecutive successes, bump up the throttle if it was lowered */ /* after this # consecutive successes, bump up the throttle if it was lowered */
#define COMPLETE_THRESHOLD 5 #define COMPLETE_THRESHOLD 5
/* after this # of giveups (fall back to kernel IPI's) disable the use of
the BAU for a period of time */
#define GIVEUP_LIMIT 100
#define UV_LB_SUBNODEID 0x10 #define UV_LB_SUBNODEID 0x10
...@@ -166,7 +169,6 @@ ...@@ -166,7 +169,6 @@
#define FLUSH_RETRY_TIMEOUT 2 #define FLUSH_RETRY_TIMEOUT 2
#define FLUSH_GIVEUP 3 #define FLUSH_GIVEUP 3
#define FLUSH_COMPLETE 4 #define FLUSH_COMPLETE 4
#define FLUSH_RETRY_BUSYBUG 5
/* /*
* tuning the action when the numalink network is extremely delayed * tuning the action when the numalink network is extremely delayed
...@@ -175,7 +177,7 @@ ...@@ -175,7 +177,7 @@
microseconds */ microseconds */
#define CONGESTED_REPS 10 /* long delays averaged over #define CONGESTED_REPS 10 /* long delays averaged over
this many broadcasts */ this many broadcasts */
#define CONGESTED_PERIOD 30 /* time for the bau to be #define DISABLED_PERIOD 10 /* time for the bau to be
disabled, in seconds */ disabled, in seconds */
/* see msg_type: */ /* see msg_type: */
#define MSG_NOOP 0 #define MSG_NOOP 0
...@@ -520,7 +522,12 @@ struct ptc_stats { ...@@ -520,7 +522,12 @@ struct ptc_stats {
unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
unsigned long s_enters; /* entries to the driver */ unsigned long s_overipilimit; /* over the ipi reset limit */
unsigned long s_giveuplimit; /* disables, over giveup limit*/
unsigned long s_enters; /* entries to the driver */
unsigned long s_ipifordisabled; /* fall back to IPI; disabled */
unsigned long s_plugged; /* plugged by h/w bug*/
unsigned long s_congested; /* giveup on long wait */
/* destination statistics */ /* destination statistics */
unsigned long d_alltlb; /* times all tlb's on this unsigned long d_alltlb; /* times all tlb's on this
cpu were flushed */ cpu were flushed */
...@@ -588,8 +595,7 @@ struct bau_control { ...@@ -588,8 +595,7 @@ struct bau_control {
int ipi_attempts; int ipi_attempts;
int conseccompletes; int conseccompletes;
short nobau; short nobau;
int baudisabled; short baudisabled;
int set_bau_off;
short cpu; short cpu;
short osnode; short osnode;
short uvhub_cpu; short uvhub_cpu;
...@@ -598,14 +604,16 @@ struct bau_control { ...@@ -598,14 +604,16 @@ struct bau_control {
short cpus_in_socket; short cpus_in_socket;
short cpus_in_uvhub; short cpus_in_uvhub;
short partition_base_pnode; short partition_base_pnode;
short using_desc; /* an index, like uvhub_cpu */ short busy; /* all were busy (war) */
unsigned int inuse_map;
unsigned short message_number; unsigned short message_number;
unsigned short uvhub_quiesce; unsigned short uvhub_quiesce;
short socket_acknowledge_count[DEST_Q_SIZE]; short socket_acknowledge_count[DEST_Q_SIZE];
cycles_t send_message; cycles_t send_message;
cycles_t period_end;
cycles_t period_time;
spinlock_t uvhub_lock; spinlock_t uvhub_lock;
spinlock_t queue_lock; spinlock_t queue_lock;
spinlock_t disable_lock;
/* tunables */ /* tunables */
int max_concurr; int max_concurr;
int max_concurr_const; int max_concurr_const;
...@@ -616,9 +624,9 @@ struct bau_control { ...@@ -616,9 +624,9 @@ struct bau_control {
int complete_threshold; int complete_threshold;
int cong_response_us; int cong_response_us;
int cong_reps; int cong_reps;
int cong_period; cycles_t disabled_period;
unsigned long clocks_per_100_usec; int period_giveups;
cycles_t period_time; int giveup_limit;
long period_requests; long period_requests;
struct hub_and_pnode *thp; struct hub_and_pnode *thp;
}; };
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment