Commit 1203a63d authored by Weilin Wang's avatar Weilin Wang Committed by Namhyung Kim

perf test: Rerun failed metrics with longer workload

Rerun failed metrics with longer workload to avoid false failure because
sometimes metric value test fails when running in very short amount of
time. Skip rerun if equal to or more than 20 metrics fail.
Signed-off-by: default avatarWeilin Wang <weilin.wang@intel.com>
Tested-by: default avatarNamhyung Kim <namhyung@kernel.org>
Cc: ravi.bangoria@amd.com
Cc: Ian Rogers <irogers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Samantha Alt <samantha.alt@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Link: https://lore.kernel.org/r/20230620170027.1861012-4-weilin.wang@intel.comSigned-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
parent a0f1cc18
...@@ -11,8 +11,9 @@ class Validator: ...@@ -11,8 +11,9 @@ class Validator:
self.rulefname = rulefname self.rulefname = rulefname
self.reportfname = reportfname self.reportfname = reportfname
self.rules = None self.rules = None
self.collectlist=metrics self.collectlist:str = metrics
self.metrics = set(metrics) self.metrics = self.__set_metrics(metrics)
self.skiplist = set()
self.tolerance = t self.tolerance = t
self.workloads = [x for x in workload.split(",") if x] self.workloads = [x for x in workload.split(",") if x]
...@@ -41,6 +42,12 @@ class Validator: ...@@ -41,6 +42,12 @@ class Validator:
self.debug = debug self.debug = debug
self.fullrulefname = fullrulefname self.fullrulefname = fullrulefname
def __set_metrics(self, metrics=''):
if metrics != '':
return set(metrics.split(","))
else:
return set()
def read_json(self, filename: str) -> dict: def read_json(self, filename: str) -> dict:
try: try:
with open(Path(filename).resolve(), "r") as f: with open(Path(filename).resolve(), "r") as f:
...@@ -113,7 +120,7 @@ class Validator: ...@@ -113,7 +120,7 @@ class Validator:
All future test(s) on this metric will fail. All future test(s) on this metric will fail.
@param name: name of the metric @param name: name of the metric
@returns: list with value found in self.results; list is empty when not value found. @returns: list with value found in self.results; list is empty when value is not found.
""" """
results = [] results = []
data = self.results[ridx] if ridx in self.results else self.results[0] data = self.results[ridx] if ridx in self.results else self.results[0]
...@@ -123,7 +130,6 @@ class Validator: ...@@ -123,7 +130,6 @@ class Validator:
elif name.replace('.', '1').isdigit(): elif name.replace('.', '1').isdigit():
results.append(float(name)) results.append(float(name))
else: else:
self.errlist.append("Metric '%s' is not collected or the value format is incorrect"%(name))
self.ignoremetrics.add(name) self.ignoremetrics.add(name)
return results return results
...@@ -138,27 +144,32 @@ class Validator: ...@@ -138,27 +144,32 @@ class Validator:
Failure: when metric value is negative or not provided. Failure: when metric value is negative or not provided.
Metrics with negative value will be added into the self.failtests['PositiveValueTest'] and self.ignoremetrics. Metrics with negative value will be added into the self.failtests['PositiveValueTest'] and self.ignoremetrics.
""" """
negmetric = set() negmetric = dict()
missmetric = set()
pcnt = 0 pcnt = 0
tcnt = 0 tcnt = 0
rerun = list()
for name, val in self.get_results().items(): for name, val in self.get_results().items():
if val is None or val == '': if val < 0:
missmetric.add(name) negmetric[name] = val
self.errlist.append("Metric '%s' is not collected"%(name)) rerun.append(name)
elif val < 0:
negmetric.add("{0}(={1:.4f})".format(name, val))
self.collectlist[0].append(name)
else: else:
pcnt += 1 pcnt += 1
tcnt += 1 tcnt += 1
if len(rerun) > 0 and len(rerun) < 20:
second_results = dict()
self.second_test(rerun, second_results)
for name, val in second_results.items():
if name not in negmetric: continue
if val >= 0:
del negmetric[name]
pcnt += 1
self.failtests['PositiveValueTest']['Total Tests'] = tcnt self.failtests['PositiveValueTest']['Total Tests'] = tcnt
self.failtests['PositiveValueTest']['Passed Tests'] = pcnt self.failtests['PositiveValueTest']['Passed Tests'] = pcnt
if len(negmetric) or len(missmetric)> 0: if len(negmetric.keys()):
self.ignoremetrics.update(negmetric) self.ignoremetrics.update(negmetric.keys())
self.ignoremetrics.update(missmetric) negmessage = ["{0}(={1:.4f})".format(name, val) for name, val in negmetric.items()]
self.failtests['PositiveValueTest']['Failed Tests'].append({'NegativeValue':list(negmetric), 'MissingValue':list(missmetric)}) self.failtests['PositiveValueTest']['Failed Tests'].append({'NegativeValue': negmessage})
return return
...@@ -259,21 +270,36 @@ class Validator: ...@@ -259,21 +270,36 @@ class Validator:
metrics = rule['Metrics'] metrics = rule['Metrics']
passcnt = 0 passcnt = 0
totalcnt = 0 totalcnt = 0
faillist = [] faillist = list()
failures = dict()
rerun = list()
for m in metrics: for m in metrics:
totalcnt += 1 totalcnt += 1
result = self.get_value(m['Name']) result = self.get_value(m['Name'])
if len(result) > 0 and self.check_bound(result[0], lbv, ubv, t): if len(result) > 0 and self.check_bound(result[0], lbv, ubv, t) or m['Name'] in self.skiplist:
passcnt += 1 passcnt += 1
else: else:
faillist.append({'MetricName':m['Name'], 'CollectedValue':result}) failures[m['Name']] = result
self.collectlist[0].append(m['Name']) rerun.append(m['Name'])
if len(rerun) > 0 and len(rerun) < 20:
second_results = dict()
self.second_test(rerun, second_results)
for name, val in second_results.items():
if name not in failures: continue
if self.check_bound(val, lbv, ubv, t):
passcnt += 1
del failures[name]
else:
failures[name] = val
self.results[0][name] = val
self.totalcnt += totalcnt self.totalcnt += totalcnt
self.passedcnt += passcnt self.passedcnt += passcnt
self.failtests['SingleMetricTest']['Total Tests'] += totalcnt self.failtests['SingleMetricTest']['Total Tests'] += totalcnt
self.failtests['SingleMetricTest']['Passed Tests'] += passcnt self.failtests['SingleMetricTest']['Passed Tests'] += passcnt
if len(faillist) != 0: if len(failures.keys()) != 0:
faillist = [{'MetricName':name, 'CollectedValue':val} for name, val in failures.items()]
self.failtests['SingleMetricTest']['Failed Tests'].append({'RuleIndex':rule['RuleIndex'], self.failtests['SingleMetricTest']['Failed Tests'].append({'RuleIndex':rule['RuleIndex'],
'RangeLower': rule['RangeLower'], 'RangeLower': rule['RangeLower'],
'RangeUpper': rule['RangeUpper'], 'RangeUpper': rule['RangeUpper'],
...@@ -316,7 +342,7 @@ class Validator: ...@@ -316,7 +342,7 @@ class Validator:
return True return True
# Start of Collector and Converter # Start of Collector and Converter
def convert(self, data: list, idx: int): def convert(self, data: list, metricvalues:dict):
""" """
Convert collected metric data from the -j output to dict of {metric_name:value}. Convert collected metric data from the -j output to dict of {metric_name:value}.
""" """
...@@ -326,20 +352,29 @@ class Validator: ...@@ -326,20 +352,29 @@ class Validator:
if "metric-unit" in result and result["metric-unit"] != "(null)" and result["metric-unit"] != "": if "metric-unit" in result and result["metric-unit"] != "(null)" and result["metric-unit"] != "":
name = result["metric-unit"].split(" ")[1] if len(result["metric-unit"].split(" ")) > 1 \ name = result["metric-unit"].split(" ")[1] if len(result["metric-unit"].split(" ")) > 1 \
else result["metric-unit"] else result["metric-unit"]
if idx not in self.results: self.results[idx] = dict() metricvalues[name.lower()] = float(result["metric-value"])
self.results[idx][name.lower()] = float(result["metric-value"])
except ValueError as error: except ValueError as error:
continue continue
return return
def collect_perf(self, data_file: str, workload: str): def _run_perf(self, metric, workload: str):
tool = 'perf'
command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
wl = workload.split()
command.extend(wl)
print(" ".join(command))
cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8')
data = [x+'}' for x in cmd.stderr.split('}\n') if x]
return data
def collect_perf(self, workload: str):
""" """
Collect metric data with "perf stat -M" on given workload with -a and -j. Collect metric data with "perf stat -M" on given workload with -a and -j.
""" """
self.results = dict() self.results = dict()
tool = 'perf'
print(f"Starting perf collection") print(f"Starting perf collection")
print(f"Workload: {workload}") print(f"Long workload: {workload}")
collectlist = dict() collectlist = dict()
if self.collectlist != "": if self.collectlist != "":
collectlist[0] = {x for x in self.collectlist.split(",")} collectlist[0] = {x for x in self.collectlist.split(",")}
...@@ -353,17 +388,20 @@ class Validator: ...@@ -353,17 +388,20 @@ class Validator:
collectlist[rule["RuleIndex"]] = [",".join(list(set(metrics)))] collectlist[rule["RuleIndex"]] = [",".join(list(set(metrics)))]
for idx, metrics in collectlist.items(): for idx, metrics in collectlist.items():
if idx == 0: wl = "sleep 0.5".split() if idx == 0: wl = "true"
else: wl = workload.split() else: wl = workload
for metric in metrics: for metric in metrics:
command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"] data = self._run_perf(metric, wl)
command.extend(wl) if idx not in self.results: self.results[idx] = dict()
print(" ".join(command)) self.convert(data, self.results[idx])
cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8') return
data = [x+'}' for x in cmd.stderr.split('}\n') if x]
self.convert(data, idx) def second_test(self, collectlist, second_results):
self.collectlist = dict() workload = self.workloads[self.wlidx]
self.collectlist[0] = list() for metric in collectlist:
data = self._run_perf(metric, workload)
self.convert(data, second_results)
# End of Collector and Converter # End of Collector and Converter
# Start of Rule Generator # Start of Rule Generator
...@@ -381,7 +419,7 @@ class Validator: ...@@ -381,7 +419,7 @@ class Validator:
if 'MetricName' not in m: if 'MetricName' not in m:
print("Warning: no metric name") print("Warning: no metric name")
continue continue
name = m['MetricName'] name = m['MetricName'].lower()
self.metrics.add(name) self.metrics.add(name)
if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'): if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
self.pctgmetrics.add(name.lower()) self.pctgmetrics.add(name.lower())
...@@ -391,14 +429,12 @@ class Validator: ...@@ -391,14 +429,12 @@ class Validator:
return return
def remove_unsupported_rules(self, rules, skiplist: set = None): def remove_unsupported_rules(self, rules):
for m in skiplist:
self.metrics.discard(m)
new_rules = [] new_rules = []
for rule in rules: for rule in rules:
add_rule = True add_rule = True
for m in rule["Metrics"]: for m in rule["Metrics"]:
if m["Name"] not in self.metrics: if m["Name"] in self.skiplist or m["Name"] not in self.metrics:
add_rule = False add_rule = False
break break
if add_rule: if add_rule:
...@@ -415,15 +451,15 @@ class Validator: ...@@ -415,15 +451,15 @@ class Validator:
""" """
data = self.read_json(self.rulefname) data = self.read_json(self.rulefname)
rules = data['RelationshipRules'] rules = data['RelationshipRules']
skiplist = set(data['SkipList']) self.skiplist = set([name.lower() for name in data['SkipList']])
self.rules = self.remove_unsupported_rules(rules, skiplist) self.rules = self.remove_unsupported_rules(rules)
pctgrule = {'RuleIndex':0, pctgrule = {'RuleIndex':0,
'TestType':'SingleMetricTest', 'TestType':'SingleMetricTest',
'RangeLower':'0', 'RangeLower':'0',
'RangeUpper': '100', 'RangeUpper': '100',
'ErrorThreshold': self.tolerance, 'ErrorThreshold': self.tolerance,
'Description':'Metrics in percent unit have value with in [0, 100]', 'Description':'Metrics in percent unit have value with in [0, 100]',
'Metrics': [{'Name': m} for m in self.pctgmetrics]} 'Metrics': [{'Name': m.lower()} for m in self.pctgmetrics]}
self.rules.append(pctgrule) self.rules.append(pctgrule)
# Re-index all rules to avoid repeated RuleIndex # Re-index all rules to avoid repeated RuleIndex
...@@ -479,8 +515,9 @@ class Validator: ...@@ -479,8 +515,9 @@ class Validator:
self.parse_perf_metrics() self.parse_perf_metrics()
self.create_rules() self.create_rules()
for i in range(0, len(self.workloads)): for i in range(0, len(self.workloads)):
self.wlidx = i
self._init_data() self._init_data()
self.collect_perf(self.datafname, self.workloads[i]) self.collect_perf(self.workloads[i])
# Run positive value test # Run positive value test
self.pos_val_test() self.pos_val_test()
for r in self.rules: for r in self.rules:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment