aboutsummaryrefslogtreecommitdiff
path: root/analysis/scripts/make_table.py
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-12 14:33:57 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-12 14:33:57 +0000
commit7ef8f6e58990fc069cccc71ed6564e8c639ea4fc (patch)
tree9e7662a34b7d0c1f1c5d9faf6d7d6ea8672f6410 /analysis/scripts/make_table.py
downloadTCPDBench-7ef8f6e58990fc069cccc71ed6564e8c639ea4fc.tar.gz
TCPDBench-7ef8f6e58990fc069cccc71ed6564e8c639ea4fc.zip
initial commit
Diffstat (limited to 'analysis/scripts/make_table.py')
-rw-r--r--analysis/scripts/make_table.py441
1 files changed, 441 insertions, 0 deletions
diff --git a/analysis/scripts/make_table.py b/analysis/scripts/make_table.py
new file mode 100644
index 00000000..e4747258
--- /dev/null
+++ b/analysis/scripts/make_table.py
@@ -0,0 +1,441 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Script to generate tables from summary files
+
+Metrics, experiments, methods, and datasets are hard-coded as a means of
+validation.
+
+For the "best" experiment, the RBOCPDMS method is excluded because it fails too
+often. For the other experiments, datasets with incomplete results are removed.
+
+Author: G.J.J. van den Burg
+Copyright (c) 2020 - The Alan Turing Institute
+License: See the LICENSE file.
+
+"""
+
+import argparse
+import colorama
+import json
+import os
+import sys
+import termcolor
+
+from enum import Enum
+from typing import Optional
+
+# from pydantic.dataclasses import dataclass
+from dataclasses import dataclass
+
+from latex import build_latex_table
+
+colorama.init()
+
+
+class Metric(Enum):
+ f1 = "f1"
+ cover = "cover"
+
+
+class Experiment(Enum):
+ default = "default"
+ best = "best"
+
+
+class Dataset(Enum):
+ apple = "apple"
+ bank = "bank"
+ bee_waggle_6 = "bee_waggle_6"
+ bitcoin = "bitcoin"
+ brent_spot = "brent_spot"
+ businv = "businv"
+ centralia = "centralia"
+ children_per_woman = "children_per_woman"
+ co2_canada = "co2_canada"
+ construction = "construction"
+ debt_ireland = "debt_ireland"
+ gdp_argentina = "gdp_argentina"
+ gdp_croatia = "gdp_croatia"
+ gdp_iran = "gdp_iran"
+ gdp_japan = "gdp_japan"
+ global_co2 = "global_co2"
+ homeruns = "homeruns"
+ iceland_tourism = "iceland_tourism"
+ jfk_passengers = "jfk_passengers"
+ lga_passengers = "lga_passengers"
+ nile = "nile"
+ occupancy = "occupancy"
+ ozone = "ozone"
+ quality_control_1 = "quality_control_1"
+ quality_control_2 = "quality_control_2"
+ quality_control_3 = "quality_control_3"
+ quality_control_4 = "quality_control_4"
+ quality_control_5 = "quality_control_5"
+ rail_lines = "rail_lines"
+ ratner_stock = "ratner_stock"
+ robocalls = "robocalls"
+ run_log = "run_log"
+ scanline_126007 = "scanline_126007"
+ scanline_42049 = "scanline_42049"
+ seatbelts = "seatbelts"
+ shanghai_license = "shanghai_license"
+ uk_coal_employ = "uk_coal_employ"
+ measles = "measles"
+ unemployment_nl = "unemployment_nl"
+ us_population = "us_population"
+ usd_isk = "usd_isk"
+ well_log = "well_log"
+
+
+class Method(Enum):
+ amoc = "amoc"
+ binseg = "binseg"
+ bocpd = "bocpd"
+ bocpdms = "bocpdms"
+ cpnp = "cpnp"
+ ecp = "ecp"
+ kcpa = "kcpa"
+ pelt = "pelt"
+ prophet = "prophet"
+ rbocpdms = "rbocpdms"
+ rfpop = "rfpop"
+ segneigh = "segneigh"
+ wbs = "wbs"
+
+
+# Methods that support multidimensional datasets
+MULTIMETHODS = [
+ Method.bocpd,
+ Method.bocpdms,
+ Method.ecp,
+ Method.kcpa,
+ Method.rbocpdms,
+]
+
+# Multidimensional datasets
+MULTIDATASETS = [
+ Dataset.apple,
+ Dataset.bee_waggle_6,
+ Dataset.occupancy,
+ Dataset.run_log,
+]
+
+# Datasets with missing values
+MISSING_DATASETS = [Dataset.uk_coal_employ]
+
+# Methods that handle missing values
+MISSING_METHODS = [Method.bocpdms, Method.ecp, Method.kcpa, Method.prophet]
+
+
+@dataclass
+class Result:
+ dataset: Dataset
+ experiment: Experiment
+ is_multidim: bool
+ method: Method
+ metric: Metric
+ score: Optional[float]
+ summary_file: str
+ placeholder: Optional[str]
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-s",
+ "--summary-dir",
+ help="Directory with summary files",
+ required=True,
+ )
+ parser.add_argument(
+ "-m",
+ "--metric",
+ help="Metric to use for the table",
+ choices=["f1", "cover"],
+ required=True,
+ )
+ parser.add_argument(
+ "-e",
+ "--experiment",
+ help="Experiment to make table for",
+ choices=["best", "default"],
+ required=True,
+ )
+ parser.add_argument(
+ "-d",
+ "--dim",
+ help="Dimensionality",
+ choices=["uni", "multi", "combined"],
+ required=True,
+ )
+ parser.add_argument(
+ "-f",
+ "--format",
+ help="Output format",
+ choices=["json", "tex"],
+ required=True,
+ )
+ parser.add_argument(
+ "-t",
+ "--type",
+ help="Type of table to make",
+ choices=["avg", "full"],
+ required=True,
+ )
+ return parser.parse_args()
+
+
+def warning(msg):
+ termcolor.cprint(msg, "yellow", file=sys.stderr)
+
+
+def load_summary(filename):
+ with open(filename, "r") as fp:
+ data = json.load(fp)
+ return data
+
+
+def extract_score(method_results, metric=None, experiment=None):
+ """Extract a single numeric score from a list of dictionaries
+ """
+
+ if not metric in [Metric.f1, Metric.cover]:
+ raise ValueError("Unknown metric: %s" % metric)
+ if not experiment in ["default", "best"]:
+ raise ValueError("Unknown experiment: %s" % experiment)
+
+ # Collect all values for the chosen metric
+ scores = []
+ for result in method_results:
+ if not result["status"] == "SUCCESS":
+ continue
+ scores.append(result["scores"][metric.name])
+
+ if len(scores) == 0:
+ return None
+
+ # check that we have only one score for the 'default' experiment
+ if experiment == "default":
+ if len(scores) > 1:
+ raise ValueError("Default experiment with more than one score!")
+ return scores[0]
+ return max(scores)
+
+
+def collect_results(summary_dir=None, metric=None, experiment=None):
+ """Collect the results for the experiment on the specified metric.
+
+ Returns a list of Result objects.
+ """
+ if not metric in [Metric.f1, Metric.cover]:
+ raise ValueError("Unknown metric: %s" % metric)
+ if not experiment in ["default", "best"]:
+ raise ValueError("Unknown experiment: %s" % experiment)
+ if not os.path.isdir(summary_dir):
+ raise FileNotFoundError(summary_dir)
+
+ results = []
+ for fname in sorted(os.listdir(summary_dir)):
+ path = os.path.join(summary_dir, fname)
+ summary_data = load_summary(path)
+
+ dataset_name = summary_data["dataset"]
+ summary_results = summary_data["results"]
+
+ is_multi = summary_data["dataset_ndim"] > 1
+
+ for method in summary_results:
+ # method names are prefixed with the experiment type, so we skip
+ # the ones we don't want
+ if not method.startswith(experiment + "_"):
+ continue
+
+ # extract the metric score for this experiment from the summary
+ # results for the method
+ score = extract_score(
+ summary_results[method], metric=metric, experiment=experiment
+ )
+
+ # strip the experiment from the method name
+ method_name = method[len(experiment + "_") :]
+
+ # determine the placeholder value if there is no score.
+ placeholder = set()
+ if score is None:
+ if (Dataset(dataset_name) in MISSING_DATASETS) and (
+ not Method(method_name) in MISSING_METHODS
+ ):
+ # dataset has missing values and method can't handle it
+ placeholder.add("M")
+ else:
+ for result in summary_results[method]:
+ if result["status"] == "FAIL":
+ placeholder.add("F")
+ elif result["status"] == "TIMEOUT":
+ placeholder.add("T")
+ placeholder = "/".join(sorted(placeholder))
+
+ # create a Result object
+ res = Result(
+ dataset=Dataset(dataset_name),
+ experiment=Experiment(experiment),
+ is_multidim=is_multi,
+ method=Method(method_name),
+ metric=Metric(metric),
+ score=score,
+ summary_file=fname,
+ placeholder=placeholder or None,
+ )
+ results.append(res)
+ return results
+
+
+def average_results(results):
+ """Average the results
+
+ NOTE: This function filters out some methods/datasets for which we have
+ insufficient results.
+ """
+ experiment = list(set(r.experiment for r in results))[0]
+ # determine if we're dealing with multidimensional datasets
+ is_multi = all(r.is_multidim for r in results)
+
+ expected_methods = MULTIMETHODS if is_multi else list(Method)
+
+ # keep only expected methods
+ results = list(filter(lambda r: r.method in expected_methods, results))
+
+ # remove RBOCPDMS for 'best', because it fails too often
+ if experiment == Experiment.best:
+ warning(
+ "\nWarning: Removing RBOCPDMS (experiment = %s)\n" % experiment
+ )
+ results = list(filter(lambda r: r.method != Method.rbocpdms, results))
+ expected_methods.remove(Method.rbocpdms)
+
+ # remove datasets for which we do not have complete results
+ to_remove = []
+ for dataset in set(r.dataset for r in results):
+ dset_results = filter(lambda r: r.dataset == dataset, results)
+ if any(r.score is None for r in dset_results):
+ to_remove.append(dataset)
+ if to_remove:
+ warning("\nWarning: Filtering out datasets: %r\n" % to_remove)
+ results = list(filter(lambda r: not r.dataset in to_remove, results))
+
+ # check that we are now complete: for all datasets and all methods in the
+ # remaining results, we have a non-None score.
+ assert all(r.score is not None for r in results)
+
+ # compute the average per method
+ methods = set(r.method for r in results)
+ avg = {}
+ for method in methods:
+ method_scores = [r.score for r in results if r.method == method]
+ avg_score = sum(method_scores) / len(method_scores)
+ avg[method.name] = avg_score
+
+ return avg
+
+
+def write_json(results, is_avg=None):
+ if not is_avg in [True, False]:
+ raise ValueError("is_avg should be either True or False")
+
+ output = {}
+ if is_avg:
+ output = results
+ else:
+ datasets = set(r.dataset for r in results)
+ methods = set(r.method for r in results)
+ for d in datasets:
+ output[d.name] = {}
+ for m in methods:
+ r = next(
+ (r for r in results if r.dataset == d and r.method == m),
+ None,
+ )
+ # intended to fail if r is None, because that shouldn't happen
+ output[d.name][m.name] = r.score
+ print(json.dumps(output, indent="\t", sort_keys=True))
+
+
+def write_latex(results, dim=None, is_avg=None):
+ if is_avg:
+ raise NotImplementedError(
+ "write_latex is not supported for is_avg = True"
+ )
+
+ methods = sorted(set(r.method.name for r in results))
+ datasets = sorted(set(r.dataset.name for r in results))
+ if dim == "combined":
+ uni_datasets = [
+ d.name for d in list(Dataset) if not d in MULTIDATASETS
+ ]
+ multi_datasets = [d.name for d in MULTIDATASETS]
+ datasets = sorted(uni_datasets) + sorted(multi_datasets)
+ first_multi = sorted(multi_datasets)[0]
+
+ textsc = lambda m: "\\textsc{%s}" % m
+ verb = lambda m: "\\verb+%s+" % m
+
+ headers = ["Dataset"] + list(map(textsc, methods))
+
+ table = []
+ for dataset in datasets:
+ row = [verb(dataset)]
+ d = Dataset(dataset)
+
+ for method in methods:
+ m = Method(method)
+ r = next((r for r in results if r.method == m and r.dataset == d))
+ row.append(r.placeholder if r.score is None else r.score)
+
+ table.append(row)
+ spec = "l" + "c" * len(methods)
+ tex = build_latex_table(table, headers, floatfmt=".3f", table_spec=spec)
+
+ if dim == "combined":
+ # add a horizontal line for these datasets
+ lines = tex.split("\n")
+ newlines = []
+ for line in lines:
+ if line.startswith(verb(first_multi)):
+ newlines.append("\\hline")
+ newlines.append(line)
+ tex = "\n".join(newlines)
+
+ print(tex)
+
+
+def main():
+ args = parse_args()
+ if args.type == "avg" and args.dim == "combined":
+ raise ValueError("Using 'avg' and 'combined' is not supported.")
+
+ results = collect_results(
+ summary_dir=args.summary_dir,
+ metric=Metric(args.metric),
+ experiment=args.experiment,
+ )
+
+ if args.dim == "uni":
+ # filter out multi
+ results = list(filter(lambda r: not r.is_multidim, results))
+ elif args.dim == "multi":
+ # filter out uni
+ results = list(filter(lambda r: r.is_multidim, results))
+
+ if args.type == "avg":
+ results = average_results(results)
+
+ if args.format == "json":
+ write_json(results, is_avg=args.type == "avg")
+ else:
+ write_latex(results, args.dim, is_avg=args.type == "avg")
+
+
+if __name__ == "__main__":
+ main()