diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-05-25 15:32:17 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-05-25 15:32:17 +0100 |
| commit | 08d3ea5916864f6f4143e6c1f622f2dd87d21d27 (patch) | |
| tree | 72aa9ab50a8c87fbe4dda0998a644aedd5c426aa /analysis/scripts | |
| parent | Correct calculation of F measure (diff) | |
| download | TCPDBench-08d3ea5916864f6f4143e6c1f622f2dd87d21d27.tar.gz TCPDBench-08d3ea5916864f6f4143e6c1f622f2dd87d21d27.zip | |
Add descriptive statistics code and results
Diffstat (limited to 'analysis/scripts')
| -rw-r--r-- | analysis/scripts/descriptive_annotations.py | 80 | ||||
| -rw-r--r-- | analysis/scripts/descriptive_length.py | 73 |
2 files changed, 153 insertions, 0 deletions
diff --git a/analysis/scripts/descriptive_annotations.py b/analysis/scripts/descriptive_annotations.py new file mode 100644 index 00000000..2afdc422 --- /dev/null +++ b/analysis/scripts/descriptive_annotations.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- + +"""Extract descriptive statistics for the time series + +This script is used to extract descriptive statistics about the number of +annotations from the summary files. + +Author: Gertjan van den Burg +Copyright (c) 2020 - The Alan Turing Institute +License: See the LICENSE file. + +""" + + +import argparse +import json +import os +import statistics + +N_DATASETS = 42 + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-s", + "--summary-dir", + help="Directory with summary files", + required=True, + ) + parser.add_argument( + "-t", + "--type", + help="Type of statistic to compute", + choices=["min", "max", "mean", "std"], + required=True, + ) + return parser.parse_args() + + +def load_unique_annotations(summary_dir): + files = os.listdir(summary_dir) + assert len(files) == N_DATASETS + + n_uniq_anno = [] + for f in sorted(files): + path = os.path.join(summary_dir, f) + with open(path, "r") as fp: + data = json.load(fp) + + all_anno = set() + for annotations in data["annotations"].values(): + for cp in annotations: + all_anno.add(cp) + n_uniq_anno.append(len(all_anno)) + return n_uniq_anno + + +def main(): + args = parse_args() + if args.type == "max": + func = max + elif args.type == "mean": + func = statistics.mean + elif args.type == "std": + func = statistics.stdev + elif args.type == "min": + func = min + else: + raise ValueError("Unknown type") + + n_uniq_anno = load_unique_annotations(args.summary_dir) + if args.type in ["min", "max"]: + print("%i%%" % func(n_uniq_anno)) + else: + print("%.1f%%" % func(n_uniq_anno)) + + +if __name__ == "__main__": + main() diff --git a/analysis/scripts/descriptive_length.py b/analysis/scripts/descriptive_length.py new file mode 100644 index 00000000..e8504b92 --- /dev/null +++ b/analysis/scripts/descriptive_length.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- + +"""Extract descriptive statistics for the time series + +This script is used to extract descriptive statistics regarding features of the +time series from the summary files. + +Author: Gertjan van den Burg +Copyright (c) 2020 - The Alan Turing Institute +License: See the LICENSE file. + +""" + + +import argparse +import json +import os +import statistics + +N_DATASETS = 42 + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-s", + "--summary-dir", + help="Directory with summary files", + required=True, + ) + parser.add_argument( + "-t", + "--type", + help="Type of statistic to compute", + choices=["min", "max", "mean"], + required=True, + ) + return parser.parse_args() + + +def load_summary_nobs(summary_dir): + files = os.listdir(summary_dir) + assert len(files) == N_DATASETS + + all_nobs = [] + for f in sorted(files): + path = os.path.join(summary_dir, f) + with open(path, "r") as fp: + data = json.load(fp) + all_nobs.append(data["dataset_nobs"]) + return all_nobs + + +def main(): + args = parse_args() + if args.type == "min": + func = min + elif args.type == "mean": + func = statistics.mean + elif args.type == "max": + func = max + else: + raise ValueError("Unknown type") + + all_nobs = load_summary_nobs(args.summary_dir) + if args.type in ["min", "max"]: + print("%i%%" % func(all_nobs)) + else: + print("%.1f%%" % func(all_nobs)) + + +if __name__ == "__main__": + main() |
