aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile37
-rw-r--r--analysis/output/constants/SeriesLengthMax.tex1
-rw-r--r--analysis/output/constants/SeriesLengthMean.tex1
-rw-r--r--analysis/output/constants/SeriesLengthMin.tex1
-rw-r--r--analysis/output/constants/UniqueAnnotationsMax.tex1
-rw-r--r--analysis/output/constants/UniqueAnnotationsMean.tex1
-rw-r--r--analysis/output/constants/UniqueAnnotationsMin.tex1
-rw-r--r--analysis/output/constants/UniqueAnnotationsStd.tex1
-rw-r--r--analysis/scripts/descriptive_annotations.py80
-rw-r--r--analysis/scripts/descriptive_length.py73
10 files changed, 196 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 054a3972..9f66234b 100644
--- a/Makefile
+++ b/Makefile
@@ -237,7 +237,14 @@ clean_rankplots:
CONSTANT_TARGETS = $(CONST_DIR)/sigtest_global_best_cover_uni.tex \
$(CONST_DIR)/sigtest_global_best_f1_uni.tex \
$(CONST_DIR)/sigtest_global_default_cover_uni.tex \
- $(CONST_DIR)/sigtest_global_default_f1_uni.tex
+ $(CONST_DIR)/sigtest_global_default_f1_uni.tex \
+ $(CONST_DIR)/SeriesLengthMin.tex \
+ $(CONST_DIR)/SeriesLengthMax.tex \
+ $(CONST_DIR)/SeriesLengthMean.tex \
+ $(CONST_DIR)/UniqueAnnotationsMin.tex \
+ $(CONST_DIR)/UniqueAnnotationsMax.tex \
+ $(CONST_DIR)/UniqueAnnotationsMean.tex \
+ $(CONST_DIR)/UniqueAnnotationsStd.tex
const-dir:
mkdir -p $(CONST_DIR)
@@ -260,6 +267,34 @@ $(CONST_DIR)/sigtest_global_default_f1_uni.tex: $(TABLE_DIR)/default_f1_uni_full
$(SCRIPT_DIR)/significance.py | const-dir
python $(SCRIPT_DIR)/significance.py -i $< -o $@ --type best --mode global
+$(CONST_DIR)/SeriesLengthMin.tex: $(SCRIPT_DIR)/descriptive_length.py \
+ $(DATASET_SUMMARIES) | const-dir
+ python $< -s $(SUMMARY_DIR) -t min > $@
+
+$(CONST_DIR)/SeriesLengthMax.tex: $(SCRIPT_DIR)/descriptive_length.py \
+ $(DATASET_SUMMARIES) | const-dir
+ python $< -s $(SUMMARY_DIR) -t max > $@
+
+$(CONST_DIR)/SeriesLengthMean.tex: $(SCRIPT_DIR)/descriptive_length.py \
+ $(DATASET_SUMMARIES) | const-dir
+ python $< -s $(SUMMARY_DIR) -t mean > $@
+
+$(CONST_DIR)/UniqueAnnotationsMin.tex: $(SCRIPT_DIR)/descriptive_annotations.py \
+ $(DATASET_SUMMARIES) | const-dir
+ python $< -s $(SUMMARY_DIR) -t min > $@
+
+$(CONST_DIR)/UniqueAnnotationsMax.tex: $(SCRIPT_DIR)/descriptive_annotations.py \
+ $(DATASET_SUMMARIES) | const-dir
+ python $< -s $(SUMMARY_DIR) -t max > $@
+
+$(CONST_DIR)/UniqueAnnotationsMean.tex: $(SCRIPT_DIR)/descriptive_annotations.py \
+ $(DATASET_SUMMARIES) | const-dir
+ python $< -s $(SUMMARY_DIR) -t mean > $@
+
+$(CONST_DIR)/UniqueAnnotationsStd.tex: $(SCRIPT_DIR)/descriptive_annotations.py \
+ $(DATASET_SUMMARIES) | const-dir
+ python $< -s $(SUMMARY_DIR) -t std > $@
+
clean_constants:
rm -f $(CONSTANT_TARGETS)
diff --git a/analysis/output/constants/SeriesLengthMax.tex b/analysis/output/constants/SeriesLengthMax.tex
new file mode 100644
index 00000000..a9000c23
--- /dev/null
+++ b/analysis/output/constants/SeriesLengthMax.tex
@@ -0,0 +1 @@
+991%
diff --git a/analysis/output/constants/SeriesLengthMean.tex b/analysis/output/constants/SeriesLengthMean.tex
new file mode 100644
index 00000000..9d3449e5
--- /dev/null
+++ b/analysis/output/constants/SeriesLengthMean.tex
@@ -0,0 +1 @@
+327.7%
diff --git a/analysis/output/constants/SeriesLengthMin.tex b/analysis/output/constants/SeriesLengthMin.tex
new file mode 100644
index 00000000..8826b25d
--- /dev/null
+++ b/analysis/output/constants/SeriesLengthMin.tex
@@ -0,0 +1 @@
+15%
diff --git a/analysis/output/constants/UniqueAnnotationsMax.tex b/analysis/output/constants/UniqueAnnotationsMax.tex
new file mode 100644
index 00000000..2f8b1fd1
--- /dev/null
+++ b/analysis/output/constants/UniqueAnnotationsMax.tex
@@ -0,0 +1 @@
+26%
diff --git a/analysis/output/constants/UniqueAnnotationsMean.tex b/analysis/output/constants/UniqueAnnotationsMean.tex
new file mode 100644
index 00000000..18a14a6b
--- /dev/null
+++ b/analysis/output/constants/UniqueAnnotationsMean.tex
@@ -0,0 +1 @@
+7.4%
diff --git a/analysis/output/constants/UniqueAnnotationsMin.tex b/analysis/output/constants/UniqueAnnotationsMin.tex
new file mode 100644
index 00000000..635c47ac
--- /dev/null
+++ b/analysis/output/constants/UniqueAnnotationsMin.tex
@@ -0,0 +1 @@
+0%
diff --git a/analysis/output/constants/UniqueAnnotationsStd.tex b/analysis/output/constants/UniqueAnnotationsStd.tex
new file mode 100644
index 00000000..0a119a8a
--- /dev/null
+++ b/analysis/output/constants/UniqueAnnotationsStd.tex
@@ -0,0 +1 @@
+7.0%
diff --git a/analysis/scripts/descriptive_annotations.py b/analysis/scripts/descriptive_annotations.py
new file mode 100644
index 00000000..2afdc422
--- /dev/null
+++ b/analysis/scripts/descriptive_annotations.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+"""Extract descriptive statistics for the time series
+
+This script is used to extract descriptive statistics about the number of
+annotations from the summary files.
+
+Author: Gertjan van den Burg
+Copyright (c) 2020 - The Alan Turing Institute
+License: See the LICENSE file.
+
+"""
+
+
+import argparse
+import json
+import os
+import statistics
+
+N_DATASETS = 42
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-s",
+ "--summary-dir",
+ help="Directory with summary files",
+ required=True,
+ )
+ parser.add_argument(
+ "-t",
+ "--type",
+ help="Type of statistic to compute",
+ choices=["min", "max", "mean", "std"],
+ required=True,
+ )
+ return parser.parse_args()
+
+
+def load_unique_annotations(summary_dir):
+ files = os.listdir(summary_dir)
+ assert len(files) == N_DATASETS
+
+ n_uniq_anno = []
+ for f in sorted(files):
+ path = os.path.join(summary_dir, f)
+ with open(path, "r") as fp:
+ data = json.load(fp)
+
+ all_anno = set()
+ for annotations in data["annotations"].values():
+ for cp in annotations:
+ all_anno.add(cp)
+ n_uniq_anno.append(len(all_anno))
+ return n_uniq_anno
+
+
+def main():
+ args = parse_args()
+ if args.type == "max":
+ func = max
+ elif args.type == "mean":
+ func = statistics.mean
+ elif args.type == "std":
+ func = statistics.stdev
+ elif args.type == "min":
+ func = min
+ else:
+ raise ValueError("Unknown type")
+
+ n_uniq_anno = load_unique_annotations(args.summary_dir)
+ if args.type in ["min", "max"]:
+ print("%i%%" % func(n_uniq_anno))
+ else:
+ print("%.1f%%" % func(n_uniq_anno))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/analysis/scripts/descriptive_length.py b/analysis/scripts/descriptive_length.py
new file mode 100644
index 00000000..e8504b92
--- /dev/null
+++ b/analysis/scripts/descriptive_length.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+
+"""Extract descriptive statistics for the time series
+
+This script is used to extract descriptive statistics regarding features of the
+time series from the summary files.
+
+Author: Gertjan van den Burg
+Copyright (c) 2020 - The Alan Turing Institute
+License: See the LICENSE file.
+
+"""
+
+
+import argparse
+import json
+import os
+import statistics
+
+N_DATASETS = 42
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-s",
+ "--summary-dir",
+ help="Directory with summary files",
+ required=True,
+ )
+ parser.add_argument(
+ "-t",
+ "--type",
+ help="Type of statistic to compute",
+ choices=["min", "max", "mean"],
+ required=True,
+ )
+ return parser.parse_args()
+
+
+def load_summary_nobs(summary_dir):
+ files = os.listdir(summary_dir)
+ assert len(files) == N_DATASETS
+
+ all_nobs = []
+ for f in sorted(files):
+ path = os.path.join(summary_dir, f)
+ with open(path, "r") as fp:
+ data = json.load(fp)
+ all_nobs.append(data["dataset_nobs"])
+ return all_nobs
+
+
+def main():
+ args = parse_args()
+ if args.type == "min":
+ func = min
+ elif args.type == "mean":
+ func = statistics.mean
+ elif args.type == "max":
+ func = max
+ else:
+ raise ValueError("Unknown type")
+
+ all_nobs = load_summary_nobs(args.summary_dir)
+ if args.type in ["min", "max"]:
+ print("%i%%" % func(all_nobs))
+ else:
+ print("%.1f%%" % func(all_nobs))
+
+
+if __name__ == "__main__":
+ main()