aboutsummaryrefslogtreecommitdiff
path: root/build_tcpd.py
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-10 12:27:53 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-10 12:27:53 +0000
commit7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e (patch)
tree10aa6710599230c889ec44407a065ee303a79348 /build_tcpd.py
downloadTCPD-7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e.tar.gz
TCPD-7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e.zip
Initial commit
Diffstat (limited to 'build_tcpd.py')
-rw-r--r--build_tcpd.py115
1 files changed, 115 insertions, 0 deletions
diff --git a/build_tcpd.py b/build_tcpd.py
new file mode 100644
index 0000000..9fe75ed
--- /dev/null
+++ b/build_tcpd.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Collect and verify all time series that are not packaged in the repository.
+
+Author: Gertjan van den Burg
+License: See LICENSE file.
+Copyright: 2019, The Alan Turing Institute
+
+"""
+
+import argparse
+import platform
+import os
+
+DATASET_DIR = "./datasets"
+
+TARGETS = [
+ ("apple", "get_apple.py"),
+ ("bee_waggle_6", "get_bee_waggle_6.py"),
+ ("bitcoin", "get_bitcoin.py"),
+ ("global_co2", "get_global_co2.py"),
+ ("iceland_tourism", "get_iceland_tourism.py"),
+ ("occupancy", "get_occupancy.py"),
+ ("ratner_stock", "get_ratner_stock.py"),
+ ("robocalls", "get_robocalls.py"),
+ ("scanline_126007", "get_scanline_126007.py"),
+ ("scanline_42049", "get_scanline_42049.py"),
+]
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-v", "--verbose", help="Enable logging", action="store_true"
+ )
+ parser.add_argument(
+ "-o", "--output-dir", help="Output directory to store all time series"
+ )
+ parser.add_argument(
+ "action",
+ help="Action to perform",
+ choices=["collect", "clean"],
+ default="collect",
+ nargs="?",
+ )
+ return parser.parse_args()
+
+
+def load_dataset_script(module_name, path):
+ """Load the dataset collection script as a module
+
+ This is not a *super* clean way to do this, but it maintains the modularity
+ of the dataset, where each dataset can be downloaded individually as well
+ as through this script.
+ """
+ version = platform.python_version_tuple()
+ if version[0] == "2":
+ import imp
+
+ module = imp.load_source(module_name, path)
+ elif version[0] == "3" and version[1] in ["3", "4"]:
+ from importlib.machinery import SourceFileLoader
+
+ module = SourceFileLoader(module_name, path).load_module()
+ else:
+ import importlib.util
+
+ spec = importlib.util.spec_from_file_location(module_name, path)
+ module = importlib.util.module_from_spec(spec)
+ spec.loader.exec_module(module)
+ return module
+
+
+def run_dataset_func(name, script, funcname):
+ dir_path = os.path.join(DATASET_DIR, name)
+ get_path = os.path.join(dir_path, script)
+ module = load_dataset_script("tcpd.%s" % name, get_path)
+ func = getattr(module, funcname)
+ func(output_dir=dir_path)
+
+
+def collect_dataset(name, script):
+ return run_dataset_func(name, script, "collect")
+
+
+def clean_dataset(name, script):
+ return run_dataset_func(name, script, "clean")
+
+
+def main():
+ args = parse_args()
+
+ log = lambda *a, **kw: print(*a, **kw) if args.verbose else None
+
+ if args.action == "collect":
+ func = collect_dataset
+ elif args.action == "clean":
+ func = clean_dataset
+ else:
+ raise ValueError("Unknown action: %s" % args.action)
+
+ for name, script in TARGETS:
+ log(
+ "Running %s action for dataset: %s ... " % (args.action, name),
+ end="",
+ flush=True,
+ )
+ func(name, script)
+ log("ok", flush=True)
+
+
+if __name__ == "__main__":
+ main()