aboutsummaryrefslogtreecommitdiff
path: root/datasets/lga_passengers/convert.py
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-10 12:27:53 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-10 12:27:53 +0000
commit7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e (patch)
tree10aa6710599230c889ec44407a065ee303a79348 /datasets/lga_passengers/convert.py
downloadTCPD-7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e.tar.gz
TCPD-7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e.zip
Initial commit
Diffstat (limited to 'datasets/lga_passengers/convert.py')
-rw-r--r--datasets/lga_passengers/convert.py85
1 files changed, 85 insertions, 0 deletions
diff --git a/datasets/lga_passengers/convert.py b/datasets/lga_passengers/convert.py
new file mode 100644
index 0000000..b55fb04
--- /dev/null
+++ b/datasets/lga_passengers/convert.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Dataset conversion script
+
+Author: G.J.J. van den Burg
+
+"""
+
+import json
+import argparse
+import clevercsv
+
+
+def month2index(month):
+ return {
+ "Jan": "01",
+ "Feb": "02",
+ "Mar": "03",
+ "Apr": "04",
+ "May": "05",
+ "Jun": "06",
+ "Jul": "07",
+ "Aug": "08",
+ "Sep": "09",
+ "Oct": "10",
+ "Nov": "11",
+ "Dec": "12",
+ }[month]
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("input_file", help="File to convert")
+ parser.add_argument("output_file", help="File to write to")
+ return parser.parse_args()
+
+
+def main():
+ args = parse_args()
+
+ with open(args.input_file, "r", newline="", encoding="ascii") as fp:
+ reader = clevercsv.DictReader(
+ fp, delimiter=",", quotechar="", escapechar=""
+ )
+ items = list(reader)
+
+ for it in items:
+ it["time"] = f"{it['Year']}-{month2index(it['Month'])}"
+ it["value"] = int(it["Total Passengers"])
+
+ lgas = [it for it in items if it["Airport Code"] == "LGA"]
+ pairs = [(it["time"], it["value"]) for it in lgas]
+ # with this date format string sort is date sort
+ pairs.sort()
+
+ name = "lga_passengers"
+ longname = "LaGuardia Passengers"
+ time_fmt = "%Y-%m"
+ time = [p[0] for p in pairs]
+ values = [p[1] for p in pairs]
+
+ series = [{"label": "Number of Passengers", "type": "int", "raw": values}]
+
+ data = {
+ "name": name,
+ "longname": longname,
+ "n_obs": len(time),
+ "n_dim": len(series),
+ "time": {
+ "type": "string",
+ "format": time_fmt,
+ "index": list(range(len(time))),
+ "raw": time,
+ },
+ "series": series,
+ }
+
+ with open(args.output_file, "w") as fp:
+ json.dump(data, fp, indent="\t")
+
+
+if __name__ == "__main__":
+ main()