From 7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 10 Mar 2020 12:27:53 +0000 Subject: Initial commit --- datasets/jfk_passengers/convert.py | 86 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 datasets/jfk_passengers/convert.py (limited to 'datasets/jfk_passengers/convert.py') diff --git a/datasets/jfk_passengers/convert.py b/datasets/jfk_passengers/convert.py new file mode 100644 index 0000000..3c8ae1a --- /dev/null +++ b/datasets/jfk_passengers/convert.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Dataset conversion script + +Author: G.J.J. van den Burg + +""" + +import json +import argparse +import clevercsv + + +def month2index(month): + return { + "Jan": "01", + "Feb": "02", + "Mar": "03", + "Apr": "04", + "May": "05", + "Jun": "06", + "Jul": "07", + "Aug": "08", + "Sep": "09", + "Oct": "10", + "Nov": "11", + "Dec": "12", + }[month] + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("input_file", help="File to convert") + parser.add_argument("output_file", help="File to write to") + return parser.parse_args() + + +def main(): + args = parse_args() + + with open(args.input_file, "r", newline="", encoding="ascii") as fp: + reader = clevercsv.DictReader( + fp, delimiter=",", quotechar="", escapechar="" + ) + items = list(reader) + + for it in items: + it["time"] = f"{it['Year']}-{month2index(it['Month'])}" + it["value"] = int(it["Total Passengers"]) + + + jfks = [it for it in items if it["Airport Code"] == "JFK"] + pairs = [(it["time"], it["value"]) for it in jfks] + # with this date format string sort is date sort + pairs.sort() + + name = "jfk_passengers" + longname = "JFK Passengers" + time_fmt = "%Y-%m" + time = [p[0] for p in pairs] + values = [p[1] for p in pairs] + + series = [{"label": "Number of Passengers", "type": "int", "raw": values}] + + data = { + "name": name, + "longname": longname, + "n_obs": len(time), + "n_dim": len(series), + "time": { + "type": "string", + "format": time_fmt, + "index": list(range(len(time))), + "raw": time, + }, + "series": series, + } + + with open(args.output_file, "w") as fp: + json.dump(data, fp, indent="\t") + + +if __name__ == "__main__": + main() -- cgit v1.2.3