diff options
Diffstat (limited to 'datasets/brent_spot/convert.py')
| -rw-r--r-- | datasets/brent_spot/convert.py | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/datasets/brent_spot/convert.py b/datasets/brent_spot/convert.py new file mode 100644 index 0000000..a6fcc20 --- /dev/null +++ b/datasets/brent_spot/convert.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Dataset conversion script + +Author: G.J.J. van den Burg + +""" + +import argparse +import clevercsv +import json + +SAMPLE = 10 + +def date_to_iso(datestr): + mm, dd, yyyy = list(map(int, datestr.split("/"))) + return f"{yyyy}-{mm:02d}-{dd:02d}" + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("input_file", help="File to convert") + parser.add_argument("output_file", help="File to write to") + return parser.parse_args() + + +def main(): + args = parse_args() + + with open(args.input_file, "r", newline="", encoding="ascii") as fp: + reader = clevercsv.reader( + fp, delimiter=",", quotechar="", escapechar="" + ) + rows = list(reader) + + rows = rows[5:] + rows = list(reversed(rows)) + + rows = [r for i, r in enumerate(rows) if i % SAMPLE == 0] + + idx2000 = next((i for i, x in enumerate(rows) if x[0].endswith("2000"))) + rows = rows[idx2000:] + + name = "brent_spot" + longname = "Brent Spot Price" + time = [date_to_iso(r[0]) for r in rows] + time_fmt = "%Y-%m-%d" + values = [float(r[1]) for r in rows] + + series = [{"label": "Dollars/Barrel", "type": "float", "raw": values}] + + data = { + "name": name, + "longname": longname, + "n_obs": len(time), + "n_dim": len(series), + "time": { + "type": "string", + "format": time_fmt, + "index": list(range(len(time))), + "raw": time, + }, + "series": series, + } + + with open(args.output_file, "w") as fp: + json.dump(data, fp, indent="\t") + + +if __name__ == "__main__": + main() |
