1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Dataset conversion script
Author: G.J.J. van den Burg
"""
import argparse
import json
import xlrd
MONTHS = {
"Jan": 1,
"Feb": 2,
"Mar": 3,
"Apr": 4,
"May": 5,
"Jun": 6,
"Jul": 7,
"Aug": 8,
"Sep": 9,
"Oct": 10,
"Nov": 11,
"Dec": 12,
}
def format_date(datestr):
""" expects: mmm-yyx with x an extraneous character or empty """
mmm, yyx = datestr.split("-")
midx = MONTHS[mmm]
if len(yyx) == 3:
yy = yyx[:2]
elif len(yyx) == 2:
yy = yyx
else:
raise ValueError
# this will break in 71 years
if yy.startswith("9"):
yyyy = 1900 + int(yy)
else:
yyyy = 2000 + int(yy)
return f"{yyyy}-{midx:02}"
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("input_file", help="File to convert")
parser.add_argument("output_file", help="File to write to")
return parser.parse_args()
def main():
args = parse_args()
wb = xlrd.open_workbook(args.input_file)
ws = wb.sheet_by_index(0)
header = ws.row(3)
assert header[0].value == "Date"
by_month = {}
ridx = 4
while True:
# stop if date cell is empty
if ws.row(ridx)[0].ctype == xlrd.XL_CELL_EMPTY:
break
date_value = ws.row(ridx)[0].value
construct_value = ws.row(ridx)[1].value
date = format_date(date_value)
construct = int(construct_value)
by_month[date] = construct
ridx += 1
name = "construction"
longname = "US Construction Spending"
time = sorted(by_month.keys())
time_fmt = "%Y-%m"
values = [by_month[t] for t in time]
series = [
{
"label": "Total Private Construction Spending",
"type": "int",
"raw": values,
}
]
data = {
"name": name,
"longname": longname,
"n_obs": len(time),
"n_dim": len(series),
"time": {
"type": "string",
"format": time_fmt,
"index": list(range(len(time))),
"raw": time,
},
"series": series,
}
with open(args.output_file, "w") as fp:
json.dump(data, fp, indent="\t")
if __name__ == "__main__":
main()
|