1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Author: Gertjan van den Burg
"""
import argparse
import clevercsv
import json
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("input_file", help="File to convert")
parser.add_argument("output_file", help="File to write to")
return parser.parse_args()
def main():
args = parse_args()
with open(args.input_file, "r", newline="", encoding="UTF-8-SIG") as fp:
reader = clevercsv.reader(
fp, delimiter=";", quotechar='"', escapechar=""
)
rows = list(reader)
# remove rows we don't need
title = rows.pop(0)
meta = rows.pop(0)
meta = rows.pop(0)
# filter out rows we want
header = rows.pop(0)
eligible_population = rows.pop(0)
working_population = rows.pop(0)
unemployed_population = rows.pop(0)
years = header[3:]
eligible = list(map(int, eligible_population[3:]))
unemployed = list(map(int, unemployed_population[3:]))
# compute the percentage unemployed
by_year = {
y: (u / e * 100) for y, e, u in zip(years, eligible, unemployed)
}
# remove value of 2001 before revision
del by_year["2001 voor revisie"]
# rename value of 2001 after revision as simply '2001'
by_year["2001"] = by_year["2001 na revisie"]
del by_year["2001 na revisie"]
time = sorted(by_year.keys())
values = [by_year[t] for t in time]
series = [{"label": "V1", "type": "float", "raw": values}]
data = {
"name": "unemployment_nl",
"longname": "Unemployment rate (NL)",
"n_obs": len(time),
"n_dim": len(series),
"time": {
"type": "string",
"format": "%Y",
"index": list(range(len(time))),
"raw": time,
},
"series": series,
}
with open(args.output_file, "w") as fp:
json.dump(data, fp, indent="\t")
if __name__ == "__main__":
main()
|