diff options
Diffstat (limited to 'datasets/centralia')
| -rw-r--r-- | datasets/centralia/.gitignore | 1 | ||||
| -rw-r--r-- | datasets/centralia/README.md | 8 | ||||
| -rw-r--r-- | datasets/centralia/centralia.json | 67 | ||||
| -rw-r--r-- | datasets/centralia/centralia.png | bin | 0 -> 17909 bytes | |||
| -rw-r--r-- | datasets/centralia/convert.py | 64 | ||||
| -rw-r--r-- | datasets/centralia/from_wikipedia.txt | 15 |
6 files changed, 155 insertions, 0 deletions
diff --git a/datasets/centralia/.gitignore b/datasets/centralia/.gitignore new file mode 100644 index 0000000..a52cbb0 --- /dev/null +++ b/datasets/centralia/.gitignore @@ -0,0 +1 @@ +old/ diff --git a/datasets/centralia/README.md b/datasets/centralia/README.md new file mode 100644 index 0000000..caed746 --- /dev/null +++ b/datasets/centralia/README.md @@ -0,0 +1,8 @@ +# Population of Centralia, Pennsylvania + +Abandoned mining town in the US. + +Source: +[https://en.wikipedia.org/wiki/Centralia,_Pennsylvania#Demographics](https://en.wikipedia.org/wiki/Centralia,_Pennsylvania#Demographics) + + diff --git a/datasets/centralia/centralia.json b/datasets/centralia/centralia.json new file mode 100644 index 0000000..027bd53 --- /dev/null +++ b/datasets/centralia/centralia.json @@ -0,0 +1,67 @@ +{ + "name": "centralia", + "longname": "Centralia Pennsylvania Population", + "n_obs": 15, + "n_dim": 1, + "time": { + "type": "string", + "format": "%Y", + "index": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14 + ], + "raw": [ + "1870", + "1880", + "1890", + "1900", + "1910", + "1920", + "1930", + "1940", + "1950", + "1960", + "1970", + "1980", + "1990", + "2000", + "2010" + ] + }, + "series": [ + { + "label": "Population", + "type": "int", + "raw": [ + 1342, + 1886, + 2761, + 2048, + 2429, + 2336, + 2446, + 2449, + 1986, + 1435, + 1165, + 1017, + 63, + 21, + 10 + ] + } + ] +}
\ No newline at end of file diff --git a/datasets/centralia/centralia.png b/datasets/centralia/centralia.png Binary files differnew file mode 100644 index 0000000..8fa2b09 --- /dev/null +++ b/datasets/centralia/centralia.png diff --git a/datasets/centralia/convert.py b/datasets/centralia/convert.py new file mode 100644 index 0000000..51efa94 --- /dev/null +++ b/datasets/centralia/convert.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Dataset conversion script + +Author: Gertjan van den Burg + +""" + +import json +import argparse + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-s", + "--subsample", + help="Number of observations to skip during subsampling", + type=int, + ) + parser.add_argument("input_file", help="File to convert") + parser.add_argument("output_file", help="File to write to") + return parser.parse_args() + + +def main(): + args = parse_args() + + with open(args.input_file, "r") as fp: + rows = [l.strip().split("\t") for l in fp] + + time = [] + values = [] + for year, pop in rows: + time.append(year) + values.append(int(pop)) + + name = "centralia" + longname = "Centralia Pennsylvania Population" + time_fmt = "%Y" + series = [{"label": "Population", "type": "int", "raw": values}] + + data = { + "name": name, + "longname": longname, + "n_obs": len(time), + "n_dim": len(series), + "time": { + "type": "string", + "format": time_fmt, + "index": list(range(len(time))), + "raw": time, + }, + "series": series, + } + + with open(args.output_file, "w") as fp: + json.dump(data, fp, indent="\t") + + +if __name__ == "__main__": + main() diff --git a/datasets/centralia/from_wikipedia.txt b/datasets/centralia/from_wikipedia.txt new file mode 100644 index 0000000..9e9b45b --- /dev/null +++ b/datasets/centralia/from_wikipedia.txt @@ -0,0 +1,15 @@ +1870 1342 +1880 1886 +1890 2761 +1900 2048 +1910 2429 +1920 2336 +1930 2446 +1940 2449 +1950 1986 +1960 1435 +1970 1165 +1980 1017 +1990 63 +2000 21 +2010 10 |
