aboutsummaryrefslogtreecommitdiff
path: root/datasets/centralia
diff options
context:
space:
mode:
Diffstat (limited to 'datasets/centralia')
-rw-r--r--datasets/centralia/.gitignore1
-rw-r--r--datasets/centralia/README.md8
-rw-r--r--datasets/centralia/centralia.json67
-rw-r--r--datasets/centralia/centralia.pngbin0 -> 17909 bytes
-rw-r--r--datasets/centralia/convert.py64
-rw-r--r--datasets/centralia/from_wikipedia.txt15
6 files changed, 155 insertions, 0 deletions
diff --git a/datasets/centralia/.gitignore b/datasets/centralia/.gitignore
new file mode 100644
index 0000000..a52cbb0
--- /dev/null
+++ b/datasets/centralia/.gitignore
@@ -0,0 +1 @@
+old/
diff --git a/datasets/centralia/README.md b/datasets/centralia/README.md
new file mode 100644
index 0000000..caed746
--- /dev/null
+++ b/datasets/centralia/README.md
@@ -0,0 +1,8 @@
+# Population of Centralia, Pennsylvania
+
+Abandoned mining town in the US.
+
+Source:
+[https://en.wikipedia.org/wiki/Centralia,_Pennsylvania#Demographics](https://en.wikipedia.org/wiki/Centralia,_Pennsylvania#Demographics)
+
+![Plot of centralia dataset](./centralia.png)
diff --git a/datasets/centralia/centralia.json b/datasets/centralia/centralia.json
new file mode 100644
index 0000000..027bd53
--- /dev/null
+++ b/datasets/centralia/centralia.json
@@ -0,0 +1,67 @@
+{
+ "name": "centralia",
+ "longname": "Centralia Pennsylvania Population",
+ "n_obs": 15,
+ "n_dim": 1,
+ "time": {
+ "type": "string",
+ "format": "%Y",
+ "index": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14
+ ],
+ "raw": [
+ "1870",
+ "1880",
+ "1890",
+ "1900",
+ "1910",
+ "1920",
+ "1930",
+ "1940",
+ "1950",
+ "1960",
+ "1970",
+ "1980",
+ "1990",
+ "2000",
+ "2010"
+ ]
+ },
+ "series": [
+ {
+ "label": "Population",
+ "type": "int",
+ "raw": [
+ 1342,
+ 1886,
+ 2761,
+ 2048,
+ 2429,
+ 2336,
+ 2446,
+ 2449,
+ 1986,
+ 1435,
+ 1165,
+ 1017,
+ 63,
+ 21,
+ 10
+ ]
+ }
+ ]
+} \ No newline at end of file
diff --git a/datasets/centralia/centralia.png b/datasets/centralia/centralia.png
new file mode 100644
index 0000000..8fa2b09
--- /dev/null
+++ b/datasets/centralia/centralia.png
Binary files differ
diff --git a/datasets/centralia/convert.py b/datasets/centralia/convert.py
new file mode 100644
index 0000000..51efa94
--- /dev/null
+++ b/datasets/centralia/convert.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Dataset conversion script
+
+Author: Gertjan van den Burg
+
+"""
+
+import json
+import argparse
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-s",
+ "--subsample",
+ help="Number of observations to skip during subsampling",
+ type=int,
+ )
+ parser.add_argument("input_file", help="File to convert")
+ parser.add_argument("output_file", help="File to write to")
+ return parser.parse_args()
+
+
+def main():
+ args = parse_args()
+
+ with open(args.input_file, "r") as fp:
+ rows = [l.strip().split("\t") for l in fp]
+
+ time = []
+ values = []
+ for year, pop in rows:
+ time.append(year)
+ values.append(int(pop))
+
+ name = "centralia"
+ longname = "Centralia Pennsylvania Population"
+ time_fmt = "%Y"
+ series = [{"label": "Population", "type": "int", "raw": values}]
+
+ data = {
+ "name": name,
+ "longname": longname,
+ "n_obs": len(time),
+ "n_dim": len(series),
+ "time": {
+ "type": "string",
+ "format": time_fmt,
+ "index": list(range(len(time))),
+ "raw": time,
+ },
+ "series": series,
+ }
+
+ with open(args.output_file, "w") as fp:
+ json.dump(data, fp, indent="\t")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/datasets/centralia/from_wikipedia.txt b/datasets/centralia/from_wikipedia.txt
new file mode 100644
index 0000000..9e9b45b
--- /dev/null
+++ b/datasets/centralia/from_wikipedia.txt
@@ -0,0 +1,15 @@
+1870 1342
+1880 1886
+1890 2761
+1900 2048
+1910 2429
+1920 2336
+1930 2446
+1940 2449
+1950 1986
+1960 1435
+1970 1165
+1980 1017
+1990 63
+2000 21
+2010 10