aboutsummaryrefslogtreecommitdiff
path: root/datasets/construction
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-10 12:27:53 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-10 12:27:53 +0000
commit7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e (patch)
tree10aa6710599230c889ec44407a065ee303a79348 /datasets/construction
downloadTCPD-7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e.tar.gz
TCPD-7c6c2e09e3ad1d41f26869cb7b9f9882175c8a6e.zip
Initial commit
Diffstat (limited to 'datasets/construction')
-rw-r--r--datasets/construction/.gitignore1
-rw-r--r--datasets/construction/README.md26
-rw-r--r--datasets/construction/construction.json979
-rw-r--r--datasets/construction/construction.pngbin0 -> 35211 bytes
-rw-r--r--datasets/construction/convert.py114
-rw-r--r--datasets/construction/privtime.xlsbin0 -> 245248 bytes
6 files changed, 1120 insertions, 0 deletions
diff --git a/datasets/construction/.gitignore b/datasets/construction/.gitignore
new file mode 100644
index 0000000..a52cbb0
--- /dev/null
+++ b/datasets/construction/.gitignore
@@ -0,0 +1 @@
+old/
diff --git a/datasets/construction/README.md b/datasets/construction/README.md
new file mode 100644
index 0000000..c90f793
--- /dev/null
+++ b/datasets/construction/README.md
@@ -0,0 +1,26 @@
+# Total Private Construction Spending
+
+This dataset is retrieved from the US Census and concerns the total private
+construction spending. Potential change points occur at recessions. The data
+is obtained [from this Census
+page](https://www.census.gov/construction/c30/historical_data.html) using the
+"Private" series from the "Not Seasonally Adjusted" column in the "Monthly"
+table. Alternatively, use [this direct
+URL](https://www.census.gov/construction/c30/xls/privtime.xls).
+
+According to [this
+page](https://web.archive.org/web/20191120160410/https://ask.census.gov/prweb/PRServletCustom/YACFBFye-rFIz_FoGtyvDRUGg1Uzu5Mn*/!STANDARD?pyActivity=pyMobileSnapStart&ArticleID=KCP-4726)
+on the US Census website, we are allowed to redistribute the data as part of
+this repository.
+
+Source: United States Census Bureau, URL: https://www.census.gov, Retrieved:
+2019-09-11.
+
+To create ``construction.json`` file from the raw ``privtime.xls`` file,
+simply run:
+
+```
+$ python convert.py privtime.xls construction.json
+```
+
+![Plot of construction dataset](./construction.png)
diff --git a/datasets/construction/construction.json b/datasets/construction/construction.json
new file mode 100644
index 0000000..5807c85
--- /dev/null
+++ b/datasets/construction/construction.json
@@ -0,0 +1,979 @@
+{
+ "name": "construction",
+ "longname": "US Construction Spending",
+ "n_obs": 319,
+ "n_dim": 1,
+ "time": {
+ "type": "string",
+ "format": "%Y-%m",
+ "index": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 62,
+ 63,
+ 64,
+ 65,
+ 66,
+ 67,
+ 68,
+ 69,
+ 70,
+ 71,
+ 72,
+ 73,
+ 74,
+ 75,
+ 76,
+ 77,
+ 78,
+ 79,
+ 80,
+ 81,
+ 82,
+ 83,
+ 84,
+ 85,
+ 86,
+ 87,
+ 88,
+ 89,
+ 90,
+ 91,
+ 92,
+ 93,
+ 94,
+ 95,
+ 96,
+ 97,
+ 98,
+ 99,
+ 100,
+ 101,
+ 102,
+ 103,
+ 104,
+ 105,
+ 106,
+ 107,
+ 108,
+ 109,
+ 110,
+ 111,
+ 112,
+ 113,
+ 114,
+ 115,
+ 116,
+ 117,
+ 118,
+ 119,
+ 120,
+ 121,
+ 122,
+ 123,
+ 124,
+ 125,
+ 126,
+ 127,
+ 128,
+ 129,
+ 130,
+ 131,
+ 132,
+ 133,
+ 134,
+ 135,
+ 136,
+ 137,
+ 138,
+ 139,
+ 140,
+ 141,
+ 142,
+ 143,
+ 144,
+ 145,
+ 146,
+ 147,
+ 148,
+ 149,
+ 150,
+ 151,
+ 152,
+ 153,
+ 154,
+ 155,
+ 156,
+ 157,
+ 158,
+ 159,
+ 160,
+ 161,
+ 162,
+ 163,
+ 164,
+ 165,
+ 166,
+ 167,
+ 168,
+ 169,
+ 170,
+ 171,
+ 172,
+ 173,
+ 174,
+ 175,
+ 176,
+ 177,
+ 178,
+ 179,
+ 180,
+ 181,
+ 182,
+ 183,
+ 184,
+ 185,
+ 186,
+ 187,
+ 188,
+ 189,
+ 190,
+ 191,
+ 192,
+ 193,
+ 194,
+ 195,
+ 196,
+ 197,
+ 198,
+ 199,
+ 200,
+ 201,
+ 202,
+ 203,
+ 204,
+ 205,
+ 206,
+ 207,
+ 208,
+ 209,
+ 210,
+ 211,
+ 212,
+ 213,
+ 214,
+ 215,
+ 216,
+ 217,
+ 218,
+ 219,
+ 220,
+ 221,
+ 222,
+ 223,
+ 224,
+ 225,
+ 226,
+ 227,
+ 228,
+ 229,
+ 230,
+ 231,
+ 232,
+ 233,
+ 234,
+ 235,
+ 236,
+ 237,
+ 238,
+ 239,
+ 240,
+ 241,
+ 242,
+ 243,
+ 244,
+ 245,
+ 246,
+ 247,
+ 248,
+ 249,
+ 250,
+ 251,
+ 252,
+ 253,
+ 254,
+ 255,
+ 256,
+ 257,
+ 258,
+ 259,
+ 260,
+ 261,
+ 262,
+ 263,
+ 264,
+ 265,
+ 266,
+ 267,
+ 268,
+ 269,
+ 270,
+ 271,
+ 272,
+ 273,
+ 274,
+ 275,
+ 276,
+ 277,
+ 278,
+ 279,
+ 280,
+ 281,
+ 282,
+ 283,
+ 284,
+ 285,
+ 286,
+ 287,
+ 288,
+ 289,
+ 290,
+ 291,
+ 292,
+ 293,
+ 294,
+ 295,
+ 296,
+ 297,
+ 298,
+ 299,
+ 300,
+ 301,
+ 302,
+ 303,
+ 304,
+ 305,
+ 306,
+ 307,
+ 308,
+ 309,
+ 310,
+ 311,
+ 312,
+ 313,
+ 314,
+ 315,
+ 316,
+ 317,
+ 318
+ ],
+ "raw": [
+ "1993-01",
+ "1993-02",
+ "1993-03",
+ "1993-04",
+ "1993-05",
+ "1993-06",
+ "1993-07",
+ "1993-08",
+ "1993-09",
+ "1993-10",
+ "1993-11",
+ "1993-12",
+ "1994-01",
+ "1994-02",
+ "1994-03",
+ "1994-04",
+ "1994-05",
+ "1994-06",
+ "1994-07",
+ "1994-08",
+ "1994-09",
+ "1994-10",
+ "1994-11",
+ "1994-12",
+ "1995-01",
+ "1995-02",
+ "1995-03",
+ "1995-04",
+ "1995-05",
+ "1995-06",
+ "1995-07",
+ "1995-08",
+ "1995-09",
+ "1995-10",
+ "1995-11",
+ "1995-12",
+ "1996-01",
+ "1996-02",
+ "1996-03",
+ "1996-04",
+ "1996-05",
+ "1996-06",
+ "1996-07",
+ "1996-08",
+ "1996-09",
+ "1996-10",
+ "1996-11",
+ "1996-12",
+ "1997-01",
+ "1997-02",
+ "1997-03",
+ "1997-04",
+ "1997-05",
+ "1997-06",
+ "1997-07",
+ "1997-08",
+ "1997-09",
+ "1997-10",
+ "1997-11",
+ "1997-12",
+ "1998-01",
+ "1998-02",
+ "1998-03",
+ "1998-04",
+ "1998-05",
+ "1998-06",
+ "1998-07",
+ "1998-08",
+ "1998-09",
+ "1998-10",
+ "1998-11",
+ "1998-12",
+ "1999-01",
+ "1999-02",
+ "1999-03",
+ "1999-04",
+ "1999-05",
+ "1999-06",
+ "1999-07",
+ "1999-08",
+ "1999-09",
+ "1999-10",
+ "1999-11",
+ "1999-12",
+ "2000-01",
+ "2000-02",
+ "2000-03",
+ "2000-04",
+ "2000-05",
+ "2000-06",
+ "2000-07",
+ "2000-08",
+ "2000-09",
+ "2000-10",
+ "2000-11",
+ "2000-12",
+ "2001-01",
+ "2001-02",
+ "2001-03",
+ "2001-04",
+ "2001-05",
+ "2001-06",
+ "2001-07",
+ "2001-08",
+ "2001-09",
+ "2001-10",
+ "2001-11",
+ "2001-12",
+ "2002-01",
+ "2002-02",
+ "2002-03",
+ "2002-04",
+ "2002-05",
+ "2002-06",
+ "2002-07",
+ "2002-08",
+ "2002-09",
+ "2002-10",
+ "2002-11",
+ "2002-12",
+ "2003-01",
+ "2003-02",
+ "2003-03",
+ "2003-04",
+ "2003-05",
+ "2003-06",
+ "2003-07",
+ "2003-08",
+ "2003-09",
+ "2003-10",
+ "2003-11",
+ "2003-12",
+ "2004-01",
+ "2004-02",
+ "2004-03",
+ "2004-04",
+ "2004-05",
+ "2004-06",
+ "2004-07",
+ "2004-08",
+ "2004-09",
+ "2004-10",
+ "2004-11",
+ "2004-12",
+ "2005-01",
+ "2005-02",
+ "2005-03",
+ "2005-04",
+ "2005-05",
+ "2005-06",
+ "2005-07",
+ "2005-08",
+ "2005-09",
+ "2005-10",
+ "2005-11",
+ "2005-12",
+ "2006-01",
+ "2006-02",
+ "2006-03",
+ "2006-04",
+ "2006-05",
+ "2006-06",
+ "2006-07",
+ "2006-08",
+ "2006-09",
+ "2006-10",
+ "2006-11",
+ "2006-12",
+ "2007-01",
+ "2007-02",
+ "2007-03",
+ "2007-04",
+ "2007-05",
+ "2007-06",
+ "2007-07",
+ "2007-08",
+ "2007-09",
+ "2007-10",
+ "2007-11",
+ "2007-12",
+ "2008-01",
+ "2008-02",
+ "2008-03",
+ "2008-04",
+ "2008-05",
+ "2008-06",
+ "2008-07",
+ "2008-08",
+ "2008-09",
+ "2008-10",
+ "2008-11",
+ "2008-12",
+ "2009-01",
+ "2009-02",
+ "2009-03",
+ "2009-04",
+ "2009-05",
+ "2009-06",
+ "2009-07",
+ "2009-08",
+ "2009-09",
+ "2009-10",
+ "2009-11",
+ "2009-12",
+ "2010-01",
+ "2010-02",
+ "2010-03",
+ "2010-04",
+ "2010-05",
+ "2010-06",
+ "2010-07",
+ "2010-08",
+ "2010-09",
+ "2010-10",
+ "2010-11",
+ "2010-12",
+ "2011-01",
+ "2011-02",
+ "2011-03",
+ "2011-04",
+ "2011-05",
+ "2011-06",
+ "2011-07",
+ "2011-08",
+ "2011-09",
+ "2011-10",
+ "2011-11",
+ "2011-12",
+ "2012-01",
+ "2012-02",
+ "2012-03",
+ "2012-04",
+ "2012-05",
+ "2012-06",
+ "2012-07",
+ "2012-08",
+ "2012-09",
+ "2012-10",
+ "2012-11",
+ "2012-12",
+ "2013-01",
+ "2013-02",
+ "2013-03",
+ "2013-04",
+ "2013-05",
+ "2013-06",
+ "2013-07",
+ "2013-08",
+ "2013-09",
+ "2013-10",
+ "2013-11",
+ "2013-12",
+ "2014-01",
+ "2014-02",
+ "2014-03",
+ "2014-04",
+ "2014-05",
+ "2014-06",
+ "2014-07",
+ "2014-08",
+ "2014-09",
+ "2014-10",
+ "2014-11",
+ "2014-12",
+ "2015-01",
+ "2015-02",
+ "2015-03",
+ "2015-04",
+ "2015-05",
+ "2015-06",
+ "2015-07",
+ "2015-08",
+ "2015-09",
+ "2015-10",
+ "2015-11",
+ "2015-12",
+ "2016-01",
+ "2016-02",
+ "2016-03",
+ "2016-04",
+ "2016-05",
+ "2016-06",
+ "2016-07",
+ "2016-08",
+ "2016-09",
+ "2016-10",
+ "2016-11",
+ "2016-12",
+ "2017-01",
+ "2017-02",
+ "2017-03",
+ "2017-04",
+ "2017-05",
+ "2017-06",
+ "2017-07",
+ "2017-08",
+ "2017-09",
+ "2017-10",
+ "2017-11",
+ "2017-12",
+ "2018-01",
+ "2018-02",
+ "2018-03",
+ "2018-04",
+ "2018-05",
+ "2018-06",
+ "2018-07",
+ "2018-08",
+ "2018-09",
+ "2018-10",
+ "2018-11",
+ "2018-12",
+ "2019-01",
+ "2019-02",
+ "2019-03",
+ "2019-04",
+ "2019-05",
+ "2019-06",
+ "2019-07"
+ ]
+ },
+ "series": [
+ {
+ "label": "Total Private Construction Spending",
+ "type": "int",
+ "raw": [
+ 24043,
+ 22794,
+ 25667,
+ 27609,
+ 29839,
+ 31842,
+ 32303,
+ 33546,
+ 34208,
+ 33414,
+ 33469,
+ 29453,
+ 26978,
+ 25459,
+ 29769,
+ 32510,
+ 35038,
+ 36752,
+ 36702,
+ 37728,
+ 38086,
+ 36364,
+ 35572,
+ 30511,
+ 28780,
+ 27075,
+ 30706,
+ 32504,
+ 34739,
+ 36579,
+ 37048,
+ 37876,
+ 38393,
+ 37639,
+ 35934,
+ 31383,
+ 30074,
+ 28733,
+ 32567,
+ 35722,
+ 38727,
+ 41488,
+ 41271,
+ 42797,
+ 43138,
+ 43046,
+ 40693,
+ 34764,
+ 32641,
+ 31384,
+ 35236,
+ 37462,
+ 40780,
+ 42705,
+ 43731,
+ 44816,
+ 45560,
+ 45144,
+ 42127,
+ 36826,
+ 34903,
+ 33886,
+ 39498,
+ 42524,
+ 45481,
+ 49721,
+ 49067,
+ 50244,
+ 50142,
+ 49631,
+ 47250,
+ 41394,
+ 38582,
+ 38333,
+ 44097,
+ 45757,
+ 48405,
+ 52055,
+ 52451,
+ 52743,
+ 52725,
+ 52728,
+ 51770,
+ 45824,
+ 42073,
+ 42589,
+ 48318,
+ 50472,
+ 54147,
+ 56460,
+ 55573,
+ 57345,
+ 56317,
+ 56280,
+ 54279,
+ 47579,
+ 44444,
+ 43361,
+ 49170,
+ 52269,
+ 55363,
+ 58992,
+ 58811,
+ 59210,
+ 56769,
+ 57150,
+ 54208,
+ 48589,
+ 45273,
+ 44475,
+ 49396,
+ 53283,
+ 55009,
+ 57801,
+ 58494,
+ 57953,
+ 55157,
+ 56023,
+ 53552,
+ 48023,
+ 45473,
+ 44620,
+ 49996,
+ 53195,
+ 56286,
+ 60076,
+ 62216,
+ 63234,
+ 61767,
+ 62813,
+ 60115,
+ 55584,
+ 50975,
+ 50449,
+ 57796,
+ 61177,
+ 65027,
+ 69039,
+ 72170,
+ 74001,
+ 70662,
+ 70856,
+ 67585,
+ 61438,
+ 58078,
+ 58577,
+ 65871,
+ 70376,
+ 73607,
+ 78304,
+ 81197,
+ 83376,
+ 80997,
+ 82520,
+ 78573,
+ 71177,
+ 66681,
+ 66525,
+ 74662,
+ 77512,
+ 79855,
+ 82366,
+ 82427,
+ 82813,
+ 78179,
+ 77338,
+ 71972,
+ 65567,
+ 60764,
+ 60761,
+ 67919,
+ 71702,
+ 74841,
+ 78183,
+ 78959,
+ 79880,
+ 75872,
+ 77161,
+ 69889,
+ 62947,
+ 58840,
+ 58319,
+ 62048,
+ 65400,
+ 67302,
+ 69059,
+ 69931,
+ 69057,
+ 66560,
+ 67726,
+ 60020,
+ 54351,
+ 47331,
+ 46281,
+ 49049,
+ 50090,
+ 50195,
+ 51830,
+ 52378,
+ 52976,
+ 50849,
+ 51733,
+ 46219,
+ 42719,
+ 36427,
+ 36263,
+ 39853,
+ 42630,
+ 43337,
+ 45602,
+ 44692,
+ 45523,
+ 44032,
+ 45456,
+ 42104,
+ 39373,
+ 32395,
+ 32542,
+ 36687,
+ 39577,
+ 41656,
+ 45331,
+ 45398,
+ 47689,
+ 46881,
+ 47827,
+ 44189,
+ 41754,
+ 38523,
+ 38555,
+ 42310,
+ 45691,
+ 48165,
+ 50356,
+ 50957,
+ 52392,
+ 52363,
+ 54749,
+ 50204,
+ 46880,
+ 41977,
+ 41754,
+ 46358,
+ 49899,
+ 52858,
+ 55444,
+ 57629,
+ 58992,
+ 59069,
+ 61237,
+ 57496,
+ 54929,
+ 50773,
+ 50366,
+ 55865,
+ 59904,
+ 62361,
+ 64718,
+ 64878,
+ 65013,
+ 65863,
+ 68035,
+ 63578,
+ 60142,
+ 55540,
+ 56126,
+ 63334,
+ 67904,
+ 72237,
+ 76694,
+ 76535,
+ 76424,
+ 76751,
+ 76520,
+ 71750,
+ 67110,
+ 61320,
+ 62987,
+ 71078,
+ 73825,
+ 77402,
+ 81578,
+ 81997,
+ 82872,
+ 82310,
+ 83052,
+ 80671,
+ 75331,
+ 68533,
+ 69629,
+ 77375,
+ 80170,
+ 83985,
+ 86363,
+ 86473,
+ 86557,
+ 85526,
+ 85141,
+ 83076,
+ 76476,
+ 70537,
+ 74001,
+ 80544,
+ 84183,
+ 89212,
+ 90007,
+ 91200,
+ 88911,
+ 88674,
+ 86712,
+ 81863,
+ 74311,
+ 69541,
+ 70054,
+ 77678,
+ 80577,
+ 83633,
+ 85813,
+ 86812
+ ]
+ }
+ ]
+} \ No newline at end of file
diff --git a/datasets/construction/construction.png b/datasets/construction/construction.png
new file mode 100644
index 0000000..6296f67
--- /dev/null
+++ b/datasets/construction/construction.png
Binary files differ
diff --git a/datasets/construction/convert.py b/datasets/construction/convert.py
new file mode 100644
index 0000000..0abde23
--- /dev/null
+++ b/datasets/construction/convert.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Dataset conversion script
+
+Author: G.J.J. van den Burg
+
+"""
+
+import argparse
+import json
+import xlrd
+
+MONTHS = {
+ "Jan": 1,
+ "Feb": 2,
+ "Mar": 3,
+ "Apr": 4,
+ "May": 5,
+ "Jun": 6,
+ "Jul": 7,
+ "Aug": 8,
+ "Sep": 9,
+ "Oct": 10,
+ "Nov": 11,
+ "Dec": 12,
+}
+
+
+def format_date(datestr):
+ """ expects: mmm-yyx with x an extraneous character or empty """
+ mmm, yyx = datestr.split("-")
+ midx = MONTHS[mmm]
+ if len(yyx) == 3:
+ yy = yyx[:2]
+ elif len(yyx) == 2:
+ yy = yyx
+ else:
+ raise ValueError
+
+ # this will break in 71 years
+ if yy.startswith("9"):
+ yyyy = 1900 + int(yy)
+ else:
+ yyyy = 2000 + int(yy)
+ return f"{yyyy}-{midx:02}"
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("input_file", help="File to convert")
+ parser.add_argument("output_file", help="File to write to")
+ return parser.parse_args()
+
+
+def main():
+ args = parse_args()
+
+ wb = xlrd.open_workbook(args.input_file)
+ ws = wb.sheet_by_index(0)
+ header = ws.row(3)
+ assert header[0].value == "Date"
+
+ by_month = {}
+ ridx = 4
+ while True:
+ # stop if date cell is empty
+ if ws.row(ridx)[0].ctype == xlrd.XL_CELL_EMPTY:
+ break
+
+ date_value = ws.row(ridx)[0].value
+ construct_value = ws.row(ridx)[1].value
+
+ date = format_date(date_value)
+ construct = int(construct_value)
+
+ by_month[date] = construct
+ ridx += 1
+
+ name = "construction"
+ longname = "US Construction Spending"
+ time = sorted(by_month.keys())
+ time_fmt = "%Y-%m"
+ values = [by_month[t] for t in time]
+
+ series = [
+ {
+ "label": "Total Private Construction Spending",
+ "type": "int",
+ "raw": values,
+ }
+ ]
+
+ data = {
+ "name": name,
+ "longname": longname,
+ "n_obs": len(time),
+ "n_dim": len(series),
+ "time": {
+ "type": "string",
+ "format": time_fmt,
+ "index": list(range(len(time))),
+ "raw": time,
+ },
+ "series": series,
+ }
+
+ with open(args.output_file, "w") as fp:
+ json.dump(data, fp, indent="\t")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/datasets/construction/privtime.xls b/datasets/construction/privtime.xls
new file mode 100644
index 0000000..ecc1fec
--- /dev/null
+++ b/datasets/construction/privtime.xls
Binary files differ