diff options
Diffstat (limited to 'datasets/shanghai_license')
| -rw-r--r-- | datasets/shanghai_license/.gitignore | 1 | ||||
| -rw-r--r-- | datasets/shanghai_license/README.md | 29 | ||||
| -rw-r--r-- | datasets/shanghai_license/Shanghai_license_plate_price_-_Sheet3.csv | 205 | ||||
| -rw-r--r-- | datasets/shanghai_license/convert.py | 89 | ||||
| -rw-r--r-- | datasets/shanghai_license/shanghai_license.json | 637 | ||||
| -rw-r--r-- | datasets/shanghai_license/shanghai_license.png | bin | 0 -> 21139 bytes |
6 files changed, 961 insertions, 0 deletions
diff --git a/datasets/shanghai_license/.gitignore b/datasets/shanghai_license/.gitignore new file mode 100644 index 0000000..a52cbb0 --- /dev/null +++ b/datasets/shanghai_license/.gitignore @@ -0,0 +1 @@ +old/ diff --git a/datasets/shanghai_license/README.md b/datasets/shanghai_license/README.md new file mode 100644 index 0000000..f4c7026 --- /dev/null +++ b/datasets/shanghai_license/README.md @@ -0,0 +1,29 @@ +# Shanghai License Plate Applicants + +Source: +[Kaggle](https://www.kaggle.com/bogof666/shanghai-car-license-plate-auction-price). +Data licensed under [CC0: Public +Domain](https://creativecommons.org/publicdomain/zero/1.0/), so we can +redistribute it as part of this repository. + +There seems to be a clear sudden growth in the number of applicants. + +Note: according to [this discussion on +Kaggle](https://www.kaggle.com/bogof666/shanghai-car-license-plate-auction-price/discussion/73140), +the record for 2008-02 is missing because the license plates for January and +Feburary were auctioned off simultaneously in January. As this represents an +uneven measurement and a missing value, we choose to split the observation for +January and February 2008 in two, dividing the amount equally between the +months. An alternative would be to introduce a missing value in 2008-02, but +since many of the algorithms we wish to evaluate are not able to handle +missing values (and any imputation method would be incorrect), we believe this +is a reasonable way to deal with this issue. + +To obtain the ``shanghai_license.json`` file from the +``Shanghai_license_plate_price_-_Sheet3.csv`` file, simply run: + +``` +$ python convert.py Shanghai_license_plate_price_-_Sheet3.csv shanghai_license.json +``` + + diff --git a/datasets/shanghai_license/Shanghai_license_plate_price_-_Sheet3.csv b/datasets/shanghai_license/Shanghai_license_plate_price_-_Sheet3.csv new file mode 100644 index 0000000..18863d7 --- /dev/null +++ b/datasets/shanghai_license/Shanghai_license_plate_price_-_Sheet3.csv @@ -0,0 +1,205 @@ +Date,Total number of license issued,lowest price ,avg price,Total number of applicants
+Jan-02,1400,13600,14735,3718
+Feb-02,1800,13100,14057,4590
+Mar-02,2000,14300,14662,5190
+Apr-02,2300,16000,16334,4806
+May-02,2350,17800,18357,4665
+Jun-02,2800,19600,20178,4502
+Jul-02,3000,19800,20904,3774
+Aug-02,3000,21000,21601,4640
+Sep-02,3200,23600,24040,4393
+Oct-02,3200,26400,27040,4661
+Nov-02,3200,30800,31721,4021
+Dec-02,3600,27800,27848,3525
+Jan-03,3000,18800,24267,9442
+Feb-03,3000,23800,25254,12030
+Mar-03,3000,28800,29551,11219
+Apr-03,3300,34100,34845,8794
+May-03,3800,35000,36903,14634
+Jun-03,5500,36100,37667,15507
+Jul-03,6000,36900,38269,11929
+Aug-03,4500,38500,39369,9315
+Sep-03,6650,28800,38728,8532
+Oct-03,4500,32800,34842,9383
+Nov-03,5042,33100,34284,9849
+Dec-03,4776,37100,38054,10491
+Jan-04,5000,38000,39516,8663
+Feb-04,4800,39600,40053,10156
+Mar-04,4800,43000,43333,9950
+Apr-04,5500,44200,45492,8150
+May-04,6527,10800,34226,8114
+Jun-04,6233,17800,21001,19233
+Jul-04,6600,21800,23544,14464
+Aug-04,6800,25100,25991,15506
+Sep-04,6640,29300,30033,10634
+Oct-04,6600,28000,29768,9519
+Nov-04,6600,26000,27620,9188
+Dec-04,5500,29300,30282,9005
+Jan-05,5500,28500,32520,6208
+Feb-05,3800,31700,32425,8949
+Mar-05,4000,34300,34684,9117
+Apr-05,5000,36800,37355,8113
+May-05,5833,35000,35661,9673
+Jun-05,5690,37000,37479,8409
+Jul-05,6326,37900,38378,8777
+Aug-05,6829,25000,35905,7520
+Sep-05,6700,26500,28927,10972
+Oct-05,6000,25200,26385,11167
+Nov-05,5700,29800,30320,13633
+Dec-05,5700,35200,36749,8351
+Jan-06,5000,26900,31220,5907
+Feb-06,3800,34200,34887,12367
+Mar-06,4500,38500,38932,8904
+Apr-06,5000,37500,38326,7888
+May-06,4500,37700,38139,8301
+Jun-06,4500,39500,39752,8478
+Jul-06,5500,39600,39966,8966
+Aug-06,6200,39900,40459,9190
+Sep-06,6500,37000,41601,7064
+Oct-06,6500,36300,37899,11237
+Nov-06,6000,37800,38460,110234
+Dec-06,6500,39800,40518,9477
+Jan-07,6000,38500,40974,6587
+Feb-07,3500,39100,40473,5056
+Mar-07,4000,41100,41573,10168
+Apr-07,5500,43300,43623,10523
+May-07,6000,44500,44853,10273
+Jun-07,6000,47200,47711,11478
+Jul-07,5500,45200,46581,10327
+Aug-07,8000,46500,46897,12943
+Sep-07,8500,48600,49631,10561
+Oct-07,7500,50500,51000,10715
+Nov-07,7500,53800,54317,10596
+Dec-07,7500,50000,56042,10356
+Jan-08,16000,8100,23370,20539
+Mar-08,9300,31300,32169,63534
+Apr-08,9000,37300,37659,37072
+May-08,8200,34400,36047,26341
+Jun-08,7700,33900,34947,21208
+Jul-08,6800,33800,34491,16783
+Aug-08,6000,35900,36460,13451
+Sep-08,6500,29300,31788,11002
+Oct-08,5000,32600,33224,11882
+Nov-08,5500,21800,24351,10170
+Dec-08,4500,31000,31665,16801
+Jan-09,5200,28600,29399,16544
+Feb-09,5200,33000,33394,16848
+Mar-09,6000,26600,27552,18575
+Apr-09,6500,28300,28724,17654
+May-09,7200,28500,29100,16471
+Jun-09,8000,30000,30363,17433
+Jul-09,8000,32100,32522,17220
+Aug-09,8000,36000,36231,18750
+Sep-09,8500,27200,29500,14906
+Oct-09,8000,33900,34402,22006
+Nov-09,8000,34900,35317,21902
+Dec-09,8000,36900,37593,18577
+Jan-10,8000,37800,38311,18975
+Feb-10,7500,38300,38620,18810
+Mar-10,8000,39600,39882,17704
+Apr-10,8500,41000,41637,17313
+May-10,8500,41900,42262,16324
+Jun-10,9200,39200,40380,16252
+Jul-10,9000,38400,39362,13389
+Aug-10,9000,39800,40169,16855
+Sep-10,9000,41800,42180,15198
+Oct-10,9000,43000,43271,14941
+Nov-10,8500,44900,45291,13429
+Dec-10,9000,10400,15970,11224
+Jan-11,8000,38300,38771,30675
+Feb-11,7500,44200,44627,25104
+Mar-11,8000,46200,46657,25014
+Apr-11,8000,46300,47399,22326
+May-11,9000,47400,47700,25708
+Jun-11,9000,48500,48855,22474
+Jul-11,9000,50900,51174,21852
+Aug-11,9000,51000,52228,21544
+Sep-11,9500,52200,52622,22268
+Oct-11,9000,53800,54008,19415
+Nov-11,9000,45700,47635,20050
+Dec-11,8500,51000,51437,26531
+Jan-12,8000,52800,53195,24354
+Feb-12,8000,55400,55632,23391
+Mar-12,8000,58300,58625,24897
+Apr-12,8500,61000,61626,22706
+May-12,9300,64000,64367,24230
+Jun-12,9500,55800,58227,24774
+Jul-12,9500,57700,58271,26526
+Aug-12,9500,62100,62559,21425
+Sep-12,9500,65700,66425,19114
+Oct-12,9500,65200,66708,19921
+Nov-12,9500,66400,66946,19120
+Dec-12,9500,68900,69346,18244
+Jan-13,9000,75000,75332,20857
+Feb-13,9000,83300,83571,24651
+Mar-13,9000,90800,91898,23589
+Apr-13,11000,83900,84101,26174
+May-13,9000,80700,80803,22224
+Jun-13,9000,77600,77823,21482
+Jul-13,9000,76300,76465,21811
+Aug-13,9000,74700,74939,22650
+Sep-13,9000,73400,73492,35154
+Oct-13,10000,82300,83723,28887
+Nov-13,8500,75500,75717,38220
+Dec-13,8500,76000,76093,39625
+Jan-14,8100,73500,73501,41946
+Feb-14,7400,73200,73357,45758
+Mar-14,7400,73800,73872,61853
+Apr-14,8200,74000,74113,94241
+May-14,7400,74400,74503,114121
+Jun-14,7400,73800,73896,135677
+Jul-14,7400,74600,74680,136098
+Aug-14,7400,73600,73785,121550
+Sep-14,8300,73800,73875,122219
+Oct-14,7400,74000,74075,105532
+Nov-14,7400,73500,73633,95595
+Dec-14,7447,73600,73687,96972
+Jan-15,7990,74000,74216,98203
+Feb-15,7653,76500,76618,103224
+Mar-15,7406,74600,74830,132690
+Apr-15,8288,80600,80759,152298
+May-15,7482,79000,79099,156007
+Jun-15,7441,80000,80020,172205
+Jul-15,7531,83100,83171,166302
+Aug-15,7454,82600,82642,166939
+Sep-15,8727,82100,82172,165765
+Oct-15,7763,85300,85424,170995
+Nov-15,7514,84600,84703,169159
+Dec-15,7698,84500,84572,179133
+Jan-16,9409,82200,82352,187533
+Feb-16,8363,83200,83244,196470
+Mar-16,8310,83100,83148,221109
+Apr-16,11829,85100,85127,256897
+May-16,11598,85000,85058,277889
+Jun-16,11546,84400,84483,275438
+Jul-16,11475,87200,87235,240750
+Aug-16,11549,86900,86946,251188
+Sep-16,12889,86500,86523,229544
+Oct-16,11621,88300,88359,213212
+Nov-16,11549,88600,88665,215424
+Dec-16,12261,88300,88412,219882
+Jan-17,12215,87600,87685,232101
+Feb-17,10157,88200,88240,251717
+Mar-17,10356,87800,87916,262010
+Apr-17,12196,89800,89850,252273
+May-17,10316,90100,90209,270197
+Jun-17,10312,89400,89532,244349
+Jul-17,10325,92200,92250,269189
+Aug-17,10558,91600,91629,256083
+Sep-17,12413,91300,91415,250566
+Oct-17,11388,93500,93540,244868
+Nov-17,11002,93100,93130,226911
+Dec-17,12147,92800,92848,228148
+Jan-18,12183,87900,87936,226316
+Feb-18,11098,87600,87660,220831
+Mar-18,9855,88100,88176,217056
+Apr-18,11916,86900,87089,204980
+May-18,10216,89000,89018,198627
+Jun-18,10775,87800,87900,209672
+Jul-18,10395,88300,88380,202337
+Aug-18,10402,88300,88365,192755
+Sep-18,12712,87300,87410,189142
+Oct-18,10728,88000,88070,181861
+Nov-18,11766,87300,87374,177355
+Dec-18,12850,87400,87508,165442
+Jan-19,12832,89500,89565,168614
diff --git a/datasets/shanghai_license/convert.py b/datasets/shanghai_license/convert.py new file mode 100644 index 0000000..b29395a --- /dev/null +++ b/datasets/shanghai_license/convert.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Dataset conversion script + +Author: Gertjan van den Burg + +""" + +import json +import argparse +import clevercsv + + +def reformat_time(mmmyy): + """ From MMM-YY to %Y-%m """ + MONTHS = { + "Jan": 1, + "Feb": 2, + "Mar": 3, + "Apr": 4, + "May": 5, + "Jun": 6, + "Jul": 7, + "Aug": 8, + "Sep": 9, + "Oct": 10, + "Nov": 11, + "Dec": 12, + } + mmm, yy = mmmyy.split("-") + Y = int(yy) + 2000 + m = MONTHS.get(mmm) + return "%i-%02i" % (Y, m) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("input_file", help="File to convert") + parser.add_argument("output_file", help="File to write to") + return parser.parse_args() + + +def main(): + args = parse_args() + + with open(args.input_file, "r", newline="", encoding="ascii") as fp: + reader = clevercsv.reader( + fp, delimiter=",", quotechar="", escapechar="" + ) + rows = list(reader) + + rows.pop(0) + + time = [reformat_time(r[0]) for r in rows] + values = [int(r[-1]) for r in rows] + + # Manually split Jan-08 into two, see readme for details. + jan08idx = time.index("2008-01") + values[jan08idx] /= 2 + time.insert(jan08idx + 1, "2008-02") + values.insert(jan08idx + 1, values[jan08idx]) + + name = "shanghai_license" + longname = "Shanghai License" + time_fmt = "%Y-%m" + series = [{"label": "No. of Applicants", "type": "int", "raw": values}] + + data = { + "name": name, + "longname": longname, + "n_obs": len(time), + "n_dim": len(series), + "time": { + "type": "string", + "format": time_fmt, + "index": list(range(len(time))), + "raw": time, + }, + "series": series, + } + + with open(args.output_file, "w") as fp: + json.dump(data, fp, indent="\t") + + +if __name__ == "__main__": + main() diff --git a/datasets/shanghai_license/shanghai_license.json b/datasets/shanghai_license/shanghai_license.json new file mode 100644 index 0000000..5a2aa7d --- /dev/null +++ b/datasets/shanghai_license/shanghai_license.json @@ -0,0 +1,637 @@ +{ + "name": "shanghai_license", + "longname": "Shanghai License", + "n_obs": 205, + "n_dim": 1, + "time": { + "type": "string", + "format": "%Y-%m", + "index": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204 + ], + "raw": [ + "2002-01", + "2002-02", + "2002-03", + "2002-04", + "2002-05", + "2002-06", + "2002-07", + "2002-08", + "2002-09", + "2002-10", + "2002-11", + "2002-12", + "2003-01", + "2003-02", + "2003-03", + "2003-04", + "2003-05", + "2003-06", + "2003-07", + "2003-08", + "2003-09", + "2003-10", + "2003-11", + "2003-12", + "2004-01", + "2004-02", + "2004-03", + "2004-04", + "2004-05", + "2004-06", + "2004-07", + "2004-08", + "2004-09", + "2004-10", + "2004-11", + "2004-12", + "2005-01", + "2005-02", + "2005-03", + "2005-04", + "2005-05", + "2005-06", + "2005-07", + "2005-08", + "2005-09", + "2005-10", + "2005-11", + "2005-12", + "2006-01", + "2006-02", + "2006-03", + "2006-04", + "2006-05", + "2006-06", + "2006-07", + "2006-08", + "2006-09", + "2006-10", + "2006-11", + "2006-12", + "2007-01", + "2007-02", + "2007-03", + "2007-04", + "2007-05", + "2007-06", + "2007-07", + "2007-08", + "2007-09", + "2007-10", + "2007-11", + "2007-12", + "2008-01", + "2008-02", + "2008-03", + "2008-04", + "2008-05", + "2008-06", + "2008-07", + "2008-08", + "2008-09", + "2008-10", + "2008-11", + "2008-12", + "2009-01", + "2009-02", + "2009-03", + "2009-04", + "2009-05", + "2009-06", + "2009-07", + "2009-08", + "2009-09", + "2009-10", + "2009-11", + "2009-12", + "2010-01", + "2010-02", + "2010-03", + "2010-04", + "2010-05", + "2010-06", + "2010-07", + "2010-08", + "2010-09", + "2010-10", + "2010-11", + "2010-12", + "2011-01", + "2011-02", + "2011-03", + "2011-04", + "2011-05", + "2011-06", + "2011-07", + "2011-08", + "2011-09", + "2011-10", + "2011-11", + "2011-12", + "2012-01", + "2012-02", + "2012-03", + "2012-04", + "2012-05", + "2012-06", + "2012-07", + "2012-08", + "2012-09", + "2012-10", + "2012-11", + "2012-12", + "2013-01", + "2013-02", + "2013-03", + "2013-04", + "2013-05", + "2013-06", + "2013-07", + "2013-08", + "2013-09", + "2013-10", + "2013-11", + "2013-12", + "2014-01", + "2014-02", + "2014-03", + "2014-04", + "2014-05", + "2014-06", + "2014-07", + "2014-08", + "2014-09", + "2014-10", + "2014-11", + "2014-12", + "2015-01", + "2015-02", + "2015-03", + "2015-04", + "2015-05", + "2015-06", + "2015-07", + "2015-08", + "2015-09", + "2015-10", + "2015-11", + "2015-12", + "2016-01", + "2016-02", + "2016-03", + "2016-04", + "2016-05", + "2016-06", + "2016-07", + "2016-08", + "2016-09", + "2016-10", + "2016-11", + "2016-12", + "2017-01", + "2017-02", + "2017-03", + "2017-04", + "2017-05", + "2017-06", + "2017-07", + "2017-08", + "2017-09", + "2017-10", + "2017-11", + "2017-12", + "2018-01", + "2018-02", + "2018-03", + "2018-04", + "2018-05", + "2018-06", + "2018-07", + "2018-08", + "2018-09", + "2018-10", + "2018-11", + "2018-12", + "2019-01" + ] + }, + "series": [ + { + "label": "No. of Applicants", + "type": "int", + "raw": [ + 3718, + 4590, + 5190, + 4806, + 4665, + 4502, + 3774, + 4640, + 4393, + 4661, + 4021, + 3525, + 9442, + 12030, + 11219, + 8794, + 14634, + 15507, + 11929, + 9315, + 8532, + 9383, + 9849, + 10491, + 8663, + 10156, + 9950, + 8150, + 8114, + 19233, + 14464, + 15506, + 10634, + 9519, + 9188, + 9005, + 6208, + 8949, + 9117, + 8113, + 9673, + 8409, + 8777, + 7520, + 10972, + 11167, + 13633, + 8351, + 5907, + 12367, + 8904, + 7888, + 8301, + 8478, + 8966, + 9190, + 7064, + 11237, + 110234, + 9477, + 6587, + 5056, + 10168, + 10523, + 10273, + 11478, + 10327, + 12943, + 10561, + 10715, + 10596, + 10356, + 10269.5, + 10269.5, + 63534, + 37072, + 26341, + 21208, + 16783, + 13451, + 11002, + 11882, + 10170, + 16801, + 16544, + 16848, + 18575, + 17654, + 16471, + 17433, + 17220, + 18750, + 14906, + 22006, + 21902, + 18577, + 18975, + 18810, + 17704, + 17313, + 16324, + 16252, + 13389, + 16855, + 15198, + 14941, + 13429, + 11224, + 30675, + 25104, + 25014, + 22326, + 25708, + 22474, + 21852, + 21544, + 22268, + 19415, + 20050, + 26531, + 24354, + 23391, + 24897, + 22706, + 24230, + 24774, + 26526, + 21425, + 19114, + 19921, + 19120, + 18244, + 20857, + 24651, + 23589, + 26174, + 22224, + 21482, + 21811, + 22650, + 35154, + 28887, + 38220, + 39625, + 41946, + 45758, + 61853, + 94241, + 114121, + 135677, + 136098, + 121550, + 122219, + 105532, + 95595, + 96972, + 98203, + 103224, + 132690, + 152298, + 156007, + 172205, + 166302, + 166939, + 165765, + 170995, + 169159, + 179133, + 187533, + 196470, + 221109, + 256897, + 277889, + 275438, + 240750, + 251188, + 229544, + 213212, + 215424, + 219882, + 232101, + 251717, + 262010, + 252273, + 270197, + 244349, + 269189, + 256083, + 250566, + 244868, + 226911, + 228148, + 226316, + 220831, + 217056, + 204980, + 198627, + 209672, + 202337, + 192755, + 189142, + 181861, + 177355, + 165442, + 168614 + ] + } + ] +}
\ No newline at end of file diff --git a/datasets/shanghai_license/shanghai_license.png b/datasets/shanghai_license/shanghai_license.png Binary files differnew file mode 100644 index 0000000..574fc24 --- /dev/null +++ b/datasets/shanghai_license/shanghai_license.png |
