diff options
| -rw-r--r-- | app/static/js/makeChart.js | 12 | ||||
| -rw-r--r-- | app/utils/dataset_schema.json | 130 | ||||
| -rw-r--r-- | app/utils/datasets.py | 93 | ||||
| -rw-r--r-- | poetry.lock | 31 | ||||
| -rw-r--r-- | pyproject.toml | 1 |
5 files changed, 205 insertions, 62 deletions
diff --git a/app/static/js/makeChart.js b/app/static/js/makeChart.js index 85d743b..1a0283d 100644 --- a/app/static/js/makeChart.js +++ b/app/static/js/makeChart.js @@ -3,10 +3,12 @@ function preprocessData(data) { var n = 0; - data.forEach(function(d) { - d.X = n++; - d.Y = d.value; - }); + cleanData = []; + for (i=0; i<data.values[0].raw.length; i++) { + d = data.values[0].raw[i]; + cleanData.push({"X": n++, "Y": d}); + } + return cleanData; } function scaleAndAxis(data, width, height) { @@ -50,7 +52,7 @@ function noZoom() { function baseChart(selector, data, clickFunction, annotations, annotationFunction) { // preprocess the data - preprocessData(data); + data = preprocessData(data); var divWidth = 1000; var divHeight = 480; diff --git a/app/utils/dataset_schema.json b/app/utils/dataset_schema.json new file mode 100644 index 0000000..2aec504 --- /dev/null +++ b/app/utils/dataset_schema.json @@ -0,0 +1,130 @@ +{ + "definitions": {}, + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://example.com/root.json", + "type": "object", + "title": "Dataset Schema", + "default": null, + "required": [ + "name", + "n_obs", + "n_dim", + "series" + ], + "properties": { + "name": { + "$id": "#/properties/name", + "type": "string", + "title": "The Name Schema", + "default": "", + "pattern": "^(.*)$" + }, + "n_obs": { + "$id": "#/properties/n_obs", + "type": "integer", + "title": "The N_obs Schema", + "default": 0 + }, + "n_dim": { + "$id": "#/properties/n_dim", + "type": "integer", + "title": "The N_dim Schema", + "default": 0 + }, + "demo": { + "$id": "#/properties/demo", + "type": "object", + "title": "The Demo Schema", + "properties": { + "true_CPs": { + "$id": "#/properties/demo/properties/true_CPs", + "type": "array", + "items": { + "$id": "#/properties/demo/properties/true_CPs/items", + "type": "integer", + "title": "The Items Schema", + "default": null + } + } + } + }, + "time": { + "$id": "#/properties/time", + "type": "object", + "title": "The Time Schema", + "default": null, + "required": [ + "type", + "format", + "raw" + ], + "properties": { + "type": { + "$id": "#/properties/time/properties/type", + "type": "string", + "title": "The Type Schema", + "default": "", + "pattern": "^(.*)$" + }, + "format": { + "$id": "#/properties/time/properties/format", + "type": "string", + "title": "The Format Schema", + "default": "", + "pattern": "^(.*)$" + }, + "raw": { + "$id": "#/properties/time/properties/raw", + "type": "array", + "title": "The Raw Schema", + "items": { + "$id": "#/properties/time/properties/raw/items", + "title": "The Items Schema", + "default": "" + } + } + } + }, + "series": { + "$id": "#/properties/series", + "type": "array", + "title": "The Series Schema", + "items": { + "$id": "#/properties/series/items", + "type": "object", + "title": "The Variable Schema", + "default": null, + "properties": { + "label": { + "$id": "#/properties/series/items/properties/label", + "type": "string", + "title": "The Label Schema", + "default": "", + "pattern": "^(.*)$" + }, + "type": { + "$id": "#/properties/series/items/properties/type", + "type": "string", + "title": "The Type Schema", + "default": "", + "pattern": "^(.*)$" + }, + "raw": { + "$id": "#/properties/series/items/properties/raw", + "type": "array", + "title": "The Raw Schema", + "items": { + "$id": "#/properties/series/items/properties/raw/items", + "title": "The Items Schema", + "default": 0 + } + } + }, + "required": [ + "type", + "raw" + ] + } + } + } +} diff --git a/app/utils/datasets.py b/app/utils/datasets.py index 1fef85f..16647f0 100644 --- a/app/utils/datasets.py +++ b/app/utils/datasets.py @@ -3,28 +3,9 @@ """ Dataset handling -The dataset model is a JSON object as follows: - - { - "name": "name of the dataset", - "n_obs": number of observations, - "n_dim": number of dimensions, - "series": { - "V1": { - "type": "float", - "raw": [list of observations] - }, - "V2": { - "type": "int", - "raw": [list of observations] - }, - "V3": { - "type": "category", - "levels": ["A", "B", "C"], - "raw": [list of observations] - } - } - } +The dataset model is defined in the adjacent 'dataset_schema.json' file, which +is a JSONSchema schema definition. It can be easily edited at +www.jsonschema.net or yapi.demo.qunar.com/editor/ Missing values must be denoted by 'NaN' (this is understood by the JSON decoder). @@ -35,56 +16,56 @@ Author: Gertjan van den Burg import hashlib import json +import jsonschema import logging import os -import re from flask import current_app LOGGER = logging.getLogger(__file__) +def load_schema(): + pth = os.path.abspath(__file__) + basedir = os.path.dirname(pth) + schema_file = os.path.join(basedir, "dataset_schema.json") + if not os.path.exists(schema_file): + raise FileNotFoundError(schema_file) + with open(schema_file, "rb") as fp: + schema = json.load(fp) + return schema + + def validate_dataset(filename): - """ Validate a dataset uploaded to the webapp - Return None on success and a string error on failure - """ + if not os.path.exists(filename): + return "File not found." - with open(filename, "rb") as fid: + with open(filename, "rb") as fp: try: - data = json.load(fid) + data = json.load(fp) except json.JSONDecodeError as err: return "JSON decoding error: %s" % err.msg - required_keys = ["name", "n_obs", "n_dim", "series"] - for key in required_keys: - if not key in data: - return "Required key missing: %s" % key + try: + schema = load_schema() + except FileNotFoundError: + return "Schema file not found." - if not re.fullmatch("\w+", data["name"]): - return "Name can only contain characters in the set [a-zA-Z0-9_]" + try: + jsonschema.validate(instance=data, schema=schema) + except jsonschema.ValidationError as err: + return "JSONSchema validation error: %s" % err.msg if len(data["series"]) != data["n_dim"]: return "Number of dimensions and number of series don't match" - required_keys = ["type", "raw"] - for idx, var in enumerate(data["series"]): - if not var == "V%i" % (idx + 1): - return "Unexpected variable name, expected 'V<int>', got %s" % var - vardict = data["series"][var] - for key in required_keys: - if not key in vardict: - return "Key '%s' missing for variable '%s'" % (key, var) - if vardict["type"] == "category": - if not "levels" in vardict: - return ( - "Variable '%s' has categorical type but 'levels' is missing" - % (var) - ) - if not len(vardict["raw"]) == data["n_obs"]: - return ( - "Length of data for variable '%s' not equal to n_obs = %i" - % (var, data["n_obs"]) - ) + if "time" in data.keys(): + if len(data["time"]["raw"]) != data["n_obs"]: + return "Number of time points doesn't match number of observations" + + for var in data["series"]: + if len(var["raw"]) != data["n_obs"]: + return "Number of observations doesn't match for %s" % var["label"] return None @@ -98,7 +79,7 @@ def get_name_from_dataset(filename): def dataset_is_demo(filename): with open(filename, "rb") as fid: data = json.load(fid) - return "demo" in data + return "demo" in data.keys() def get_demo_true_cps(name): @@ -153,5 +134,7 @@ def load_data_for_chart(name, known_md5): return None with open(target_filename, "rb") as fid: data = json.load(fid) - chart_data = [{"value": x} for x in data["series"]["V1"]["raw"]] + + chart_data = {"time": data["time"] if "time" in data else None, "values": + data["series"]} return {"chart_data": chart_data} diff --git a/poetry.lock b/poetry.lock index 2ce758e..48b0d66 100644 --- a/poetry.lock +++ b/poetry.lock @@ -29,7 +29,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "1.3.0" [[package]] -category = "dev" +category = "main" description = "Classes Without Boilerplate" name = "attrs" optional = false @@ -237,6 +237,20 @@ MarkupSafe = ">=0.23" [[package]] category = "main" +description = "An implementation of JSON Schema validation for Python" +name = "jsonschema" +optional = false +python-versions = "*" +version = "3.0.1" + +[package.dependencies] +attrs = ">=17.4.0" +pyrsistent = ">=0.14.0" +setuptools = "*" +six = ">=1.11.0" + +[[package]] +category = "main" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." name = "mako" optional = false @@ -314,6 +328,17 @@ python-versions = "*" version = "0.9.3" [[package]] +category = "main" +description = "Persistent/Functional/Immutable data structures" +name = "pyrsistent" +optional = false +python-versions = "*" +version = "0.15.2" + +[package.dependencies] +six = "*" + +[[package]] category = "dev" description = "pytest: simple powerful testing with Python" name = "pytest" @@ -391,7 +416,7 @@ python-versions = "*" version = "2.2.1" [metadata] -content-hash = "0055b739d3afaaf6ffa9ca7720fc6869f5cb4d0b338beb28aa3d8c2b6f1e4bd9" +content-hash = "4322d6cbe5b122bfc42ba5f35a3d465631c8ec8eb78dddadb17b01156e5805fa" python-versions = "^3.7" [metadata.hashes] @@ -418,6 +443,7 @@ gunicorn = ["aa8e0b40b4157b36a5df5e599f45c9c76d6af43845ba3b3b0efe2c70473c2471", idna = ["c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", "ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"] itsdangerous = ["321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", "b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749"] jinja2 = ["74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd", "f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"] +jsonschema = ["0c0a81564f181de3212efa2d17de1910f8732fa1b71c42266d983cd74304e20d", "a5f6559964a3851f59040d3b961de5e68e70971afb88ba519d27e6a039efff1a"] mako = ["4e02fde57bd4abb5ec400181e4c314f56ac3e49ba4fb8b0d50bba18cb27d25ae"] markdown = ["2e50876bcdd74517e7b71f3e7a76102050edec255b3983403f1a63e7c8a41e7a", "56a46ac655704b91e5b7e6326ce43d5ef72411376588afa1dd90e881b83c7e8c"] markupsafe = ["00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", "09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", "09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", "1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", "24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", "29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", "43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", "46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", "500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", "535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", "62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", "6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", "717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", "79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", "7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", "88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", "8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", "98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", "9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", "9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", "ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", "b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", "b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", "b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", "ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", "c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", "cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", "e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7"] @@ -427,6 +453,7 @@ py = ["64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", "dc639 pycparser = ["a988718abfad80b6b157acce7bf130a30876d27603738ac39f140993246b25b3"] pyjwt = ["5c6eca3c2940464d106b99ba83b00c6add741c9becaec087fb7ccdefea71350e", "8d59a976fb773f3e6a39c85636357c4f0e242707394cadadd9814f5cbaa20e96"] pymysql = ["3943fbbbc1e902f41daf7f9165519f140c4451c179380677e6a848587042561a", "d8c059dcd81dedb85a9f034d5e22dcb4442c0b201908bede99e306d65ea7c8e7"] +pyrsistent = ["16692ee739d42cf5e39cef8d27649a8c1fdb7aa99887098f1460057c5eb75c3a"] pytest = ["3f193df1cfe1d1609d4c583838bea3d532b18d6160fd3f55c9447fdca30848ec", "e246cf173c01169b9617fc07264b7b1316e78d7a650055235d6d897bc80d9660"] python-dateutil = ["7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", "c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e"] python-editor = ["1bf6e860a8ad52a14c3ee1252d5dc25b2030618ed80c022598f00176adc8367d", "51fda6bcc5ddbbb7063b2af7509e43bd84bfc32a4ff71349ec7847713882327b", "5f98b069316ea1c2ed3f67e7f5df6c0d8f10b689964a4a811ff64f0106819ec8", "c3da2053dbab6b29c94e43c486ff67206eafbe7eb52dbec7390b5e2fb05aac77", "ea87e17f6ec459e780e4221f295411462e0d0810858e055fc514684350a2f522"] diff --git a/pyproject.toml b/pyproject.toml index 8544bc6..f26df38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ gunicorn = "^19.9" pymysql = "^0.9.3" cryptography = "^2.6" markdown = "^3.1" +jsonschema = "^3.0" [tool.poetry.dev-dependencies] pytest = "^3.0" |
