From 3a2404010f8c0fdb3a9e9940202f59b84cb2791f Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 10 Jun 2019 14:34:48 +0100 Subject: Validate data according to a schema It became clear that a formal schema would make it easier to validate data. This is now added and the code is updated to work with this schema. --- app/static/js/makeChart.js | 12 ++-- app/utils/dataset_schema.json | 130 ++++++++++++++++++++++++++++++++++++++++++ app/utils/datasets.py | 93 ++++++++++++------------------ 3 files changed, 175 insertions(+), 60 deletions(-) create mode 100644 app/utils/dataset_schema.json (limited to 'app') diff --git a/app/static/js/makeChart.js b/app/static/js/makeChart.js index 85d743b..1a0283d 100644 --- a/app/static/js/makeChart.js +++ b/app/static/js/makeChart.js @@ -3,10 +3,12 @@ function preprocessData(data) { var n = 0; - data.forEach(function(d) { - d.X = n++; - d.Y = d.value; - }); + cleanData = []; + for (i=0; i', got %s" % var - vardict = data["series"][var] - for key in required_keys: - if not key in vardict: - return "Key '%s' missing for variable '%s'" % (key, var) - if vardict["type"] == "category": - if not "levels" in vardict: - return ( - "Variable '%s' has categorical type but 'levels' is missing" - % (var) - ) - if not len(vardict["raw"]) == data["n_obs"]: - return ( - "Length of data for variable '%s' not equal to n_obs = %i" - % (var, data["n_obs"]) - ) + if "time" in data.keys(): + if len(data["time"]["raw"]) != data["n_obs"]: + return "Number of time points doesn't match number of observations" + + for var in data["series"]: + if len(var["raw"]) != data["n_obs"]: + return "Number of observations doesn't match for %s" % var["label"] return None @@ -98,7 +79,7 @@ def get_name_from_dataset(filename): def dataset_is_demo(filename): with open(filename, "rb") as fid: data = json.load(fid) - return "demo" in data + return "demo" in data.keys() def get_demo_true_cps(name): @@ -153,5 +134,7 @@ def load_data_for_chart(name, known_md5): return None with open(target_filename, "rb") as fid: data = json.load(fid) - chart_data = [{"value": x} for x in data["series"]["V1"]["raw"]] + + chart_data = {"time": data["time"] if "time" in data else None, "values": + data["series"]} return {"chart_data": chart_data} -- cgit v1.2.3