aboutsummaryrefslogtreecommitdiff
path: root/app/admin
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2019-05-22 14:52:06 -0400
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2019-05-22 14:52:06 -0400
commit44337a3ba728777ec197397aee8c7f9056699bff (patch)
tree7b1b4161c6acc0d27702ec33939e7d0ff42a828a /app/admin
parentBugfix for registration email restriction (diff)
downloadAnnotateChange-44337a3ba728777ec197397aee8c7f9056699bff.tar.gz
AnnotateChange-44337a3ba728777ec197397aee8c7f9056699bff.zip
Move dealing with datasets to util
Diffstat (limited to 'app/admin')
-rw-r--r--app/admin/datasets.py101
-rw-r--r--app/admin/forms.py2
-rw-r--r--app/admin/routes.py2
3 files changed, 2 insertions, 103 deletions
diff --git a/app/admin/datasets.py b/app/admin/datasets.py
deleted file mode 100644
index 078535f..0000000
--- a/app/admin/datasets.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Dataset handling
-
-The dataset model is a JSON object as follows:
-
- {
- "name": "name of the dataset",
- "n_obs": number of observations,
- "n_dim": number of dimensions,
- "series": {
- "V1": {
- "type": "float",
- "raw": [list of observations]
- },
- "V2": {
- "type": "int",
- "raw": [list of observations]
- },
- "V3": {
- "type": "category",
- "levels": ["A", "B", "C"],
- "raw": [list of observations]
- }
- }
- }
-
-Missing values must be denoted by 'NaN' (this is understood by the JSON
-decoder).
-
-Author: Gertjan van den Burg
-
-"""
-
-import re
-import json
-import hashlib
-
-
-def validate_dataset(filename):
- """ Validate a dataset uploaded to the webapp
- Return None on success and a string error on failure
- """
-
- with open(filename, "rb") as fid:
- try:
- data = json.load(fid)
- except json.JSONDecodeError as err:
- return "JSON decoding error: %s" % err.msg
-
- required_keys = ["name", "n_obs", "n_dim", "series"]
- for key in required_keys:
- if not key in data:
- return "Required key missing: %s" % key
-
- if not re.fullmatch("\w+", data["name"]):
- return "Name can only contain characters in the set [a-zA-Z0-9_]"
-
- if len(data["series"]) != data["n_dim"]:
- return "Number of dimensions and number of series don't match"
-
- required_keys = ["type", "raw"]
- for idx, var in enumerate(data["series"]):
- if not var == "V%i" % (idx + 1):
- return "Unexpected variable name, expected 'V<int>', got %s" % var
- vardict = data["series"][var]
- for key in required_keys:
- if not key in vardict:
- return "Key '%s' missing for variable '%s'" % (key, var)
- if vardict["type"] == "category":
- if not "levels" in vardict:
- return (
- "Variable '%s' has categorical type but 'levels' is missing"
- % (var)
- )
- if not len(vardict["raw"]) == data["n_obs"]:
- return (
- "Length of data for variable '%s' not equal to n_obs = %i"
- % (var, data["n_obs"])
- )
-
- return None
-
-
-def get_name_from_dataset(filename):
- with open(filename, "rb") as fid:
- data = json.load(fid)
- return data["name"]
-
-
-def md5sum(filename):
- """ Compute the MD5 hash for a given filename """
- blocksize = 65536
- hasher = hashlib.md5()
- with open(filename, "rb") as fid:
- buf = fid.read(blocksize)
- while len(buf) > 0:
- hasher.update(buf)
- buf = fid.read(blocksize)
- return hasher.hexdigest()
diff --git a/app/admin/forms.py b/app/admin/forms.py
index 1f13626..1ac9333 100644
--- a/app/admin/forms.py
+++ b/app/admin/forms.py
@@ -12,7 +12,7 @@ from wtforms.validators import ValidationError, InputRequired, NumberRange
from werkzeug.utils import secure_filename
from app.models import Dataset
-from app.admin.datasets import validate_dataset, get_name_from_dataset
+from app.utils.datasets import validate_dataset, get_name_from_dataset
class AdminAutoAssignForm(FlaskForm):
diff --git a/app/admin/routes.py b/app/admin/routes.py
index bed31e2..5c86fad 100644
--- a/app/admin/routes.py
+++ b/app/admin/routes.py
@@ -8,7 +8,6 @@ from werkzeug.utils import secure_filename
from app import db
from app.admin import bp
-from app.admin.datasets import get_name_from_dataset, md5sum
from app.decorators import admin_required
from app.admin.forms import (
AdminAutoAssignForm,
@@ -19,6 +18,7 @@ from app.admin.forms import (
AdminSelectDatasetForm,
)
from app.models import User, Dataset, Task, Annotation
+from app.utils.datasets import get_name_from_dataset, md5sum, dataset_is_demo
from app.utils.tasks import generate_auto_assign_tasks
from app.main.datasets import load_data_for_chart