diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-03-19 14:51:04 +0000 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-03-19 14:51:04 +0000 |
| commit | 20cc8eab24191902394316519f4889e55bdec1c6 (patch) | |
| tree | 60c408804444a265f0eaac34519e8f8c9537ff26 /app/admin | |
| parent | Add instance directories (diff) | |
| download | AnnotateChange-20cc8eab24191902394316519f4889e55bdec1c6.tar.gz AnnotateChange-20cc8eab24191902394316519f4889e55bdec1c6.zip | |
Allow admin to add datasets
Diffstat (limited to 'app/admin')
| -rw-r--r-- | app/admin/datasets.py | 97 | ||||
| -rw-r--r-- | app/admin/forms.py | 43 | ||||
| -rw-r--r-- | app/admin/routes.py | 45 |
3 files changed, 181 insertions, 4 deletions
diff --git a/app/admin/datasets.py b/app/admin/datasets.py new file mode 100644 index 0000000..f86fa33 --- /dev/null +++ b/app/admin/datasets.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- + +""" +Dataset handling + +The dataset model is a JSON object as follows: + + { + "name": "name of the dataset", + "n_obs": number of observations, + "n_dim": number of dimensions, + "series": { + "V1": { + "type": "float", + "raw": [list of observations] + }, + "V2": { + "type": "int", + "raw": [list of observations] + }, + "V3": { + "type": "category", + "levels": ["A", "B", "C"], + "raw": [list of observations] + } + } + } + +Missing values must be denoted by 'NaN' (this is understood by the JSON +decoder). + +Author: Gertjan van den Burg + +""" + +import json +import hashlib + + +def validate_dataset(filename): + """ Validate a dataset uploaded to the webapp + Return None on success and a string error on failure + """ + + with open(filename, "rb") as fid: + try: + data = json.load(fid) + except json.JSONDecoderError as err: + return "JSON decoding error: %s" % err.msg + + required_keys = ["name", "n_obs", "n_dim", "series"] + for key in required_keys: + if not key in data: + return "Required key missing: %s" % key + + if len(data["series"]) != data["n_dim"]: + return "Number of dimensions and number of series don't match" + + required_keys = ["type", "raw"] + for idx, var in enumerate(data["series"]): + if not var == "V%i" % (idx + 1): + return "Unexpected variable name, expected 'V<int>', got %s" % var + vardict = data["series"][var] + for key in required_keys: + if not key in vardict: + return "Key '%s' missing for variable '%s'" % (key, var) + if vardict["type"] == "category": + if not "levels" in vardict: + return ( + "Variable '%s' has categorical type but 'levels' is missing" + % (var) + ) + if not len(vardict["raw"]) == data["n_obs"]: + return ( + "Length of data for variable '%s' not equal to n_obs = %i" + % (var, data["n_obs"]) + ) + + return None + + +def get_name_from_dataset(filename): + with open(filename, "rb") as fid: + data = json.load(fid) + return data["name"] + + +def md5sum(filename): + """ Compute the MD5 hash for a given filename """ + blocksize = 65536 + hasher = hashlib.md5() + with open(filename, "rb") as fid: + buf = fid.read(blocksize) + while len(buf) > 0: + hasher.update(buf) + buf = fid.read(blocksize) + return hasher.hexdigest() diff --git a/app/admin/forms.py b/app/admin/forms.py index b33b1c6..6b39e2e 100644 --- a/app/admin/forms.py +++ b/app/admin/forms.py @@ -1,12 +1,51 @@ # -*- coding: utf-8 -*- +import os + +from flask import current_app from flask_wtf import FlaskForm +from flask_wtf.file import FileField, FileRequired + +from wtforms import StringField, SubmitField +from wtforms.validators import DataRequired, ValidationError + +from werkzeug.utils import secure_filename -from wtforms import StringField, PasswordField, BooleanField, SubmitField -from wtforms.validators import DataRequired, ValidationError, Email, EqualTo +from app.models import Dataset +from app.admin.datasets import validate_dataset, get_name_from_dataset class AdminAssignTaskForm(FlaskForm): username = StringField("Username", validators=[DataRequired()]) dataset = StringField("Dataset", validators=[DataRequired()]) submit = SubmitField("Assign") + + +class AdminAddDatasetForm(FlaskForm): + file_ = FileField("File", validators=[FileRequired()]) + submit = SubmitField("Upload") + + def validate_file_(self, field): + filename = secure_filename(field.data.filename) + if not "." in filename: + raise ValidationError("The file should be a '.json' file.") + if not filename.rsplit(".", 1)[1].lower() == "json": + raise ValidationError("The file should be a '.json' file.") + + temp_filename = os.path.join( + current_app.instance_path, current_app.config["TEMP_DIR"], filename + ) + + field.data.save(temp_filename) + error = validate_dataset(temp_filename) + if not error is None: + os.unlink(temp_filename) + raise ValidationError("Error validating dataset: %s" % error) + + name = get_name_from_dataset(temp_filename) + dataset = Dataset.query.filter_by(name=name).first() + if dataset is not None: + os.unlink(temp_filename) + raise ValidationError( + "A dataset with the name '%s' already exists." % name + ) diff --git a/app/admin/routes.py b/app/admin/routes.py index e667445..8d40665 100644 --- a/app/admin/routes.py +++ b/app/admin/routes.py @@ -1,11 +1,16 @@ # -*- coding: utf-8 -*- -from flask import render_template, flash, redirect, url_for +import os + +from flask import render_template, flash, redirect, url_for, current_app + +from werkzeug.utils import secure_filename from app import db from app.admin import bp +from app.admin.datasets import get_name_from_dataset, md5sum from app.admin.decorators import admin_required -from app.admin.forms import AdminAssignTaskForm +from app.admin.forms import AdminAssignTaskForm, AdminAddDatasetForm from app.models import User, Dataset, Task @@ -39,6 +44,42 @@ def assign(): "admin/assign.html", title="Assign Task", form=form, tasks=tasks ) + +@bp.route("/add", methods=("GET", "POST")) +@admin_required +def add_dataset(): + tmp_dir = os.path.join( + current_app.instance_path, current_app.config["TEMP_DIR"] + ) + dataset_dir = os.path.join( + current_app.instance_path, current_app.config["DATASET_DIR"] + ) + form = AdminAddDatasetForm() + if form.validate_on_submit(): + temp_filename = os.path.join( + tmp_dir, secure_filename(form.file_.data.filename) + ) + if not os.path.exists(temp_filename): + flash("Internal error: temporary dataset disappeared.") + return redirect(url_for("admin.add_dataset")) + name = get_name_from_dataset(temp_filename) + target_filename = os.path.join(dataset_dir, name + ".json") + if os.path.exists(target_filename): + flash("Internal error: file already exists!") + return redirect(url_for("admin.add_dataset")) + os.rename(temp_filename, target_filename) + if not os.path.exists(target_filename): + flash("Internal error: file moving failed") + return redirect(url_for("admin.add_dataset")) + + dataset = Dataset(name=name, md5sum=md5sum(target_filename)) + db.session.add(dataset) + db.session.commit() + flash("Dataset %r added successfully.", name) + return redirect(url_for("admin.add_dataset")) + return render_template("admin/add.html", title="Add Dataset", form=form) + + @bp.route("/", methods=("GET",)) @admin_required def index(): |
