aboutsummaryrefslogtreecommitdiff
path: root/app/admin
diff options
context:
space:
mode:
Diffstat (limited to 'app/admin')
-rw-r--r--app/admin/datasets.py97
-rw-r--r--app/admin/forms.py43
-rw-r--r--app/admin/routes.py45
3 files changed, 181 insertions, 4 deletions
diff --git a/app/admin/datasets.py b/app/admin/datasets.py
new file mode 100644
index 0000000..f86fa33
--- /dev/null
+++ b/app/admin/datasets.py
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+
+"""
+Dataset handling
+
+The dataset model is a JSON object as follows:
+
+ {
+ "name": "name of the dataset",
+ "n_obs": number of observations,
+ "n_dim": number of dimensions,
+ "series": {
+ "V1": {
+ "type": "float",
+ "raw": [list of observations]
+ },
+ "V2": {
+ "type": "int",
+ "raw": [list of observations]
+ },
+ "V3": {
+ "type": "category",
+ "levels": ["A", "B", "C"],
+ "raw": [list of observations]
+ }
+ }
+ }
+
+Missing values must be denoted by 'NaN' (this is understood by the JSON
+decoder).
+
+Author: Gertjan van den Burg
+
+"""
+
+import json
+import hashlib
+
+
+def validate_dataset(filename):
+ """ Validate a dataset uploaded to the webapp
+ Return None on success and a string error on failure
+ """
+
+ with open(filename, "rb") as fid:
+ try:
+ data = json.load(fid)
+ except json.JSONDecoderError as err:
+ return "JSON decoding error: %s" % err.msg
+
+ required_keys = ["name", "n_obs", "n_dim", "series"]
+ for key in required_keys:
+ if not key in data:
+ return "Required key missing: %s" % key
+
+ if len(data["series"]) != data["n_dim"]:
+ return "Number of dimensions and number of series don't match"
+
+ required_keys = ["type", "raw"]
+ for idx, var in enumerate(data["series"]):
+ if not var == "V%i" % (idx + 1):
+ return "Unexpected variable name, expected 'V<int>', got %s" % var
+ vardict = data["series"][var]
+ for key in required_keys:
+ if not key in vardict:
+ return "Key '%s' missing for variable '%s'" % (key, var)
+ if vardict["type"] == "category":
+ if not "levels" in vardict:
+ return (
+ "Variable '%s' has categorical type but 'levels' is missing"
+ % (var)
+ )
+ if not len(vardict["raw"]) == data["n_obs"]:
+ return (
+ "Length of data for variable '%s' not equal to n_obs = %i"
+ % (var, data["n_obs"])
+ )
+
+ return None
+
+
+def get_name_from_dataset(filename):
+ with open(filename, "rb") as fid:
+ data = json.load(fid)
+ return data["name"]
+
+
+def md5sum(filename):
+ """ Compute the MD5 hash for a given filename """
+ blocksize = 65536
+ hasher = hashlib.md5()
+ with open(filename, "rb") as fid:
+ buf = fid.read(blocksize)
+ while len(buf) > 0:
+ hasher.update(buf)
+ buf = fid.read(blocksize)
+ return hasher.hexdigest()
diff --git a/app/admin/forms.py b/app/admin/forms.py
index b33b1c6..6b39e2e 100644
--- a/app/admin/forms.py
+++ b/app/admin/forms.py
@@ -1,12 +1,51 @@
# -*- coding: utf-8 -*-
+import os
+
+from flask import current_app
from flask_wtf import FlaskForm
+from flask_wtf.file import FileField, FileRequired
+
+from wtforms import StringField, SubmitField
+from wtforms.validators import DataRequired, ValidationError
+
+from werkzeug.utils import secure_filename
-from wtforms import StringField, PasswordField, BooleanField, SubmitField
-from wtforms.validators import DataRequired, ValidationError, Email, EqualTo
+from app.models import Dataset
+from app.admin.datasets import validate_dataset, get_name_from_dataset
class AdminAssignTaskForm(FlaskForm):
username = StringField("Username", validators=[DataRequired()])
dataset = StringField("Dataset", validators=[DataRequired()])
submit = SubmitField("Assign")
+
+
+class AdminAddDatasetForm(FlaskForm):
+ file_ = FileField("File", validators=[FileRequired()])
+ submit = SubmitField("Upload")
+
+ def validate_file_(self, field):
+ filename = secure_filename(field.data.filename)
+ if not "." in filename:
+ raise ValidationError("The file should be a '.json' file.")
+ if not filename.rsplit(".", 1)[1].lower() == "json":
+ raise ValidationError("The file should be a '.json' file.")
+
+ temp_filename = os.path.join(
+ current_app.instance_path, current_app.config["TEMP_DIR"], filename
+ )
+
+ field.data.save(temp_filename)
+ error = validate_dataset(temp_filename)
+ if not error is None:
+ os.unlink(temp_filename)
+ raise ValidationError("Error validating dataset: %s" % error)
+
+ name = get_name_from_dataset(temp_filename)
+ dataset = Dataset.query.filter_by(name=name).first()
+ if dataset is not None:
+ os.unlink(temp_filename)
+ raise ValidationError(
+ "A dataset with the name '%s' already exists." % name
+ )
diff --git a/app/admin/routes.py b/app/admin/routes.py
index e667445..8d40665 100644
--- a/app/admin/routes.py
+++ b/app/admin/routes.py
@@ -1,11 +1,16 @@
# -*- coding: utf-8 -*-
-from flask import render_template, flash, redirect, url_for
+import os
+
+from flask import render_template, flash, redirect, url_for, current_app
+
+from werkzeug.utils import secure_filename
from app import db
from app.admin import bp
+from app.admin.datasets import get_name_from_dataset, md5sum
from app.admin.decorators import admin_required
-from app.admin.forms import AdminAssignTaskForm
+from app.admin.forms import AdminAssignTaskForm, AdminAddDatasetForm
from app.models import User, Dataset, Task
@@ -39,6 +44,42 @@ def assign():
"admin/assign.html", title="Assign Task", form=form, tasks=tasks
)
+
+@bp.route("/add", methods=("GET", "POST"))
+@admin_required
+def add_dataset():
+ tmp_dir = os.path.join(
+ current_app.instance_path, current_app.config["TEMP_DIR"]
+ )
+ dataset_dir = os.path.join(
+ current_app.instance_path, current_app.config["DATASET_DIR"]
+ )
+ form = AdminAddDatasetForm()
+ if form.validate_on_submit():
+ temp_filename = os.path.join(
+ tmp_dir, secure_filename(form.file_.data.filename)
+ )
+ if not os.path.exists(temp_filename):
+ flash("Internal error: temporary dataset disappeared.")
+ return redirect(url_for("admin.add_dataset"))
+ name = get_name_from_dataset(temp_filename)
+ target_filename = os.path.join(dataset_dir, name + ".json")
+ if os.path.exists(target_filename):
+ flash("Internal error: file already exists!")
+ return redirect(url_for("admin.add_dataset"))
+ os.rename(temp_filename, target_filename)
+ if not os.path.exists(target_filename):
+ flash("Internal error: file moving failed")
+ return redirect(url_for("admin.add_dataset"))
+
+ dataset = Dataset(name=name, md5sum=md5sum(target_filename))
+ db.session.add(dataset)
+ db.session.commit()
+ flash("Dataset %r added successfully.", name)
+ return redirect(url_for("admin.add_dataset"))
+ return render_template("admin/add.html", title="Add Dataset", form=form)
+
+
@bp.route("/", methods=("GET",))
@admin_required
def index():