diff options
| -rw-r--r-- | Dockerfile | 2 | ||||
| -rw-r--r-- | README.md | 120 | ||||
| -rw-r--r-- | annotate_change.py | 4 | ||||
| -rw-r--r-- | annotatechange_wide.png | bin | 0 -> 85214 bytes | |||
| -rw-r--r-- | app/__init__.py | 4 | ||||
| -rw-r--r-- | app/admin/__init__.py | 4 | ||||
| -rw-r--r-- | app/admin/forms.py | 4 | ||||
| -rw-r--r-- | app/admin/routes.py | 4 | ||||
| -rw-r--r-- | app/auth/__init__.py | 4 | ||||
| -rw-r--r-- | app/auth/email.py | 4 | ||||
| -rw-r--r-- | app/auth/forms.py | 4 | ||||
| -rw-r--r-- | app/auth/routes.py | 4 | ||||
| -rw-r--r-- | app/cli.py | 4 | ||||
| -rw-r--r-- | app/decorators.py | 4 | ||||
| -rw-r--r-- | app/email.py | 4 | ||||
| -rw-r--r-- | app/errors/__init__.py | 4 | ||||
| -rw-r--r-- | app/errors/handlers.py | 4 | ||||
| -rw-r--r-- | app/main/__init__.py | 4 | ||||
| -rw-r--r-- | app/main/datasets.py | 4 | ||||
| -rw-r--r-- | app/main/demo.py | 4 | ||||
| -rw-r--r-- | app/main/email.py | 4 | ||||
| -rw-r--r-- | app/main/forms.py | 4 | ||||
| -rw-r--r-- | app/main/routes.py | 4 | ||||
| -rw-r--r-- | app/models.py | 4 | ||||
| -rw-r--r-- | app/utils/datasets.py | 9 | ||||
| -rw-r--r-- | app/utils/tasks.py | 4 | ||||
| -rw-r--r-- | boot.sh | 2 | ||||
| -rw-r--r-- | config.py | 4 | ||||
| -rwxr-xr-x | flask.sh | 2 |
29 files changed, 199 insertions, 28 deletions
@@ -1,3 +1,5 @@ +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> + FROM python:3.7-alpine RUN apk add gcc musl-dev libffi-dev openssl-dev libxslt-dev build-base @@ -1,33 +1,105 @@ # AnnotateChange -## Implementation Notes +Welcome to the repository of the "AnnotateChange" application. This +application was created to collect annotations of time series data in order to +construct the [Turing Change Point +Dataset](https://github.com/alan-turing-institute/TCPD) (TCPD). The TCPD is a +dataset of real-world time series used to evaluate change point detection +algorithms. For the change point detection benchmark that was created using +this dataset, see the [Turing Change Point Detection +Benchmark](https://github.com/alan-turing-institute/TCPDBench) repository. -* Missing values are skipped, so that gaps occur in the graph. X-values are - however counted continuously, to ensure that this gap has nonzero width. - Thus, when a change point is selected by an annotator after such a gap, its - location can be found by retrieving the observation at the index *while - including missing values*. This is in contrast to the approach where missing - values are removed before the index is used to retrieve the data point. +Any work that uses this repository should cite our paper: [**Van den Burg & +Williams - An Evaluation of Change Point Detection Algorithms +(2020)**](https://arxiv.org/abs/2003.06222). You can use the following BibTeX +entry: -* Task assignment flow: tasks assignment is handled upon login, completion of - the demo, and completion of an annotation. +```bib +@article{vandenburg2020evaluation, + title={An Evaluation of Change Point Detection Algorithms}, + author={{Van den Burg}, G. J. J. and Williams, C. K. I.}, + journal={arXiv preprint arXiv:2003.06222}, + year={2020} +} +``` - General rules: +Here's a screenshot of what the application looks like during the annotation +process: - - [x] Users don't get a task assigned if they still have an unfinished task - - [x] Users don't get a task assigned if there are no more datasets to - annotate - - [x] Users don't get a task assigned if they have reached their maximum. - - [x] Users never get assigned the same dataset more than once - - [x] Tasks are assigned on the fly, at the moment that a user requests a - dataset to annotate. +<p align="center"> +<img height="500px" src="./annotatechange_wide.png" alt="screenshot of +AnnotateChange" /> +</p> - Handled at login: +Some of the features of AnnotateChange include: - - [x] When a user logs in, a task that was previously assigned that is not - finished should be removed to avoid duplication, unless this task was - assigned by the admin. +* Admin panel to add/remove datasets, add/remove annotation tasks, add/remove + users, and inspect incoming annotations. - It may be easier to remove assigning a *specific* dataset and instead give - the user the option to "annotate again". If they click that, they get - assigned a task on the fly. This will remove a lot of the difficulties. +* Basic user management: authentication, email confirmation, forgotten + password, automatic log out after inactivity, etc. Users are only allowed to + register using an email address from an approved domain. + +* Task assignment of time series to user is done on the fly, ensuring no user + ever annotates the same dataset twice, and prioritising datasets that are + close to a desired number of annotations. + +* Interactive graph of a time series that supports pan and zoom, support for + multidimensional time series. + +* Mandatory "demo" to onboard the user to change point annotation. + +* Backup of annotations to the admin via email. + +* Time series datasets are verified upon upload acccording to a strict schema. + +## Notes + +This codebase is provided "as is". If you find any problems, please raise an +issue [on GitHub](https://github.com/alan-turing-institute/annotatechange). + +The code is licensed under the [MIT License](./LICENSE). + +This code was written by [Gertjan van den Burg](https://gertjan.dev) with +helpful comments provided by [Chris +Williams](https://homepages.inf.ed.ac.uk/ckiw/). + +## Some implementation details + +Below are some thoughts that may help make sense of the codebase. + +* AnnotateChange is a web application build on the Flask framework. See [this + excellent + tutorial](https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-i-hello-world) + for an introduction to Flask. The [flask.sh](./flask.sh) shell script loads + the appropriate environment variables and runs the application in a virtual + environment managed by Poetry. + +* The application handles user management and is centered around the idea of a + "task" which links a particular user to a particular time series to + annotate. + +* An admin role is available, and the admin user can manually assign and + delete tasks as well as add/delete users, datasets, etc. The admin user is + created using the [cli](./app/cli.py). + +* All datasets must adhere to a specific dataset schema (see + [utils/dataset_schema.json](utils/dataset_schema.json)). + +* Annotations are stored in the database using 0-based indexing. Tasks are + assigned on the fly when a user requests a time series to annotate (see + [utils/tasks.py](utils/tasks.py)). + +* Users can only begin annotating when they have successfully passed the + introduction. + +* Configuration of the app is done through environment variables, see the + [.env.example](.env.example) file for an example. + +* [Poetry](https://python-poetry.org/) is used for dependency management. + +* Docker is used for deployment (see the deployment documentation in + [docs](docs)), and [Traefik](https://containo.us/traefik/) is used for SSL, + etc. + +* The time series graph is plotted using [d3.js](https://d3js.org/). diff --git a/annotate_change.py b/annotate_change.py index 0aa4765..d8a953d 100644 --- a/annotate_change.py +++ b/annotate_change.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from app import create_app, db, cli app = create_app() diff --git a/annotatechange_wide.png b/annotatechange_wide.png Binary files differnew file mode 100644 index 0000000..413a7b7 --- /dev/null +++ b/annotatechange_wide.png diff --git a/app/__init__.py b/app/__init__.py index 32fd0c7..5105c0a 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + __version__ = "0.4.5" import logging diff --git a/app/admin/__init__.py b/app/admin/__init__.py index 737dcc6..a6304ab 100644 --- a/app/admin/__init__.py +++ b/app/admin/__init__.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from flask import Blueprint bp = Blueprint("admin", __name__, url_prefix="/admin") diff --git a/app/admin/forms.py b/app/admin/forms.py index 5ff5361..ad066fc 100644 --- a/app/admin/forms.py +++ b/app/admin/forms.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + import os from flask import current_app diff --git a/app/admin/routes.py b/app/admin/routes.py index a6225ce..4ae1aef 100644 --- a/app/admin/routes.py +++ b/app/admin/routes.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + import clevercsv import io import os diff --git a/app/auth/__init__.py b/app/auth/__init__.py index c938693..712c39e 100644 --- a/app/auth/__init__.py +++ b/app/auth/__init__.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from flask import Blueprint bp = Blueprint('auth', __name__) diff --git a/app/auth/email.py b/app/auth/email.py index 581c9ce..3510938 100644 --- a/app/auth/email.py +++ b/app/auth/email.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from flask import current_app, render_template from app.email import send_email diff --git a/app/auth/forms.py b/app/auth/forms.py index 14f390f..ae07a4f 100644 --- a/app/auth/forms.py +++ b/app/auth/forms.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from flask import current_app, flash from flask_wtf import FlaskForm diff --git a/app/auth/routes.py b/app/auth/routes.py index 57bd605..62d8dff 100644 --- a/app/auth/routes.py +++ b/app/auth/routes.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + import datetime import markdown import textwrap @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + import getpass from email_validator import validate_email diff --git a/app/decorators.py b/app/decorators.py index 9fbf1f4..ee81af6 100644 --- a/app/decorators.py +++ b/app/decorators.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from functools import wraps from flask import current_app, request, redirect, flash, url_for diff --git a/app/email.py b/app/email.py index 7b97364..76c750c 100644 --- a/app/email.py +++ b/app/email.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from threading import Thread from flask import current_app diff --git a/app/errors/__init__.py b/app/errors/__init__.py index 8a85dca..e0044bc 100644 --- a/app/errors/__init__.py +++ b/app/errors/__init__.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from flask import Blueprint bp = Blueprint("errors", __name__) diff --git a/app/errors/handlers.py b/app/errors/handlers.py index 6c2b1e7..96354c9 100644 --- a/app/errors/handlers.py +++ b/app/errors/handlers.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from flask import render_template from app import db from app.errors import bp diff --git a/app/main/__init__.py b/app/main/__init__.py index 2612509..bfceafa 100644 --- a/app/main/__init__.py +++ b/app/main/__init__.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from flask import Blueprint bp = Blueprint('main', __name__) diff --git a/app/main/datasets.py b/app/main/datasets.py index 7245e51..8959b0d 100644 --- a/app/main/datasets.py +++ b/app/main/datasets.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + import os import json import logging diff --git a/app/main/demo.py b/app/main/demo.py index 29687d6..2ee8171 100644 --- a/app/main/demo.py +++ b/app/main/demo.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + import datetime import logging import markdown diff --git a/app/main/email.py b/app/main/email.py index c10e234..093b4f6 100644 --- a/app/main/email.py +++ b/app/main/email.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + import json from flask import current_app, render_template diff --git a/app/main/forms.py b/app/main/forms.py index dcdffd3..af9c4c4 100644 --- a/app/main/forms.py +++ b/app/main/forms.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + from flask_wtf import FlaskForm from wtforms import SubmitField diff --git a/app/main/routes.py b/app/main/routes.py index 8d2aa6e..69e3937 100644 --- a/app/main/routes.py +++ b/app/main/routes.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + import datetime import logging diff --git a/app/models.py b/app/models.py index 1ebdc53..0219a22 100644 --- a/app/models.py +++ b/app/models.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + import datetime import jwt import time diff --git a/app/utils/datasets.py b/app/utils/datasets.py index db2c514..d348b47 100644 --- a/app/utils/datasets.py +++ b/app/utils/datasets.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + """ Dataset handling @@ -10,8 +14,6 @@ www.jsonschema.net or yapi.demo.qunar.com/editor/ Missing values must be denoted by 'NaN' (this is understood by the JSON decoder). -Author: Gertjan van den Burg - """ import hashlib @@ -76,7 +78,6 @@ def validate_dataset(filename): if None in data["time"]["raw"]: return "Null is not supported in time axis. Use 'NaN' instead." - has_missing = False for var in data["series"]: if len(var["raw"]) != data["n_obs"]: @@ -85,7 +86,7 @@ def validate_dataset(filename): return "Null is not supported in series. Use 'NaN' instead." has_missing = has_missing or any(map(math.isnan, var["raw"])) - # this doesn't happen in any dataset yet, so let's not implement it until + # this doesn't happen in any dataset yet, so let's not implement it until # we need it. if data["n_dim"] > 1 and has_missing: return "Missing values are not yet supported for multidimensional data" diff --git a/app/utils/tasks.py b/app/utils/tasks.py index 8356876..c082160 100644 --- a/app/utils/tasks.py +++ b/app/utils/tasks.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + """Utilities for task assignment """ @@ -1,5 +1,7 @@ #!/bin/sh +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> + # Activate the virtual environment poetry shell @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> +# License: See LICENSE file +# Copyright: 2020 (c) The Alan Turing Institute + """Configuration for the AnnotateChange app Almost all configuration options are expected to be supplied through @@ -1,5 +1,7 @@ #!/bin/bash +# Author: G.J.J. van den Burg <gvandenburg@turing.ac.uk> + export $(grep -v '^#' .env.development | xargs -d '\n') echo "FLASK_APP = ${FLASK_APP}" |
