diff options
Diffstat (limited to 'app/main')
| -rw-r--r-- | app/main/__init__.py | 2 | ||||
| -rw-r--r-- | app/main/demo.py | 439 | ||||
| -rw-r--r-- | app/main/forms.py | 8 | ||||
| -rw-r--r-- | app/main/routes.py | 31 |
4 files changed, 463 insertions, 17 deletions
diff --git a/app/main/__init__.py b/app/main/__init__.py index 2cb605e..2612509 100644 --- a/app/main/__init__.py +++ b/app/main/__init__.py @@ -4,6 +4,6 @@ from flask import Blueprint bp = Blueprint('main', __name__) -from app.main import routes +from app.main import routes, demo diff --git a/app/main/demo.py b/app/main/demo.py new file mode 100644 index 0000000..a126fd8 --- /dev/null +++ b/app/main/demo.py @@ -0,0 +1,439 @@ +# -*- coding: utf-8 -*- + +import datetime +import logging +import markdown +import textwrap + +from flask import ( + render_template, + flash, + url_for, + redirect, + request, + session, + abort, +) +from flask_login import current_user + +from app import db +from app.decorators import login_required +from app.models import Annotation, Dataset, Task +from app.main import bp +from app.main.forms import NextForm +from app.main.routes import RUBRIC +from app.utils.datasets import load_data_for_chart, get_demo_true_cps + +LOGGER = logging.getLogger(__name__) + +# textwrap.dedent is used mostly for code formatting. +DEMO_DATA = { + 1: { + "dataset": {"name": "demo_100"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + Welcome to AnnotateChange, an annotation app for change point + detection. + + Our goal with AnnotateChange is to create a dataset of + human-annotated time series to use in the development and + evaluation of change point algorithms. + + We really appreciate that you've agreed to help us with this! + Without your help this project would not be possible. + + In the next few pages, we'll introduce you to the problem of + change point detection. We'll look at a few datasets and see + different types of changes that can occur. + + Thanks again for your help!""" + ) + ) + }, + "annotate": { + "text": RUBRIC + + markdown.markdown( + textwrap.dedent( + """ + Click "Submit" when you have finished marking the change points + or "No change points" when you believe there are none. You can + reset the graph with the "Reset" button.""" + ) + ) + }, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + This first example has **one** change point. Not all datasets + that you'll encounter in this program have exactly one change + point. It is up to you to see whether a time series contains a + change point or not, and if it does, to see if there is more + than one. + + Don't worry if you weren't exactly correct on the first try. + The goal of this introduction is to familiarise yourself with + time series data and with change point detection in + particular.""" + ) + ) + }, + }, + 2: { + "dataset": {"name": "demo_200"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + In the previous example, you've seen a relatively simple + dataset where a *step change* occurred at a certain point in + time. A step change is one of the simplest types of change + points that can occur. + + Click "Continue" to move on to the next example.""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + The dataset in the previous example shows again a time series + with step changes, but here there are **two** change points. + This is important to keep in mind, as there can be more than + one change point in a dataset.""" + ) + ) + }, + }, + 3: { + "dataset": {"name": "demo_300"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + In the previous examples we've introduced *step changes*. + However, these are not the only types of change points that + can occur, as we'll see in the next example.""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + This time series shows an example where a change occurs in the + **variance** of the data. At the change point the variance of + the noise changes abruptly from a relatively low noise variance + to a high noise variance. This is another type of change point + that can occur.""" + ) + ) + }, + }, + 4: { + "dataset": {"name": "demo_400"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + So far we have seen two types of change points: step changes + (also known as mean shift) and variance changes.""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + Remember that it's also possible for there to be *no change + points* in a dataset. It can sometimes be difficult to tell + whether a dataset has change points or not. In that case, it's + important to remember that we are looking for points where the + behaviour of the time series changes *abruptly*.""" + ) + ) + }, + }, + 5: { + "dataset": {"name": "demo_500"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + Change points mark places in the time series where the + behaviour changes *abruptly*. While **outliers** are data + points that do not adhere to the prevailing behaviour of the + time series, they are not generally considered change points + because the behaviour of the time series before and after the + outlier is the same. """ + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + Outliers are quite common in real-world time series data, and + not all change point detection methods are robust against these + observations. + + Note that short periods that consist of several consecutive + outlying data points could be considered an abrupt change in + behaviour of the time series. If you see this, use your + intuition to guide you.""" + ) + ) + }, + }, + 6: { + "dataset": {"name": "demo_600"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + So far we've seen *step changes*, *variance changes*, and time + series with *outliers*. Can you think of another type of change + that can occur?""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + What we see here is a change in *trend*. For these types of + changes it's not always easy to figure out exactly where the + change occurs, so it's harder to get it exactly right. Use + your intuition and keep in mind that it is normal for the + observations to be noisy.""" + ) + ) + }, + }, + 7: { + "dataset": {"name": "demo_700"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + It is not uncommon for time series data from the real world to + display **seasonal variation**, for instance because certain + days of the week are more busy than others. Seasonality can + make it harder to find the change points in the dataset (if + there are any at all). Try to follow the pattern of + seasonality, and check whether the pattern changes in one of + the ways we've seen previously.""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + As you can see from this example, changes in seasonality can + occur as well. We expect that these changes are quite rare, + but it's nevertheless good to be aware of them.""" + ) + ) + }, + }, +} + + +def redirect_user(demo_id, phase_id): + last_demo_id = max(DEMO_DATA.keys()) + demo_last_phase_id = 3 + if demo_id == last_demo_id and phase_id == demo_last_phase_id: + # User is introduced. + if current_user.is_introduced: + return redirect(url_for("main.index")) + + current_user.is_introduced = True + db.session.commit() + # TODO: Assign real tasks to the user here. + return redirect(url_for("main.index")) + elif phase_id == demo_last_phase_id: + demo_id += 1 + phase_id = 1 + return redirect( + url_for("main.demo", demo_id=demo_id, phase_id=phase_id) + ) + else: + phase_id += 1 + return redirect( + url_for("main.demo", demo_id=demo_id, phase_id=phase_id) + ) + + +def process_annotations(demo_id): + annotation = request.get_json() + if annotation["identifier"] != demo_id: + LOGGER.error( + "User %s returned a task id in the demo that wasn't the demo id." + % current_user.username + ) + flash( + "An internal error occurred, the administrator has been notified.", + "error", + ) + return redirect(url_for("main.index")) + + if annotation["changepoints"] is None: + retval = [] + else: + retval = [cp["x"] for cp in annotation["changepoints"]] + + # If the user is already introduced, we assume that their demo annotations + # are already in the database, and thus we don't put them back in (because + # we want the original ones). + if current_user.is_introduced: + return retval + + dataset = Dataset.query.filter_by( + name=DEMO_DATA[demo_id]["dataset"]["name"] + ).first() + task = Task.query.filter_by( + annotator_id=current_user.id, dataset_id=dataset.id + ).first() + # this happens if the user returns to the same demo page, but hasn't + # completed the full demo yet. Same as above, not updating because we want + # the originals. + if not task is None: + return retval + + # Create a new task + task = Task(annotator_id=current_user.id, dataset_id=dataset.id) + task.done = False + task.annotated_on = None + db.session.add(task) + db.session.commit() + if annotation["changepoints"] is None: + ann = Annotation(cp_index=None, task_id=task.id) + db.session.add(ann) + db.session.commit() + else: + for cp in annotation["changepoints"]: + ann = Annotation(cp_index=cp["x"], task_id=task.id) + db.session.add(ann) + db.session.commit() + + # mark task as done + task.done = True + task.annotated_on = datetime.datetime.utcnow() + db.session.commit() + + return retval + + +def demo_learn(demo_id, form): + demo_data = DEMO_DATA[demo_id]["learn"] + return render_template( + "demo/learn.html", + title="Introduction – %i" % demo_id, + text=demo_data["text"], + form=form, + ) + + +def demo_annotate(demo_id): + demo_data = DEMO_DATA[demo_id]["annotate"] + dataset = Dataset.query.filter_by( + name=DEMO_DATA[demo_id]["dataset"]["name"] + ).first() + if dataset is None: + LOGGER.error( + "Demo requested unavailable dataset: %s" + % demo_data["dataset"]["name"] + ) + flash( + "An internal error occured. The administrator has been notified. We apologise for the inconvenience, please try again later.", + "error", + ) + return redirect(url_for("main.index")) + chart_data = load_data_for_chart(dataset.name, dataset.md5sum) + return render_template( + "annotate/index.html", + title="Introduction – %i" % demo_id, + data=chart_data, + rubric=demo_data["text"], + identifier=demo_id, + ) + + +def demo_evaluate(demo_id, phase_id, form): + demo_data = DEMO_DATA[demo_id]["evaluate"] + user_changepoints = session.get("user_changepoints", "__UNK__") + if user_changepoints == "__UNK__": + flash( + "The previous step of the demo was not completed successfully. Please try again.", + "error", + ) + return redirect( + url_for("main.demo", demo_id=demo_id, phase_id=phase_id - 1) + ) + dataset = Dataset.query.filter_by( + name=DEMO_DATA[demo_id]["dataset"]["name"] + ).first() + chart_data = load_data_for_chart(dataset.name, dataset.md5sum) + true_changepoints = get_demo_true_cps(dataset.name) + if true_changepoints is None: + flash( + "An internal error occurred, the administrator has been notified. We apologise for the inconvenience, please try again later.", + "error", + ) + return redirect(url_for("main.index")) + annotations_true = [dict(index=x) for x in true_changepoints] + annotations_user = [dict(index=x) for x in user_changepoints] + return render_template( + "demo/evaluate.html", + title="Introduction – %i" % demo_id, + data=chart_data, + annotations_user=annotations_user, + annotations_true=annotations_true, + text=demo_data["text"], + form=form, + ) + + +@bp.route( + "/introduction/", + defaults={"demo_id": 1, "phase_id": 1}, + methods=("GET", "POST"), +) +@bp.route( + "/introduction/<int:demo_id>/", + defaults={"phase_id": 1}, + methods=("GET", "POST"), +) +@bp.route( + "/introduction/<int:demo_id>/<int:phase_id>", methods=("GET", "POST") +) +@login_required +def demo(demo_id, phase_id): + form = NextForm() + + if request.method == "POST": + if form.validate_on_submit(): + return redirect_user(demo_id, phase_id) + else: + user_changepoints = process_annotations(demo_id) + session["user_changepoints"] = user_changepoints + return url_for("main.demo", demo_id=demo_id, phase_id=phase_id + 1) + + if phase_id == 1: + return demo_learn(demo_id, form) + elif phase_id == 2: + return demo_annotate(demo_id) + elif phase_id == 3: + return demo_evaluate(demo_id, phase_id, form) + else: + abort(404) diff --git a/app/main/forms.py b/app/main/forms.py new file mode 100644 index 0000000..dcdffd3 --- /dev/null +++ b/app/main/forms.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- + +from flask_wtf import FlaskForm + +from wtforms import SubmitField + +class NextForm(FlaskForm): + submit = SubmitField("Continue") diff --git a/app/main/routes.py b/app/main/routes.py index d249c5c..11de2f9 100644 --- a/app/main/routes.py +++ b/app/main/routes.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import datetime +import logging from flask import render_template, flash, url_for, redirect, request from flask_login import current_user @@ -11,19 +12,13 @@ from app.main import bp from app.models import Annotation, Task from app.utils.datasets import load_data_for_chart +logger = logging.getLogger(__name__) + RUBRIC = """ -<i>Please mark all the points in the time series where an <b>abrupt change</b> -in - the behaviour of the series occurs.</i> -<br> -If there are no such points, please click the <u>no changepoints</u> button. -When you're ready, please click the <u>submit</u> button. -<br> -<br> -<b>Note:</b> You can zoom and pan the graph if needed. +Please mark the points in the time series where an <b>abrupt change</b> in + the behaviour of the series occurs. The goal is to define segments of the time + series that are separated by places where these abrupt changes occur. <br> -<br> -Thank you! """ @@ -35,7 +30,7 @@ def index(): if current_user.is_authenticated: user_id = current_user.id tasks = Task.query.filter_by(annotator_id=user_id).all() - tasks_done = [t for t in tasks if t.done] + tasks_done = [t for t in tasks if t.done and not t.dataset.is_demo] tasks_todo = [t for t in tasks if not t.done] return render_template( "index.html", @@ -48,14 +43,14 @@ def index(): @bp.route("/annotate/<int:task_id>", methods=("GET", "POST")) @login_required -def task(task_id): +def annotate(task_id): if request.method == "POST": # record post time now = datetime.datetime.utcnow() # get the json from the client annotation = request.get_json() - if annotation["task"] != task_id: + if annotation["identifier"] != task_id: flash("Internal error: task id doesn't match.", "error") return redirect(url_for(task_id=task_id)) @@ -100,10 +95,14 @@ def task(task_id): flash("It's not possible to edit annotations at the moment.") return redirect(url_for("main.index")) data = load_data_for_chart(task.dataset.name, task.dataset.md5sum) + if data is None: + flash( + "An internal error occurred loading this dataset, the admin has been notified. Please try again later. We apologise for the inconvenience." + ) return render_template( "annotate/index.html", - title="Annotate %s" % task.dataset.name, - task=task, + title=task.dataset.name.title(), + identifier=task.id, data=data, rubric=RUBRIC, ) |
