diff options
Diffstat (limited to 'app/main/demo.py')
| -rw-r--r-- | app/main/demo.py | 439 |
1 files changed, 439 insertions, 0 deletions
diff --git a/app/main/demo.py b/app/main/demo.py new file mode 100644 index 0000000..a126fd8 --- /dev/null +++ b/app/main/demo.py @@ -0,0 +1,439 @@ +# -*- coding: utf-8 -*- + +import datetime +import logging +import markdown +import textwrap + +from flask import ( + render_template, + flash, + url_for, + redirect, + request, + session, + abort, +) +from flask_login import current_user + +from app import db +from app.decorators import login_required +from app.models import Annotation, Dataset, Task +from app.main import bp +from app.main.forms import NextForm +from app.main.routes import RUBRIC +from app.utils.datasets import load_data_for_chart, get_demo_true_cps + +LOGGER = logging.getLogger(__name__) + +# textwrap.dedent is used mostly for code formatting. +DEMO_DATA = { + 1: { + "dataset": {"name": "demo_100"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + Welcome to AnnotateChange, an annotation app for change point + detection. + + Our goal with AnnotateChange is to create a dataset of + human-annotated time series to use in the development and + evaluation of change point algorithms. + + We really appreciate that you've agreed to help us with this! + Without your help this project would not be possible. + + In the next few pages, we'll introduce you to the problem of + change point detection. We'll look at a few datasets and see + different types of changes that can occur. + + Thanks again for your help!""" + ) + ) + }, + "annotate": { + "text": RUBRIC + + markdown.markdown( + textwrap.dedent( + """ + Click "Submit" when you have finished marking the change points + or "No change points" when you believe there are none. You can + reset the graph with the "Reset" button.""" + ) + ) + }, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + This first example has **one** change point. Not all datasets + that you'll encounter in this program have exactly one change + point. It is up to you to see whether a time series contains a + change point or not, and if it does, to see if there is more + than one. + + Don't worry if you weren't exactly correct on the first try. + The goal of this introduction is to familiarise yourself with + time series data and with change point detection in + particular.""" + ) + ) + }, + }, + 2: { + "dataset": {"name": "demo_200"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + In the previous example, you've seen a relatively simple + dataset where a *step change* occurred at a certain point in + time. A step change is one of the simplest types of change + points that can occur. + + Click "Continue" to move on to the next example.""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + The dataset in the previous example shows again a time series + with step changes, but here there are **two** change points. + This is important to keep in mind, as there can be more than + one change point in a dataset.""" + ) + ) + }, + }, + 3: { + "dataset": {"name": "demo_300"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + In the previous examples we've introduced *step changes*. + However, these are not the only types of change points that + can occur, as we'll see in the next example.""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + This time series shows an example where a change occurs in the + **variance** of the data. At the change point the variance of + the noise changes abruptly from a relatively low noise variance + to a high noise variance. This is another type of change point + that can occur.""" + ) + ) + }, + }, + 4: { + "dataset": {"name": "demo_400"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + So far we have seen two types of change points: step changes + (also known as mean shift) and variance changes.""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + Remember that it's also possible for there to be *no change + points* in a dataset. It can sometimes be difficult to tell + whether a dataset has change points or not. In that case, it's + important to remember that we are looking for points where the + behaviour of the time series changes *abruptly*.""" + ) + ) + }, + }, + 5: { + "dataset": {"name": "demo_500"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + Change points mark places in the time series where the + behaviour changes *abruptly*. While **outliers** are data + points that do not adhere to the prevailing behaviour of the + time series, they are not generally considered change points + because the behaviour of the time series before and after the + outlier is the same. """ + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + Outliers are quite common in real-world time series data, and + not all change point detection methods are robust against these + observations. + + Note that short periods that consist of several consecutive + outlying data points could be considered an abrupt change in + behaviour of the time series. If you see this, use your + intuition to guide you.""" + ) + ) + }, + }, + 6: { + "dataset": {"name": "demo_600"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + So far we've seen *step changes*, *variance changes*, and time + series with *outliers*. Can you think of another type of change + that can occur?""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + What we see here is a change in *trend*. For these types of + changes it's not always easy to figure out exactly where the + change occurs, so it's harder to get it exactly right. Use + your intuition and keep in mind that it is normal for the + observations to be noisy.""" + ) + ) + }, + }, + 7: { + "dataset": {"name": "demo_700"}, + "learn": { + "text": markdown.markdown( + textwrap.dedent( + """ + It is not uncommon for time series data from the real world to + display **seasonal variation**, for instance because certain + days of the week are more busy than others. Seasonality can + make it harder to find the change points in the dataset (if + there are any at all). Try to follow the pattern of + seasonality, and check whether the pattern changes in one of + the ways we've seen previously.""" + ) + ) + }, + "annotate": {"text": RUBRIC}, + "evaluate": { + "text": markdown.markdown( + textwrap.dedent( + """ + As you can see from this example, changes in seasonality can + occur as well. We expect that these changes are quite rare, + but it's nevertheless good to be aware of them.""" + ) + ) + }, + }, +} + + +def redirect_user(demo_id, phase_id): + last_demo_id = max(DEMO_DATA.keys()) + demo_last_phase_id = 3 + if demo_id == last_demo_id and phase_id == demo_last_phase_id: + # User is introduced. + if current_user.is_introduced: + return redirect(url_for("main.index")) + + current_user.is_introduced = True + db.session.commit() + # TODO: Assign real tasks to the user here. + return redirect(url_for("main.index")) + elif phase_id == demo_last_phase_id: + demo_id += 1 + phase_id = 1 + return redirect( + url_for("main.demo", demo_id=demo_id, phase_id=phase_id) + ) + else: + phase_id += 1 + return redirect( + url_for("main.demo", demo_id=demo_id, phase_id=phase_id) + ) + + +def process_annotations(demo_id): + annotation = request.get_json() + if annotation["identifier"] != demo_id: + LOGGER.error( + "User %s returned a task id in the demo that wasn't the demo id." + % current_user.username + ) + flash( + "An internal error occurred, the administrator has been notified.", + "error", + ) + return redirect(url_for("main.index")) + + if annotation["changepoints"] is None: + retval = [] + else: + retval = [cp["x"] for cp in annotation["changepoints"]] + + # If the user is already introduced, we assume that their demo annotations + # are already in the database, and thus we don't put them back in (because + # we want the original ones). + if current_user.is_introduced: + return retval + + dataset = Dataset.query.filter_by( + name=DEMO_DATA[demo_id]["dataset"]["name"] + ).first() + task = Task.query.filter_by( + annotator_id=current_user.id, dataset_id=dataset.id + ).first() + # this happens if the user returns to the same demo page, but hasn't + # completed the full demo yet. Same as above, not updating because we want + # the originals. + if not task is None: + return retval + + # Create a new task + task = Task(annotator_id=current_user.id, dataset_id=dataset.id) + task.done = False + task.annotated_on = None + db.session.add(task) + db.session.commit() + if annotation["changepoints"] is None: + ann = Annotation(cp_index=None, task_id=task.id) + db.session.add(ann) + db.session.commit() + else: + for cp in annotation["changepoints"]: + ann = Annotation(cp_index=cp["x"], task_id=task.id) + db.session.add(ann) + db.session.commit() + + # mark task as done + task.done = True + task.annotated_on = datetime.datetime.utcnow() + db.session.commit() + + return retval + + +def demo_learn(demo_id, form): + demo_data = DEMO_DATA[demo_id]["learn"] + return render_template( + "demo/learn.html", + title="Introduction – %i" % demo_id, + text=demo_data["text"], + form=form, + ) + + +def demo_annotate(demo_id): + demo_data = DEMO_DATA[demo_id]["annotate"] + dataset = Dataset.query.filter_by( + name=DEMO_DATA[demo_id]["dataset"]["name"] + ).first() + if dataset is None: + LOGGER.error( + "Demo requested unavailable dataset: %s" + % demo_data["dataset"]["name"] + ) + flash( + "An internal error occured. The administrator has been notified. We apologise for the inconvenience, please try again later.", + "error", + ) + return redirect(url_for("main.index")) + chart_data = load_data_for_chart(dataset.name, dataset.md5sum) + return render_template( + "annotate/index.html", + title="Introduction – %i" % demo_id, + data=chart_data, + rubric=demo_data["text"], + identifier=demo_id, + ) + + +def demo_evaluate(demo_id, phase_id, form): + demo_data = DEMO_DATA[demo_id]["evaluate"] + user_changepoints = session.get("user_changepoints", "__UNK__") + if user_changepoints == "__UNK__": + flash( + "The previous step of the demo was not completed successfully. Please try again.", + "error", + ) + return redirect( + url_for("main.demo", demo_id=demo_id, phase_id=phase_id - 1) + ) + dataset = Dataset.query.filter_by( + name=DEMO_DATA[demo_id]["dataset"]["name"] + ).first() + chart_data = load_data_for_chart(dataset.name, dataset.md5sum) + true_changepoints = get_demo_true_cps(dataset.name) + if true_changepoints is None: + flash( + "An internal error occurred, the administrator has been notified. We apologise for the inconvenience, please try again later.", + "error", + ) + return redirect(url_for("main.index")) + annotations_true = [dict(index=x) for x in true_changepoints] + annotations_user = [dict(index=x) for x in user_changepoints] + return render_template( + "demo/evaluate.html", + title="Introduction – %i" % demo_id, + data=chart_data, + annotations_user=annotations_user, + annotations_true=annotations_true, + text=demo_data["text"], + form=form, + ) + + +@bp.route( + "/introduction/", + defaults={"demo_id": 1, "phase_id": 1}, + methods=("GET", "POST"), +) +@bp.route( + "/introduction/<int:demo_id>/", + defaults={"phase_id": 1}, + methods=("GET", "POST"), +) +@bp.route( + "/introduction/<int:demo_id>/<int:phase_id>", methods=("GET", "POST") +) +@login_required +def demo(demo_id, phase_id): + form = NextForm() + + if request.method == "POST": + if form.validate_on_submit(): + return redirect_user(demo_id, phase_id) + else: + user_changepoints = process_annotations(demo_id) + session["user_changepoints"] = user_changepoints + return url_for("main.demo", demo_id=demo_id, phase_id=phase_id + 1) + + if phase_id == 1: + return demo_learn(demo_id, form) + elif phase_id == 2: + return demo_annotate(demo_id) + elif phase_id == 3: + return demo_evaluate(demo_id, phase_id, form) + else: + abort(404) |
