aboutsummaryrefslogtreecommitdiff
path: root/app/main
diff options
context:
space:
mode:
Diffstat (limited to 'app/main')
-rw-r--r--app/main/__init__.py2
-rw-r--r--app/main/demo.py439
-rw-r--r--app/main/forms.py8
-rw-r--r--app/main/routes.py31
4 files changed, 463 insertions, 17 deletions
diff --git a/app/main/__init__.py b/app/main/__init__.py
index 2cb605e..2612509 100644
--- a/app/main/__init__.py
+++ b/app/main/__init__.py
@@ -4,6 +4,6 @@ from flask import Blueprint
bp = Blueprint('main', __name__)
-from app.main import routes
+from app.main import routes, demo
diff --git a/app/main/demo.py b/app/main/demo.py
new file mode 100644
index 0000000..a126fd8
--- /dev/null
+++ b/app/main/demo.py
@@ -0,0 +1,439 @@
+# -*- coding: utf-8 -*-
+
+import datetime
+import logging
+import markdown
+import textwrap
+
+from flask import (
+ render_template,
+ flash,
+ url_for,
+ redirect,
+ request,
+ session,
+ abort,
+)
+from flask_login import current_user
+
+from app import db
+from app.decorators import login_required
+from app.models import Annotation, Dataset, Task
+from app.main import bp
+from app.main.forms import NextForm
+from app.main.routes import RUBRIC
+from app.utils.datasets import load_data_for_chart, get_demo_true_cps
+
+LOGGER = logging.getLogger(__name__)
+
+# textwrap.dedent is used mostly for code formatting.
+DEMO_DATA = {
+ 1: {
+ "dataset": {"name": "demo_100"},
+ "learn": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ Welcome to AnnotateChange, an annotation app for change point
+ detection.
+
+ Our goal with AnnotateChange is to create a dataset of
+ human-annotated time series to use in the development and
+ evaluation of change point algorithms.
+
+ We really appreciate that you've agreed to help us with this!
+ Without your help this project would not be possible.
+
+ In the next few pages, we'll introduce you to the problem of
+ change point detection. We'll look at a few datasets and see
+ different types of changes that can occur.
+
+ Thanks again for your help!"""
+ )
+ )
+ },
+ "annotate": {
+ "text": RUBRIC
+ + markdown.markdown(
+ textwrap.dedent(
+ """
+ Click "Submit" when you have finished marking the change points
+ or "No change points" when you believe there are none. You can
+ reset the graph with the "Reset" button."""
+ )
+ )
+ },
+ "evaluate": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ This first example has **one** change point. Not all datasets
+ that you'll encounter in this program have exactly one change
+ point. It is up to you to see whether a time series contains a
+ change point or not, and if it does, to see if there is more
+ than one.
+
+ Don't worry if you weren't exactly correct on the first try.
+ The goal of this introduction is to familiarise yourself with
+ time series data and with change point detection in
+ particular."""
+ )
+ )
+ },
+ },
+ 2: {
+ "dataset": {"name": "demo_200"},
+ "learn": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ In the previous example, you've seen a relatively simple
+ dataset where a *step change* occurred at a certain point in
+ time. A step change is one of the simplest types of change
+ points that can occur.
+
+ Click "Continue" to move on to the next example."""
+ )
+ )
+ },
+ "annotate": {"text": RUBRIC},
+ "evaluate": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ The dataset in the previous example shows again a time series
+ with step changes, but here there are **two** change points.
+ This is important to keep in mind, as there can be more than
+ one change point in a dataset."""
+ )
+ )
+ },
+ },
+ 3: {
+ "dataset": {"name": "demo_300"},
+ "learn": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ In the previous examples we've introduced *step changes*.
+ However, these are not the only types of change points that
+ can occur, as we'll see in the next example."""
+ )
+ )
+ },
+ "annotate": {"text": RUBRIC},
+ "evaluate": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ This time series shows an example where a change occurs in the
+ **variance** of the data. At the change point the variance of
+ the noise changes abruptly from a relatively low noise variance
+ to a high noise variance. This is another type of change point
+ that can occur."""
+ )
+ )
+ },
+ },
+ 4: {
+ "dataset": {"name": "demo_400"},
+ "learn": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ So far we have seen two types of change points: step changes
+ (also known as mean shift) and variance changes."""
+ )
+ )
+ },
+ "annotate": {"text": RUBRIC},
+ "evaluate": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ Remember that it's also possible for there to be *no change
+ points* in a dataset. It can sometimes be difficult to tell
+ whether a dataset has change points or not. In that case, it's
+ important to remember that we are looking for points where the
+ behaviour of the time series changes *abruptly*."""
+ )
+ )
+ },
+ },
+ 5: {
+ "dataset": {"name": "demo_500"},
+ "learn": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ Change points mark places in the time series where the
+ behaviour changes *abruptly*. While **outliers** are data
+ points that do not adhere to the prevailing behaviour of the
+ time series, they are not generally considered change points
+ because the behaviour of the time series before and after the
+ outlier is the same. """
+ )
+ )
+ },
+ "annotate": {"text": RUBRIC},
+ "evaluate": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ Outliers are quite common in real-world time series data, and
+ not all change point detection methods are robust against these
+ observations.
+
+ Note that short periods that consist of several consecutive
+ outlying data points could be considered an abrupt change in
+ behaviour of the time series. If you see this, use your
+ intuition to guide you."""
+ )
+ )
+ },
+ },
+ 6: {
+ "dataset": {"name": "demo_600"},
+ "learn": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ So far we've seen *step changes*, *variance changes*, and time
+ series with *outliers*. Can you think of another type of change
+ that can occur?"""
+ )
+ )
+ },
+ "annotate": {"text": RUBRIC},
+ "evaluate": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ What we see here is a change in *trend*. For these types of
+ changes it's not always easy to figure out exactly where the
+ change occurs, so it's harder to get it exactly right. Use
+ your intuition and keep in mind that it is normal for the
+ observations to be noisy."""
+ )
+ )
+ },
+ },
+ 7: {
+ "dataset": {"name": "demo_700"},
+ "learn": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ It is not uncommon for time series data from the real world to
+ display **seasonal variation**, for instance because certain
+ days of the week are more busy than others. Seasonality can
+ make it harder to find the change points in the dataset (if
+ there are any at all). Try to follow the pattern of
+ seasonality, and check whether the pattern changes in one of
+ the ways we've seen previously."""
+ )
+ )
+ },
+ "annotate": {"text": RUBRIC},
+ "evaluate": {
+ "text": markdown.markdown(
+ textwrap.dedent(
+ """
+ As you can see from this example, changes in seasonality can
+ occur as well. We expect that these changes are quite rare,
+ but it's nevertheless good to be aware of them."""
+ )
+ )
+ },
+ },
+}
+
+
+def redirect_user(demo_id, phase_id):
+ last_demo_id = max(DEMO_DATA.keys())
+ demo_last_phase_id = 3
+ if demo_id == last_demo_id and phase_id == demo_last_phase_id:
+ # User is introduced.
+ if current_user.is_introduced:
+ return redirect(url_for("main.index"))
+
+ current_user.is_introduced = True
+ db.session.commit()
+ # TODO: Assign real tasks to the user here.
+ return redirect(url_for("main.index"))
+ elif phase_id == demo_last_phase_id:
+ demo_id += 1
+ phase_id = 1
+ return redirect(
+ url_for("main.demo", demo_id=demo_id, phase_id=phase_id)
+ )
+ else:
+ phase_id += 1
+ return redirect(
+ url_for("main.demo", demo_id=demo_id, phase_id=phase_id)
+ )
+
+
+def process_annotations(demo_id):
+ annotation = request.get_json()
+ if annotation["identifier"] != demo_id:
+ LOGGER.error(
+ "User %s returned a task id in the demo that wasn't the demo id."
+ % current_user.username
+ )
+ flash(
+ "An internal error occurred, the administrator has been notified.",
+ "error",
+ )
+ return redirect(url_for("main.index"))
+
+ if annotation["changepoints"] is None:
+ retval = []
+ else:
+ retval = [cp["x"] for cp in annotation["changepoints"]]
+
+ # If the user is already introduced, we assume that their demo annotations
+ # are already in the database, and thus we don't put them back in (because
+ # we want the original ones).
+ if current_user.is_introduced:
+ return retval
+
+ dataset = Dataset.query.filter_by(
+ name=DEMO_DATA[demo_id]["dataset"]["name"]
+ ).first()
+ task = Task.query.filter_by(
+ annotator_id=current_user.id, dataset_id=dataset.id
+ ).first()
+ # this happens if the user returns to the same demo page, but hasn't
+ # completed the full demo yet. Same as above, not updating because we want
+ # the originals.
+ if not task is None:
+ return retval
+
+ # Create a new task
+ task = Task(annotator_id=current_user.id, dataset_id=dataset.id)
+ task.done = False
+ task.annotated_on = None
+ db.session.add(task)
+ db.session.commit()
+ if annotation["changepoints"] is None:
+ ann = Annotation(cp_index=None, task_id=task.id)
+ db.session.add(ann)
+ db.session.commit()
+ else:
+ for cp in annotation["changepoints"]:
+ ann = Annotation(cp_index=cp["x"], task_id=task.id)
+ db.session.add(ann)
+ db.session.commit()
+
+ # mark task as done
+ task.done = True
+ task.annotated_on = datetime.datetime.utcnow()
+ db.session.commit()
+
+ return retval
+
+
+def demo_learn(demo_id, form):
+ demo_data = DEMO_DATA[demo_id]["learn"]
+ return render_template(
+ "demo/learn.html",
+ title="Introduction – %i" % demo_id,
+ text=demo_data["text"],
+ form=form,
+ )
+
+
+def demo_annotate(demo_id):
+ demo_data = DEMO_DATA[demo_id]["annotate"]
+ dataset = Dataset.query.filter_by(
+ name=DEMO_DATA[demo_id]["dataset"]["name"]
+ ).first()
+ if dataset is None:
+ LOGGER.error(
+ "Demo requested unavailable dataset: %s"
+ % demo_data["dataset"]["name"]
+ )
+ flash(
+ "An internal error occured. The administrator has been notified. We apologise for the inconvenience, please try again later.",
+ "error",
+ )
+ return redirect(url_for("main.index"))
+ chart_data = load_data_for_chart(dataset.name, dataset.md5sum)
+ return render_template(
+ "annotate/index.html",
+ title="Introduction – %i" % demo_id,
+ data=chart_data,
+ rubric=demo_data["text"],
+ identifier=demo_id,
+ )
+
+
+def demo_evaluate(demo_id, phase_id, form):
+ demo_data = DEMO_DATA[demo_id]["evaluate"]
+ user_changepoints = session.get("user_changepoints", "__UNK__")
+ if user_changepoints == "__UNK__":
+ flash(
+ "The previous step of the demo was not completed successfully. Please try again.",
+ "error",
+ )
+ return redirect(
+ url_for("main.demo", demo_id=demo_id, phase_id=phase_id - 1)
+ )
+ dataset = Dataset.query.filter_by(
+ name=DEMO_DATA[demo_id]["dataset"]["name"]
+ ).first()
+ chart_data = load_data_for_chart(dataset.name, dataset.md5sum)
+ true_changepoints = get_demo_true_cps(dataset.name)
+ if true_changepoints is None:
+ flash(
+ "An internal error occurred, the administrator has been notified. We apologise for the inconvenience, please try again later.",
+ "error",
+ )
+ return redirect(url_for("main.index"))
+ annotations_true = [dict(index=x) for x in true_changepoints]
+ annotations_user = [dict(index=x) for x in user_changepoints]
+ return render_template(
+ "demo/evaluate.html",
+ title="Introduction – %i" % demo_id,
+ data=chart_data,
+ annotations_user=annotations_user,
+ annotations_true=annotations_true,
+ text=demo_data["text"],
+ form=form,
+ )
+
+
+@bp.route(
+ "/introduction/",
+ defaults={"demo_id": 1, "phase_id": 1},
+ methods=("GET", "POST"),
+)
+@bp.route(
+ "/introduction/<int:demo_id>/",
+ defaults={"phase_id": 1},
+ methods=("GET", "POST"),
+)
+@bp.route(
+ "/introduction/<int:demo_id>/<int:phase_id>", methods=("GET", "POST")
+)
+@login_required
+def demo(demo_id, phase_id):
+ form = NextForm()
+
+ if request.method == "POST":
+ if form.validate_on_submit():
+ return redirect_user(demo_id, phase_id)
+ else:
+ user_changepoints = process_annotations(demo_id)
+ session["user_changepoints"] = user_changepoints
+ return url_for("main.demo", demo_id=demo_id, phase_id=phase_id + 1)
+
+ if phase_id == 1:
+ return demo_learn(demo_id, form)
+ elif phase_id == 2:
+ return demo_annotate(demo_id)
+ elif phase_id == 3:
+ return demo_evaluate(demo_id, phase_id, form)
+ else:
+ abort(404)
diff --git a/app/main/forms.py b/app/main/forms.py
new file mode 100644
index 0000000..dcdffd3
--- /dev/null
+++ b/app/main/forms.py
@@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+
+from flask_wtf import FlaskForm
+
+from wtforms import SubmitField
+
+class NextForm(FlaskForm):
+ submit = SubmitField("Continue")
diff --git a/app/main/routes.py b/app/main/routes.py
index d249c5c..11de2f9 100644
--- a/app/main/routes.py
+++ b/app/main/routes.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import datetime
+import logging
from flask import render_template, flash, url_for, redirect, request
from flask_login import current_user
@@ -11,19 +12,13 @@ from app.main import bp
from app.models import Annotation, Task
from app.utils.datasets import load_data_for_chart
+logger = logging.getLogger(__name__)
+
RUBRIC = """
-<i>Please mark all the points in the time series where an <b>abrupt change</b>
-in
- the behaviour of the series occurs.</i>
-<br>
-If there are no such points, please click the <u>no changepoints</u> button.
-When you're ready, please click the <u>submit</u> button.
-<br>
-<br>
-<b>Note:</b> You can zoom and pan the graph if needed.
+Please mark the points in the time series where an <b>abrupt change</b> in
+ the behaviour of the series occurs. The goal is to define segments of the time
+ series that are separated by places where these abrupt changes occur.
<br>
-<br>
-Thank you!
"""
@@ -35,7 +30,7 @@ def index():
if current_user.is_authenticated:
user_id = current_user.id
tasks = Task.query.filter_by(annotator_id=user_id).all()
- tasks_done = [t for t in tasks if t.done]
+ tasks_done = [t for t in tasks if t.done and not t.dataset.is_demo]
tasks_todo = [t for t in tasks if not t.done]
return render_template(
"index.html",
@@ -48,14 +43,14 @@ def index():
@bp.route("/annotate/<int:task_id>", methods=("GET", "POST"))
@login_required
-def task(task_id):
+def annotate(task_id):
if request.method == "POST":
# record post time
now = datetime.datetime.utcnow()
# get the json from the client
annotation = request.get_json()
- if annotation["task"] != task_id:
+ if annotation["identifier"] != task_id:
flash("Internal error: task id doesn't match.", "error")
return redirect(url_for(task_id=task_id))
@@ -100,10 +95,14 @@ def task(task_id):
flash("It's not possible to edit annotations at the moment.")
return redirect(url_for("main.index"))
data = load_data_for_chart(task.dataset.name, task.dataset.md5sum)
+ if data is None:
+ flash(
+ "An internal error occurred loading this dataset, the admin has been notified. Please try again later. We apologise for the inconvenience."
+ )
return render_template(
"annotate/index.html",
- title="Annotate %s" % task.dataset.name,
- task=task,
+ title=task.dataset.name.title(),
+ identifier=task.id,
data=data,
rubric=RUBRIC,
)