diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-06-03 15:19:28 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-06-03 15:19:28 +0100 |
| commit | 94c3656bea0ff16b826f1adf34a3ada9084c7f8f (patch) | |
| tree | b9e64849ea5dd15ab94fb3b8f79a2080fa050d1e | |
| parent | Initial version of demo (diff) | |
| download | AnnotateChange-94c3656bea0ff16b826f1adf34a3ada9084c7f8f.tar.gz AnnotateChange-94c3656bea0ff16b826f1adf34a3ada9084c7f8f.zip | |
Rewrite the task assignment flow
With the demo in place, we're rewriting the
task assignment flow such that users only
get a task assigned when:
1. They finish the demo
2. They finish a task
3. They login again.
This way we can better balance the datasets
and we won't have datasets that don't get
enough annotations because some users didn't
finish tasks they were assigned.
| -rw-r--r-- | app/admin/forms.py | 14 | ||||
| -rw-r--r-- | app/admin/routes.py | 41 | ||||
| -rw-r--r-- | app/auth/routes.py | 26 | ||||
| -rw-r--r-- | app/main/demo.py | 15 | ||||
| -rw-r--r-- | app/main/routes.py | 17 | ||||
| -rw-r--r-- | app/templates/admin/manage.html | 11 | ||||
| -rw-r--r-- | app/utils/tasks.py | 124 | ||||
| -rw-r--r-- | config.py | 4 |
8 files changed, 108 insertions, 144 deletions
diff --git a/app/admin/forms.py b/app/admin/forms.py index bd1dce6..5ff5361 100644 --- a/app/admin/forms.py +++ b/app/admin/forms.py @@ -6,8 +6,8 @@ from flask import current_app from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired -from wtforms import SubmitField, SelectField, IntegerField -from wtforms.validators import ValidationError, InputRequired, NumberRange +from wtforms import SubmitField, SelectField +from wtforms.validators import ValidationError, InputRequired from werkzeug.utils import secure_filename @@ -15,16 +15,6 @@ from app.models import Dataset from app.utils.datasets import validate_dataset, get_name_from_dataset -class AdminAutoAssignForm(FlaskForm): - max_per_user = IntegerField( - "Maximum Tasks per User", [NumberRange(min=0, max=10)], default=5 - ) - num_per_dataset = IntegerField( - "Tasks per Dataset", [NumberRange(min=1, max=20)], default=10 - ) - submit = SubmitField("Submit") - - class AdminManageTaskForm(FlaskForm): username = SelectField( "Username", coerce=int, validators=[InputRequired()] diff --git a/app/admin/routes.py b/app/admin/routes.py index f0d7817..d244a77 100644 --- a/app/admin/routes.py +++ b/app/admin/routes.py @@ -10,7 +10,6 @@ from app import db from app.admin import bp from app.decorators import admin_required from app.admin.forms import ( - AdminAutoAssignForm, AdminManageTaskForm, AdminAddDatasetForm, AdminManageDatasetsForm, @@ -24,54 +23,38 @@ from app.utils.datasets import ( dataset_is_demo, load_data_for_chart, ) -from app.utils.tasks import generate_auto_assign_tasks @bp.route("/manage/tasks", methods=("GET", "POST")) @admin_required def manage_tasks(): - form_auto = AdminAutoAssignForm() - user_list = [(u.id, u.username) for u in User.query.all()] dataset_list = [ (d.id, d.name) for d in Dataset.query.order_by(Dataset.name).all() ] - form_manual = AdminManageTaskForm() - form_manual.username.choices = user_list - form_manual.dataset.choices = dataset_list - - if form_auto.validate_on_submit() and form_auto.submit.data: - max_per_user = form_auto.max_per_user.data - num_per_dataset = form_auto.num_per_dataset.data + form = AdminManageTaskForm() + form.username.choices = user_list + form.dataset.choices = dataset_list - for task, error in generate_auto_assign_tasks( - max_per_user, num_per_dataset - ): - if task is None: - flash(error, "error") - return redirect(url_for("admin.manage_tasks")) - db.session.add(task) - db.session.commit() - flash("Automatic task assignment successful.", "success") - elif form_manual.validate_on_submit(): - user = User.query.filter_by(id=form_manual.username.data).first() + if form.validate_on_submit(): + user = User.query.filter_by(id=form.username.data).first() if user is None: flash("User does not exist.", "error") return redirect(url_for("admin.manage_tasks")) - dataset = Dataset.query.filter_by(id=form_manual.dataset.data).first() + dataset = Dataset.query.filter_by(id=form.dataset.data).first() if dataset is None: flash("Dataset does not exist.", "error") return redirect(url_for("admin.manage_tasks")) action = None - if form_manual.assign.data: + if form.assign.data: action = "assign" - elif form_manual.delete.data: + elif form.delete.data: action = "delete" else: flash( - "Internal error: no button is true but form_manual was submitted.", + "Internal error: no button is true but form was submitted.", "error", ) return redirect(url_for("admin.manage_tasks")) @@ -107,11 +90,7 @@ def manage_tasks(): .all() ) return render_template( - "admin/manage.html", - title="Assign Task", - form_auto=form_auto, - form_manual=form_manual, - tasks=tasks, + "admin/manage.html", title="Assign Task", form=form, tasks=tasks ) diff --git a/app/auth/routes.py b/app/auth/routes.py index 27b0de0..bc5e9b3 100644 --- a/app/auth/routes.py +++ b/app/auth/routes.py @@ -20,7 +20,8 @@ from app.auth.email import ( send_password_reset_email, send_email_confirmation_email, ) -from app.models import User +from app.models import User, Task +from app.utils.tasks import generate_user_task @bp.route("/login", methods=("GET", "POST")) @@ -32,13 +33,36 @@ def login(): flash("Invalid username or password", "error") return redirect(url_for("auth.login")) login_user(user, remember=form.remember_me.data) + # record last_active time current_user.last_active = datetime.datetime.utcnow() db.session.commit() + + # redirect if not confirmed yet if not user.is_confirmed: return redirect(url_for("auth.not_confirmed")) + + # Get the next page from the request (default to index) next_page = request.args.get("next") if not next_page or url_parse(next_page).netloc != "": next_page = url_for("main.index") + + # redirect if not introduced yet + if not user.is_introduced: + return redirect(url_for("main.index")) + + # assign task if no remaining and not at maximum. + remaining = Task.query.filter_by( + annotator_id=user.id, done=False + ).all() + if remaining: + return redirect(next_page) + + task = generate_user_task(user) + if task is None: + return redirect(next_page) + + db.session.add(task) + db.session.commit() return redirect(next_page) return render_template("auth/login.html", title="Sign In", form=form) diff --git a/app/main/demo.py b/app/main/demo.py index a126fd8..a5dbc0f 100644 --- a/app/main/demo.py +++ b/app/main/demo.py @@ -23,6 +23,7 @@ from app.main import bp from app.main.forms import NextForm from app.main.routes import RUBRIC from app.utils.datasets import load_data_for_chart, get_demo_true_cps +from app.utils.tasks import generate_user_task LOGGER = logging.getLogger(__name__) @@ -253,13 +254,21 @@ def redirect_user(demo_id, phase_id): last_demo_id = max(DEMO_DATA.keys()) demo_last_phase_id = 3 if demo_id == last_demo_id and phase_id == demo_last_phase_id: - # User is introduced. + # User is already introduced (happens if they redo the demo) if current_user.is_introduced: return redirect(url_for("main.index")) + # mark user as introduced current_user.is_introduced = True db.session.commit() - # TODO: Assign real tasks to the user here. + + # assign a task to the user + task = generate_user_task(current_user) + if task is None: + return redirect(url_for("main.index")) + db.session.add(task) + db.session.commit() + return redirect(url_for("main.index")) elif phase_id == demo_last_phase_id: demo_id += 1 @@ -352,7 +361,7 @@ def demo_annotate(demo_id): if dataset is None: LOGGER.error( "Demo requested unavailable dataset: %s" - % demo_data["dataset"]["name"] + % DEMO_DATA[demo_id]["dataset"]["name"] ) flash( "An internal error occured. The administrator has been notified. We apologise for the inconvenience, please try again later.", diff --git a/app/main/routes.py b/app/main/routes.py index 11de2f9..a0033b1 100644 --- a/app/main/routes.py +++ b/app/main/routes.py @@ -11,6 +11,7 @@ from app.decorators import login_required from app.main import bp from app.models import Annotation, Task from app.utils.datasets import load_data_for_chart +from app.utils.tasks import generate_user_task logger = logging.getLogger(__name__) @@ -31,7 +32,9 @@ def index(): user_id = current_user.id tasks = Task.query.filter_by(annotator_id=user_id).all() tasks_done = [t for t in tasks if t.done and not t.dataset.is_demo] - tasks_todo = [t for t in tasks if not t.done] + tasks_todo = [ + t for t in tasks if (not t.done) and (not t.dataset.is_demo) + ] return render_template( "index.html", title="Home", @@ -78,8 +81,18 @@ def annotate(task_id): task.done = True task.annotated_on = now db.session.commit() - flash("Your annotation has been recorded, thank you!", "success") + + # assign a new task if necessary + task = generate_user_task(current_user) + if task is None: + return url_for("main.index") + db.session.add(task) + db.session.commit() + flash( + "A new dataset has been assigned for you to annotate. Thanks for your help!", + "info", + ) return url_for("main.index") task = Task.query.filter_by(id=task_id).first() diff --git a/app/templates/admin/manage.html b/app/templates/admin/manage.html index e0c247a..5c10e02 100644 --- a/app/templates/admin/manage.html +++ b/app/templates/admin/manage.html @@ -2,17 +2,10 @@ {% import 'bootstrap/wtf.html' as wtf %} {% block app_content %} -<h1>Manage Tasks Automatically</h1> +<h1>Manage Tasks</h1> <div class="row"> <div class="col-md-4"> - {{ wtf.quick_form(form_auto, button_map={'assign': 'success'}) }} - </div> -</div> - -<h1>Manage Tasks Manually</h1> -<div class="row"> - <div class="col-md-4"> - {{ wtf.quick_form(form_manual, button_map={'assign': 'primary', 'delete': 'danger'}) }} + {{ wtf.quick_form(form, button_map={'assign': 'primary', 'delete': 'danger'}) }} </div> </div> <br> diff --git a/app/utils/tasks.py b/app/utils/tasks.py index a5398eb..4297420 100644 --- a/app/utils/tasks.py +++ b/app/utils/tasks.py @@ -4,100 +4,56 @@ """ -import random from flask import current_app from app.models import User, Dataset, Task -def generate_auto_assign_tasks(max_per_user, num_per_dataset): - """Automatically generate random tasks - - This function generates random tasks for the users based on the desired - number of tasks per dataset and the maximum number of tasks per user. The - return value is a tuple (Task, error) where Task is None if an error - occurred. +def generate_user_task(user): """ + Generate new task for a given user. + + This function assigns tasks to a given user and ensures that: + + 1) datasets that are nearly annotated with the desired number of + datasets get priority + 2) users never are given more tasks than max_per_user + 3) users never get the same dataset twice - # create a dictionary of user/num available tasks - available_users = {} - for user in User.query.all(): - user_tasks = Task.query.filter_by(annotator_id=user.id).all() - if len(user_tasks) < max_per_user: - available_users[user] = max_per_user - len(user_tasks) - - if not available_users: - error = ( - "All users already have at least %i tasks assigned to them." - % max_per_user - ) - yield (None, error) - - # create a dictionary of dataset/num tasks desired - datasets_tbd = {} - for dataset in Dataset.query.all(): - dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() - if len(dataset_tasks) < num_per_dataset: - datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks) - - if not datasets_tbd: - error = ( - "All datasets already have at least the desired number (%i) of tasks." - % num_per_dataset - ) - yield (None, error) - - # shuffle the dataset list - datasets = list(datasets_tbd.keys()) - random.shuffle(datasets) - for dataset in datasets: - available = [u for u, v in available_users.items() if v > 0] - tbd = min(len(available), datasets_tbd[dataset]) - - # select a random set of users - selected_users = random.sample(available, tbd) - for user in selected_users: - task = Task(annotator_id=user.id, dataset_id=dataset.id) - yield (task, None) - available_users[user] -= 1 - datasets_tbd[dataset] -= 1 - - if any((datasets_tbd[d] > 0 for d in datasets)): - yield ( - None, - "Insufficient users available for the desired number of tasks per dataset.", - ) - - -def create_initial_user_tasks(user, max_per_user=None, num_per_dataset=None): - """Generate initial tasks for a given user """ - if max_per_user is None: - max_per_user = current_app.config["TASKS_MAX_PER_USER"] - if num_per_dataset is None: - num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"] + max_per_user = current_app.config["TASKS_MAX_PER_USER"] + num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"] user_tasks = Task.query.filter_by(annotator_id=user.id).all() - if len(user_tasks) >= max_per_user: - yield None - available_user = max_per_user - len(user_tasks) + user_tasks = [t for t in user_tasks if not t.dataset.is_demo] + n_user_tasks = len(user_tasks) + if n_user_tasks >= max_per_user: + return None - datasets_tbd = {} - for dataset in Dataset.query.all(): + potential_datasets = [] + for dataset in Dataset.query.filter_by(is_demo=False).all(): dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() - if len(dataset_tasks) < num_per_dataset: - datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks) - if not datasets_tbd: - yield None - - # shuffle the dataset list - datasets = list(datasets_tbd.keys()) - random.shuffle(datasets) - for dataset in datasets: - task = Task(annotator_id=user.id, dataset_id=dataset.id) - yield task - available_user -= 1 - datasets_tbd[dataset] -= 1 - if available_user == 0: - break + + # check that this dataset needs more annotations + n_needed = num_per_dataset - len(dataset_tasks) + if n_needed <= 0: + continue + + # check that this dataset is not already assigned to the user + task = Task.query.filter_by( + dataset_id=dataset.id, annotator_id=user.id + ).first() + if not task is None: + continue + potential_datasets.append((n_needed, dataset)) + + if len(potential_datasets) == 0: + return None + + # sort datasets so that the ones who need the least are at the front. + potential_datasets.sort() + + _, dataset = potential_datasets[0] + task = Task(annotator_id=user.id, dataset_id=dataset.id) + return task @@ -48,8 +48,8 @@ class Config(object): TEMP_DIR = "tmp" # task distribution settings - TASKS_MAX_PER_USER = 5 - TASKS_NUM_PER_DATASET = 10 + TASKS_MAX_PER_USER =int(os.environ.get("TASKS_MAX_PER_USER")) or 5 + TASKS_NUM_PER_DATASET = int(os.environ.get("TASKS_NUM_PER_DATASET")) or 10 # user emails allowed USER_EMAIL_DOMAINS = os.environ.get("USER_EMAIL_DOMAINS") or "" |
