diff options
| -rw-r--r-- | app/admin/routes.py | 55 | ||||
| -rw-r--r-- | app/auth/routes.py | 7 | ||||
| -rw-r--r-- | app/utils/tasks.py | 103 | ||||
| -rw-r--r-- | config.py | 4 |
4 files changed, 123 insertions, 46 deletions
diff --git a/app/admin/routes.py b/app/admin/routes.py index 9658266..859f8ac 100644 --- a/app/admin/routes.py +++ b/app/admin/routes.py @@ -18,6 +18,7 @@ from app.admin.forms import ( AdminManageDatasetsForm, ) from app.models import User, Dataset, Task, Annotation +from app.utils.tasks import generate_auto_assign_tasks @bp.route("/manage/tasks", methods=("GET", "POST")) @@ -38,53 +39,15 @@ def manage_tasks(): max_per_user = form_auto.max_per_user.data num_per_dataset = form_auto.num_per_dataset.data - available_users = {} - for user in User.query.all(): - user_tasks = Task.query.filter_by(annotator_id=user.id).all() - if len(user_tasks) < max_per_user: - available_users[user] = max_per_user - len(user_tasks) - - if not available_users: - flash( - "All users already have at least %i tasks assigned to them." - % max_per_user, - "error", - ) - return redirect(url_for("admin.manage_tasks")) - - datasets_tbd = {} - for dataset in Dataset.query.all(): - dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() - if len(dataset_tasks) < num_per_dataset: - datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks) - - if not datasets_tbd: - flash( - "All datasets have at least the desired number (%i) of assigned tasks." - % num_per_dataset, - "info", - ) - return redirect(url_for("admin.manage_tasks")) - - datasets = list(datasets_tbd.keys()) - random.shuffle(datasets) - for dataset in datasets: - available = [u for u, v in available_users.items() if v > 0] - tbd = min(len(available), datasets_tbd[dataset]) - selected_users = random.sample(available, tbd) - for user in selected_users: - task = Task(annotator_id=user.id, dataset_id=dataset.id) - db.session.add(task) - db.session.commit() - available_users[user] -= 1 - datasets_tbd[dataset] -= 1 - if any((datasets_tbd[d] > 0 for d in datasets)): - flash( - "Insufficient users available for the desired number of tasks per dataset.", - "info", - ) + for task, error in generate_auto_assign_tasks( + max_per_user, num_per_dataset + ): + if task is None: + flash(error, "error") + return redirect(url_for("admin.manage_tasks")) + db.session.add(task) + db.session.commit() flash("Automatic task assignment successful.", "success") - elif form_manual.validate_on_submit(): user = User.query.filter_by(id=form_manual.username.data).first() if user is None: diff --git a/app/auth/routes.py b/app/auth/routes.py index c5c175c..73cbd6a 100644 --- a/app/auth/routes.py +++ b/app/auth/routes.py @@ -22,6 +22,7 @@ from app.auth.email import ( send_password_reset_email, send_email_confirmation_email, ) +from app.utils.tasks import create_initial_user_tasks @bp.route("/login", methods=("GET", "POST")) @@ -122,7 +123,13 @@ def confirm_email(token): else: user.is_confirmed = True db.session.commit() + for task in create_initial_user_tasks(user): + if task is None: + break + db.session.add(task) + db.session.commit() flash("Account confirmed successfully. Thank you!", "success") + return redirect(url_for("auth.login")) return redirect(url_for("main.index")) diff --git a/app/utils/tasks.py b/app/utils/tasks.py new file mode 100644 index 0000000..a5398eb --- /dev/null +++ b/app/utils/tasks.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +"""Utilities for task assignment + +""" + +import random + +from flask import current_app + +from app.models import User, Dataset, Task + + +def generate_auto_assign_tasks(max_per_user, num_per_dataset): + """Automatically generate random tasks + + This function generates random tasks for the users based on the desired + number of tasks per dataset and the maximum number of tasks per user. The + return value is a tuple (Task, error) where Task is None if an error + occurred. + """ + + # create a dictionary of user/num available tasks + available_users = {} + for user in User.query.all(): + user_tasks = Task.query.filter_by(annotator_id=user.id).all() + if len(user_tasks) < max_per_user: + available_users[user] = max_per_user - len(user_tasks) + + if not available_users: + error = ( + "All users already have at least %i tasks assigned to them." + % max_per_user + ) + yield (None, error) + + # create a dictionary of dataset/num tasks desired + datasets_tbd = {} + for dataset in Dataset.query.all(): + dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() + if len(dataset_tasks) < num_per_dataset: + datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks) + + if not datasets_tbd: + error = ( + "All datasets already have at least the desired number (%i) of tasks." + % num_per_dataset + ) + yield (None, error) + + # shuffle the dataset list + datasets = list(datasets_tbd.keys()) + random.shuffle(datasets) + for dataset in datasets: + available = [u for u, v in available_users.items() if v > 0] + tbd = min(len(available), datasets_tbd[dataset]) + + # select a random set of users + selected_users = random.sample(available, tbd) + for user in selected_users: + task = Task(annotator_id=user.id, dataset_id=dataset.id) + yield (task, None) + available_users[user] -= 1 + datasets_tbd[dataset] -= 1 + + if any((datasets_tbd[d] > 0 for d in datasets)): + yield ( + None, + "Insufficient users available for the desired number of tasks per dataset.", + ) + + +def create_initial_user_tasks(user, max_per_user=None, num_per_dataset=None): + """Generate initial tasks for a given user + """ + if max_per_user is None: + max_per_user = current_app.config["TASKS_MAX_PER_USER"] + if num_per_dataset is None: + num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"] + + user_tasks = Task.query.filter_by(annotator_id=user.id).all() + if len(user_tasks) >= max_per_user: + yield None + available_user = max_per_user - len(user_tasks) + + datasets_tbd = {} + for dataset in Dataset.query.all(): + dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() + if len(dataset_tasks) < num_per_dataset: + datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks) + if not datasets_tbd: + yield None + + # shuffle the dataset list + datasets = list(datasets_tbd.keys()) + random.shuffle(datasets) + for dataset in datasets: + task = Task(annotator_id=user.id, dataset_id=dataset.id) + yield task + available_user -= 1 + datasets_tbd[dataset] -= 1 + if available_user == 0: + break @@ -46,3 +46,7 @@ class Config(object): # these should be used relative to the instance path DATASET_DIR = "datasets" TEMP_DIR = "tmp" + + # task distribution settings + TASKS_MAX_PER_USER = 5 + TASKS_NUM_PER_DATASET = 10 |
