From 47b38ea7f9e8c4e57d5463dc68271819b823d023 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 1 Apr 2019 15:58:14 +0100 Subject: Move task assignment to separate module Also, add automatic assign on user signup --- app/utils/tasks.py | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 app/utils/tasks.py (limited to 'app/utils') diff --git a/app/utils/tasks.py b/app/utils/tasks.py new file mode 100644 index 0000000..a5398eb --- /dev/null +++ b/app/utils/tasks.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +"""Utilities for task assignment + +""" + +import random + +from flask import current_app + +from app.models import User, Dataset, Task + + +def generate_auto_assign_tasks(max_per_user, num_per_dataset): + """Automatically generate random tasks + + This function generates random tasks for the users based on the desired + number of tasks per dataset and the maximum number of tasks per user. The + return value is a tuple (Task, error) where Task is None if an error + occurred. + """ + + # create a dictionary of user/num available tasks + available_users = {} + for user in User.query.all(): + user_tasks = Task.query.filter_by(annotator_id=user.id).all() + if len(user_tasks) < max_per_user: + available_users[user] = max_per_user - len(user_tasks) + + if not available_users: + error = ( + "All users already have at least %i tasks assigned to them." + % max_per_user + ) + yield (None, error) + + # create a dictionary of dataset/num tasks desired + datasets_tbd = {} + for dataset in Dataset.query.all(): + dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() + if len(dataset_tasks) < num_per_dataset: + datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks) + + if not datasets_tbd: + error = ( + "All datasets already have at least the desired number (%i) of tasks." + % num_per_dataset + ) + yield (None, error) + + # shuffle the dataset list + datasets = list(datasets_tbd.keys()) + random.shuffle(datasets) + for dataset in datasets: + available = [u for u, v in available_users.items() if v > 0] + tbd = min(len(available), datasets_tbd[dataset]) + + # select a random set of users + selected_users = random.sample(available, tbd) + for user in selected_users: + task = Task(annotator_id=user.id, dataset_id=dataset.id) + yield (task, None) + available_users[user] -= 1 + datasets_tbd[dataset] -= 1 + + if any((datasets_tbd[d] > 0 for d in datasets)): + yield ( + None, + "Insufficient users available for the desired number of tasks per dataset.", + ) + + +def create_initial_user_tasks(user, max_per_user=None, num_per_dataset=None): + """Generate initial tasks for a given user + """ + if max_per_user is None: + max_per_user = current_app.config["TASKS_MAX_PER_USER"] + if num_per_dataset is None: + num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"] + + user_tasks = Task.query.filter_by(annotator_id=user.id).all() + if len(user_tasks) >= max_per_user: + yield None + available_user = max_per_user - len(user_tasks) + + datasets_tbd = {} + for dataset in Dataset.query.all(): + dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() + if len(dataset_tasks) < num_per_dataset: + datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks) + if not datasets_tbd: + yield None + + # shuffle the dataset list + datasets = list(datasets_tbd.keys()) + random.shuffle(datasets) + for dataset in datasets: + task = Task(annotator_id=user.id, dataset_id=dataset.id) + yield task + available_user -= 1 + datasets_tbd[dataset] -= 1 + if available_user == 0: + break -- cgit v1.2.3