diff options
Diffstat (limited to 'app/utils')
| -rw-r--r-- | app/utils/tasks.py | 124 |
1 files changed, 40 insertions, 84 deletions
diff --git a/app/utils/tasks.py b/app/utils/tasks.py index a5398eb..4297420 100644 --- a/app/utils/tasks.py +++ b/app/utils/tasks.py @@ -4,100 +4,56 @@ """ -import random from flask import current_app from app.models import User, Dataset, Task -def generate_auto_assign_tasks(max_per_user, num_per_dataset): - """Automatically generate random tasks - - This function generates random tasks for the users based on the desired - number of tasks per dataset and the maximum number of tasks per user. The - return value is a tuple (Task, error) where Task is None if an error - occurred. +def generate_user_task(user): """ + Generate new task for a given user. + + This function assigns tasks to a given user and ensures that: + + 1) datasets that are nearly annotated with the desired number of + datasets get priority + 2) users never are given more tasks than max_per_user + 3) users never get the same dataset twice - # create a dictionary of user/num available tasks - available_users = {} - for user in User.query.all(): - user_tasks = Task.query.filter_by(annotator_id=user.id).all() - if len(user_tasks) < max_per_user: - available_users[user] = max_per_user - len(user_tasks) - - if not available_users: - error = ( - "All users already have at least %i tasks assigned to them." - % max_per_user - ) - yield (None, error) - - # create a dictionary of dataset/num tasks desired - datasets_tbd = {} - for dataset in Dataset.query.all(): - dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() - if len(dataset_tasks) < num_per_dataset: - datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks) - - if not datasets_tbd: - error = ( - "All datasets already have at least the desired number (%i) of tasks." - % num_per_dataset - ) - yield (None, error) - - # shuffle the dataset list - datasets = list(datasets_tbd.keys()) - random.shuffle(datasets) - for dataset in datasets: - available = [u for u, v in available_users.items() if v > 0] - tbd = min(len(available), datasets_tbd[dataset]) - - # select a random set of users - selected_users = random.sample(available, tbd) - for user in selected_users: - task = Task(annotator_id=user.id, dataset_id=dataset.id) - yield (task, None) - available_users[user] -= 1 - datasets_tbd[dataset] -= 1 - - if any((datasets_tbd[d] > 0 for d in datasets)): - yield ( - None, - "Insufficient users available for the desired number of tasks per dataset.", - ) - - -def create_initial_user_tasks(user, max_per_user=None, num_per_dataset=None): - """Generate initial tasks for a given user """ - if max_per_user is None: - max_per_user = current_app.config["TASKS_MAX_PER_USER"] - if num_per_dataset is None: - num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"] + max_per_user = current_app.config["TASKS_MAX_PER_USER"] + num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"] user_tasks = Task.query.filter_by(annotator_id=user.id).all() - if len(user_tasks) >= max_per_user: - yield None - available_user = max_per_user - len(user_tasks) + user_tasks = [t for t in user_tasks if not t.dataset.is_demo] + n_user_tasks = len(user_tasks) + if n_user_tasks >= max_per_user: + return None - datasets_tbd = {} - for dataset in Dataset.query.all(): + potential_datasets = [] + for dataset in Dataset.query.filter_by(is_demo=False).all(): dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() - if len(dataset_tasks) < num_per_dataset: - datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks) - if not datasets_tbd: - yield None - - # shuffle the dataset list - datasets = list(datasets_tbd.keys()) - random.shuffle(datasets) - for dataset in datasets: - task = Task(annotator_id=user.id, dataset_id=dataset.id) - yield task - available_user -= 1 - datasets_tbd[dataset] -= 1 - if available_user == 0: - break + + # check that this dataset needs more annotations + n_needed = num_per_dataset - len(dataset_tasks) + if n_needed <= 0: + continue + + # check that this dataset is not already assigned to the user + task = Task.query.filter_by( + dataset_id=dataset.id, annotator_id=user.id + ).first() + if not task is None: + continue + potential_datasets.append((n_needed, dataset)) + + if len(potential_datasets) == 0: + return None + + # sort datasets so that the ones who need the least are at the front. + potential_datasets.sort() + + _, dataset = potential_datasets[0] + task = Task(annotator_id=user.id, dataset_id=dataset.id) + return task |
