aboutsummaryrefslogtreecommitdiff
path: root/app/utils/tasks.py
diff options
context:
space:
mode:
Diffstat (limited to 'app/utils/tasks.py')
-rw-r--r--app/utils/tasks.py124
1 files changed, 40 insertions, 84 deletions
diff --git a/app/utils/tasks.py b/app/utils/tasks.py
index a5398eb..4297420 100644
--- a/app/utils/tasks.py
+++ b/app/utils/tasks.py
@@ -4,100 +4,56 @@
"""
-import random
from flask import current_app
from app.models import User, Dataset, Task
-def generate_auto_assign_tasks(max_per_user, num_per_dataset):
- """Automatically generate random tasks
-
- This function generates random tasks for the users based on the desired
- number of tasks per dataset and the maximum number of tasks per user. The
- return value is a tuple (Task, error) where Task is None if an error
- occurred.
+def generate_user_task(user):
"""
+ Generate new task for a given user.
+
+ This function assigns tasks to a given user and ensures that:
+
+ 1) datasets that are nearly annotated with the desired number of
+ datasets get priority
+ 2) users never are given more tasks than max_per_user
+ 3) users never get the same dataset twice
- # create a dictionary of user/num available tasks
- available_users = {}
- for user in User.query.all():
- user_tasks = Task.query.filter_by(annotator_id=user.id).all()
- if len(user_tasks) < max_per_user:
- available_users[user] = max_per_user - len(user_tasks)
-
- if not available_users:
- error = (
- "All users already have at least %i tasks assigned to them."
- % max_per_user
- )
- yield (None, error)
-
- # create a dictionary of dataset/num tasks desired
- datasets_tbd = {}
- for dataset in Dataset.query.all():
- dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all()
- if len(dataset_tasks) < num_per_dataset:
- datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks)
-
- if not datasets_tbd:
- error = (
- "All datasets already have at least the desired number (%i) of tasks."
- % num_per_dataset
- )
- yield (None, error)
-
- # shuffle the dataset list
- datasets = list(datasets_tbd.keys())
- random.shuffle(datasets)
- for dataset in datasets:
- available = [u for u, v in available_users.items() if v > 0]
- tbd = min(len(available), datasets_tbd[dataset])
-
- # select a random set of users
- selected_users = random.sample(available, tbd)
- for user in selected_users:
- task = Task(annotator_id=user.id, dataset_id=dataset.id)
- yield (task, None)
- available_users[user] -= 1
- datasets_tbd[dataset] -= 1
-
- if any((datasets_tbd[d] > 0 for d in datasets)):
- yield (
- None,
- "Insufficient users available for the desired number of tasks per dataset.",
- )
-
-
-def create_initial_user_tasks(user, max_per_user=None, num_per_dataset=None):
- """Generate initial tasks for a given user
"""
- if max_per_user is None:
- max_per_user = current_app.config["TASKS_MAX_PER_USER"]
- if num_per_dataset is None:
- num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"]
+ max_per_user = current_app.config["TASKS_MAX_PER_USER"]
+ num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"]
user_tasks = Task.query.filter_by(annotator_id=user.id).all()
- if len(user_tasks) >= max_per_user:
- yield None
- available_user = max_per_user - len(user_tasks)
+ user_tasks = [t for t in user_tasks if not t.dataset.is_demo]
+ n_user_tasks = len(user_tasks)
+ if n_user_tasks >= max_per_user:
+ return None
- datasets_tbd = {}
- for dataset in Dataset.query.all():
+ potential_datasets = []
+ for dataset in Dataset.query.filter_by(is_demo=False).all():
dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all()
- if len(dataset_tasks) < num_per_dataset:
- datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks)
- if not datasets_tbd:
- yield None
-
- # shuffle the dataset list
- datasets = list(datasets_tbd.keys())
- random.shuffle(datasets)
- for dataset in datasets:
- task = Task(annotator_id=user.id, dataset_id=dataset.id)
- yield task
- available_user -= 1
- datasets_tbd[dataset] -= 1
- if available_user == 0:
- break
+
+ # check that this dataset needs more annotations
+ n_needed = num_per_dataset - len(dataset_tasks)
+ if n_needed <= 0:
+ continue
+
+ # check that this dataset is not already assigned to the user
+ task = Task.query.filter_by(
+ dataset_id=dataset.id, annotator_id=user.id
+ ).first()
+ if not task is None:
+ continue
+ potential_datasets.append((n_needed, dataset))
+
+ if len(potential_datasets) == 0:
+ return None
+
+ # sort datasets so that the ones who need the least are at the front.
+ potential_datasets.sort()
+
+ _, dataset = potential_datasets[0]
+ task = Task(annotator_id=user.id, dataset_id=dataset.id)
+ return task