1 files changed, 40 insertions, 84 deletions
diff --git a/app/utils/tasks.py b/app/utils/tasks.py
index a5398eb..4297420 100644
--- a/app/utils/tasks.py
+++ b/app/utils/tasks.py
@@ -4,100 +4,56 @@
 
 """
 
-import random
 
 from flask import current_app
 
 from app.models import User, Dataset, Task
 
 
-def generate_auto_assign_tasks(max_per_user, num_per_dataset):
-    """Automatically generate random tasks
-
-    This function generates random tasks for the users based on the desired 
-    number of tasks per dataset and the maximum number of tasks per user. The 
-    return value is a tuple (Task, error) where Task is None if an error 
-    occurred.
+def generate_user_task(user):
     """
+    Generate new task for a given user.
+
+    This function assigns tasks to a given user and ensures that:
+
+        1) datasets that are nearly annotated with the desired number of 
+        datasets get priority
+        2) users never are given more tasks than max_per_user
+        3) users never get the same dataset twice
 
-    # create a dictionary of user/num available tasks
-    available_users = {}
-    for user in User.query.all():
-        user_tasks = Task.query.filter_by(annotator_id=user.id).all()
-        if len(user_tasks) < max_per_user:
-            available_users[user] = max_per_user - len(user_tasks)
-
-    if not available_users:
-        error = (
-            "All users already have at least %i tasks assigned to them."
-            % max_per_user
-        )
-        yield (None, error)
-
-    # create a dictionary of dataset/num tasks desired
-    datasets_tbd = {}
-    for dataset in Dataset.query.all():
-        dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all()
-        if len(dataset_tasks) < num_per_dataset:
-            datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks)
-
-    if not datasets_tbd:
-        error = (
-            "All datasets already have at least the desired number (%i) of tasks."
-            % num_per_dataset
-        )
-        yield (None, error)
-
-    # shuffle the dataset list
-    datasets = list(datasets_tbd.keys())
-    random.shuffle(datasets)
-    for dataset in datasets:
-        available = [u for u, v in available_users.items() if v > 0]
-        tbd = min(len(available), datasets_tbd[dataset])
-
-        # select a random set of users
-        selected_users = random.sample(available, tbd)
-        for user in selected_users:
-            task = Task(annotator_id=user.id, dataset_id=dataset.id)
-            yield (task, None)
-            available_users[user] -= 1
-            datasets_tbd[dataset] -= 1
-
-    if any((datasets_tbd[d] > 0 for d in datasets)):
-        yield (
-            None,
-            "Insufficient users available for the desired number of tasks per dataset.",
-        )
-
-
-def create_initial_user_tasks(user, max_per_user=None, num_per_dataset=None):
-    """Generate initial tasks for a given user
     """
-    if max_per_user is None:
-        max_per_user = current_app.config["TASKS_MAX_PER_USER"]
-    if num_per_dataset is None:
-        num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"]
+    max_per_user = current_app.config["TASKS_MAX_PER_USER"]
+    num_per_dataset = current_app.config["TASKS_NUM_PER_DATASET"]
 
     user_tasks = Task.query.filter_by(annotator_id=user.id).all()
-    if len(user_tasks) >= max_per_user:
-        yield None
-    available_user = max_per_user - len(user_tasks)
+    user_tasks = [t for t in user_tasks if not t.dataset.is_demo]
+    n_user_tasks = len(user_tasks)
+    if n_user_tasks >= max_per_user:
+        return None
 
-    datasets_tbd = {}
-    for dataset in Dataset.query.all():
+    potential_datasets = []
+    for dataset in Dataset.query.filter_by(is_demo=False).all():
         dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all()
-        if len(dataset_tasks) < num_per_dataset:
-            datasets_tbd[dataset] = num_per_dataset - len(dataset_tasks)
-    if not datasets_tbd:
-        yield None
-
-    # shuffle the dataset list
-    datasets = list(datasets_tbd.keys())
-    random.shuffle(datasets)
-    for dataset in datasets:
-        task = Task(annotator_id=user.id, dataset_id=dataset.id)
-        yield task
-        available_user -= 1
-        datasets_tbd[dataset] -= 1
-        if available_user == 0:
-            break
+
+        # check that this dataset needs more annotations
+        n_needed = num_per_dataset - len(dataset_tasks)
+        if n_needed <= 0:
+            continue
+
+        # check that this dataset is not already assigned to the user
+        task = Task.query.filter_by(
+            dataset_id=dataset.id, annotator_id=user.id
+        ).first()
+        if not task is None:
+            continue
+        potential_datasets.append((n_needed, dataset))
+
+    if len(potential_datasets) == 0:
+        return None
+
+    # sort datasets so that the ones who need the least are at the front.
+    potential_datasets.sort()
+
+    _, dataset = potential_datasets[0]
+    task = Task(annotator_id=user.id, dataset_id=dataset.id)
+    return task