aboutsummaryrefslogtreecommitdiff
path: root/app
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2019-09-16 12:23:02 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2019-09-16 12:23:02 +0100
commit7836612090cae6e568886980d4846b03690d66f5 (patch)
treef0cb016470039afdb54c2e79046ff64171ed9afe /app
parentMinor text changes to demo (diff)
downloadAnnotateChange-7836612090cae6e568886980d4846b03690d66f5.tar.gz
AnnotateChange-7836612090cae6e568886980d4846b03690d66f5.zip
Revise task assignment to randomly assign and allow extras
This commit changes the task assignment to randomly assign tasks that need more annotations with a bias towards those that are close to being annotated with our desired number of annotations. Users are further allowed to keep annotating datasets if they want to.
Diffstat (limited to 'app')
-rw-r--r--app/utils/tasks.py34
1 files changed, 28 insertions, 6 deletions
diff --git a/app/utils/tasks.py b/app/utils/tasks.py
index 61155de..b0f5c27 100644
--- a/app/utils/tasks.py
+++ b/app/utils/tasks.py
@@ -4,6 +4,7 @@
"""
+import random
from flask import current_app
@@ -38,14 +39,13 @@ def generate_user_task(user):
if n_user_tasks >= max_per_user:
return None
+ # collect datasets that can be potentially assigned to the user
potential_datasets = []
for dataset in Dataset.query.filter_by(is_demo=False).all():
dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all()
# check that this dataset needs more annotations
n_needed = num_per_dataset - len(dataset_tasks)
- if n_needed <= 0:
- continue
# check that this dataset is not already assigned to the user
task = Task.query.filter_by(
@@ -55,13 +55,35 @@ def generate_user_task(user):
continue
potential_datasets.append((n_needed, dataset))
- # don't assign a dataset if there are no more datasets to annotate
+ # don't assign a dataset if there are no more datasets to annotate (user
+ # has done all)
if len(potential_datasets) == 0:
return None
- # sort datasets so that the ones who need the least are at the front.
- potential_datasets.sort(key=lambda x: x[0])
+ # First try assigning a random dataset that still needs annotations
+ dataset = None
+ need_annotations = [d for n, d in potential_datasets if n > 0]
+
+ # Weights are set to prioritize datasets that need fewer annotations to
+ # reach our goal (num_per_dataset), with a small chance of selecting
+ # another dataset.
+ weights = [
+ (num_per_dataset - n + 0.01) for n, d in potential_datasets if n > 0
+ ]
+ if need_annotations:
+ dataset = random.choices(need_annotations, weights=weights)[0]
+ else:
+ # if there are no such datasets, then this user is requesting
+ # additional annotations after all datasets have our desired coverage
+ # of num_per_dataset. Assign a random dataset that has the least excess
+ # annotations (thus causing even distribution).
+ max_nonpos = max((n for n, d in potential_datasets if n <= 0))
+ extra = [d for n, d in potential_datasets if n == max_nonpos]
+ if extra:
+ dataset = random.choice(extra)
+
+ if dataset is None:
+ return None
- _, dataset = potential_datasets[0]
task = Task(annotator_id=user.id, dataset_id=dataset.id)
return task