diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-09-16 12:23:02 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-09-16 12:23:02 +0100 |
| commit | 7836612090cae6e568886980d4846b03690d66f5 (patch) | |
| tree | f0cb016470039afdb54c2e79046ff64171ed9afe /app | |
| parent | Minor text changes to demo (diff) | |
| download | AnnotateChange-7836612090cae6e568886980d4846b03690d66f5.tar.gz AnnotateChange-7836612090cae6e568886980d4846b03690d66f5.zip | |
Revise task assignment to randomly assign and allow extras
This commit changes the task assignment to randomly
assign tasks that need more annotations with a bias
towards those that are close to being annotated with
our desired number of annotations. Users are further
allowed to keep annotating datasets if they want to.
Diffstat (limited to 'app')
| -rw-r--r-- | app/utils/tasks.py | 34 |
1 files changed, 28 insertions, 6 deletions
diff --git a/app/utils/tasks.py b/app/utils/tasks.py index 61155de..b0f5c27 100644 --- a/app/utils/tasks.py +++ b/app/utils/tasks.py @@ -4,6 +4,7 @@ """ +import random from flask import current_app @@ -38,14 +39,13 @@ def generate_user_task(user): if n_user_tasks >= max_per_user: return None + # collect datasets that can be potentially assigned to the user potential_datasets = [] for dataset in Dataset.query.filter_by(is_demo=False).all(): dataset_tasks = Task.query.filter_by(dataset_id=dataset.id).all() # check that this dataset needs more annotations n_needed = num_per_dataset - len(dataset_tasks) - if n_needed <= 0: - continue # check that this dataset is not already assigned to the user task = Task.query.filter_by( @@ -55,13 +55,35 @@ def generate_user_task(user): continue potential_datasets.append((n_needed, dataset)) - # don't assign a dataset if there are no more datasets to annotate + # don't assign a dataset if there are no more datasets to annotate (user + # has done all) if len(potential_datasets) == 0: return None - # sort datasets so that the ones who need the least are at the front. - potential_datasets.sort(key=lambda x: x[0]) + # First try assigning a random dataset that still needs annotations + dataset = None + need_annotations = [d for n, d in potential_datasets if n > 0] + + # Weights are set to prioritize datasets that need fewer annotations to + # reach our goal (num_per_dataset), with a small chance of selecting + # another dataset. + weights = [ + (num_per_dataset - n + 0.01) for n, d in potential_datasets if n > 0 + ] + if need_annotations: + dataset = random.choices(need_annotations, weights=weights)[0] + else: + # if there are no such datasets, then this user is requesting + # additional annotations after all datasets have our desired coverage + # of num_per_dataset. Assign a random dataset that has the least excess + # annotations (thus causing even distribution). + max_nonpos = max((n for n, d in potential_datasets if n <= 0)) + extra = [d for n, d in potential_datasets if n == max_nonpos] + if extra: + dataset = random.choice(extra) + + if dataset is None: + return None - _, dataset = potential_datasets[0] task = Task(annotator_id=user.id, dataset_id=dataset.id) return task |
