aboutsummaryrefslogtreecommitdiff
path: root/datasets
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-05-04 22:56:12 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-05-04 22:56:12 +0100
commit4654fe77c06b6f6396b349eee07d9eb3a374d18d (patch)
tree6feda1301932566798fc8f73e9b2de7364a8f34c /datasets
parentMake robocall script more robust (diff)
downloadTCPD-4654fe77c06b6f6396b349eee07d9eb3a374d18d.tar.gz
TCPD-4654fe77c06b6f6396b349eee07d9eb3a374d18d.zip
Address rounding differences in bee_waggle_6
To construct this timeseries from the original data we compute the sine and cosine of the head angle of the bee. On different systems this can result in slight differences in the data due to rounding. This commit adds a known version of the dataset with a different rounding than the original one and adds a comment to the user when the checksum is not matched exactly.
Diffstat (limited to 'datasets')
-rw-r--r--datasets/bee_waggle_6/get_bee_waggle_6.py45
1 files changed, 34 insertions, 11 deletions
diff --git a/datasets/bee_waggle_6/get_bee_waggle_6.py b/datasets/bee_waggle_6/get_bee_waggle_6.py
index 6f80042..1a2033b 100644
--- a/datasets/bee_waggle_6/get_bee_waggle_6.py
+++ b/datasets/bee_waggle_6/get_bee_waggle_6.py
@@ -18,6 +18,7 @@ import json
import math
import os
import zipfile
+import sys
from functools import wraps
from urllib.request import urlretrieve
@@ -26,8 +27,11 @@ ZIP_URL = "https://web.archive.org/web/20191114130815if_/https://www.cc.gatech.e
MD5_ZIP = "039843dc15c72fd5450eeb11c6e5599c"
MD5_JSON = "4f03feafecb3be0b069b3cb0d6b17d4f"
-# alternative checksum for small rounding errors
-MD5_JSON_2 = "71311783488ee5f1122545d24c15429b"
+# known alternative checksums for small rounding errors
+MD5_JSON_X = [
+ "71311783488ee5f1122545d24c15429b",
+ "3632e004b540de5c3eb049fb5591d044",
+]
NAME_ZIP = "psslds.zip"
NAME_JSON = "bee_waggle_6.json"
@@ -50,7 +54,7 @@ def check_md5sum(filename, checksum):
return h == checksum
-def validate(checksum, alternative_checksum=None):
+def validate(checksum, alt_checksums=None):
"""Decorator that validates the target file."""
def validate_decorator(func):
@@ -61,18 +65,37 @@ def validate(checksum, alternative_checksum=None):
return
if (
os.path.exists(target)
- and alternative_checksum
- and check_md5sum(target, alternative_checksum)
+ and alt_checksums
+ and any(check_md5sum(target, c) for c in alt_checksums)
):
+ print(
+ "Note: Matched alternative checksum for %s. "
+ "This indicates that small differences exist compared to "
+ "the original version of this time series, likely due to "
+ "rounding differences. Usually this is nothing to "
+ "worry about." % target,
+ file=sys.stderr,
+ )
return
out = func(*args, **kwargs)
if not os.path.exists(target):
raise FileNotFoundError("Target file expected at: %s" % target)
- if not (check_md5sum(target, checksum) or (
- alternative_checksum
- and check_md5sum(target, alternative_checksum)
- )):
- raise ValidationError(target)
+ if not (
+ check_md5sum(target, checksum)
+ or (
+ alt_checksums
+ and any(check_md5sum(target, c) for c in alt_checksums)
+ )
+ ):
+ print(
+ "Warning: Generated dataset %s didn't match a "
+ "known checksum. This is likely due to "
+ "rounding differences caused by "
+ "different system architectures. Minor differences in "
+ "algorithm performance can occur for this dataset. "
+ % target,
+ file=sys.stderr,
+ )
return out
return wrapper
@@ -85,7 +108,7 @@ def download_zip(target_path=None):
urlretrieve(ZIP_URL, target_path)
-@validate(MD5_JSON, MD5_JSON_2)
+@validate(MD5_JSON, MD5_JSON_X)
def write_json(zip_path, target_path=None):
with zipfile.ZipFile(zip_path) as thezip:
with thezip.open("psslds/zips/data/sequence6/btf/ximage.btf") as fp: