diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-05-04 22:56:12 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-05-04 22:56:12 +0100 |
| commit | 4654fe77c06b6f6396b349eee07d9eb3a374d18d (patch) | |
| tree | 6feda1301932566798fc8f73e9b2de7364a8f34c | |
| parent | Make robocall script more robust (diff) | |
| download | TCPD-4654fe77c06b6f6396b349eee07d9eb3a374d18d.tar.gz TCPD-4654fe77c06b6f6396b349eee07d9eb3a374d18d.zip | |
Address rounding differences in bee_waggle_6
To construct this timeseries from the original data
we compute the sine and cosine of the head angle
of the bee. On different systems this can result in
slight differences in the data due to rounding. This
commit adds a known version of the dataset with a
different rounding than the original one and adds
a comment to the user when the checksum is not matched
exactly.
| -rw-r--r-- | datasets/bee_waggle_6/get_bee_waggle_6.py | 45 |
1 files changed, 34 insertions, 11 deletions
diff --git a/datasets/bee_waggle_6/get_bee_waggle_6.py b/datasets/bee_waggle_6/get_bee_waggle_6.py index 6f80042..1a2033b 100644 --- a/datasets/bee_waggle_6/get_bee_waggle_6.py +++ b/datasets/bee_waggle_6/get_bee_waggle_6.py @@ -18,6 +18,7 @@ import json import math import os import zipfile +import sys from functools import wraps from urllib.request import urlretrieve @@ -26,8 +27,11 @@ ZIP_URL = "https://web.archive.org/web/20191114130815if_/https://www.cc.gatech.e MD5_ZIP = "039843dc15c72fd5450eeb11c6e5599c" MD5_JSON = "4f03feafecb3be0b069b3cb0d6b17d4f" -# alternative checksum for small rounding errors -MD5_JSON_2 = "71311783488ee5f1122545d24c15429b" +# known alternative checksums for small rounding errors +MD5_JSON_X = [ + "71311783488ee5f1122545d24c15429b", + "3632e004b540de5c3eb049fb5591d044", +] NAME_ZIP = "psslds.zip" NAME_JSON = "bee_waggle_6.json" @@ -50,7 +54,7 @@ def check_md5sum(filename, checksum): return h == checksum -def validate(checksum, alternative_checksum=None): +def validate(checksum, alt_checksums=None): """Decorator that validates the target file.""" def validate_decorator(func): @@ -61,18 +65,37 @@ def validate(checksum, alternative_checksum=None): return if ( os.path.exists(target) - and alternative_checksum - and check_md5sum(target, alternative_checksum) + and alt_checksums + and any(check_md5sum(target, c) for c in alt_checksums) ): + print( + "Note: Matched alternative checksum for %s. " + "This indicates that small differences exist compared to " + "the original version of this time series, likely due to " + "rounding differences. Usually this is nothing to " + "worry about." % target, + file=sys.stderr, + ) return out = func(*args, **kwargs) if not os.path.exists(target): raise FileNotFoundError("Target file expected at: %s" % target) - if not (check_md5sum(target, checksum) or ( - alternative_checksum - and check_md5sum(target, alternative_checksum) - )): - raise ValidationError(target) + if not ( + check_md5sum(target, checksum) + or ( + alt_checksums + and any(check_md5sum(target, c) for c in alt_checksums) + ) + ): + print( + "Warning: Generated dataset %s didn't match a " + "known checksum. This is likely due to " + "rounding differences caused by " + "different system architectures. Minor differences in " + "algorithm performance can occur for this dataset. " + % target, + file=sys.stderr, + ) return out return wrapper @@ -85,7 +108,7 @@ def download_zip(target_path=None): urlretrieve(ZIP_URL, target_path) -@validate(MD5_JSON, MD5_JSON_2) +@validate(MD5_JSON, MD5_JSON_X) def write_json(zip_path, target_path=None): with zipfile.ZipFile(zip_path) as thezip: with thezip.open("psslds/zips/data/sequence6/btf/ximage.btf") as fp: |
