aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-17 14:52:05 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-17 14:52:05 +0000
commit04dd3af22613fc3eaf8cdc64ed354d9105423643 (patch)
tree7144c5d6fce6bb43ee168402d4a3a11333ce0bc4
parentAdd global_co2 to repository (diff)
downloadTCPD-04dd3af22613fc3eaf8cdc64ed354d9105423643.tar.gz
TCPD-04dd3af22613fc3eaf8cdc64ed354d9105423643.zip
Allow small rounding errors for bee_waggle_6
For this dataset we compute two of the features, and small rounding errors can occur on different systems. This fix allows these errors at least on Travis.
-rw-r--r--checksums.json5
-rw-r--r--datasets/bee_waggle_6/get_bee_waggle_6.py19
-rw-r--r--utils/check_checksums.py10
3 files changed, 27 insertions, 7 deletions
diff --git a/checksums.json b/checksums.json
index 11eeab9..a092d6b 100644
--- a/checksums.json
+++ b/checksums.json
@@ -3,7 +3,10 @@
"checksums": {
"apple.json": "22edb48471bd3711f7a6e15de6413643",
"bank.json": "5207135ea53fc6fa2a8119908da73abf",
- "bee_waggle_6.json": "4f03feafecb3be0b069b3cb0d6b17d4f",
+ "bee_waggle_6.json": [
+ "4f03feafecb3be0b069b3cb0d6b17d4f",
+ "71311783488ee5f1122545d24c15429b"
+ ],
"bitcoin.json": "f90ff14ed1fc0c3d47d4394d25cbce93",
"brent_spot.json": "79892116ef8a0aa16e2450123655b31d",
"businv.json": "d2ab178da17b2e659a10a102a4b9f332",
diff --git a/datasets/bee_waggle_6/get_bee_waggle_6.py b/datasets/bee_waggle_6/get_bee_waggle_6.py
index b68f378..6f80042 100644
--- a/datasets/bee_waggle_6/get_bee_waggle_6.py
+++ b/datasets/bee_waggle_6/get_bee_waggle_6.py
@@ -26,6 +26,8 @@ ZIP_URL = "https://web.archive.org/web/20191114130815if_/https://www.cc.gatech.e
MD5_ZIP = "039843dc15c72fd5450eeb11c6e5599c"
MD5_JSON = "4f03feafecb3be0b069b3cb0d6b17d4f"
+# alternative checksum for small rounding errors
+MD5_JSON_2 = "71311783488ee5f1122545d24c15429b"
NAME_ZIP = "psslds.zip"
NAME_JSON = "bee_waggle_6.json"
@@ -48,7 +50,7 @@ def check_md5sum(filename, checksum):
return h == checksum
-def validate(checksum):
+def validate(checksum, alternative_checksum=None):
"""Decorator that validates the target file."""
def validate_decorator(func):
@@ -57,10 +59,19 @@ def validate(checksum):
target = kwargs.get("target_path", None)
if os.path.exists(target) and check_md5sum(target, checksum):
return
+ if (
+ os.path.exists(target)
+ and alternative_checksum
+ and check_md5sum(target, alternative_checksum)
+ ):
+ return
out = func(*args, **kwargs)
if not os.path.exists(target):
raise FileNotFoundError("Target file expected at: %s" % target)
- if not check_md5sum(target, checksum):
+ if not (check_md5sum(target, checksum) or (
+ alternative_checksum
+ and check_md5sum(target, alternative_checksum)
+ )):
raise ValidationError(target)
return out
@@ -74,7 +85,7 @@ def download_zip(target_path=None):
urlretrieve(ZIP_URL, target_path)
-@validate(MD5_JSON)
+@validate(MD5_JSON, MD5_JSON_2)
def write_json(zip_path, target_path=None):
with zipfile.ZipFile(zip_path) as thezip:
with thezip.open("psslds/zips/data/sequence6/btf/ximage.btf") as fp:
@@ -138,7 +149,7 @@ def parse_args():
choices=["collect", "clean"],
help="Action to perform",
default="collect",
- nargs='?'
+ nargs="?",
)
return parser.parse_args()
diff --git a/utils/check_checksums.py b/utils/check_checksums.py
index 01dcd99..0d66400 100644
--- a/utils/check_checksums.py
+++ b/utils/check_checksums.py
@@ -73,8 +73,14 @@ def main():
if not fname in data_files:
raise FileNotFoundError("Missing data file: %s" % fname)
md5 = md5sum(data_files[fname])
- if not md5 == checksums[fname]:
- raise ValueError(
+ if isinstance(checksums[fname], list):
+ if not md5 in checksums[fname]:
+ raise ValueError(
+ "Checksums don't match for file: %s" % (data_files[fname])
+ )
+ else:
+ if not md5 == checksums[fname]:
+ raise ValueError(
"Checksums don't match for file: %s" % (data_files[fname])
)