diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-08-17 20:44:42 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-08-17 20:44:42 +0100 |
| commit | 9480e00023b1315609000cf256c59425e9efdccd (patch) | |
| tree | 53259b8ed39a6e9d9b39cb894a2e45780c666f42 /datasets/homeruns | |
| parent | Add retries to the get_measles script (diff) | |
| download | TCPD-9480e00023b1315609000cf256c59425e9efdccd.tar.gz TCPD-9480e00023b1315609000cf256c59425e9efdccd.zip | |
Add retries to all download scripts
Diffstat (limited to 'datasets/homeruns')
| -rw-r--r-- | datasets/homeruns/get_homeruns.py | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/datasets/homeruns/get_homeruns.py b/datasets/homeruns/get_homeruns.py index 6093484..dab616c 100644 --- a/datasets/homeruns/get_homeruns.py +++ b/datasets/homeruns/get_homeruns.py @@ -17,9 +17,12 @@ import clevercsv import hashlib import json import os +import sys +import time from functools import wraps from urllib.request import urlretrieve +from urllib.error import URLError # Original source of the batting csv file CSV_URL = "https://web.archive.org/web/20191128150525if_/https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/242285f8f5e8981327cf50c07355fb034833ce4a/core/Batting.csv" @@ -70,7 +73,19 @@ def validate(checksum): @validate(MD5_CSV) def download_csv(target_path=None): - urlretrieve(CSV_URL, target_path) + count = 0 + while count < 5: + count += 1 + try: + urlretrieve(CSV_URL, target_path) + return + except URLError as err: + print( + "Error occurred (%r) when trying to download csv. Retrying in 5 seconds" + % err, + sys.stderr, + ) + time.sleep(5) def read_csv(csv_file): |
