aboutsummaryrefslogtreecommitdiff
path: root/datasets/homeruns
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-08-17 20:44:42 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-08-17 20:44:42 +0100
commit9480e00023b1315609000cf256c59425e9efdccd (patch)
tree53259b8ed39a6e9d9b39cb894a2e45780c666f42 /datasets/homeruns
parentAdd retries to the get_measles script (diff)
downloadTCPD-9480e00023b1315609000cf256c59425e9efdccd.tar.gz
TCPD-9480e00023b1315609000cf256c59425e9efdccd.zip
Add retries to all download scripts
Diffstat (limited to 'datasets/homeruns')
-rw-r--r--datasets/homeruns/get_homeruns.py17
1 files changed, 16 insertions, 1 deletions
diff --git a/datasets/homeruns/get_homeruns.py b/datasets/homeruns/get_homeruns.py
index 6093484..dab616c 100644
--- a/datasets/homeruns/get_homeruns.py
+++ b/datasets/homeruns/get_homeruns.py
@@ -17,9 +17,12 @@ import clevercsv
import hashlib
import json
import os
+import sys
+import time
from functools import wraps
from urllib.request import urlretrieve
+from urllib.error import URLError
# Original source of the batting csv file
CSV_URL = "https://web.archive.org/web/20191128150525if_/https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/242285f8f5e8981327cf50c07355fb034833ce4a/core/Batting.csv"
@@ -70,7 +73,19 @@ def validate(checksum):
@validate(MD5_CSV)
def download_csv(target_path=None):
- urlretrieve(CSV_URL, target_path)
+ count = 0
+ while count < 5:
+ count += 1
+ try:
+ urlretrieve(CSV_URL, target_path)
+ return
+ except URLError as err:
+ print(
+ "Error occurred (%r) when trying to download csv. Retrying in 5 seconds"
+ % err,
+ sys.stderr,
+ )
+ time.sleep(5)
def read_csv(csv_file):