aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-10-28 11:40:26 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-10-28 11:40:26 +0100
commit07fe6c3ee7fa8955253afbc276e483e55254f9db (patch)
tree660b907bdc4ee2cb370985b9b75f0761a24216d4
parentMerge branch 'master' into feature/tandfonline (diff)
parentMerge branch 'bugfix/content_type' (diff)
downloadpaper2remarkable-07fe6c3ee7fa8955253afbc276e483e55254f9db.tar.gz
paper2remarkable-07fe6c3ee7fa8955253afbc276e483e55254f9db.zip
Merge branch 'master' into feature/tandfonline
-rw-r--r--paper2remarkable/utils.py22
-rw-r--r--tests/test_ui.py5
2 files changed, 27 insertions, 0 deletions
diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py
index 5b7ba2c..09082a5 100644
--- a/paper2remarkable/utils.py
+++ b/paper2remarkable/utils.py
@@ -112,6 +112,28 @@ def get_content_type_with_retry(url, tries=5, cookiejar=None):
continue
return res.headers.get("Content-Type", None)
+ # In rare cases, a HEAD request fails but a GET request does work. So here
+ # we try to get the content type from a GET request.
+ count = 0
+ jar = {} if cookiejar is None else cookiejar
+ while count < tries:
+ count += 1
+ error = False
+ try:
+ res = requests.get(
+ url, headers=HEADERS, cookies=jar, allow_redirects=True
+ )
+ except requests.exceptions.ConnectionError:
+ error = True
+ if error or not res.ok:
+ logger.warning(
+ "(%i/%i) Error getting headers for %s. Retrying in 5 seconds."
+ % (count, tries, url)
+ )
+ time.sleep(5)
+ continue
+ return res.headers.get("Content-Type", None)
+
def follow_redirects(url):
"""Follow redirects from the URL (at most 100)"""
diff --git a/tests/test_ui.py b/tests/test_ui.py
index 835f594..1cca0cd 100644
--- a/tests/test_ui.py
+++ b/tests/test_ui.py
@@ -101,6 +101,11 @@ class TestUI(unittest.TestCase):
"https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf",
),
(
+ PdfUrl,
+ "https://publications.aston.ac.uk/id/eprint/38334/1/5th_Artificial_Neural_Networks.pdf",
+ "https://publications.aston.ac.uk/id/eprint/38334/1/5th_Artificial_Neural_Networks.pdf",
+ ),
+ (
JMLR,
"https://www.jmlr.org/papers/volume17/14-526/14-526.pdf",
"https://www.jmlr.org/papers/volume17/14-526/14-526.pdf",