aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2021-01-08 00:03:41 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2021-01-08 00:03:41 +0000
commitd3c89a318871c49bd19c55424c8f90e7ed2be700 (patch)
treedefeb9f49f1d28bff5b81fca010f28f14eb3c163
parentMerge branch 'bugfix/merge_config' (diff)
downloadpaper2remarkable-d3c89a318871c49bd19c55424c8f90e7ed2be700.tar.gz
paper2remarkable-d3c89a318871c49bd19c55424c8f90e7ed2be700.zip
Shorten utility function
-rw-r--r--paper2remarkable/utils.py58
1 files changed, 20 insertions, 38 deletions
diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py
index 09082a5..2432916 100644
--- a/paper2remarkable/utils.py
+++ b/paper2remarkable/utils.py
@@ -89,50 +89,32 @@ def get_page_with_retry(url, tries=5, cookiejar=None, return_text=False):
def get_content_type_with_retry(url, tries=5, cookiejar=None):
- count = 0
if cookiejar is None:
jar = requests.cookies.RequestsCookieJar()
else:
jar = cookiejar
- while count < tries:
- count += 1
- error = False
- try:
- res = requests.head(
- url, headers=HEADERS, cookies=jar, allow_redirects=True
- )
- except requests.exceptions.ConnectionError:
- error = True
- if error or not res.ok:
- logger.warning(
- "(%i/%i) Error getting headers for %s. Retrying in 5 seconds."
- % (count, tries, url)
- )
- time.sleep(5)
- continue
- return res.headers.get("Content-Type", None)
+
+ msg = "(%i/%i) Error getting content type for %s. Retrying in 5 seconds."
# In rare cases, a HEAD request fails but a GET request does work. So here
- # we try to get the content type from a GET request.
- count = 0
- jar = {} if cookiejar is None else cookiejar
- while count < tries:
- count += 1
- error = False
- try:
- res = requests.get(
- url, headers=HEADERS, cookies=jar, allow_redirects=True
- )
- except requests.exceptions.ConnectionError:
- error = True
- if error or not res.ok:
- logger.warning(
- "(%i/%i) Error getting headers for %s. Retrying in 5 seconds."
- % (count, tries, url)
- )
- time.sleep(5)
- continue
- return res.headers.get("Content-Type", None)
+ # we try both
+ ops = [requests.head, requests.get]
+ kwargs = dict(headers=HEADERS, cookies=jar, allow_redirects=True)
+ for op in ops:
+ count = 0
+ while count < tries:
+ count += 1
+ error = False
+ try:
+ res = op(url, **kwargs)
+ except requests.exceptions.ConnectionError:
+ error = True
+ if error or not res.ok:
+ logger.warning(msg % (count, tries, url))
+ time.sleep(5)
+ continue
+ return res.headers.get("Content-Type", None)
+ return None
def follow_redirects(url):