diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2021-01-08 00:03:41 +0000 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2021-01-08 00:03:41 +0000 |
| commit | d3c89a318871c49bd19c55424c8f90e7ed2be700 (patch) | |
| tree | defeb9f49f1d28bff5b81fca010f28f14eb3c163 | |
| parent | Merge branch 'bugfix/merge_config' (diff) | |
| download | paper2remarkable-d3c89a318871c49bd19c55424c8f90e7ed2be700.tar.gz paper2remarkable-d3c89a318871c49bd19c55424c8f90e7ed2be700.zip | |
Shorten utility function
| -rw-r--r-- | paper2remarkable/utils.py | 58 |
1 files changed, 20 insertions, 38 deletions
diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py index 09082a5..2432916 100644 --- a/paper2remarkable/utils.py +++ b/paper2remarkable/utils.py @@ -89,50 +89,32 @@ def get_page_with_retry(url, tries=5, cookiejar=None, return_text=False): def get_content_type_with_retry(url, tries=5, cookiejar=None): - count = 0 if cookiejar is None: jar = requests.cookies.RequestsCookieJar() else: jar = cookiejar - while count < tries: - count += 1 - error = False - try: - res = requests.head( - url, headers=HEADERS, cookies=jar, allow_redirects=True - ) - except requests.exceptions.ConnectionError: - error = True - if error or not res.ok: - logger.warning( - "(%i/%i) Error getting headers for %s. Retrying in 5 seconds." - % (count, tries, url) - ) - time.sleep(5) - continue - return res.headers.get("Content-Type", None) + + msg = "(%i/%i) Error getting content type for %s. Retrying in 5 seconds." # In rare cases, a HEAD request fails but a GET request does work. So here - # we try to get the content type from a GET request. - count = 0 - jar = {} if cookiejar is None else cookiejar - while count < tries: - count += 1 - error = False - try: - res = requests.get( - url, headers=HEADERS, cookies=jar, allow_redirects=True - ) - except requests.exceptions.ConnectionError: - error = True - if error or not res.ok: - logger.warning( - "(%i/%i) Error getting headers for %s. Retrying in 5 seconds." - % (count, tries, url) - ) - time.sleep(5) - continue - return res.headers.get("Content-Type", None) + # we try both + ops = [requests.head, requests.get] + kwargs = dict(headers=HEADERS, cookies=jar, allow_redirects=True) + for op in ops: + count = 0 + while count < tries: + count += 1 + error = False + try: + res = op(url, **kwargs) + except requests.exceptions.ConnectionError: + error = True + if error or not res.ok: + logger.warning(msg % (count, tries, url)) + time.sleep(5) + continue + return res.headers.get("Content-Type", None) + return None def follow_redirects(url): |
