aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2019-03-11 15:22:45 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2019-03-11 15:22:45 +0000
commit39b04d5db26daaf566eb33fc8da267c82939966c (patch)
tree66272fa465d1d55c647e1e79ecc81770537f5bdf
parentAdded support for ACM urls (diff)
downloadpaper2remarkable-39b04d5db26daaf566eb33fc8da267c82939966c.tar.gz
paper2remarkable-39b04d5db26daaf566eb33fc8da267c82939966c.zip
Formatting and doc improvements
-rw-r--r--README.md2
-rwxr-xr-xarxiv2remarkable.py12
2 files changed, 10 insertions, 4 deletions
diff --git a/README.md b/README.md
index 77c44ce..3b7be2a 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ following sources:
The script takes the source and:
-1. Downloads it if necessary
+1. Downloads the pdf if necessary
2. Removes the arXiv timestamp
3. Crops the pdf to remove unnecessary borders
4. Shrinks the pdf file to reduce the filesize
diff --git a/arxiv2remarkable.py b/arxiv2remarkable.py
index 9525329..42a1392 100755
--- a/arxiv2remarkable.py
+++ b/arxiv2remarkable.py
@@ -167,7 +167,9 @@ def get_page_with_retry(url):
def retry(url, count):
if count < 5:
- logger.info("Caught error for url %s. Retrying in 5 seconds." % url)
+ logger.info(
+ "Caught error for url %s. Retrying in 5 seconds." % url
+ )
time.sleep(5)
else:
exception("Failed to download url: %s" % url)
@@ -277,11 +279,13 @@ def shrink_pdf(filepath, gs_path="gs"):
def get_paper_info_arxiv(url):
+ """ Extract the paper's authors, title, and publication year """
logger.info("Getting paper info from arXiv")
page = get_page_with_retry(url)
soup = bs4.BeautifulSoup(page, "html.parser")
authors = [
- x["content"] for x in soup.find_all("meta", {"name": "citation_author"})
+ x["content"]
+ for x in soup.find_all("meta", {"name": "citation_author"})
]
authors = [x.split(",")[0].strip() for x in authors]
title = soup.find_all("meta", {"name": "citation_title"})[0]["content"]
@@ -430,7 +434,9 @@ def main():
mode = "acm_url"
elif valid_url(args.input):
if args.filename is None:
- exception("Filename must be provided with pdf url (use --filename)")
+ exception(
+ "Filename must be provided with pdf url (use --filename)"
+ )
mode = "pdf_url"
else:
exception("Input not a valid url, arxiv url, or existing file.")