diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-03-11 15:22:45 +0000 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-03-11 15:22:45 +0000 |
| commit | 39b04d5db26daaf566eb33fc8da267c82939966c (patch) | |
| tree | 66272fa465d1d55c647e1e79ecc81770537f5bdf | |
| parent | Added support for ACM urls (diff) | |
| download | paper2remarkable-39b04d5db26daaf566eb33fc8da267c82939966c.tar.gz paper2remarkable-39b04d5db26daaf566eb33fc8da267c82939966c.zip | |
Formatting and doc improvements
| -rw-r--r-- | README.md | 2 | ||||
| -rwxr-xr-x | arxiv2remarkable.py | 12 |
2 files changed, 10 insertions, 4 deletions
@@ -11,7 +11,7 @@ following sources: The script takes the source and: -1. Downloads it if necessary +1. Downloads the pdf if necessary 2. Removes the arXiv timestamp 3. Crops the pdf to remove unnecessary borders 4. Shrinks the pdf file to reduce the filesize diff --git a/arxiv2remarkable.py b/arxiv2remarkable.py index 9525329..42a1392 100755 --- a/arxiv2remarkable.py +++ b/arxiv2remarkable.py @@ -167,7 +167,9 @@ def get_page_with_retry(url): def retry(url, count): if count < 5: - logger.info("Caught error for url %s. Retrying in 5 seconds." % url) + logger.info( + "Caught error for url %s. Retrying in 5 seconds." % url + ) time.sleep(5) else: exception("Failed to download url: %s" % url) @@ -277,11 +279,13 @@ def shrink_pdf(filepath, gs_path="gs"): def get_paper_info_arxiv(url): + """ Extract the paper's authors, title, and publication year """ logger.info("Getting paper info from arXiv") page = get_page_with_retry(url) soup = bs4.BeautifulSoup(page, "html.parser") authors = [ - x["content"] for x in soup.find_all("meta", {"name": "citation_author"}) + x["content"] + for x in soup.find_all("meta", {"name": "citation_author"}) ] authors = [x.split(",")[0].strip() for x in authors] title = soup.find_all("meta", {"name": "citation_title"})[0]["content"] @@ -430,7 +434,9 @@ def main(): mode = "acm_url" elif valid_url(args.input): if args.filename is None: - exception("Filename must be provided with pdf url (use --filename)") + exception( + "Filename must be provided with pdf url (use --filename)" + ) mode = "pdf_url" else: exception("Input not a valid url, arxiv url, or existing file.") |
