aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md57
-rwxr-xr-xarxiv2remarkable.py63
2 files changed, 89 insertions, 31 deletions
diff --git a/README.md b/README.md
index 4687012..00c45c4 100644
--- a/README.md
+++ b/README.md
@@ -1,39 +1,50 @@
# arxiv2remarkable.py
-This script takes an URL to an arXiv paper, and:
+This script makes it as easy to get a PDF on your reMarkable from any of the
+following sources:
-1. Downloads it
+- an arXiv url (either ``arxiv.org/abs/...`` or ``arxiv.org/pdf/...``)
+- a url to a PDF file
+- a local file.
+
+The script takes the source and:
+
+1. Downloads it if necessary
2. Removes the arXiv timestamp
3. Crops the pdf to remove unnecessary borders
-4. Shrinks the pdf to reduce filesize
-5. Generates a nice filename based on author/title/year of the paper
-6. Uploads it to the reMarkable
+4. Shrinks the pdf file to reduce the filesize
+5. Generates a nice filename based on author/title/year of the paper (arXiv
+ only)
+6. Uploads it to your reMarkable using ``rMapi``.
Optionally, you can download a paper but not have it uploaded to the
-reMarkable (using the ``-n`` switch), or provide an existing pdf file (to use
-only steps 2 - 6).
+reMarkable using the ``-n`` switch. Also, the ``--filename`` parameter to the
+script can be used to provide an explicit filename for on the reMarkable.
Here's the full help of the script:
```bash
-[arxiv2remarkable] $ python arxiv2remarkable.py -h
-usage: arxiv2remarkable.py [-h] [-v] [-n] [-d] [--rmapi RMAPI]
- [--pdfcrop PDFCROP] [--pdftk PDFTK] [--gs GS]
+usage: arxiv2remarkable.py [-h] [-v] [-n] [-d] [--filename FILENAME]
+ [--rmapi RMAPI] [--pdfcrop PDFCROP] [--pdftk PDFTK]
+ [--gs GS]
input
positional arguments:
- input url to an arxiv paper or existing pdf file
+ input url to an arxiv paper, url to pdf, or existing pdf file
optional arguments:
- -h, --help show this help message and exit
- -v, --verbose be verbose (default: False)
- -n, --no-upload don't upload to the reMarkable, save the output in
- current working dir (default: False)
- -d, --debug debug mode, doesn't upload to reMarkable (default: False)
- --rmapi RMAPI path to rmapi executable (default: rmapi)
- --pdfcrop PDFCROP path to pdfcrop executable (default: pdfcrop)
- --pdftk PDFTK path to pdftk executable (default: pdftk)
- --gs GS path to gs executable (default: gs)
+ -h, --help show this help message and exit
+ -v, --verbose be verbose (default: False)
+ -n, --no-upload don't upload to the reMarkable, save the output in
+ current working dir (default: False)
+ -d, --debug debug mode, doesn't upload to reMarkable (default:
+ False)
+ --filename FILENAME Filename to use for the file on reMarkable (default:
+ None)
+ --rmapi RMAPI path to rmapi executable (default: rmapi)
+ --pdfcrop PDFCROP path to pdfcrop executable (default: pdfcrop)
+ --pdftk PDFTK path to pdftk executable (default: pdftk)
+ --gs GS path to gs executable (default: gs)
```
And here's an example with verbose mode enabled that shows everything the
@@ -70,7 +81,13 @@ The script also needs the following Python packages:
- [BeautifulSoup4](https://pypi.org/project/beautifulsoup4/)
- [requests](https://pypi.org/project/requests/)
- [loguru](https://pypi.org/project/loguru/)
+- [PyPDF2](https://github.com/mstamy2/PyPDF2)
+
+You can use this line:
+```bash
+pip install --user bs4 requests loguru PyPDF2
+```
# Notes
diff --git a/arxiv2remarkable.py b/arxiv2remarkable.py
index 8c7b0ea..5ba357c 100755
--- a/arxiv2remarkable.py
+++ b/arxiv2remarkable.py
@@ -18,6 +18,7 @@ License: MIT
"""
+import PyPDF2
import argparse
import bs4
import os
@@ -28,6 +29,7 @@ import subprocess
import sys
import tempfile
import time
+import urllib.parse
from loguru import logger
@@ -44,7 +46,7 @@ def exception(msg):
raise SystemExit(1)
-def validate_url(url):
+def arxiv_url(url):
"""Check if the url is to an arXiv page.
>>> validate_url("https://arxiv.org/abs/1811.11242")
@@ -68,7 +70,23 @@ def validate_url(url):
return not m is None
-def get_urls(url):
+def valid_url(url):
+ try:
+ result = urllib.parse.urlparse(url)
+ return all([result.scheme, result.netloc, result.path])
+ except:
+ return False
+
+
+def check_file_is_pdf(filename):
+ try:
+ PyPDF2.PdfFileReader(open(filename, "rb"))
+ return True
+ except PyPDF2.utils.PdfReadError:
+ return False
+
+
+def get_arxiv_urls(url):
"""Get the pdf and abs url from any given url """
if re.match("https?://arxiv.org/abs/\d{4}\.\d{5}(v\d+)?", url):
abs_url = url
@@ -242,6 +260,11 @@ def parse_args():
action="store_true",
)
parser.add_argument(
+ "--filename",
+ help="Filename to use for the file on reMarkable",
+ default=None,
+ )
+ parser.add_argument(
"--rmapi", help="path to rmapi executable", default="rmapi"
)
parser.add_argument(
@@ -252,7 +275,7 @@ def parse_args():
)
parser.add_argument("--gs", help="path to gs executable", default="gs")
parser.add_argument(
- "input", help="url to an arxiv paper or existing pdf file"
+ "input", help="url to an arxiv paper, url to pdf, or existing pdf file"
)
return parser.parse_args()
@@ -261,8 +284,16 @@ def parse_args():
def main():
args = parse_args()
- if not (os.path.exists(args.input) or validate_url(args.input)):
- exception("Input not a file or arXiv url.")
+ if os.path.exists(args.input):
+ mode = "local_file"
+ elif arxiv_url(args.input):
+ mode = "arxiv_url"
+ elif valid_url(args.input):
+ if args.filename is None:
+ exception("Filename must be provided with pdf url (use --filename)")
+ mode = "pdf_url"
+ else:
+ exception("Input not a valid url, arxiv url, or existing file.")
if not args.verbose:
logger.remove(0)
@@ -270,18 +301,28 @@ def main():
start_wd = os.getcwd()
with tempfile.TemporaryDirectory() as working_dir:
- if os.path.exists(args.input):
+ if mode == "local_file":
shutil.copy(args.input, working_dir)
filename = os.path.basename(args.input)
- clean_filename = filename
+ clean_filename = args.filename if args.filename else filename
os.chdir(working_dir)
- if validate_url(args.input):
- pdf_url, abs_url = get_urls(args.input)
+ if mode == "arxiv_url":
+ pdf_url, abs_url = get_arxiv_urls(args.input)
filename = "paper.pdf"
download_url(pdf_url, filename)
- paper_info = get_paper_info(abs_url)
- clean_filename = generate_filename(paper_info)
+ if args.filename:
+ clean_filename = args.filename
+ else:
+ paper_info = get_paper_info(abs_url)
+ clean_filename = generate_filename(paper_info)
+
+ if mode == "pdf_url":
+ filename = "paper.pdf"
+ download_url(args.input, filename)
+ if not check_file_is_pdf(filename):
+ exception("Input url doesn't point to valid pdf file.")
+ clean_filename = args.filename
dearxived = dearxiv(filename, pdftk_path=args.pdftk)
cropped = crop_pdf(dearxived, pdfcrop_path=args.pdfcrop)