aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--paper2remarkable/providers/_base.py8
-rw-r--r--paper2remarkable/ui.py18
-rw-r--r--paper2remarkable/utils.py14
-rw-r--r--tests/test_providers.py11
4 files changed, 43 insertions, 8 deletions
diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index bdc9558..52e3b0e 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -15,7 +15,7 @@ import tempfile
from ._info import Informer
from ..pdf_ops import crop_pdf, center_pdf, blank_pdf, shrink_pdf
-from ..utils import assert_file_is_pdf, download_url, upload_to_remarkable
+from ..utils import assert_file_is_pdf, download_url, upload_to_remarkable, follow_redirects
from ..log import Logger
logger = Logger()
@@ -82,7 +82,13 @@ class Provider(metaclass=abc.ABCMeta):
download_url(pdf_url, filename)
def run(self, src, filename=None):
+ # needed with library use
+ src = follow_redirects(src)
+
+ # extract page and pdf file urls
abs_url, pdf_url = self.get_abs_pdf_urls(src)
+
+ # generate nice filename if needed
clean_filename = filename or self.informer.get_filename(abs_url)
tmp_filename = "paper.pdf"
diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py
index 5323996..2a30e7f 100644
--- a/paper2remarkable/ui.py
+++ b/paper2remarkable/ui.py
@@ -12,8 +12,8 @@ import argparse
from . import __version__
-from .providers import providers
-from .utils import exception
+from .providers import providers, LocalFile
+from .utils import exception, follow_redirects
def parse_args():
@@ -78,8 +78,7 @@ def parse_args():
default="rmapi",
)
parser.add_argument(
- "input",
- help="URL to a paper or the path of a local PDF file",
+ "input", help="URL to a paper or the path of a local PDF file"
)
return parser.parse_args()
@@ -87,7 +86,16 @@ def parse_args():
def main():
args = parse_args()
- provider = next((p for p in providers if p.validate(args.input)), None)
+ if LocalFile.validate(args.input):
+ # input is a local file
+ provider = LocalFile
+ else:
+ # input is a url
+ url = args.input
+ # follow all redirects of the url
+ url = follow_redirects(url)
+ provider = next((p for p in providers if p.validate(url)), None)
+
if provider is None:
exception("Input not valid, no provider can handle this source.")
diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py
index a313ffe..1b6718e 100644
--- a/paper2remarkable/utils.py
+++ b/paper2remarkable/utils.py
@@ -95,6 +95,20 @@ def get_page_with_retry(url, tries=5):
return res.content
+def follow_redirects(url):
+ """Follow redirects from the URL (at most 10)"""
+ it = 0
+ while it < 10:
+ req = requests.head(url, allow_redirects=False)
+ if req.status_code == 200:
+ break
+ if not "Location" in req.headers:
+ break
+ url = req.headers["Location"]
+ it += 1
+ return url
+
+
def upload_to_remarkable(filepath, remarkable_dir="/", rmapi_path="rmapi"):
logger.info("Starting upload to reMarkable")
diff --git a/tests/test_providers.py b/tests/test_providers.py
index bb793b3..143fc78 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -35,7 +35,7 @@ def md5sum(filename):
return hasher.hexdigest()
-class Tests(unittest.TestCase):
+class TestProviders(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.original_dir = os.getcwd()
@@ -48,13 +48,20 @@ class Tests(unittest.TestCase):
os.chdir(self.original_dir)
shutil.rmtree(self.test_dir)
- def test_arxiv(self):
+ def test_arxiv_1(self):
prov = Arxiv(upload=False, verbose=VERBOSE)
url = "https://arxiv.org/abs/1811.11242v1"
exp_filename = "Burg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2018.pdf"
filename = prov.run(url)
self.assertEqual(exp_filename, os.path.basename(filename))
+ def test_arxiv_2(self):
+ prov = Arxiv(upload=False, verbose=VERBOSE)
+ url = "http://arxiv.org/abs/arXiv:1908.03213"
+ exp_filename = "Ecker_et_al_-_Gravitational_Waves_From_Holographic_Neutron_Star_Mergers_2019.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp_filename, os.path.basename(filename))
+
def test_pmc(self):
prov = PubMed(upload=False, verbose=VERBOSE)
url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/"