aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-04-04 17:29:23 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-04-04 17:29:23 +0100
commit9e262c7e15f13920524deda1e9354069a8dbf5f1 (patch)
treed0509e42636103a18eab7c790ac964c4548e9f07
parentMerge branch 'bugfix/springer_redirect' (diff)
parentFix provider selection for redirected urls (diff)
downloadpaper2remarkable-9e262c7e15f13920524deda1e9354069a8dbf5f1.tar.gz
paper2remarkable-9e262c7e15f13920524deda1e9354069a8dbf5f1.zip
Merge branch 'bugfix/choose_provider'
-rw-r--r--paper2remarkable/exceptions.py22
-rw-r--r--paper2remarkable/ui.py71
-rw-r--r--tests/test_ui.py203
3 files changed, 278 insertions, 18 deletions
diff --git a/paper2remarkable/exceptions.py b/paper2remarkable/exceptions.py
index 5ea9a78..94e9470 100644
--- a/paper2remarkable/exceptions.py
+++ b/paper2remarkable/exceptions.py
@@ -116,3 +116,25 @@ class NoPDFToolError(Error):
)
msg += GH_MSG
return msg
+
+class UnidentifiedSourceError(Error):
+ """Exception raised when the input is neither a local file nor a url """
+
+ def __str__(self):
+ msg = (
+ "ERROR: Couldn't figure out what source you mean. If it's a "
+ "local file, please make sure it exists."
+ )
+ msg += GH_MSG
+ return msg
+
+class InvalidURLError(Error):
+ """Exception raised when no provider can handle a url source """
+
+ def __str__(self):
+ msg = (
+ "ERROR: Input URL is not valid, no provider can handle "
+ "this source."
+ )
+ msg += GH_MSG
+ return msg
diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py
index bfb3647..bf57552 100644
--- a/paper2remarkable/ui.py
+++ b/paper2remarkable/ui.py
@@ -13,6 +13,7 @@ import sys
from . import __version__, GITHUB_URL
+from .exceptions import UnidentifiedSourceError, InvalidURLError
from .providers import providers, LocalFile
from .utils import follow_redirects, is_url
@@ -118,9 +119,58 @@ def exception(msg):
raise SystemExit(1)
+def choose_provider(cli_input):
+ """Choose the provider to use for the given source
+
+ This function first tries to check if the input is a local file, by
+ checking if the path exists. Next, it checks if the input is a "valid" url
+ using a regex test. If it is, the registered provider classes are checked
+ to see which provider can handle this url.
+
+ Returns
+ -------
+ provider : class
+ The class of the provider than can handle the source. A subclass of the
+ Provider abc.
+
+ new_input : str
+ The updated input to the provider. This only has an effect for the url
+ providers, where this will be the url after following all redirects.
+
+ cookiejar : dict or requests.RequestsCookieJar
+ Cookies picked up when following redirects. These are needed for some
+ providers to ensure later requests have the right cookie settings.
+
+ Raises
+ ------
+ UnidentifiedSourceError
+ Raised when the input is neither an existing local file nor a valid url
+
+ InvalidURLError
+ Raised when the input *is* a valid url, but no provider can handle it.
+
+ """
+ provider = cookiejar = None
+ if LocalFile.validate(cli_input):
+ # input is a local file
+ new_input = cli_input
+ provider = LocalFile
+ elif is_url(cli_input):
+ # input is a url
+ new_input, cookiejar = follow_redirects(cli_input)
+ provider = next((p for p in providers if p.validate(new_input)), None)
+ else:
+ # not a proper URL or non-existent file
+ raise UnidentifiedSourceError
+
+ if provider is None:
+ raise InvalidURLError
+
+ return provider, new_input, cookiejar
+
+
def main():
args = parse_args()
- cookiejar = None
if args.center and args.right:
exception("Can't center and right align at the same time!")
@@ -131,22 +181,7 @@ def main():
if args.right and args.no_crop:
exception("Can't right align and not crop at the same time!")
- if LocalFile.validate(args.input):
- # input is a local file
- provider = LocalFile
- elif is_url(args.input):
- # input is a url
- url, cookiejar = follow_redirects(args.input)
- provider = next((p for p in providers if p.validate(url)), None)
- else:
- # not a proper URL or non-existent file
- exception(
- "Couldn't figure out what source you mean. If it's a "
- "local file, make sure it exists."
- )
-
- if provider is None:
- exception("Input not valid, no provider can handle this source.")
+ provider, new_input, cookiejar = choose_provider(args.input)
prov = provider(
verbose=args.verbose,
@@ -165,4 +200,4 @@ def main():
cookiejar=cookiejar,
)
- prov.run(args.input, filename=args.filename)
+ prov.run(new_input, filename=args.filename)
diff --git a/tests/test_ui.py b/tests/test_ui.py
new file mode 100644
index 0000000..fc362a0
--- /dev/null
+++ b/tests/test_ui.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Unit tests for command line interface
+
+This file is part of paper2remarkable.
+
+"""
+
+import os
+import shutil
+import tempfile
+import unittest
+
+from paper2remarkable.exceptions import (
+ InvalidURLError,
+ UnidentifiedSourceError,
+)
+from paper2remarkable.providers import (
+ ACM,
+ Arxiv,
+ CiteSeerX,
+ HTML,
+ JMLR,
+ LocalFile,
+ NBER,
+ NeurIPS,
+ OpenReview,
+ PMLR,
+ PdfUrl,
+ PubMed,
+ Springer,
+)
+from paper2remarkable.ui import choose_provider
+
+
+class TestUI(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.original_dir = os.getcwd()
+
+ def setUp(self):
+ self.test_dir = tempfile.mkdtemp()
+ os.chdir(self.test_dir)
+
+ def tearDown(self):
+ os.chdir(self.original_dir)
+ shutil.rmtree(self.test_dir)
+
+ def test_choose_provider_1(self):
+ tests = [
+ (
+ Arxiv,
+ "https://arxiv.org/abs/1811.11242v1",
+ "https://arxiv.org/abs/1811.11242v1",
+ ),
+ (
+ Arxiv,
+ "http://arxiv.org/abs/arXiv:1908.03213",
+ "https://arxiv.org/abs/1908.03213",
+ ),
+ (
+ Arxiv,
+ "https://arxiv.org/abs/math/0309285",
+ "https://arxiv.org/abs/math/0309285",
+ ),
+ (
+ Arxiv,
+ "https://arxiv.org/pdf/physics/0605197v1.pdf",
+ "https://arxiv.org/pdf/physics/0605197v1.pdf",
+ ),
+ (
+ PubMed,
+ "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/",
+ "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/",
+ ),
+ (
+ ACM,
+ "https://dl.acm.org/citation.cfm?id=3025626",
+ "https://dl.acm.org/doi/10.1145/3025453.3025626",
+ ),
+ (
+ ACM,
+ "https://dl.acm.org/doi/pdf/10.1145/3219819.3220081?download=true",
+ "https://dl.acm.org/doi/pdf/10.1145/3219819.3220081?download=true&",
+ ),
+ (
+ OpenReview,
+ "http://openreview.net/forum?id=S1x4ghC9tQ",
+ "https://openreview.net/forum?id=S1x4ghC9tQ",
+ ),
+ (
+ Springer,
+ "https://link.springer.com/article/10.1007/s10618-019-00631-5",
+ "https://link.springer.com/article/10.1007/s10618-019-00631-5",
+ ),
+ (
+ PdfUrl,
+ "https://gertjanvandenburg.com/papers/VandenBurg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2019.pdf",
+ "https://gertjanvandenburg.com/papers/VandenBurg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2019.pdf",
+ ),
+ (
+ JMLR,
+ "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf",
+ "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf",
+ ),
+ (
+ JMLR,
+ "http://www.jmlr.org/papers/v10/xu09a.html",
+ "http://www.jmlr.org/papers/v10/xu09a.html",
+ ),
+ (
+ PMLR,
+ "http://proceedings.mlr.press/v97/behrmann19a.html",
+ "http://proceedings.mlr.press/v97/behrmann19a.html",
+ ),
+ (
+ PMLR,
+ "http://proceedings.mlr.press/v15/maaten11b/maaten11b.pdf",
+ "http://proceedings.mlr.press/v15/maaten11b/maaten11b.pdf",
+ ),
+ (
+ PMLR,
+ "http://proceedings.mlr.press/v48/melnyk16.pdf",
+ "http://proceedings.mlr.press/v48/melnyk16.pdf",
+ ),
+ (
+ PMLR,
+ "http://proceedings.mlr.press/v48/zhangf16.html",
+ "http://proceedings.mlr.press/v48/zhangf16.html",
+ ),
+ (
+ NBER,
+ "https://www.nber.org/papers/w26752",
+ "https://www.nber.org/papers/w26752",
+ ),
+ (
+ NBER,
+ "https://www.nber.org/papers/w19152.pdf",
+ "https://www.nber.org/papers/w19152.pdf",
+ ),
+ (
+ NeurIPS,
+ "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf",
+ "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf",
+ ),
+ (
+ NeurIPS,
+ "https://papers.nips.cc/paper/7796-middle-out-decoding",
+ "https://papers.nips.cc/paper/7796-middle-out-decoding",
+ ),
+ (
+ CiteSeerX,
+ "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548",
+ "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548",
+ ),
+ (
+ CiteSeerX,
+ "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf",
+ "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf",
+ ),
+ (
+ HTML,
+ "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines",
+ "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines"
+ ),
+ (
+ HTML,
+ "https://www.nature.com/articles/d41586-020-00176-4",
+ "https://www.nature.com/articles/d41586-020-00176-4"
+ ),
+ ]
+ for exp_prov, url, exp_url in tests:
+ prov, new_url, jar = choose_provider(url)
+ with self.subTest(url=url):
+ self.assertEqual(exp_url, new_url)
+ self.assertEqual(prov, exp_prov)
+
+ def test_choose_provider_2(self):
+ local_filename = "test.pdf"
+ with open(local_filename, "w") as fp:
+ fp.write(
+ "%PDF-1.1\n%¥±ë\n\n1 0 obj\n << /Type /Catalog\n /Pages 2 0 R\n >>\nendobj\n\n2 0 obj\n << /Type /Pages\n /Kids [3 0 R]\n /Count 1\n /MediaBox [0 0 300 144]\n >>\nendobj\n\n3 0 obj\n << /Type /Page\n /Parent 2 0 R\n /Resources\n << /Font\n << /F1\n << /Type /Font\n /Subtype /Type1\n /BaseFont /Times-Roman\n >>\n >>\n >>\n /Contents 4 0 R\n >>\nendobj\n\n4 0 obj\n << /Length 55 >>\nstream\n BT\n /F1 18 Tf\n 0 0 Td\n (Hello World) Tj\n ET\nendstream\nendobj\n\nxref\n0 5\n0000000000 65535 f \n0000000018 00000 n \n0000000077 00000 n \n0000000178 00000 n \n0000000457 00000 n \ntrailer\n << /Root 1 0 R\n /Size 5\n >>\nstartxref\n565\n%%EOF"
+ )
+
+ prov, new_input, jar = choose_provider(local_filename)
+ self.assertEqual(prov, LocalFile)
+ self.assertEqual(new_input, local_filename)
+ self.assertIsNone(jar)
+
+ def test_choose_provider_3(self):
+ local_filename = "/tmp/abcdef.pdf"
+ with self.assertRaises(UnidentifiedSourceError):
+ choose_provider(local_filename)
+
+ def test_choose_provider_4(self):
+ url = "https://raw.githubusercontent.com/GjjvdBurg/paper2remarkable/master/README.md"
+ with self.assertRaises(InvalidURLError):
+ choose_provider(url)
+
+
+if __name__ == "__main__":
+ unittest.main()