aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-10-27 20:59:17 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-10-27 20:59:17 +0100
commit282de79f44e95b539c1788de8a71177b5a023557 (patch)
tree6a6a26e210dc32d4d6a5ed4d8bc0b581af9bbc0e /tests
parent[WIP] Provider for Taylor and Francis Online (diff)
parentBump version and update changelog (diff)
downloadpaper2remarkable-282de79f44e95b539c1788de8a71177b5a023557.tar.gz
paper2remarkable-282de79f44e95b539c1788de8a71177b5a023557.zip
Merge branch 'master' into feature/tandfonline
Diffstat (limited to 'tests')
-rw-r--r--tests/test_arxiv.py122
-rw-r--r--tests/test_providers.py185
-rw-r--r--tests/test_ui.py214
-rw-r--r--tests/test_utils.py21
4 files changed, 523 insertions, 19 deletions
diff --git a/tests/test_arxiv.py b/tests/test_arxiv.py
new file mode 100644
index 0000000..2cb84cf
--- /dev/null
+++ b/tests/test_arxiv.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Unit tests for arXiv provider
+
+This file is part of paper2remarkable.
+
+"""
+
+import os
+import re
+import shutil
+import tempfile
+import unittest
+
+from paper2remarkable.providers.arxiv import (
+ DEARXIV_TEXT_REGEX,
+ DEARXIV_URI_REGEX,
+ Arxiv,
+)
+
+
+class TestArxiv(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.original_dir = os.getcwd()
+
+ def setUp(self):
+ self.test_dir = tempfile.mkdtemp()
+ os.chdir(self.test_dir)
+
+ def tearDown(self):
+ os.chdir(self.original_dir)
+ shutil.rmtree(self.test_dir)
+
+ def test_text_regex_1(self):
+ key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019"
+ m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
+ self.assertIsNotNone(m)
+
+ def test_text_regex_2(self):
+ key = b"arXiv:1908.03213v1 [astro-ph.HE] 8 Aug 2019"
+ m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
+ self.assertIsNotNone(m)
+
+ def test_text_regex_3(self):
+ key = b"arXiv:physics/0605197v1 [physics.data-an] 23 May 2006"
+ m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
+ self.assertIsNotNone(m)
+
+ def test_text_regex_4(self):
+ key = b"arXiv:math/0309285v2 [math.NA] 9 Apr 2004"
+ m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
+ self.assertIsNotNone(m)
+
+ def test_uri_regex_1(self):
+ key = b"http://arxiv.org/abs/physics/0605197v1"
+ m = re.fullmatch(DEARXIV_URI_REGEX, key)
+ self.assertIsNotNone(m)
+
+ def test_uri_regex_2(self):
+ key = b"https://arxiv.org/abs/1101.0028v3"
+ m = re.fullmatch(DEARXIV_URI_REGEX, key)
+ self.assertIsNotNone(m)
+
+ def test_stamp_removed_1(self):
+ url = "https://arxiv.org/pdf/1703.06103.pdf"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(b"arXiv:1703.06103v4 [stat.ML] 26 Oct 2017", data)
+
+ def test_stamp_removed_2(self):
+ url = "https://arxiv.org/abs/2003.06222"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(b"arXiv:2003.06222v1 [stat.ML] 13 Mar 2020", data)
+
+ def test_stamp_removed_3(self):
+ url = "https://arxiv.org/abs/physics/0605197v1"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(
+ b"arXiv:physics/0605197v1 [physics.data-an] 23 May 2006", data
+ )
+ self.assertNotIn(
+ b"/URI (http://arxiv.org/abs/physics/0605197v1)", data
+ )
+
+ def test_stamp_removed_4(self):
+ url = "https://arxiv.org/abs/math/0309285v2"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(b"arXiv:math/0309285v2 [math.NA] 9 Apr 2004", data)
+ self.assertNotIn(b"/URI (http://arXiv.org/abs/math/0309285v2)", data)
+
+ def test_stamp_removed_5(self):
+ url = "https://arxiv.org/abs/astro-ph/9207001v1"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(
+ b"/URI (http://arxiv.org/abs/astro-ph/9207001v1)", data
+ )
+ self.assertNotIn(b"arXiv:astro-ph/9207001v1 13 Jul 1992", data)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 3204768..4ee6773 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -7,7 +7,7 @@ __author__ = "G.J.J. van den Burg"
import hashlib
import os
-import re
+import pdfplumber
import shutil
import tempfile
import unittest
@@ -15,17 +15,22 @@ import unittest
from paper2remarkable.providers import (
ACM,
Arxiv,
+ CVF,
CiteSeerX,
+ HTML,
+ JMLR,
LocalFile,
+ NBER,
NeurIPS,
OpenReview,
PMLR,
PdfUrl,
PubMed,
+ SagePub,
+ SemanticScholar,
Springer,
TandFOnline,
)
-from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX
VERBOSE = False
@@ -41,18 +46,6 @@ def md5sum(filename):
return hasher.hexdigest()
-class TestArxiv(unittest.TestCase):
- def test_text_regex_1(self):
- key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019"
- m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
- self.assertIsNotNone(m)
-
- def test_text_regex_2(self):
- key = b"arXiv:1908.03213v1 [astro-ph.HE] 8 Aug 2019"
- m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
- self.assertIsNotNone(m)
-
-
class TestProviders(unittest.TestCase):
@classmethod
def setUpClass(cls):
@@ -67,7 +60,8 @@ class TestProviders(unittest.TestCase):
shutil.rmtree(self.test_dir)
def test_arxiv_1(self):
- prov = Arxiv(upload=False, verbose=VERBOSE)
+ # check with qpdf
+ prov = Arxiv(upload=False, verbose=VERBOSE, pdftk_path=None)
url = "https://arxiv.org/abs/1811.11242v1"
exp_filename = "Burg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2018.pdf"
filename = prov.run(url)
@@ -96,6 +90,20 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url)
self.assertEqual(exp_filename, os.path.basename(filename))
+ def test_arxiv_5(self):
+ prov = Arxiv(upload=False, verbose=VERBOSE, qpdf_path=None)
+ url = "https://arxiv.org/abs/2002.11523"
+ exp_filename = "Ponomarev_Oseledets_Cichocki_-_Using_Reinforcement_Learning_in_the_Algorithmic_Trading_Problem_2020.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp_filename, os.path.basename(filename))
+
+ def test_arxiv_6(self):
+ prov = Arxiv(upload=False, verbose=VERBOSE)
+ url = "https://arxiv.org/pdf/1701.05517.pdf?source=post_page---------------------------"
+ exp_filename = "Salimans_et_al_-_PixelCNN_Improving_the_PixelCNN_With_Discretized_Logistic_Mixture_Likelihood_and_Other_Modifications_2017.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp_filename, os.path.basename(filename))
+
def test_pmc(self):
prov = PubMed(upload=False, verbose=VERBOSE)
url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/"
@@ -128,13 +136,20 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url)
self.assertEqual(exp_filename, os.path.basename(filename))
- def test_springer(self):
+ def test_springer_1(self):
prov = Springer(upload=False, verbose=VERBOSE)
url = "https://link.springer.com/article/10.1007/s10618-019-00631-5"
exp_filename = "Mauw_Ramirez-Cruz_Trujillo-Rasua_-_Robust_Active_Attacks_on_Social_Graphs_2019.pdf"
filename = prov.run(url)
self.assertEqual(exp_filename, os.path.basename(filename))
+ def test_springer_2(self):
+ prov = Springer(upload=False, verbose=VERBOSE)
+ url = "https://link.springer.com/content/pdf/10.1007%2F11681878_14.pdf"
+ exp_filename = "Dwork_et_al_-_Calibrating_Noise_to_Sensitivity_in_Private_Data_Analysis_2006.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp_filename, os.path.basename(filename))
+
def test_local(self):
local_filename = "test.pdf"
with open(local_filename, "w") as fp:
@@ -145,11 +160,31 @@ class TestProviders(unittest.TestCase):
filename = prov.run(local_filename)
self.assertEqual("test_.pdf", os.path.basename(filename))
- def test_pdfurl(self):
+ def test_pdfurl_1(self):
+ prov = PdfUrl(upload=False, verbose=VERBOSE)
+ url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf"
+ filename = prov.run(url)
+ self.assertEqual("14-526.pdf", os.path.basename(filename))
+
+ def test_pdfurl_2(self):
prov = PdfUrl(upload=False, verbose=VERBOSE)
+ url = "https://www.manuelrigger.at/preprints/NoREC.pdf"
+ filename = prov.run(url)
+ self.assertEqual("NoREC.pdf", os.path.basename(filename))
+
+ def test_jmlr_1(self):
+ prov = JMLR(upload=False, verbose=VERBOSE)
url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf"
- filename = prov.run(url, filename="test.pdf")
- self.assertEqual("test.pdf", os.path.basename(filename))
+ exp = "Burg_Groenen_-_GenSVM_a_Generalized_Multiclass_Support_Vector_Machine_2016.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_jmlr_2(self):
+ prov = JMLR(upload=False, verbose=VERBOSE)
+ url = "http://www.jmlr.org/papers/v10/xu09a.html"
+ exp = "Xu_Zhang_-_Refinement_of_Reproducing_Kernels_2009.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
def test_pmlr_1(self):
prov = PMLR(upload=False, verbose=VERBOSE)
@@ -179,6 +214,20 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url)
self.assertEqual(exp, os.path.basename(filename))
+ def test_nber_1(self):
+ prov = NBER(upload=False, verbose=VERBOSE)
+ url = "https://www.nber.org/papers/w26752"
+ exp = "Bhattacharya_Packalen_-_Stagnation_and_Scientific_Incentives_2020.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_nber_2(self):
+ prov = NBER(upload=False, verbose=VERBOSE)
+ url = "https://www.nber.org/papers/w19152.pdf"
+ exp = "Herbst_Schorfheide_-_Sequential_Monte_Carlo_Sampling_for_DSGE_Models_2013.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
def test_neurips_1(self):
prov = NeurIPS(upload=False, verbose=VERBOSE)
url = "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf"
@@ -193,6 +242,20 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url)
self.assertEqual(exp, os.path.basename(filename))
+ def test_neurips_3(self):
+ prov = NeurIPS(upload=False, verbose=VERBOSE)
+ url = "http://papers.neurips.cc/paper/5433-combinatorial-pure-exploration-of-multi-armed-bandits"
+ exp = "Chen_et_al_-_Combinatorial_Pure_Exploration_of_Multi-Armed_Bandits_2014.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_neurips_4(self):
+ prov = NeurIPS(upload=False, verbose=VERBOSE)
+ url = "http://papers.neurips.cc/paper/7368-on-the-dimensionality-of-word-embedding.pdf"
+ exp = "Yin_Shen_-_On_the_Dimensionality_of_Word_Embedding_2018.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
def test_citeseerx_1(self):
prov = CiteSeerX(upload=False, verbose=VERBOSE)
url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548"
@@ -218,6 +281,90 @@ class TestProviders(unittest.TestCase):
prov = TandFOnline(upload=False, verbose=VERBOSE)
url = "https://www.tandfonline.com/doi/pdf/10.1080/03610918.2012.625790?scroll=top&needAccess=true"
exp = "Huskova_Marusiakova_-_M-Procedures_for_Detection_of_Changes_for_Dependent_Observations_2012.pdf"
+
+ def test_html_1(self):
+ prov = HTML(upload=False, verbose=VERBOSE)
+ url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines"
+ exp = "Getting_Your_Team_to_Do_More_Than_Meet_Deadlines.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_html_2(self):
+ prov = HTML(upload=False, verbose=VERBOSE)
+ url = "https://www.nature.com/articles/d41586-020-00176-4"
+ exp = "Isaac_Asimov_Centenary_of_the_Great_Explainer.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_html_3(self):
+ prov = HTML(upload=False, verbose=VERBOSE)
+ url = "https://conclave-team.github.io/conclave-site/"
+ # exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf"
+ # NOTE: Title differs between Readability.JS and readability-lxml, we
+ # assume that testing is done with Readability.JS
+ exp = "Conclave.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+ # this is a proxy test to check that all images are included
+ self.assertEqual(32, len(pdfplumber.open(filename).pages))
+
+ def test_html_4(self):
+ prov = HTML(upload=False, verbose=VERBOSE)
+ url = "https://sirupsen.com/2019/"
+ filename = prov.run(url)
+ # this is a proxy test to check that all images are included
+ self.assertEqual(4, len(pdfplumber.open(filename).pages))
+
+ def test_html_5(self):
+ prov = HTML(upload=False, verbose=VERBOSE)
+ url = "https://www.spiegel.de/panorama/london-tausende-rechtsextreme-demonstranten-wollen-statuen-schuetzen-a-2a1ed9b9-708a-40dc-a5ff-f312e97a60ca#"
+ filename = prov.run(url)
+ # this is a proxy test to check that all images are included
+ self.assertEqual(4, len(pdfplumber.open(filename).pages))
+
+ def test_semantic_scholar_1(self):
+ prov = SemanticScholar(upload=False, verbose=VERBOSE)
+ url = "https://pdfs.semanticscholar.org/1b01/dea77e9cbf049b4ee8b68dc4d43529d06299.pdf"
+ exp = "Dong_et_al_-_TableSense_Spreadsheet_Table_Detection_With_Convolutional_Neural_Networks_2019.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_semantic_scholar_2(self):
+ prov = SemanticScholar(upload=False, verbose=VERBOSE)
+ url = "https://www.semanticscholar.org/paper/Fast-Meta-Learning-for-Adaptive-Hierarchical-Design-Burg-Hero/90759dc4ab0ce8d3564044ef92a91080a4f3e55f"
+ exp = "Burg_Hero_-_Fast_Meta-Learning_for_Adaptive_Hierarchical_Classifier_Design_2017.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_sagepub_1(self):
+ prov = SagePub(upload=False, verbose=VERBOSE)
+ url = "https://journals.sagepub.com/doi/full/10.1177/0306312714535679"
+ exp = "Rekdal_-_Academic_Urban_Legends_2014.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_sagepub_2(self):
+ prov = SagePub(upload=False, verbose=VERBOSE)
+ url = "https://journals.sagepub.com/doi/pdf/10.1177/1352458517694432"
+ exp = "Kobelt_et_al_-_New_Insights_Into_the_Burden_and_Costs_of_Multiple_Sclerosis_in_Europe_2017.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_cvf_1(self):
+ prov = CVF(upload=False, verbose=VERBOSE)
+ url = "https://openaccess.thecvf.com/content_ICCV_2019/html/Muhammad_Goal-Driven_Sequential_Data_Abstraction_ICCV_2019_paper.html"
+ exp = (
+ "Muhammad_et_al_-_Goal-Driven_Sequential_Data_Abstraction_2019.pdf"
+ )
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_cvf_2(self):
+ prov = CVF(upload=False, verbose=VERBOSE)
+ url = "https://openaccess.thecvf.com/content_CVPR_2020/papers/Park_Seeing_the_World_in_a_Bag_of_Chips_CVPR_2020_paper.pdf"
+ exp = (
+ "Park_Holynski_Seitz_-_Seeing_the_World_in_a_Bag_of_Chips_2020.pdf"
+ )
filename = prov.run(url)
self.assertEqual(exp, os.path.basename(filename))
diff --git a/tests/test_ui.py b/tests/test_ui.py
new file mode 100644
index 0000000..835f594
--- /dev/null
+++ b/tests/test_ui.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Unit tests for command line interface
+
+This file is part of paper2remarkable.
+
+"""
+
+import os
+import shutil
+import tempfile
+import unittest
+
+from paper2remarkable.exceptions import (
+ InvalidURLError,
+ UnidentifiedSourceError,
+)
+from paper2remarkable.providers import (
+ ACM,
+ Arxiv,
+ CiteSeerX,
+ CVF,
+ HTML,
+ JMLR,
+ LocalFile,
+ NBER,
+ NeurIPS,
+ OpenReview,
+ PMLR,
+ PdfUrl,
+ PubMed,
+ Springer,
+)
+from paper2remarkable.ui import choose_provider
+
+
+class TestUI(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.original_dir = os.getcwd()
+
+ def setUp(self):
+ self.test_dir = tempfile.mkdtemp()
+ os.chdir(self.test_dir)
+
+ def tearDown(self):
+ os.chdir(self.original_dir)
+ shutil.rmtree(self.test_dir)
+
+ def test_choose_provider_1(self):
+ tests = [
+ (
+ Arxiv,
+ "https://arxiv.org/abs/1811.11242v1",
+ "https://arxiv.org/abs/1811.11242v1",
+ ),
+ (
+ Arxiv,
+ "http://arxiv.org/abs/arXiv:1908.03213",
+ "https://arxiv.org/abs/1908.03213",
+ ),
+ (
+ Arxiv,
+ "https://arxiv.org/abs/math/0309285",
+ "https://arxiv.org/abs/math/0309285",
+ ),
+ (
+ Arxiv,
+ "https://arxiv.org/pdf/physics/0605197v1.pdf",
+ "https://arxiv.org/pdf/physics/0605197v1.pdf",
+ ),
+ (
+ PubMed,
+ "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/",
+ "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/",
+ ),
+ (
+ ACM,
+ "https://dl.acm.org/citation.cfm?id=3025626",
+ "https://dl.acm.org/doi/10.1145/3025453.3025626",
+ ),
+ (
+ ACM,
+ "https://dl.acm.org/doi/pdf/10.1145/3219819.3220081?download=true",
+ "https://dl.acm.org/doi/pdf/10.1145/3219819.3220081?download=true&",
+ ),
+ (
+ OpenReview,
+ "http://openreview.net/forum?id=S1x4ghC9tQ",
+ "https://openreview.net/forum?id=S1x4ghC9tQ",
+ ),
+ (
+ Springer,
+ "https://link.springer.com/article/10.1007/s10618-019-00631-5",
+ "https://link.springer.com/article/10.1007/s10618-019-00631-5",
+ ),
+ (
+ PdfUrl,
+ "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf",
+ "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf",
+ ),
+ (
+ JMLR,
+ "https://www.jmlr.org/papers/volume17/14-526/14-526.pdf",
+ "https://www.jmlr.org/papers/volume17/14-526/14-526.pdf",
+ ),
+ (
+ JMLR,
+ "https://www.jmlr.org/papers/v10/xu09a.html",
+ "https://www.jmlr.org/papers/v10/xu09a.html",
+ ),
+ (
+ PMLR,
+ "http://proceedings.mlr.press/v97/behrmann19a.html",
+ "http://proceedings.mlr.press/v97/behrmann19a.html",
+ ),
+ (
+ PMLR,
+ "http://proceedings.mlr.press/v15/maaten11b/maaten11b.pdf",
+ "http://proceedings.mlr.press/v15/maaten11b/maaten11b.pdf",
+ ),
+ (
+ PMLR,
+ "http://proceedings.mlr.press/v48/melnyk16.pdf",
+ "http://proceedings.mlr.press/v48/melnyk16.pdf",
+ ),
+ (
+ PMLR,
+ "http://proceedings.mlr.press/v48/zhangf16.html",
+ "http://proceedings.mlr.press/v48/zhangf16.html",
+ ),
+ (
+ NBER,
+ "https://www.nber.org/papers/w26752",
+ "https://www.nber.org/papers/w26752",
+ ),
+ (
+ NBER,
+ "https://www.nber.org/papers/w19152.pdf",
+ "https://www.nber.org/system/files/working_papers/w19152/w19152.pdf",
+ ),
+ (
+ NeurIPS,
+ "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf",
+ "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf",
+ ),
+ (
+ NeurIPS,
+ "https://papers.nips.cc/paper/7796-middle-out-decoding",
+ "https://papers.nips.cc/paper/7796-middle-out-decoding",
+ ),
+ (
+ NeurIPS,
+ "http://papers.neurips.cc/paper/7368-on-the-dimensionality-of-word-embedding.pdf",
+ "http://papers.neurips.cc/paper/7368-on-the-dimensionality-of-word-embedding.pdf",
+ ),
+ (
+ CiteSeerX,
+ "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548",
+ "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548",
+ ),
+ (
+ CiteSeerX,
+ "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf",
+ "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf",
+ ),
+ (
+ HTML,
+ "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines",
+ "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines",
+ ),
+ (
+ HTML,
+ "https://www.nature.com/articles/d41586-020-00176-4",
+ "https://www.nature.com/articles/d41586-020-00176-4",
+ ),
+ (
+ CVF,
+ "https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html",
+ "https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html",
+ ),
+ ]
+ for exp_prov, url, exp_url in tests:
+ prov, new_url, jar = choose_provider(url)
+ with self.subTest(url=url):
+ self.assertEqual(exp_url, new_url)
+ self.assertEqual(prov, exp_prov)
+
+ def test_choose_provider_2(self):
+ local_filename = "test.pdf"
+ with open(local_filename, "w") as fp:
+ fp.write(
+ "%PDF-1.1\n%¥±ë\n\n1 0 obj\n << /Type /Catalog\n /Pages 2 0 R\n >>\nendobj\n\n2 0 obj\n << /Type /Pages\n /Kids [3 0 R]\n /Count 1\n /MediaBox [0 0 300 144]\n >>\nendobj\n\n3 0 obj\n << /Type /Page\n /Parent 2 0 R\n /Resources\n << /Font\n << /F1\n << /Type /Font\n /Subtype /Type1\n /BaseFont /Times-Roman\n >>\n >>\n >>\n /Contents 4 0 R\n >>\nendobj\n\n4 0 obj\n << /Length 55 >>\nstream\n BT\n /F1 18 Tf\n 0 0 Td\n (Hello World) Tj\n ET\nendstream\nendobj\n\nxref\n0 5\n0000000000 65535 f \n0000000018 00000 n \n0000000077 00000 n \n0000000178 00000 n \n0000000457 00000 n \ntrailer\n << /Root 1 0 R\n /Size 5\n >>\nstartxref\n565\n%%EOF"
+ )
+
+ prov, new_input, jar = choose_provider(local_filename)
+ self.assertEqual(prov, LocalFile)
+ self.assertEqual(new_input, local_filename)
+ self.assertIsNone(jar)
+
+ def test_choose_provider_3(self):
+ local_filename = "/tmp/abcdef.pdf"
+ with self.assertRaises(UnidentifiedSourceError):
+ choose_provider(local_filename)
+
+ def test_choose_provider_4(self):
+ url = "https://raw.githubusercontent.com/GjjvdBurg/paper2remarkable/master/README.md"
+ with self.assertRaises(InvalidURLError):
+ choose_provider(url)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..4c122e0
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import unittest
+
+from paper2remarkable.exceptions import NoPDFToolError
+from paper2remarkable.utils import check_pdftool
+
+
+class TestUtils(unittest.TestCase):
+ def test_check_pdftool(self):
+ # Needs a system with both pdftk and qpdf available
+ self.assertEqual(check_pdftool("pdftk", "qpdf"), "pdftk")
+ self.assertEqual(check_pdftool("pdftk_xyz", "qpdf"), "qpdf")
+ self.assertEqual(check_pdftool("pdftk", "qpdf_xyz"), "pdftk")
+ with self.assertRaises(NoPDFToolError):
+ check_pdftool("pdftk_xyz", "qpdf_xyz")
+
+
+if __name__ == "__main__":
+ unittest.main()