From 0456a377b3deef09a533b79224f4590e02372040 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 31 Jan 2020 13:17:58 +0000 Subject: [WIP] Initial commit of HTML provider --- tests/test_providers.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index e256eec..80f4662 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -16,6 +16,7 @@ from paper2remarkable.providers import ( ACM, Arxiv, CiteSeerX, + HTML, LocalFile, NeurIPS, OpenReview, @@ -206,6 +207,13 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_html_1(self): + prov = HTML(upload=False, verbose=VERBOSE) + url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines" + exp = "Getting_Your_Team_to_Do_More_Than_Meet_Deadlines.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From ce9c1333fcf761e322ad169df3969ca23d9938e7 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 4 Feb 2020 10:50:50 +0000 Subject: Add another test for the html provider --- tests/test_providers.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index 80f4662..d0e3d40 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -214,6 +214,14 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_html_2(self): + prov = HTML(upload=False, verbose=VERBOSE) + url = "https://www.nature.com/articles/d41586-020-00176-4" + exp = "Isaac_Asimov_Centenary_of_the_Great_Explainer.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 0128dce1c10be8db965584aa387bf00040a3f018 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Wed, 19 Feb 2020 12:14:44 +0000 Subject: Add NBER provider --- tests/test_providers.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index d0e3d40..38f88b7 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -18,6 +18,7 @@ from paper2remarkable.providers import ( CiteSeerX, HTML, LocalFile, + NBER, NeurIPS, OpenReview, PMLR, @@ -179,6 +180,20 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_nber_1(self): + prov = NBER(upload=False, verbose=VERBOSE) + url = "https://www.nber.org/papers/w26752" + exp = "Bhattacharya_Packalen_-_Stagnation_and_Scientific_Incentives_2020.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_nber_2(self): + prov = NBER(upload=False, verbose=VERBOSE) + url = "https://www.nber.org/papers/w19152.pdf" + exp = "Herbst_Schorfheide_-_Sequential_Monte_Carlo_Sampling_for_DSGE_Models_2013.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + def test_neurips_1(self): prov = NeurIPS(upload=False, verbose=VERBOSE) url = "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf" -- cgit v1.2.3 From d43e1fbe10f18fdbac08aae414e605c8387cb19b Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Wed, 19 Feb 2020 12:38:03 +0000 Subject: Extract filename from url with pdf_url provider Fixes #25 --- tests/test_providers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index d0e3d40..82c8500 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -148,8 +148,8 @@ class TestProviders(unittest.TestCase): def test_pdfurl(self): prov = PdfUrl(upload=False, verbose=VERBOSE) url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf" - filename = prov.run(url, filename="test.pdf") - self.assertEqual("test.pdf", os.path.basename(filename)) + filename = prov.run(url) + self.assertEqual("14-526.pdf", os.path.basename(filename)) def test_pmlr_1(self): prov = PMLR(upload=False, verbose=VERBOSE) -- cgit v1.2.3 From de7fa6bf3b7c25eb2a2e07fce769b515bca92e7d Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 21 Feb 2020 16:23:01 +0000 Subject: Add provider for JMLR --- tests/test_providers.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index 493a209..2bf7507 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -17,6 +17,7 @@ from paper2remarkable.providers import ( Arxiv, CiteSeerX, HTML, + JMLR, LocalFile, NBER, NeurIPS, @@ -152,6 +153,20 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual("14-526.pdf", os.path.basename(filename)) + def test_jmlr_1(self): + prov = JMLR(upload=False, verbose=VERBOSE) + url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf" + exp = "Burg_Groenen_-_GenSVM_a_Generalized_Multiclass_Support_Vector_Machine_2016.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_jmlr_2(self): + prov = JMLR(upload=False, verbose=VERBOSE) + url = "http://www.jmlr.org/papers/v10/xu09a.html" + exp = "Xu_Zhang_-_Refinement_of_Reproducing_Kernels_2009.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + def test_pmlr_1(self): prov = PMLR(upload=False, verbose=VERBOSE) url = "http://proceedings.mlr.press/v97/behrmann19a.html" @@ -237,6 +252,5 @@ class TestProviders(unittest.TestCase): self.assertEqual(exp, os.path.basename(filename)) - if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 01c294bccd10f8c430e1c959fbb5ebacea8f3c3a Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sat, 4 Apr 2020 14:53:26 +0100 Subject: Add unit test --- tests/test_providers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index 2bf7507..e539949 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -69,7 +69,8 @@ class TestProviders(unittest.TestCase): shutil.rmtree(self.test_dir) def test_arxiv_1(self): - prov = Arxiv(upload=False, verbose=VERBOSE) + # check with qpdf + prov = Arxiv(upload=False, verbose=VERBOSE, pdftk_path=None) url = "https://arxiv.org/abs/1811.11242v1" exp_filename = "Burg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2018.pdf" filename = prov.run(url) -- cgit v1.2.3 From f24d1b3fdba482e69f7cfc7a6fb7ecabbcba069d Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sat, 4 Apr 2020 16:20:15 +0100 Subject: Move arXiv tests to a separate file --- tests/test_providers.py | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index e539949..e0239ed 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -7,7 +7,6 @@ __author__ = "G.J.J. van den Burg" import hashlib import os -import re import shutil import tempfile import unittest @@ -27,7 +26,6 @@ from paper2remarkable.providers import ( PubMed, Springer, ) -from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX VERBOSE = False @@ -43,18 +41,6 @@ def md5sum(filename): return hasher.hexdigest() -class TestArxiv(unittest.TestCase): - def test_text_regex_1(self): - key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019" - m = re.fullmatch(DEARXIV_TEXT_REGEX, key) - self.assertIsNotNone(m) - - def test_text_regex_2(self): - key = b"arXiv:1908.03213v1 [astro-ph.HE] 8 Aug 2019" - m = re.fullmatch(DEARXIV_TEXT_REGEX, key) - self.assertIsNotNone(m) - - class TestProviders(unittest.TestCase): @classmethod def setUpClass(cls): -- cgit v1.2.3 From fb825cab2e4681a6e6cae8cf32adeeb880a4910c Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 27 Apr 2020 17:33:13 +0100 Subject: Add unit test for this bug --- tests/test_providers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index e0239ed..0787792 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -7,6 +7,7 @@ __author__ = "G.J.J. van den Burg" import hashlib import os +import pdfplumber import shutil import tempfile import unittest @@ -238,6 +239,15 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_html_3(self): + prov = HTML(upload=False, verbose=VERBOSE) + url = "https://conclave-team.github.io/conclave-site/" + exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + # this is a proxy test to check that all images are included + self.assertEqual(32, len(pdfplumber.open(filename).pages)) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 58be7d78a105c0b0f871b339daa29cdf8f6557d4 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 28 Apr 2020 13:18:42 +0100 Subject: Add unit test for image urls with trailing slash --- tests/test_providers.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index 0787792..a7f17ff 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -248,6 +248,14 @@ class TestProviders(unittest.TestCase): # this is a proxy test to check that all images are included self.assertEqual(32, len(pdfplumber.open(filename).pages)) + def test_html_4(self): + prov = HTML(upload=False, verbose=VERBOSE) + url = "https://sirupsen.com/2019/" + filename = prov.run(url) + # this is a proxy test to check that all images are included + self.assertEqual(4, len(pdfplumber.open(filename).pages)) + + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From f8c0e4b2c953d617ffea4a09a7373f697a5eb104 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sat, 2 May 2020 19:34:14 +0100 Subject: Various improvements to dearxiv functionality --- tests/test_providers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index a7f17ff..d2fdb0d 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -86,6 +86,13 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp_filename, os.path.basename(filename)) + def test_arxiv_5(self): + prov = Arxiv(upload=False, verbose=VERBOSE, qpdf_path=None) + url = "https://arxiv.org/abs/2002.11523" + exp_filename = "Ponomarev_Oseledets_Cichocki_-_Using_Reinforcement_Learning_in_the_Algorithmic_Trading_Problem_2020.pdf" + filename = prov.run(url) + self.assertEqual(exp_filename, os.path.basename(filename)) + def test_pmc(self): prov = PubMed(upload=False, verbose=VERBOSE) url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/" @@ -256,6 +263,5 @@ class TestProviders(unittest.TestCase): self.assertEqual(4, len(pdfplumber.open(filename).pages)) - if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 96de81d4158f7779132c9f7883c62bc3f15b6915 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 24 May 2020 20:45:44 +0100 Subject: Add test for pdf reading issue --- tests/test_providers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index d2fdb0d..fb75fbd 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -142,12 +142,18 @@ class TestProviders(unittest.TestCase): filename = prov.run(local_filename) self.assertEqual("test_.pdf", os.path.basename(filename)) - def test_pdfurl(self): + def test_pdfurl_1(self): prov = PdfUrl(upload=False, verbose=VERBOSE) url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf" filename = prov.run(url) self.assertEqual("14-526.pdf", os.path.basename(filename)) + def test_pdfurl_2(self): + prov = PdfUrl(upload=False, verbose=VERBOSE) + url = "https://www.manuelrigger.at/preprints/NoREC.pdf" + filename = prov.run(url) + self.assertEqual("NoREC.pdf", os.path.basename(filename)) + def test_jmlr_1(self): prov = JMLR(upload=False, verbose=VERBOSE) url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf" -- cgit v1.2.3 From ec000de563a32b4e757c9afde5a1b1b5ac80a511 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sat, 20 Jun 2020 22:42:10 +0100 Subject: Add support for using ReadabiliPy --- tests/test_providers.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index fb75fbd..ca6c1ae 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -268,6 +268,13 @@ class TestProviders(unittest.TestCase): # this is a proxy test to check that all images are included self.assertEqual(4, len(pdfplumber.open(filename).pages)) + def test_html_5(self): + prov = HTML(upload=False, verbose=VERBOSE) + url = "https://www.spiegel.de/panorama/london-tausende-rechtsextreme-demonstranten-wollen-statuen-schuetzen-a-2a1ed9b9-708a-40dc-a5ff-f312e97a60ca#" + filename = prov.run(url) + # this is a proxy test to check that all images are included + self.assertEqual(4, len(pdfplumber.open(filename).pages)) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From d5230d43d58c992212c89f3c221f72784a3a309d Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 5 Jul 2020 23:33:11 +0100 Subject: Add provider for Semantic Scholar --- tests/test_providers.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index fb75fbd..1a6f84f 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -26,6 +26,7 @@ from paper2remarkable.providers import ( PdfUrl, PubMed, Springer, + SemanticScholar ) VERBOSE = False @@ -268,6 +269,20 @@ class TestProviders(unittest.TestCase): # this is a proxy test to check that all images are included self.assertEqual(4, len(pdfplumber.open(filename).pages)) + def test_semantic_scholar_1(self): + prov = SemanticScholar(upload=False, verbose=VERBOSE) + url = "https://pdfs.semanticscholar.org/1b01/dea77e9cbf049b4ee8b68dc4d43529d06299.pdf" + exp = "Dong_et_al_-_TableSense_Spreadsheet_Table_Detection_With_Convolutional_Neural_Networks_2019.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_semantic_scholar_2(self): + prov = SemanticScholar(upload=False, verbose=VERBOSE) + url = "https://www.semanticscholar.org/paper/Fast-Meta-Learning-for-Adaptive-Hierarchical-Design-Burg-Hero/90759dc4ab0ce8d3564044ef92a91080a4f3e55f" + exp = "Burg_Hero_-_Fast_Meta-Learning_for_Adaptive_Hierarchical_Classifier_Design_2017.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 0a6a4ff3893474e33f71ef2d8a881cc360a29094 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Wed, 15 Jul 2020 16:23:12 +0100 Subject: Improve robustness of springer provider Adds support for downloading chapters --- tests/test_providers.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index 1a6f84f..5c8a8e4 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -126,13 +126,20 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp_filename, os.path.basename(filename)) - def test_springer(self): + def test_springer_1(self): prov = Springer(upload=False, verbose=VERBOSE) url = "https://link.springer.com/article/10.1007/s10618-019-00631-5" exp_filename = "Mauw_Ramirez-Cruz_Trujillo-Rasua_-_Robust_Active_Attacks_on_Social_Graphs_2019.pdf" filename = prov.run(url) self.assertEqual(exp_filename, os.path.basename(filename)) + def test_springer_2(self): + prov = Springer(upload=False, verbose=VERBOSE) + url = "https://link.springer.com/content/pdf/10.1007%2F11681878_14.pdf" + exp_filename = "Dwork_et_al_-_Calibrating_Noise_to_Sensitivity_in_Private_Data_Analysis_2006.pdf" + filename = prov.run(url) + self.assertEqual(exp_filename, os.path.basename(filename)) + def test_local(self): local_filename = "test.pdf" with open(local_filename, "w") as fp: -- cgit v1.2.3 From 071b5a0f2958c34f1a189259346a8732a1110de2 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Wed, 15 Jul 2020 22:44:33 +0100 Subject: Add provider for SagePub --- tests/test_providers.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index 5c8a8e4..ba1cc3a 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -25,8 +25,9 @@ from paper2remarkable.providers import ( PMLR, PdfUrl, PubMed, + SagePub, Springer, - SemanticScholar + SemanticScholar, ) VERBOSE = False @@ -290,6 +291,20 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_sagepub_1(self): + prov = SagePub(upload=False, verbose=VERBOSE) + url = "https://journals.sagepub.com/doi/full/10.1177/0306312714535679" + exp = "Rekdal_-_Academic_Urban_Legends_2014.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_sagepub_2(self): + prov = SagePub(upload=False, verbose=VERBOSE) + url = "https://journals.sagepub.com/doi/pdf/10.1177/1352458517694432" + exp = "Kobelt_et_al_-_New_Insights_Into_the_Burden_and_Costs_of_Multiple_Sclerosis_in_Europe_2017.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 25f372c69dfc846faebb4763ecc60e9e0750021b Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 25 Sep 2020 20:54:38 +0200 Subject: Improve support for Neurips provider (fixes #59) --- tests/test_providers.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index ba1cc3a..eeaef82 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -233,6 +233,20 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_neurips_3(self): + prov = NeurIPS(upload=False, verbose=VERBOSE) + url = "http://papers.neurips.cc/paper/5433-combinatorial-pure-exploration-of-multi-armed-bandits" + exp = "Chen_et_al_-_Combinatorial_Pure_Exploration_of_Multi-Armed_Bandits_2014.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_neurips_4(self): + prov = NeurIPS(upload=False, verbose=VERBOSE) + url = "http://papers.neurips.cc/paper/7368-on-the-dimensionality-of-word-embedding.pdf" + exp = "Yin_Shen_-_On_the_Dimensionality_of_Word_Embedding_2018.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + def test_citeseerx_1(self): prov = CiteSeerX(upload=False, verbose=VERBOSE) url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548" -- cgit v1.2.3 From 6338388cea254ba4c6090eb17a8942a13b7a2b1c Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 25 Sep 2020 22:25:41 +0200 Subject: Clean up readability providers This reorganizes the code a bit to ensure we only pull the HTML page once, and use the same readability provider for both the informer and the converter. --- tests/test_providers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index ca6c1ae..479fb84 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -255,7 +255,10 @@ class TestProviders(unittest.TestCase): def test_html_3(self): prov = HTML(upload=False, verbose=VERBOSE) url = "https://conclave-team.github.io/conclave-site/" - exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf" + #exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf" + # NOTE: Title differs between Readability.JS and readability-lxml, we + # assume that testing is done with Readability.JS + exp = "Conclave.pdf" filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) # this is a proxy test to check that all images are included -- cgit v1.2.3 From b77c06ad3deb27b90a91f468b0123923d217d53d Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 20 Oct 2020 22:11:18 +0200 Subject: Increase robustness for arXiv urls --- tests/test_providers.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index 70d012a..12f748e 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -95,6 +95,13 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp_filename, os.path.basename(filename)) + def test_arxiv_6(self): + prov = Arxiv(upload=False, verbose=VERBOSE) + url = "https://arxiv.org/pdf/1701.05517.pdf?source=post_page---------------------------" + exp_filename = "Salimans_et_al_-_PixelCNN_Improving_the_PixelCNN_with_Discretized_Logistic_Mixture_Likelihood_and_Other_Modifications_2017.pdf" + filename = prov.run(url) + self.assertEqual(exp_filename, os.path.basename(filename)) + def test_pmc(self): prov = PubMed(upload=False, verbose=VERBOSE) url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/" -- cgit v1.2.3 From 1f07867ec7aebb1b1aa6b806e35a46e73c034605 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 20 Oct 2020 22:15:43 +0200 Subject: fix typo --- tests/test_providers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index 12f748e..b6cce59 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -98,7 +98,7 @@ class TestProviders(unittest.TestCase): def test_arxiv_6(self): prov = Arxiv(upload=False, verbose=VERBOSE) url = "https://arxiv.org/pdf/1701.05517.pdf?source=post_page---------------------------" - exp_filename = "Salimans_et_al_-_PixelCNN_Improving_the_PixelCNN_with_Discretized_Logistic_Mixture_Likelihood_and_Other_Modifications_2017.pdf" + exp_filename = "Salimans_et_al_-_PixelCNN_Improving_the_PixelCNN_With_Discretized_Logistic_Mixture_Likelihood_and_Other_Modifications_2017.pdf" filename = prov.run(url) self.assertEqual(exp_filename, os.path.basename(filename)) -- cgit v1.2.3 From 1e83f9f6537fa108d7a157daaaeb3dc06e80fdce Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 23 Oct 2020 12:24:50 +0200 Subject: Code formatting --- tests/test_providers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index b6cce59..546794c 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -285,8 +285,8 @@ class TestProviders(unittest.TestCase): def test_html_3(self): prov = HTML(upload=False, verbose=VERBOSE) url = "https://conclave-team.github.io/conclave-site/" - #exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf" - # NOTE: Title differs between Readability.JS and readability-lxml, we + # exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf" + # NOTE: Title differs between Readability.JS and readability-lxml, we # assume that testing is done with Readability.JS exp = "Conclave.pdf" filename = prov.run(url) @@ -337,6 +337,5 @@ class TestProviders(unittest.TestCase): self.assertEqual(exp, os.path.basename(filename)) - if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 14cacacf3fd7b78b287ec7e6b127bd24f0ea4f56 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 23 Oct 2020 17:36:20 +0200 Subject: Add CVF provider --- tests/test_providers.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index 546794c..e701234 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -16,6 +16,7 @@ from paper2remarkable.providers import ( ACM, Arxiv, CiteSeerX, + CVF, HTML, JMLR, LocalFile, @@ -336,6 +337,24 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_cvf_1(self): + prov = CVF(upload=False, verbose=VERBOSE) + url = "https://openaccess.thecvf.com/content_ICCV_2019/html/Muhammad_Goal-Driven_Sequential_Data_Abstraction_ICCV_2019_paper.html" + exp = ( + "Muhammad_et_al_-_Goal-Driven_Sequential_Data_Abstraction_2019.pdf" + ) + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_cvf_2(self): + prov = CVF(upload=False, verbose=VERBOSE) + url = "https://openaccess.thecvf.com/content_CVPR_2020/papers/Park_Seeing_the_World_in_a_Bag_of_Chips_CVPR_2020_paper.pdf" + exp = ( + "Park_Holynski_Seitz_-_Seeing_the_World_in_a_Bag_of_Chips_2020.pdf" + ) + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3