From ed9b8252a2361604331f7a275a7625b3de9017ff Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sat, 4 Apr 2020 17:20:47 +0100 Subject: Fix provider selection for redirected urls Some urls, such as the arXiv urls with the : in the identifier, didn't work when using the UI interface because the redirected url wasn't past to the provider, but the original url was. This commit fixes that issue and adds unit tests for the provider selection function, hopefully making this more robust in the future. --- tests/test_ui.py | 203 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 tests/test_ui.py (limited to 'tests/test_ui.py') diff --git a/tests/test_ui.py b/tests/test_ui.py new file mode 100644 index 0000000..fc362a0 --- /dev/null +++ b/tests/test_ui.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Unit tests for command line interface + +This file is part of paper2remarkable. + +""" + +import os +import shutil +import tempfile +import unittest + +from paper2remarkable.exceptions import ( + InvalidURLError, + UnidentifiedSourceError, +) +from paper2remarkable.providers import ( + ACM, + Arxiv, + CiteSeerX, + HTML, + JMLR, + LocalFile, + NBER, + NeurIPS, + OpenReview, + PMLR, + PdfUrl, + PubMed, + Springer, +) +from paper2remarkable.ui import choose_provider + + +class TestUI(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.original_dir = os.getcwd() + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + os.chdir(self.test_dir) + + def tearDown(self): + os.chdir(self.original_dir) + shutil.rmtree(self.test_dir) + + def test_choose_provider_1(self): + tests = [ + ( + Arxiv, + "https://arxiv.org/abs/1811.11242v1", + "https://arxiv.org/abs/1811.11242v1", + ), + ( + Arxiv, + "http://arxiv.org/abs/arXiv:1908.03213", + "https://arxiv.org/abs/1908.03213", + ), + ( + Arxiv, + "https://arxiv.org/abs/math/0309285", + "https://arxiv.org/abs/math/0309285", + ), + ( + Arxiv, + "https://arxiv.org/pdf/physics/0605197v1.pdf", + "https://arxiv.org/pdf/physics/0605197v1.pdf", + ), + ( + PubMed, + "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/", + "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/", + ), + ( + ACM, + "https://dl.acm.org/citation.cfm?id=3025626", + "https://dl.acm.org/doi/10.1145/3025453.3025626", + ), + ( + ACM, + "https://dl.acm.org/doi/pdf/10.1145/3219819.3220081?download=true", + "https://dl.acm.org/doi/pdf/10.1145/3219819.3220081?download=true&", + ), + ( + OpenReview, + "http://openreview.net/forum?id=S1x4ghC9tQ", + "https://openreview.net/forum?id=S1x4ghC9tQ", + ), + ( + Springer, + "https://link.springer.com/article/10.1007/s10618-019-00631-5", + "https://link.springer.com/article/10.1007/s10618-019-00631-5", + ), + ( + PdfUrl, + "https://gertjanvandenburg.com/papers/VandenBurg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2019.pdf", + "https://gertjanvandenburg.com/papers/VandenBurg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2019.pdf", + ), + ( + JMLR, + "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf", + "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf", + ), + ( + JMLR, + "http://www.jmlr.org/papers/v10/xu09a.html", + "http://www.jmlr.org/papers/v10/xu09a.html", + ), + ( + PMLR, + "http://proceedings.mlr.press/v97/behrmann19a.html", + "http://proceedings.mlr.press/v97/behrmann19a.html", + ), + ( + PMLR, + "http://proceedings.mlr.press/v15/maaten11b/maaten11b.pdf", + "http://proceedings.mlr.press/v15/maaten11b/maaten11b.pdf", + ), + ( + PMLR, + "http://proceedings.mlr.press/v48/melnyk16.pdf", + "http://proceedings.mlr.press/v48/melnyk16.pdf", + ), + ( + PMLR, + "http://proceedings.mlr.press/v48/zhangf16.html", + "http://proceedings.mlr.press/v48/zhangf16.html", + ), + ( + NBER, + "https://www.nber.org/papers/w26752", + "https://www.nber.org/papers/w26752", + ), + ( + NBER, + "https://www.nber.org/papers/w19152.pdf", + "https://www.nber.org/papers/w19152.pdf", + ), + ( + NeurIPS, + "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf", + "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf", + ), + ( + NeurIPS, + "https://papers.nips.cc/paper/7796-middle-out-decoding", + "https://papers.nips.cc/paper/7796-middle-out-decoding", + ), + ( + CiteSeerX, + "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548", + "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548", + ), + ( + CiteSeerX, + "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf", + "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf", + ), + ( + HTML, + "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines", + "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines" + ), + ( + HTML, + "https://www.nature.com/articles/d41586-020-00176-4", + "https://www.nature.com/articles/d41586-020-00176-4" + ), + ] + for exp_prov, url, exp_url in tests: + prov, new_url, jar = choose_provider(url) + with self.subTest(url=url): + self.assertEqual(exp_url, new_url) + self.assertEqual(prov, exp_prov) + + def test_choose_provider_2(self): + local_filename = "test.pdf" + with open(local_filename, "w") as fp: + fp.write( + "%PDF-1.1\n%¥±ë\n\n1 0 obj\n << /Type /Catalog\n /Pages 2 0 R\n >>\nendobj\n\n2 0 obj\n << /Type /Pages\n /Kids [3 0 R]\n /Count 1\n /MediaBox [0 0 300 144]\n >>\nendobj\n\n3 0 obj\n << /Type /Page\n /Parent 2 0 R\n /Resources\n << /Font\n << /F1\n << /Type /Font\n /Subtype /Type1\n /BaseFont /Times-Roman\n >>\n >>\n >>\n /Contents 4 0 R\n >>\nendobj\n\n4 0 obj\n << /Length 55 >>\nstream\n BT\n /F1 18 Tf\n 0 0 Td\n (Hello World) Tj\n ET\nendstream\nendobj\n\nxref\n0 5\n0000000000 65535 f \n0000000018 00000 n \n0000000077 00000 n \n0000000178 00000 n \n0000000457 00000 n \ntrailer\n << /Root 1 0 R\n /Size 5\n >>\nstartxref\n565\n%%EOF" + ) + + prov, new_input, jar = choose_provider(local_filename) + self.assertEqual(prov, LocalFile) + self.assertEqual(new_input, local_filename) + self.assertIsNone(jar) + + def test_choose_provider_3(self): + local_filename = "/tmp/abcdef.pdf" + with self.assertRaises(UnidentifiedSourceError): + choose_provider(local_filename) + + def test_choose_provider_4(self): + url = "https://raw.githubusercontent.com/GjjvdBurg/paper2remarkable/master/README.md" + with self.assertRaises(InvalidURLError): + choose_provider(url) + + +if __name__ == "__main__": + unittest.main() -- cgit v1.2.3 From b8452034ed3a503e06e58f524ac322a4ab0203bb Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sat, 4 Apr 2020 22:43:11 +0100 Subject: Code formatting --- tests/test_ui.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tests/test_ui.py') diff --git a/tests/test_ui.py b/tests/test_ui.py index fc362a0..11ed87a 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -160,15 +160,15 @@ class TestUI(unittest.TestCase): "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf", ), ( - HTML, - "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines", - "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines" - ), - ( - HTML, - "https://www.nature.com/articles/d41586-020-00176-4", - "https://www.nature.com/articles/d41586-020-00176-4" - ), + HTML, + "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines", + "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines", + ), + ( + HTML, + "https://www.nature.com/articles/d41586-020-00176-4", + "https://www.nature.com/articles/d41586-020-00176-4", + ), ] for exp_prov, url, exp_url in tests: prov, new_url, jar = choose_provider(url) -- cgit v1.2.3 From 7cc0b6e320e45b9ce442425a04ac4708fb3df077 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 11 May 2020 17:32:21 +0100 Subject: Allow underscore in urls --- tests/test_ui.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tests/test_ui.py') diff --git a/tests/test_ui.py b/tests/test_ui.py index 11ed87a..7ae1e79 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -99,6 +99,11 @@ class TestUI(unittest.TestCase): "https://gertjanvandenburg.com/papers/VandenBurg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2019.pdf", "https://gertjanvandenburg.com/papers/VandenBurg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2019.pdf", ), + ( + PdfUrl, + "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf", + "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf", + ), ( JMLR, "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf", -- cgit v1.2.3 From a5522a9cc39b61d0d26705f99279381dcb9e7f9f Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 24 May 2020 21:07:46 +0100 Subject: Remove extra pdfurl test This test seems to fail repeatedly on Travis, for no clear reason (it works locally). Since we have another PdfUrl test I don't think it's necessary to have this one too, so I'll remove it. --- tests/test_ui.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tests/test_ui.py') diff --git a/tests/test_ui.py b/tests/test_ui.py index 7ae1e79..7ab5099 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -94,11 +94,6 @@ class TestUI(unittest.TestCase): "https://link.springer.com/article/10.1007/s10618-019-00631-5", "https://link.springer.com/article/10.1007/s10618-019-00631-5", ), - ( - PdfUrl, - "https://gertjanvandenburg.com/papers/VandenBurg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2019.pdf", - "https://gertjanvandenburg.com/papers/VandenBurg_Nazabal_Sutton_-_Wrangling_Messy_CSV_Files_by_Detecting_Row_and_Type_Patterns_2019.pdf", - ), ( PdfUrl, "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf", -- cgit v1.2.3 From b56d376ff87cfc7fc599f40e13338a3c1a489877 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 11 Aug 2020 22:18:30 +0100 Subject: Fix failing unit test --- tests/test_ui.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tests/test_ui.py') diff --git a/tests/test_ui.py b/tests/test_ui.py index 7ab5099..5747eb9 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -101,13 +101,13 @@ class TestUI(unittest.TestCase): ), ( JMLR, - "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf", - "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf", + "https://www.jmlr.org/papers/volume17/14-526/14-526.pdf", + "https://www.jmlr.org/papers/volume17/14-526/14-526.pdf", ), ( JMLR, - "http://www.jmlr.org/papers/v10/xu09a.html", - "http://www.jmlr.org/papers/v10/xu09a.html", + "https://www.jmlr.org/papers/v10/xu09a.html", + "https://www.jmlr.org/papers/v10/xu09a.html", ), ( PMLR, -- cgit v1.2.3 From 25f372c69dfc846faebb4763ecc60e9e0750021b Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 25 Sep 2020 20:54:38 +0200 Subject: Improve support for Neurips provider (fixes #59) --- tests/test_ui.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tests/test_ui.py') diff --git a/tests/test_ui.py b/tests/test_ui.py index 5747eb9..61b371d 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -98,7 +98,7 @@ class TestUI(unittest.TestCase): PdfUrl, "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf", "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf", - ), + ), ( JMLR, "https://www.jmlr.org/papers/volume17/14-526/14-526.pdf", @@ -149,6 +149,11 @@ class TestUI(unittest.TestCase): "https://papers.nips.cc/paper/7796-middle-out-decoding", "https://papers.nips.cc/paper/7796-middle-out-decoding", ), + ( + NeurIPS, + "http://papers.neurips.cc/paper/7368-on-the-dimensionality-of-word-embedding.pdf", + "http://papers.neurips.cc/paper/7368-on-the-dimensionality-of-word-embedding.pdf", + ), ( CiteSeerX, "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548", -- cgit v1.2.3 From fcd8d3cd1d94780315a82655ce6b9571534c0a7d Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 20 Oct 2020 22:00:58 +0200 Subject: Updates to NBER provider after site updates --- tests/test_ui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests/test_ui.py') diff --git a/tests/test_ui.py b/tests/test_ui.py index 61b371d..97ec44d 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -137,7 +137,7 @@ class TestUI(unittest.TestCase): ( NBER, "https://www.nber.org/papers/w19152.pdf", - "https://www.nber.org/papers/w19152.pdf", + "https://www.nber.org/system/files/working_papers/w19152/w19152.pdf", ), ( NeurIPS, -- cgit v1.2.3 From 14cacacf3fd7b78b287ec7e6b127bd24f0ea4f56 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 23 Oct 2020 17:36:20 +0200 Subject: Add CVF provider --- tests/test_ui.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tests/test_ui.py') diff --git a/tests/test_ui.py b/tests/test_ui.py index 97ec44d..835f594 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -20,6 +20,7 @@ from paper2remarkable.providers import ( ACM, Arxiv, CiteSeerX, + CVF, HTML, JMLR, LocalFile, @@ -174,6 +175,11 @@ class TestUI(unittest.TestCase): "https://www.nature.com/articles/d41586-020-00176-4", "https://www.nature.com/articles/d41586-020-00176-4", ), + ( + CVF, + "https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html", + "https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html", + ), ] for exp_prov, url, exp_url in tests: prov, new_url, jar = choose_provider(url) -- cgit v1.2.3