From b77c06ad3deb27b90a91f468b0123923d217d53d Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 20 Oct 2020 22:11:18 +0200 Subject: Increase robustness for arXiv urls --- paper2remarkable/providers/arxiv.py | 2 ++ tests/test_providers.py | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py index 0385f94..317452e 100644 --- a/paper2remarkable/providers/arxiv.py +++ b/paper2remarkable/providers/arxiv.py @@ -45,6 +45,8 @@ class Arxiv(Provider): def get_abs_pdf_urls(self, url): """Get the pdf and abs url from any given arXiv url """ + if '?' in url: + url = url[:url.index('?')] if re.match(self.re_abs_1, url) or re.match(self.re_abs_2, url): abs_url = url pdf_url = url.replace("abs", "pdf") + ".pdf" diff --git a/tests/test_providers.py b/tests/test_providers.py index 70d012a..12f748e 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -95,6 +95,13 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp_filename, os.path.basename(filename)) + def test_arxiv_6(self): + prov = Arxiv(upload=False, verbose=VERBOSE) + url = "https://arxiv.org/pdf/1701.05517.pdf?source=post_page---------------------------" + exp_filename = "Salimans_et_al_-_PixelCNN_Improving_the_PixelCNN_with_Discretized_Logistic_Mixture_Likelihood_and_Other_Modifications_2017.pdf" + filename = prov.run(url) + self.assertEqual(exp_filename, os.path.basename(filename)) + def test_pmc(self): prov = PubMed(upload=False, verbose=VERBOSE) url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/" -- cgit v1.2.3 From 1f07867ec7aebb1b1aa6b806e35a46e73c034605 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 20 Oct 2020 22:15:43 +0200 Subject: fix typo --- tests/test_providers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_providers.py b/tests/test_providers.py index 12f748e..b6cce59 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -98,7 +98,7 @@ class TestProviders(unittest.TestCase): def test_arxiv_6(self): prov = Arxiv(upload=False, verbose=VERBOSE) url = "https://arxiv.org/pdf/1701.05517.pdf?source=post_page---------------------------" - exp_filename = "Salimans_et_al_-_PixelCNN_Improving_the_PixelCNN_with_Discretized_Logistic_Mixture_Likelihood_and_Other_Modifications_2017.pdf" + exp_filename = "Salimans_et_al_-_PixelCNN_Improving_the_PixelCNN_With_Discretized_Logistic_Mixture_Likelihood_and_Other_Modifications_2017.pdf" filename = prov.run(url) self.assertEqual(exp_filename, os.path.basename(filename)) -- cgit v1.2.3