diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-10-20 23:07:29 +0200 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-10-20 23:07:29 +0200 |
| commit | 53231c48bcef4c824c18437ed95e8d3e0573d319 (patch) | |
| tree | 1a73f1dc49bdf34d3d7f6ae8894ff20ad9c05e11 | |
| parent | Merge branch 'feature/multiple_sources' into master (diff) | |
| parent | Merge branch 'master' into bugfix/arxiv (diff) | |
| download | paper2remarkable-53231c48bcef4c824c18437ed95e8d3e0573d319.tar.gz paper2remarkable-53231c48bcef4c824c18437ed95e8d3e0573d319.zip | |
Merge branch 'bugfix/arxiv' into master
| -rw-r--r-- | paper2remarkable/providers/arxiv.py | 2 | ||||
| -rw-r--r-- | tests/test_providers.py | 7 |
2 files changed, 9 insertions, 0 deletions
diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py index 0385f94..317452e 100644 --- a/paper2remarkable/providers/arxiv.py +++ b/paper2remarkable/providers/arxiv.py @@ -45,6 +45,8 @@ class Arxiv(Provider): def get_abs_pdf_urls(self, url): """Get the pdf and abs url from any given arXiv url """ + if '?' in url: + url = url[:url.index('?')] if re.match(self.re_abs_1, url) or re.match(self.re_abs_2, url): abs_url = url pdf_url = url.replace("abs", "pdf") + ".pdf" diff --git a/tests/test_providers.py b/tests/test_providers.py index 70d012a..b6cce59 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -95,6 +95,13 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp_filename, os.path.basename(filename)) + def test_arxiv_6(self): + prov = Arxiv(upload=False, verbose=VERBOSE) + url = "https://arxiv.org/pdf/1701.05517.pdf?source=post_page---------------------------" + exp_filename = "Salimans_et_al_-_PixelCNN_Improving_the_PixelCNN_With_Discretized_Logistic_Mixture_Likelihood_and_Other_Modifications_2017.pdf" + filename = prov.run(url) + self.assertEqual(exp_filename, os.path.basename(filename)) + def test_pmc(self): prov = PubMed(upload=False, verbose=VERBOSE) url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/" |
