aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--paper2remarkable/providers/arxiv.py18
-rw-r--r--tests/test_providers.py16
2 files changed, 29 insertions, 5 deletions
diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py
index 282eb09..913e015 100644
--- a/paper2remarkable/providers/arxiv.py
+++ b/paper2remarkable/providers/arxiv.py
@@ -33,8 +33,11 @@ class ArxivInformer(Informer):
class Arxiv(Provider):
- re_abs = "https?://arxiv.org/abs/\d{4}\.\d{4,5}(v\d+)?"
- re_pdf = "https?://arxiv.org/pdf/\d{4}\.\d{4,5}(v\d+)?\.pdf"
+ re_abs_1 = "https?://arxiv.org/abs/\d{4}\.\d{4,5}(v\d+)?"
+ re_pdf_1 = "https?://arxiv.org/pdf/\d{4}\.\d{4,5}(v\d+)?\.pdf"
+
+ re_abs_2 = "https?://arxiv.org/abs/\w+/\d{7}(v\d+)?"
+ re_pdf_2 = "https?://arxiv.org/pdf/\w+/\d{7}(v\d+)?.pdf"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@@ -45,10 +48,10 @@ class Arxiv(Provider):
def get_abs_pdf_urls(self, url):
"""Get the pdf and abs url from any given arXiv url """
- if re.match(self.re_abs, url):
+ if re.match(self.re_abs_1, url) or re.match(self.re_abs_2, url):
abs_url = url
pdf_url = url.replace("abs", "pdf") + ".pdf"
- elif re.match(self.re_pdf, url):
+ elif re.match(self.re_pdf_1, url) or re.match(self.re_pdf_2, url):
abs_url = url[:-4].replace("pdf", "abs")
pdf_url = url
else:
@@ -57,7 +60,12 @@ class Arxiv(Provider):
def validate(src):
"""Check if the url is to an arXiv page. """
- return re.match(Arxiv.re_abs, src) or re.match(Arxiv.re_pdf, src)
+ return (
+ re.match(Arxiv.re_abs_1, src)
+ or re.match(Arxiv.re_pdf_1, src)
+ or re.match(Arxiv.re_abs_2, src)
+ or re.match(Arxiv.re_pdf_2, src)
+ )
def dearxiv(self, input_file):
"""Remove the arXiv timestamp from a pdf"""
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 9d1882d..e256eec 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -79,6 +79,22 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url)
self.assertEqual(exp_filename, os.path.basename(filename))
+ def test_arxiv_3(self):
+ prov = Arxiv(upload=False, verbose=VERBOSE)
+ url = "https://arxiv.org/abs/math/0309285"
+ exp_filename = "Jackson_et_al_-_An_Algorithm_for_Optimal_Partitioning_of_Data_on_an_Interval_2003.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp_filename, os.path.basename(filename))
+
+ def test_arxiv_4(self):
+ prov = Arxiv(upload=False, verbose=VERBOSE)
+ url = "https://arxiv.org/pdf/physics/0605197v1.pdf"
+ exp_filename = (
+ "Knuth_-_Optimal_Data-Based_Binning_for_Histograms_2006.pdf"
+ )
+ filename = prov.run(url)
+ self.assertEqual(exp_filename, os.path.basename(filename))
+
def test_pmc(self):
prov = PubMed(upload=False, verbose=VERBOSE)
url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3474301/"