diff options
| -rw-r--r-- | paper2remarkable/providers/arxiv.py | 10 | ||||
| -rw-r--r-- | tests/test_providers.py | 22 |
2 files changed, 23 insertions, 9 deletions
diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py index e022658..1fd1795 100644 --- a/paper2remarkable/providers/arxiv.py +++ b/paper2remarkable/providers/arxiv.py @@ -19,6 +19,10 @@ from ..log import Logger logger = Logger() +DEARXIV_TEXT_REGEX = ( + b"arXiv:\d{4}\.\d{4,5}v\d+\s+\[[\w\-]+\.\w+\]\s+\d{1,2}\s\w{3}\s\d{4}" +) + class ArxivInformer(Informer): pass @@ -73,11 +77,7 @@ class Arxiv(Provider): with open(uncompress_file, "rb") as fid: data = fid.read() # Remove the text element - data = re.sub( - b"\(arXiv:\d{4}\.\d{4,5}v\d+\s+\[\w+\.\w+\]\s+\d{1,2}\s\w{3}\s\d{4}\)Tj", - b"()Tj", - data, - ) + data = re.sub(b"\(" + DEARXIV_TEXT_REGEX + b"\)Tj", b"()Tj", data) # Remove the URL element data = re.sub( b"<<\\n\/URI \(http://arxiv\.org/abs/\d{4}\.\d{4,5}v\d+\)\\n\/S /URI\\n>>\\n", diff --git a/tests/test_providers.py b/tests/test_providers.py index 143fc78..1479967 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -5,11 +5,12 @@ __author__ = "G.J.J. van den Burg" """Tests""" -import unittest -import tempfile import hashlib -import shutil import os +import re +import shutil +import tempfile +import unittest from paper2remarkable.providers import ( ACM, @@ -20,8 +21,9 @@ from paper2remarkable.providers import ( PubMed, Springer, ) +from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX -VERBOSE = True +VERBOSE = False def md5sum(filename): @@ -35,6 +37,18 @@ def md5sum(filename): return hasher.hexdigest() +class TestArxiv(unittest.TestCase): + def test_text_regex_1(self): + key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019" + m = re.fullmatch(DEARXIV_TEXT_REGEX, key) + self.assertIsNotNone(m) + + def test_text_regex_2(self): + key = b"arXiv:1908.03213v1 [astro-ph.HE] 8 Aug 2019" + m = re.fullmatch(DEARXIV_TEXT_REGEX, key) + self.assertIsNotNone(m) + + class TestProviders(unittest.TestCase): @classmethod def setUpClass(cls): |
