diff options
Diffstat (limited to 'tests/test_arxiv.py')
| -rw-r--r-- | tests/test_arxiv.py | 35 |
1 files changed, 34 insertions, 1 deletions
diff --git a/tests/test_arxiv.py b/tests/test_arxiv.py index beb9baa..08ea2c4 100644 --- a/tests/test_arxiv.py +++ b/tests/test_arxiv.py @@ -7,13 +7,28 @@ This file is part of paper2remarkable. """ +import os import re +import shutil +import tempfile import unittest -from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX +from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX, Arxiv class TestArxiv(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.original_dir = os.getcwd() + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + os.chdir(self.test_dir) + + def tearDown(self): + os.chdir(self.original_dir) + shutil.rmtree(self.test_dir) + def test_text_regex_1(self): key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019" m = re.fullmatch(DEARXIV_TEXT_REGEX, key) @@ -24,6 +39,24 @@ class TestArxiv(unittest.TestCase): m = re.fullmatch(DEARXIV_TEXT_REGEX, key) self.assertIsNotNone(m) + def test_stamp_removed_1(self): + url = "https://arxiv.org/pdf/1703.06103.pdf" + prov = Arxiv(upload=False) + filename = prov.run(url, filename="./target.pdf") + prov.uncompress_pdf(filename, "unc.pdf") + with open("unc.pdf", "rb") as fp: + data = fp.read() + self.assertNotIn(b"arXiv:1703.06103v4 [stat.ML] 26 Oct 2017", data) + + def test_stamp_removed_2(self): + url = "https://arxiv.org/abs/2003.06222" + prov = Arxiv(upload=False) + filename = prov.run(url, filename="./target.pdf") + prov.uncompress_pdf(filename, "unc.pdf") + with open("unc.pdf", "rb") as fp: + data = fp.read() + self.assertNotIn(b"arXiv:2003.06222v1 [stat.ML] 13 Mar 2020", data) + if __name__ == "__main__": unittest.main() |
