diff options
Diffstat (limited to 'tests/test_arxiv.py')
| -rw-r--r-- | tests/test_arxiv.py | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/tests/test_arxiv.py b/tests/test_arxiv.py new file mode 100644 index 0000000..2cb84cf --- /dev/null +++ b/tests/test_arxiv.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Unit tests for arXiv provider + +This file is part of paper2remarkable. + +""" + +import os +import re +import shutil +import tempfile +import unittest + +from paper2remarkable.providers.arxiv import ( + DEARXIV_TEXT_REGEX, + DEARXIV_URI_REGEX, + Arxiv, +) + + +class TestArxiv(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.original_dir = os.getcwd() + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + os.chdir(self.test_dir) + + def tearDown(self): + os.chdir(self.original_dir) + shutil.rmtree(self.test_dir) + + def test_text_regex_1(self): + key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019" + m = re.fullmatch(DEARXIV_TEXT_REGEX, key) + self.assertIsNotNone(m) + + def test_text_regex_2(self): + key = b"arXiv:1908.03213v1 [astro-ph.HE] 8 Aug 2019" + m = re.fullmatch(DEARXIV_TEXT_REGEX, key) + self.assertIsNotNone(m) + + def test_text_regex_3(self): + key = b"arXiv:physics/0605197v1 [physics.data-an] 23 May 2006" + m = re.fullmatch(DEARXIV_TEXT_REGEX, key) + self.assertIsNotNone(m) + + def test_text_regex_4(self): + key = b"arXiv:math/0309285v2 [math.NA] 9 Apr 2004" + m = re.fullmatch(DEARXIV_TEXT_REGEX, key) + self.assertIsNotNone(m) + + def test_uri_regex_1(self): + key = b"http://arxiv.org/abs/physics/0605197v1" + m = re.fullmatch(DEARXIV_URI_REGEX, key) + self.assertIsNotNone(m) + + def test_uri_regex_2(self): + key = b"https://arxiv.org/abs/1101.0028v3" + m = re.fullmatch(DEARXIV_URI_REGEX, key) + self.assertIsNotNone(m) + + def test_stamp_removed_1(self): + url = "https://arxiv.org/pdf/1703.06103.pdf" + prov = Arxiv(upload=False) + filename = prov.run(url, filename="./target.pdf") + prov.uncompress_pdf(filename, "unc.pdf") + with open("unc.pdf", "rb") as fp: + data = fp.read() + self.assertNotIn(b"arXiv:1703.06103v4 [stat.ML] 26 Oct 2017", data) + + def test_stamp_removed_2(self): + url = "https://arxiv.org/abs/2003.06222" + prov = Arxiv(upload=False) + filename = prov.run(url, filename="./target.pdf") + prov.uncompress_pdf(filename, "unc.pdf") + with open("unc.pdf", "rb") as fp: + data = fp.read() + self.assertNotIn(b"arXiv:2003.06222v1 [stat.ML] 13 Mar 2020", data) + + def test_stamp_removed_3(self): + url = "https://arxiv.org/abs/physics/0605197v1" + prov = Arxiv(upload=False) + filename = prov.run(url, filename="./target.pdf") + prov.uncompress_pdf(filename, "unc.pdf") + with open("unc.pdf", "rb") as fp: + data = fp.read() + self.assertNotIn( + b"arXiv:physics/0605197v1 [physics.data-an] 23 May 2006", data + ) + self.assertNotIn( + b"/URI (http://arxiv.org/abs/physics/0605197v1)", data + ) + + def test_stamp_removed_4(self): + url = "https://arxiv.org/abs/math/0309285v2" + prov = Arxiv(upload=False) + filename = prov.run(url, filename="./target.pdf") + prov.uncompress_pdf(filename, "unc.pdf") + with open("unc.pdf", "rb") as fp: + data = fp.read() + self.assertNotIn(b"arXiv:math/0309285v2 [math.NA] 9 Apr 2004", data) + self.assertNotIn(b"/URI (http://arXiv.org/abs/math/0309285v2)", data) + + def test_stamp_removed_5(self): + url = "https://arxiv.org/abs/astro-ph/9207001v1" + prov = Arxiv(upload=False) + filename = prov.run(url, filename="./target.pdf") + prov.uncompress_pdf(filename, "unc.pdf") + with open("unc.pdf", "rb") as fp: + data = fp.read() + self.assertNotIn( + b"/URI (http://arxiv.org/abs/astro-ph/9207001v1)", data + ) + self.assertNotIn(b"arXiv:astro-ph/9207001v1 13 Jul 1992", data) + + +if __name__ == "__main__": + unittest.main() |
