diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-04-08 21:15:48 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-04-08 21:15:48 +0100 |
| commit | 62d72c8c073376a036df66d872ffd6149374fd7b (patch) | |
| tree | c37ac1936af86c2b3c82af7fd96260ff0a09c51b /tests | |
| parent | Change wd back to initial directory (diff) | |
| download | paper2remarkable-62d72c8c073376a036df66d872ffd6149374fd7b.tar.gz paper2remarkable-62d72c8c073376a036df66d872ffd6149374fd7b.zip | |
Be more robust against spaces in pdf file
This caused problems where the arxiv stamp was not removed
for some files. This commit adds tests for this and fixes
the issue.
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/test_arxiv.py | 35 |
1 files changed, 34 insertions, 1 deletions
diff --git a/tests/test_arxiv.py b/tests/test_arxiv.py index beb9baa..08ea2c4 100644 --- a/tests/test_arxiv.py +++ b/tests/test_arxiv.py @@ -7,13 +7,28 @@ This file is part of paper2remarkable. """ +import os import re +import shutil +import tempfile import unittest -from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX +from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX, Arxiv class TestArxiv(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.original_dir = os.getcwd() + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + os.chdir(self.test_dir) + + def tearDown(self): + os.chdir(self.original_dir) + shutil.rmtree(self.test_dir) + def test_text_regex_1(self): key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019" m = re.fullmatch(DEARXIV_TEXT_REGEX, key) @@ -24,6 +39,24 @@ class TestArxiv(unittest.TestCase): m = re.fullmatch(DEARXIV_TEXT_REGEX, key) self.assertIsNotNone(m) + def test_stamp_removed_1(self): + url = "https://arxiv.org/pdf/1703.06103.pdf" + prov = Arxiv(upload=False) + filename = prov.run(url, filename="./target.pdf") + prov.uncompress_pdf(filename, "unc.pdf") + with open("unc.pdf", "rb") as fp: + data = fp.read() + self.assertNotIn(b"arXiv:1703.06103v4 [stat.ML] 26 Oct 2017", data) + + def test_stamp_removed_2(self): + url = "https://arxiv.org/abs/2003.06222" + prov = Arxiv(upload=False) + filename = prov.run(url, filename="./target.pdf") + prov.uncompress_pdf(filename, "unc.pdf") + with open("unc.pdf", "rb") as fp: + data = fp.read() + self.assertNotIn(b"arXiv:2003.06222v1 [stat.ML] 13 Mar 2020", data) + if __name__ == "__main__": unittest.main() |
