aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-04-08 21:15:48 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-04-08 21:15:48 +0100
commit62d72c8c073376a036df66d872ffd6149374fd7b (patch)
treec37ac1936af86c2b3c82af7fd96260ff0a09c51b /tests
parentChange wd back to initial directory (diff)
downloadpaper2remarkable-62d72c8c073376a036df66d872ffd6149374fd7b.tar.gz
paper2remarkable-62d72c8c073376a036df66d872ffd6149374fd7b.zip
Be more robust against spaces in pdf file
This caused problems where the arxiv stamp was not removed for some files. This commit adds tests for this and fixes the issue.
Diffstat (limited to 'tests')
-rw-r--r--tests/test_arxiv.py35
1 files changed, 34 insertions, 1 deletions
diff --git a/tests/test_arxiv.py b/tests/test_arxiv.py
index beb9baa..08ea2c4 100644
--- a/tests/test_arxiv.py
+++ b/tests/test_arxiv.py
@@ -7,13 +7,28 @@ This file is part of paper2remarkable.
"""
+import os
import re
+import shutil
+import tempfile
import unittest
-from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX
+from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX, Arxiv
class TestArxiv(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.original_dir = os.getcwd()
+
+ def setUp(self):
+ self.test_dir = tempfile.mkdtemp()
+ os.chdir(self.test_dir)
+
+ def tearDown(self):
+ os.chdir(self.original_dir)
+ shutil.rmtree(self.test_dir)
+
def test_text_regex_1(self):
key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019"
m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
@@ -24,6 +39,24 @@ class TestArxiv(unittest.TestCase):
m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
self.assertIsNotNone(m)
+ def test_stamp_removed_1(self):
+ url = "https://arxiv.org/pdf/1703.06103.pdf"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(b"arXiv:1703.06103v4 [stat.ML] 26 Oct 2017", data)
+
+ def test_stamp_removed_2(self):
+ url = "https://arxiv.org/abs/2003.06222"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(b"arXiv:2003.06222v1 [stat.ML] 13 Mar 2020", data)
+
if __name__ == "__main__":
unittest.main()