blob: 08ea2c4398ed5ed3bdba352947b3c4077c67893e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Unit tests for arXiv provider
This file is part of paper2remarkable.
"""
import os
import re
import shutil
import tempfile
import unittest
from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX, Arxiv
class TestArxiv(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.original_dir = os.getcwd()
def setUp(self):
self.test_dir = tempfile.mkdtemp()
os.chdir(self.test_dir)
def tearDown(self):
os.chdir(self.original_dir)
shutil.rmtree(self.test_dir)
def test_text_regex_1(self):
key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019"
m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
self.assertIsNotNone(m)
def test_text_regex_2(self):
key = b"arXiv:1908.03213v1 [astro-ph.HE] 8 Aug 2019"
m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
self.assertIsNotNone(m)
def test_stamp_removed_1(self):
url = "https://arxiv.org/pdf/1703.06103.pdf"
prov = Arxiv(upload=False)
filename = prov.run(url, filename="./target.pdf")
prov.uncompress_pdf(filename, "unc.pdf")
with open("unc.pdf", "rb") as fp:
data = fp.read()
self.assertNotIn(b"arXiv:1703.06103v4 [stat.ML] 26 Oct 2017", data)
def test_stamp_removed_2(self):
url = "https://arxiv.org/abs/2003.06222"
prov = Arxiv(upload=False)
filename = prov.run(url, filename="./target.pdf")
prov.uncompress_pdf(filename, "unc.pdf")
with open("unc.pdf", "rb") as fp:
data = fp.read()
self.assertNotIn(b"arXiv:2003.06222v1 [stat.ML] 13 Mar 2020", data)
if __name__ == "__main__":
unittest.main()
|