aboutsummaryrefslogtreecommitdiff
path: root/tests/test_arxiv.py
blob: e2179e9c21a98396e92dbe33c924de373fedc3d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Unit tests for arXiv provider

This file is part of paper2remarkable.

"""

import os
import re
import shutil
import tempfile
import unittest

from paper2remarkable.providers.arxiv import (
    DEARXIV_TEXT_REGEX,
    DEARXIV_URI_REGEX,
    Arxiv,
)


class TestArxiv(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.original_dir = os.getcwd()

    def setUp(self):
        self.test_dir = tempfile.mkdtemp()
        os.chdir(self.test_dir)

    def tearDown(self):
        os.chdir(self.original_dir)
        shutil.rmtree(self.test_dir)

    def test_text_regex_1(self):
        key = b"arXiv:1908.03213v1 [astro.HE] 8 Aug 2019"
        m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
        self.assertIsNotNone(m)

    def test_text_regex_2(self):
        key = b"arXiv:1908.03213v1 [astro-ph.HE] 8 Aug 2019"
        m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
        self.assertIsNotNone(m)

    def test_text_regex_3(self):
        key = b"arXiv:physics/0605197v1  [physics.data-an]  23 May 2006"
        m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
        self.assertIsNotNone(m)

    def test_text_regex_4(self):
        key = b"arXiv:math/0309285v2  [math.NA]  9 Apr 2004"
        m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
        self.assertIsNotNone(m)

    def test_uri_regex_1(self):
        key = b"http://arxiv.org/abs/physics/0605197v1"
        m = re.fullmatch(DEARXIV_URI_REGEX, key)
        self.assertIsNotNone(m)

    def test_uri_regex_2(self):
        key = b"https://arxiv.org/abs/1101.0028v3"
        m = re.fullmatch(DEARXIV_URI_REGEX, key)
        self.assertIsNotNone(m)

    def test_stamp_removed_1(self):
        url = "https://arxiv.org/pdf/1703.06103.pdf"
        prov = Arxiv(upload=False)
        filename = prov.run(url, filename="./target.pdf")
        prov.uncompress_pdf(filename, "unc.pdf")
        with open("unc.pdf", "rb") as fp:
            data = fp.read()
        self.assertNotIn(b"arXiv:1703.06103v4  [stat.ML]  26 Oct 2017", data)

    def test_stamp_removed_2(self):
        url = "https://arxiv.org/abs/2003.06222"
        prov = Arxiv(upload=False)
        filename = prov.run(url, filename="./target.pdf")
        prov.uncompress_pdf(filename, "unc.pdf")
        with open("unc.pdf", "rb") as fp:
            data = fp.read()
        self.assertNotIn(b"arXiv:2003.06222v1  [stat.ML]  13 Mar 2020", data)

    def test_stamp_removed_3(self):
        url = "https://arxiv.org/abs/physics/0605197v1"
        prov = Arxiv(upload=False)
        filename = prov.run(url, filename="./target.pdf")
        prov.uncompress_pdf(filename, "unc.pdf")
        with open("unc.pdf", "rb") as fp:
            data = fp.read()
        self.assertNotIn(
            b"arXiv:physics/0605197v1  [physics.data-an]  23 May 2006", data
        )
        self.assertNotIn(
            b"/URI (http://arxiv.org/abs/physics/0605197v1)", data
        )

    def test_stamp_removed_4(self):
        url = "https://arxiv.org/abs/math/0309285v2"
        prov = Arxiv(upload=False)
        filename = prov.run(url, filename="./target.pdf")
        prov.uncompress_pdf(filename, "unc.pdf")
        with open("unc.pdf", "rb") as fp:
            data = fp.read()
        self.assertNotIn(b"arXiv:math/0309285v2  [math.NA]  9 Apr 2004", data)
        self.assertNotIn(b"/URI (http://arXiv.org/abs/math/0309285v2)", data)

    def test_stamp_removed_5(self):
        url = "https://arxiv.org/abs/astro-ph/9207001v1"
        prov = Arxiv(upload=False)
        filename = prov.run(url, filename="./target.pdf")
        prov.uncompress_pdf(filename, "unc.pdf")
        with open("unc.pdf", "rb") as fp:
            data = fp.read()
        self.assertNotIn(
            b"/URI (http://arxiv.org/abs/astro-ph/9207001v1)", data
        )


if __name__ == "__main__":
    unittest.main()