aboutsummaryrefslogtreecommitdiff
path: root/tests/test_arxiv.py
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-05-02 19:34:14 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-05-02 19:34:14 +0100
commitf8c0e4b2c953d617ffea4a09a7373f697a5eb104 (patch)
treed40a8aa0fb78ad552566c8cc889647636e17abeb /tests/test_arxiv.py
parentAdd a comment regarding use without a remarkable (diff)
downloadpaper2remarkable-f8c0e4b2c953d617ffea4a09a7373f697a5eb104.tar.gz
paper2remarkable-f8c0e4b2c953d617ffea4a09a7373f697a5eb104.zip
Various improvements to dearxiv functionality
Diffstat (limited to 'tests/test_arxiv.py')
-rw-r--r--tests/test_arxiv.py62
1 files changed, 61 insertions, 1 deletions
diff --git a/tests/test_arxiv.py b/tests/test_arxiv.py
index 08ea2c4..2cb84cf 100644
--- a/tests/test_arxiv.py
+++ b/tests/test_arxiv.py
@@ -13,7 +13,11 @@ import shutil
import tempfile
import unittest
-from paper2remarkable.providers.arxiv import DEARXIV_TEXT_REGEX, Arxiv
+from paper2remarkable.providers.arxiv import (
+ DEARXIV_TEXT_REGEX,
+ DEARXIV_URI_REGEX,
+ Arxiv,
+)
class TestArxiv(unittest.TestCase):
@@ -39,6 +43,26 @@ class TestArxiv(unittest.TestCase):
m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
self.assertIsNotNone(m)
+ def test_text_regex_3(self):
+ key = b"arXiv:physics/0605197v1 [physics.data-an] 23 May 2006"
+ m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
+ self.assertIsNotNone(m)
+
+ def test_text_regex_4(self):
+ key = b"arXiv:math/0309285v2 [math.NA] 9 Apr 2004"
+ m = re.fullmatch(DEARXIV_TEXT_REGEX, key)
+ self.assertIsNotNone(m)
+
+ def test_uri_regex_1(self):
+ key = b"http://arxiv.org/abs/physics/0605197v1"
+ m = re.fullmatch(DEARXIV_URI_REGEX, key)
+ self.assertIsNotNone(m)
+
+ def test_uri_regex_2(self):
+ key = b"https://arxiv.org/abs/1101.0028v3"
+ m = re.fullmatch(DEARXIV_URI_REGEX, key)
+ self.assertIsNotNone(m)
+
def test_stamp_removed_1(self):
url = "https://arxiv.org/pdf/1703.06103.pdf"
prov = Arxiv(upload=False)
@@ -57,6 +81,42 @@ class TestArxiv(unittest.TestCase):
data = fp.read()
self.assertNotIn(b"arXiv:2003.06222v1 [stat.ML] 13 Mar 2020", data)
+ def test_stamp_removed_3(self):
+ url = "https://arxiv.org/abs/physics/0605197v1"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(
+ b"arXiv:physics/0605197v1 [physics.data-an] 23 May 2006", data
+ )
+ self.assertNotIn(
+ b"/URI (http://arxiv.org/abs/physics/0605197v1)", data
+ )
+
+ def test_stamp_removed_4(self):
+ url = "https://arxiv.org/abs/math/0309285v2"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(b"arXiv:math/0309285v2 [math.NA] 9 Apr 2004", data)
+ self.assertNotIn(b"/URI (http://arXiv.org/abs/math/0309285v2)", data)
+
+ def test_stamp_removed_5(self):
+ url = "https://arxiv.org/abs/astro-ph/9207001v1"
+ prov = Arxiv(upload=False)
+ filename = prov.run(url, filename="./target.pdf")
+ prov.uncompress_pdf(filename, "unc.pdf")
+ with open("unc.pdf", "rb") as fp:
+ data = fp.read()
+ self.assertNotIn(
+ b"/URI (http://arxiv.org/abs/astro-ph/9207001v1)", data
+ )
+ self.assertNotIn(b"arXiv:astro-ph/9207001v1 13 Jul 1992", data)
+
if __name__ == "__main__":
unittest.main()