From 96de81d4158f7779132c9f7883c62bc3f15b6915 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 24 May 2020 20:45:44 +0100 Subject: Add test for pdf reading issue --- tests/test_providers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_providers.py b/tests/test_providers.py index d2fdb0d..fb75fbd 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -142,12 +142,18 @@ class TestProviders(unittest.TestCase): filename = prov.run(local_filename) self.assertEqual("test_.pdf", os.path.basename(filename)) - def test_pdfurl(self): + def test_pdfurl_1(self): prov = PdfUrl(upload=False, verbose=VERBOSE) url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf" filename = prov.run(url) self.assertEqual("14-526.pdf", os.path.basename(filename)) + def test_pdfurl_2(self): + prov = PdfUrl(upload=False, verbose=VERBOSE) + url = "https://www.manuelrigger.at/preprints/NoREC.pdf" + filename = prov.run(url) + self.assertEqual("NoREC.pdf", os.path.basename(filename)) + def test_jmlr_1(self): prov = JMLR(upload=False, verbose=VERBOSE) url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf" -- cgit v1.2.3 From 17a1e7392e0d08bf820252b90ee3509e59ff4bbf Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 24 May 2020 20:50:17 +0100 Subject: Always rewrite pdfs with GhostScript as a cleaning step --- paper2remarkable/providers/_base.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py index 5ca3588..1625432 100644 --- a/paper2remarkable/providers/_base.py +++ b/paper2remarkable/providers/_base.py @@ -71,14 +71,13 @@ class Provider(metaclass=abc.ABCMeta): logger.disable() # Define the operations to run on the pdf. Providers can add others. - if no_crop: - self.operations = [] - elif center: - self.operations = [("center", self.center_pdf)] + self.operations = [("rewrite", self.rewrite_pdf)] + if center: + self.operations.append(("center", self.center_pdf)) elif right: - self.operations = [("right", self.right_pdf)] + self.operations.append(("right", self.right_pdf)) else: - self.operations = [("crop", self.crop_pdf)] + self.operations.append(("crop", self.crop_pdf)) if blank: self.operations.append(("blank", blank_pdf)) @@ -131,11 +130,14 @@ class Provider(metaclass=abc.ABCMeta): "%s failed to compress the PDF file." % self.pdftool ) - def rewrite_pdf(self, in_pdf, out_pdf): + def rewrite_pdf(self, in_pdf, out_pdf=None): """ Re-write the pdf using Ghostscript This helps avoid issues in dearxiv due to nested pdfs. """ + if out_pdf is None: + out_pdf = os.path.splitext(in_pdf)[0] + "-rewrite.pdf" + status = subprocess.call( [ self.gs_path, @@ -150,6 +152,7 @@ class Provider(metaclass=abc.ABCMeta): raise _CalledProcessError( "Failed to rewrite the pdf with GhostScript" ) + return out_pdf def uncompress_pdf(self, in_pdf, out_pdf): """ Uncompress a pdf file """ -- cgit v1.2.3