Enable both pdftk and qpdf

This adds a function that checks which pdf tool is available and moves the compress/uncompress code to the base class of the providers for cleaner code. A new exception is added in case neither pdf tool can be found.
author: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2020-04-04 14:53:15 +0100
committer: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2020-04-04 14:53:15 +0100
commit: 8d5ce28ed6a4cf52ae10bf4bed197cd00c529218 (patch)
tree: 9d3f72dd3ec1d2f3be1ab62dcca969a95b82fba8
parent: Add the pdftk path back to the ui (diff)
download: paper2remarkable-8d5ce28ed6a4cf52ae10bf4bed197cd00c529218.tar.gz
paper2remarkable-8d5ce28ed6a4cf52ae10bf4bed197cd00c529218.zip
4 files changed, 87 insertions, 24 deletions
diff --git a/paper2remarkable/exceptions.py b/paper2remarkable/exceptions.py
index 66a329f..5ea9a78 100644
--- a/paper2remarkable/exceptions.py
+++ b/paper2remarkable/exceptions.py
@@ -100,3 +100,19 @@ class _CalledProcessError(Error):
         msg = "ERROR: {message}".format(message=self.message)
         msg += GH_MSG
         return msg
+
+
+class NoPDFToolError(Error):
+    """Exception raised when neither pdftk or qpdf is found."""
+
+    def __init__(self):
+        pass
+
+    def __str__(self):
+        msg = (
+            "ERROR: Neither pdftk or qpdf could be found. Install "
+            "either of these or ensure that they can be found using "
+            "the --pdftk or --qpdf options."
+        )
+        msg += GH_MSG
+        return msg
diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index 1337201..0cab6b7 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -11,13 +11,16 @@ Copyright: 2019, G.J.J. van den Burg
 import abc
 import os
 import shutil
+import subprocess
 import tempfile
 import time
 
+from ..exceptions import _CalledProcessError
 from ._info import Informer
 from ..pdf_ops import prepare_pdf, blank_pdf, shrink_pdf
 from ..utils import (
     assert_file_is_pdf,
+    check_pdftool,
     download_url,
     upload_to_remarkable,
     follow_redirects,
@@ -58,6 +61,8 @@ class Provider(metaclass=abc.ABCMeta):
         self.informer = Informer()
         self.cookiejar = cookiejar
 
+        self.pdftool = check_pdftool(self.pdftk_path, self.qpdf_path)
+
         # wait time to not hit the server too frequently
         self.server_delay = 0
 
@@ -110,6 +115,47 @@ class Provider(metaclass=abc.ABCMeta):
         # This must exist so that the LocalFile provider can overwrite it
         download_url(pdf_url, filename, cookiejar=self.cookiejar)
 
+    def compress_pdf(self, in_pdf, out_pdf):
+        """ Compress a pdf file, returns subprocess status """
+        if self.pdftool == "pdftk":
+            status = subprocess.call(
+                [self.pdftk_path, in_pdf, "output", out_pdf, "compress"]
+            )
+        elif self.pdftool == "qpdf":
+            # TODO: the --no-warn option is only needed because when we remove
+            # the arXiv stamp we don't fix the length of the pdf object. This
+            # causes qpdf to raise a warning and give a nonzero exit status
+            # (3). Fixing the pdf object is the right approach, but this does 
+            # work as qpdf fixes the file.
+            status = subprocess.call(
+                [
+                    self.qpdf_path,
+                    "--no-warn",
+                    "--stream-data=compress",
+                    in_pdf,
+                    out_pdf,
+                ]
+            )
+        if not (status == 0 or status == 3):
+            raise _CalledProcessError(
+                "%s failed to compress the PDF file." % self.pdftool
+            )
+
+    def uncompress_pdf(self, in_pdf, out_pdf):
+        """ Uncompress a pdf file """
+        if self.pdftool == "pdftk":
+            status = subprocess.call(
+                [self.pdftk_path, in_pdf, "output", out_pdf, "uncompress",]
+            )
+        elif self.pdftool == "qpdf":
+            status = subprocess.call(
+                [self.qpdf_path, "--stream-data=uncompress", in_pdf, out_pdf,]
+            )
+        if not status == 0:
+            raise _CalledProcessError(
+                "%s failed to uncompress the PDF file." % self.pdftool
+            )
+
     def run(self, src, filename=None):
         # follow_redirects here is needed with library use
         if os.path.exists(src):
diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py
index 06bfdec..74043ed 100644
--- a/paper2remarkable/providers/arxiv.py
+++ b/paper2remarkable/providers/arxiv.py
@@ -10,14 +10,10 @@ Copyright: 2019, G.J.J. van den Burg
 
 import os
 import re
-import subprocess
 
 from ._info import Informer
 from ._base import Provider
-from ..exceptions import (
-    URLResolutionError,
-    _CalledProcessError as CalledProcessError,
-)
+from ..exceptions import URLResolutionError
 from ..log import Logger
 
 logger = Logger()
@@ -71,20 +67,9 @@ class Arxiv(Provider):
         """Remove the arXiv timestamp from a pdf"""
         logger.info("Removing arXiv timestamp")
         basename = os.path.splitext(input_file)[0]
-        uncompress_file = basename + "_uncompress.pdf"
 
-        status = subprocess.call(
-            [
-                self.qpdf_path,
-                "--stream-data=uncompress",
-                input_file,
-                uncompress_file,
-            ]
-        )
-        if not status == 0:
-            raise CalledProcessError(
-                "qpdf failed to uncompress the PDF file."
-            )
+        uncompress_file = basename + "_uncompress.pdf"
+        self.uncompress_pdf(input_file, uncompress_file)
 
         with open(uncompress_file, "rb") as fid:
             data = fid.read()
@@ -102,10 +87,6 @@ class Arxiv(Provider):
             oid.write(data)
 
         output_file = basename + "_dearxiv.pdf"
-        status = subprocess.call(
-            [self.qpdf_path, "--stream-data=compress", removed_file, output_file]
-        )
-        if not status == 0:
-            raise CalledProcessError("qpdf failed to compress the PDF file.")
+        self.compress_pdf(removed_file, output_file)
 
         return output_file
diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py
index f1447d9..9bfeec6 100644
--- a/paper2remarkable/utils.py
+++ b/paper2remarkable/utils.py
@@ -17,7 +17,7 @@ import time
 import unidecode
 
 from .log import Logger
-from .exceptions import FileTypeError, RemarkableError
+from .exceptions import FileTypeError, RemarkableError, NoPDFToolError
 
 HEADERS = {
     "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
@@ -166,3 +166,23 @@ def is_url(string):
     string = string.strip(" ")
     match = regex.fullmatch(pattern, string)
     return match is not None
+
+
+def check_pdftool(pdftk_path, qpdf_path):
+    """Check whether we have pdftk or qpdf available"""
+    # set defaults in case either is set to None or something
+    pdftk_path = pdftk_path or 'false'
+    qpdf_path = qpdf_path or 'false'
+
+    status = subprocess.call(
+        [pdftk_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+    )
+    if status == 0:
+        return "pdftk"
+    status = subprocess.call(
+        [qpdf_path, '--help'], stdout=subprocess.DEVNULL, 
+        stderr=subprocess.DEVNULL
+    )
+    if status == 0:
+        return "qpdf"
+    raise NoPDFToolError
author	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2020-04-04 14:53:15 +0100
committer	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2020-04-04 14:53:15 +0100
commit	8d5ce28ed6a4cf52ae10bf4bed197cd00c529218 (patch)
tree	9d3f72dd3ec1d2f3be1ab62dcca969a95b82fba8
parent	Add the pdftk path back to the ui (diff)
download	paper2remarkable-8d5ce28ed6a4cf52ae10bf4bed197cd00c529218.tar.gz paper2remarkable-8d5ce28ed6a4cf52ae10bf4bed197cd00c529218.zip