aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-04-04 14:53:15 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-04-04 14:53:15 +0100
commit8d5ce28ed6a4cf52ae10bf4bed197cd00c529218 (patch)
tree9d3f72dd3ec1d2f3be1ab62dcca969a95b82fba8
parentAdd the pdftk path back to the ui (diff)
downloadpaper2remarkable-8d5ce28ed6a4cf52ae10bf4bed197cd00c529218.tar.gz
paper2remarkable-8d5ce28ed6a4cf52ae10bf4bed197cd00c529218.zip
Enable both pdftk and qpdf
This adds a function that checks which pdf tool is available and moves the compress/uncompress code to the base class of the providers for cleaner code. A new exception is added in case neither pdf tool can be found.
-rw-r--r--paper2remarkable/exceptions.py16
-rw-r--r--paper2remarkable/providers/_base.py46
-rw-r--r--paper2remarkable/providers/arxiv.py27
-rw-r--r--paper2remarkable/utils.py22
4 files changed, 87 insertions, 24 deletions
diff --git a/paper2remarkable/exceptions.py b/paper2remarkable/exceptions.py
index 66a329f..5ea9a78 100644
--- a/paper2remarkable/exceptions.py
+++ b/paper2remarkable/exceptions.py
@@ -100,3 +100,19 @@ class _CalledProcessError(Error):
msg = "ERROR: {message}".format(message=self.message)
msg += GH_MSG
return msg
+
+
+class NoPDFToolError(Error):
+ """Exception raised when neither pdftk or qpdf is found."""
+
+ def __init__(self):
+ pass
+
+ def __str__(self):
+ msg = (
+ "ERROR: Neither pdftk or qpdf could be found. Install "
+ "either of these or ensure that they can be found using "
+ "the --pdftk or --qpdf options."
+ )
+ msg += GH_MSG
+ return msg
diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index 1337201..0cab6b7 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -11,13 +11,16 @@ Copyright: 2019, G.J.J. van den Burg
import abc
import os
import shutil
+import subprocess
import tempfile
import time
+from ..exceptions import _CalledProcessError
from ._info import Informer
from ..pdf_ops import prepare_pdf, blank_pdf, shrink_pdf
from ..utils import (
assert_file_is_pdf,
+ check_pdftool,
download_url,
upload_to_remarkable,
follow_redirects,
@@ -58,6 +61,8 @@ class Provider(metaclass=abc.ABCMeta):
self.informer = Informer()
self.cookiejar = cookiejar
+ self.pdftool = check_pdftool(self.pdftk_path, self.qpdf_path)
+
# wait time to not hit the server too frequently
self.server_delay = 0
@@ -110,6 +115,47 @@ class Provider(metaclass=abc.ABCMeta):
# This must exist so that the LocalFile provider can overwrite it
download_url(pdf_url, filename, cookiejar=self.cookiejar)
+ def compress_pdf(self, in_pdf, out_pdf):
+ """ Compress a pdf file, returns subprocess status """
+ if self.pdftool == "pdftk":
+ status = subprocess.call(
+ [self.pdftk_path, in_pdf, "output", out_pdf, "compress"]
+ )
+ elif self.pdftool == "qpdf":
+ # TODO: the --no-warn option is only needed because when we remove
+ # the arXiv stamp we don't fix the length of the pdf object. This
+ # causes qpdf to raise a warning and give a nonzero exit status
+ # (3). Fixing the pdf object is the right approach, but this does
+ # work as qpdf fixes the file.
+ status = subprocess.call(
+ [
+ self.qpdf_path,
+ "--no-warn",
+ "--stream-data=compress",
+ in_pdf,
+ out_pdf,
+ ]
+ )
+ if not (status == 0 or status == 3):
+ raise _CalledProcessError(
+ "%s failed to compress the PDF file." % self.pdftool
+ )
+
+ def uncompress_pdf(self, in_pdf, out_pdf):
+ """ Uncompress a pdf file """
+ if self.pdftool == "pdftk":
+ status = subprocess.call(
+ [self.pdftk_path, in_pdf, "output", out_pdf, "uncompress",]
+ )
+ elif self.pdftool == "qpdf":
+ status = subprocess.call(
+ [self.qpdf_path, "--stream-data=uncompress", in_pdf, out_pdf,]
+ )
+ if not status == 0:
+ raise _CalledProcessError(
+ "%s failed to uncompress the PDF file." % self.pdftool
+ )
+
def run(self, src, filename=None):
# follow_redirects here is needed with library use
if os.path.exists(src):
diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py
index 06bfdec..74043ed 100644
--- a/paper2remarkable/providers/arxiv.py
+++ b/paper2remarkable/providers/arxiv.py
@@ -10,14 +10,10 @@ Copyright: 2019, G.J.J. van den Burg
import os
import re
-import subprocess
from ._info import Informer
from ._base import Provider
-from ..exceptions import (
- URLResolutionError,
- _CalledProcessError as CalledProcessError,
-)
+from ..exceptions import URLResolutionError
from ..log import Logger
logger = Logger()
@@ -71,20 +67,9 @@ class Arxiv(Provider):
"""Remove the arXiv timestamp from a pdf"""
logger.info("Removing arXiv timestamp")
basename = os.path.splitext(input_file)[0]
- uncompress_file = basename + "_uncompress.pdf"
- status = subprocess.call(
- [
- self.qpdf_path,
- "--stream-data=uncompress",
- input_file,
- uncompress_file,
- ]
- )
- if not status == 0:
- raise CalledProcessError(
- "qpdf failed to uncompress the PDF file."
- )
+ uncompress_file = basename + "_uncompress.pdf"
+ self.uncompress_pdf(input_file, uncompress_file)
with open(uncompress_file, "rb") as fid:
data = fid.read()
@@ -102,10 +87,6 @@ class Arxiv(Provider):
oid.write(data)
output_file = basename + "_dearxiv.pdf"
- status = subprocess.call(
- [self.qpdf_path, "--stream-data=compress", removed_file, output_file]
- )
- if not status == 0:
- raise CalledProcessError("qpdf failed to compress the PDF file.")
+ self.compress_pdf(removed_file, output_file)
return output_file
diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py
index f1447d9..9bfeec6 100644
--- a/paper2remarkable/utils.py
+++ b/paper2remarkable/utils.py
@@ -17,7 +17,7 @@ import time
import unidecode
from .log import Logger
-from .exceptions import FileTypeError, RemarkableError
+from .exceptions import FileTypeError, RemarkableError, NoPDFToolError
HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
@@ -166,3 +166,23 @@ def is_url(string):
string = string.strip(" ")
match = regex.fullmatch(pattern, string)
return match is not None
+
+
+def check_pdftool(pdftk_path, qpdf_path):
+ """Check whether we have pdftk or qpdf available"""
+ # set defaults in case either is set to None or something
+ pdftk_path = pdftk_path or 'false'
+ qpdf_path = qpdf_path or 'false'
+
+ status = subprocess.call(
+ [pdftk_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+ )
+ if status == 0:
+ return "pdftk"
+ status = subprocess.call(
+ [qpdf_path, '--help'], stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL
+ )
+ if status == 0:
+ return "qpdf"
+ raise NoPDFToolError