diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-04-04 14:53:15 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-04-04 14:53:15 +0100 |
| commit | 8d5ce28ed6a4cf52ae10bf4bed197cd00c529218 (patch) | |
| tree | 9d3f72dd3ec1d2f3be1ab62dcca969a95b82fba8 | |
| parent | Add the pdftk path back to the ui (diff) | |
| download | paper2remarkable-8d5ce28ed6a4cf52ae10bf4bed197cd00c529218.tar.gz paper2remarkable-8d5ce28ed6a4cf52ae10bf4bed197cd00c529218.zip | |
Enable both pdftk and qpdf
This adds a function that checks which pdf tool is available
and moves the compress/uncompress code to the base class of
the providers for cleaner code. A new exception is added in
case neither pdf tool can be found.
| -rw-r--r-- | paper2remarkable/exceptions.py | 16 | ||||
| -rw-r--r-- | paper2remarkable/providers/_base.py | 46 | ||||
| -rw-r--r-- | paper2remarkable/providers/arxiv.py | 27 | ||||
| -rw-r--r-- | paper2remarkable/utils.py | 22 |
4 files changed, 87 insertions, 24 deletions
diff --git a/paper2remarkable/exceptions.py b/paper2remarkable/exceptions.py index 66a329f..5ea9a78 100644 --- a/paper2remarkable/exceptions.py +++ b/paper2remarkable/exceptions.py @@ -100,3 +100,19 @@ class _CalledProcessError(Error): msg = "ERROR: {message}".format(message=self.message) msg += GH_MSG return msg + + +class NoPDFToolError(Error): + """Exception raised when neither pdftk or qpdf is found.""" + + def __init__(self): + pass + + def __str__(self): + msg = ( + "ERROR: Neither pdftk or qpdf could be found. Install " + "either of these or ensure that they can be found using " + "the --pdftk or --qpdf options." + ) + msg += GH_MSG + return msg diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py index 1337201..0cab6b7 100644 --- a/paper2remarkable/providers/_base.py +++ b/paper2remarkable/providers/_base.py @@ -11,13 +11,16 @@ Copyright: 2019, G.J.J. van den Burg import abc import os import shutil +import subprocess import tempfile import time +from ..exceptions import _CalledProcessError from ._info import Informer from ..pdf_ops import prepare_pdf, blank_pdf, shrink_pdf from ..utils import ( assert_file_is_pdf, + check_pdftool, download_url, upload_to_remarkable, follow_redirects, @@ -58,6 +61,8 @@ class Provider(metaclass=abc.ABCMeta): self.informer = Informer() self.cookiejar = cookiejar + self.pdftool = check_pdftool(self.pdftk_path, self.qpdf_path) + # wait time to not hit the server too frequently self.server_delay = 0 @@ -110,6 +115,47 @@ class Provider(metaclass=abc.ABCMeta): # This must exist so that the LocalFile provider can overwrite it download_url(pdf_url, filename, cookiejar=self.cookiejar) + def compress_pdf(self, in_pdf, out_pdf): + """ Compress a pdf file, returns subprocess status """ + if self.pdftool == "pdftk": + status = subprocess.call( + [self.pdftk_path, in_pdf, "output", out_pdf, "compress"] + ) + elif self.pdftool == "qpdf": + # TODO: the --no-warn option is only needed because when we remove + # the arXiv stamp we don't fix the length of the pdf object. This + # causes qpdf to raise a warning and give a nonzero exit status + # (3). Fixing the pdf object is the right approach, but this does + # work as qpdf fixes the file. + status = subprocess.call( + [ + self.qpdf_path, + "--no-warn", + "--stream-data=compress", + in_pdf, + out_pdf, + ] + ) + if not (status == 0 or status == 3): + raise _CalledProcessError( + "%s failed to compress the PDF file." % self.pdftool + ) + + def uncompress_pdf(self, in_pdf, out_pdf): + """ Uncompress a pdf file """ + if self.pdftool == "pdftk": + status = subprocess.call( + [self.pdftk_path, in_pdf, "output", out_pdf, "uncompress",] + ) + elif self.pdftool == "qpdf": + status = subprocess.call( + [self.qpdf_path, "--stream-data=uncompress", in_pdf, out_pdf,] + ) + if not status == 0: + raise _CalledProcessError( + "%s failed to uncompress the PDF file." % self.pdftool + ) + def run(self, src, filename=None): # follow_redirects here is needed with library use if os.path.exists(src): diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py index 06bfdec..74043ed 100644 --- a/paper2remarkable/providers/arxiv.py +++ b/paper2remarkable/providers/arxiv.py @@ -10,14 +10,10 @@ Copyright: 2019, G.J.J. van den Burg import os import re -import subprocess from ._info import Informer from ._base import Provider -from ..exceptions import ( - URLResolutionError, - _CalledProcessError as CalledProcessError, -) +from ..exceptions import URLResolutionError from ..log import Logger logger = Logger() @@ -71,20 +67,9 @@ class Arxiv(Provider): """Remove the arXiv timestamp from a pdf""" logger.info("Removing arXiv timestamp") basename = os.path.splitext(input_file)[0] - uncompress_file = basename + "_uncompress.pdf" - status = subprocess.call( - [ - self.qpdf_path, - "--stream-data=uncompress", - input_file, - uncompress_file, - ] - ) - if not status == 0: - raise CalledProcessError( - "qpdf failed to uncompress the PDF file." - ) + uncompress_file = basename + "_uncompress.pdf" + self.uncompress_pdf(input_file, uncompress_file) with open(uncompress_file, "rb") as fid: data = fid.read() @@ -102,10 +87,6 @@ class Arxiv(Provider): oid.write(data) output_file = basename + "_dearxiv.pdf" - status = subprocess.call( - [self.qpdf_path, "--stream-data=compress", removed_file, output_file] - ) - if not status == 0: - raise CalledProcessError("qpdf failed to compress the PDF file.") + self.compress_pdf(removed_file, output_file) return output_file diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py index f1447d9..9bfeec6 100644 --- a/paper2remarkable/utils.py +++ b/paper2remarkable/utils.py @@ -17,7 +17,7 @@ import time import unidecode from .log import Logger -from .exceptions import FileTypeError, RemarkableError +from .exceptions import FileTypeError, RemarkableError, NoPDFToolError HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " @@ -166,3 +166,23 @@ def is_url(string): string = string.strip(" ") match = regex.fullmatch(pattern, string) return match is not None + + +def check_pdftool(pdftk_path, qpdf_path): + """Check whether we have pdftk or qpdf available""" + # set defaults in case either is set to None or something + pdftk_path = pdftk_path or 'false' + qpdf_path = qpdf_path or 'false' + + status = subprocess.call( + [pdftk_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) + if status == 0: + return "pdftk" + status = subprocess.call( + [qpdf_path, '--help'], stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + if status == 0: + return "qpdf" + raise NoPDFToolError |
