From 0a72c6b0dcb047ca6bfc11ae876a33f26325a2ef Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 3 Mar 2020 11:02:45 +0000 Subject: Be more robust against missing pdftoppm --- paper2remarkable/crop.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/paper2remarkable/crop.py b/paper2remarkable/crop.py index 02c6757..dc4b31c 100644 --- a/paper2remarkable/crop.py +++ b/paper2remarkable/crop.py @@ -45,6 +45,17 @@ def find_offset_byte_line(line): return off +def check_pdftoppm(pth): + """Check that we can run the provided pdftoppm executable + """ + try: + subprocess.check_output([pth, "-v"], stderr=subprocess.DEVNULL) + except (subprocess.CalledProcessError, FileNotFoundError, PermissionError): + logger.info("pdftoppm not found, using pdfplumber instead (slower)") + return False + return True + + class Cropper(object): def __init__( self, input_file=None, output_file=None, pdftoppm_path="pdftoppm", @@ -55,6 +66,9 @@ class Cropper(object): if not output_file is None: self.output_file = os.path.abspath(output_file) + if pdftoppm_path and not check_pdftoppm(pdftoppm_path): + pdftoppm_path = None + self.pdftoppm_path = pdftoppm_path self.writer = PyPDF2.PdfFileWriter() @@ -116,15 +130,11 @@ class Cropper(object): def get_raw_bbox(self, filename, resolution=72): """Get the basic bounding box of a pdf file""" - # We try to use pdftoppm, but if it's not available or fails, we - # default to pdfplumber. - try: - bbox = self.get_raw_bbox_pdftoppm(filename, resolution=resolution) - except subprocess.CalledProcessError: - bbox = self.get_raw_bbox_pdfplumber( - filename, resolution=resolution - ) - return bbox + if self.pdftoppm_path is None: + box = self.get_raw_bbox_pdfplumber(filename, resolution=resolution) + else: + box = self.get_raw_bbox_pdftoppm(filename, resolution=resolution) + return box def get_raw_bbox_pdfplumber(self, filename, resolution=72): """Get the basic bounding box with pdfplumber""" -- cgit v1.2.3