diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-10-24 23:39:03 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-10-24 23:39:03 +0100 |
| commit | b0b3b177dd2ee5555fb5a6a68c529d5673df83bb (patch) | |
| tree | 85cfc27182c35b803c7e8c0e374026f44ba58106 | |
| parent | Remove old script as its no longer needed (diff) | |
| download | paper2remarkable-b0b3b177dd2ee5555fb5a6a68c529d5673df83bb.tar.gz paper2remarkable-b0b3b177dd2ee5555fb5a6a68c529d5673df83bb.zip | |
Switch to a simple logger singleton
The logging module gave problems because one
of the pdf packages is based on a package
that extensively used the info level of the
logging module, and this seemed like the
easiest solution.
| -rw-r--r-- | paper2remarkable/log.py | 56 | ||||
| -rw-r--r-- | paper2remarkable/pdf_ops.py | 21 | ||||
| -rw-r--r-- | paper2remarkable/providers/_base.py | 9 | ||||
| -rw-r--r-- | paper2remarkable/providers/_info.py | 10 | ||||
| -rw-r--r-- | paper2remarkable/providers/acm.py | 5 | ||||
| -rw-r--r-- | paper2remarkable/providers/arxiv.py | 6 | ||||
| -rw-r--r-- | paper2remarkable/providers/pdf_url.py | 1 | ||||
| -rw-r--r-- | paper2remarkable/utils.py | 15 |
8 files changed, 95 insertions, 28 deletions
diff --git a/paper2remarkable/log.py b/paper2remarkable/log.py new file mode 100644 index 0000000..bae1cbf --- /dev/null +++ b/paper2remarkable/log.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +"""Just a simple logger + +Author: G.J.J. van den Burg +License: See LICENSE file. +Copyright: 2019, G.J.J. van den Burg + +""" + +# NOTE: I know about the logging module, but this was easier because one of the +# dependencies was using that and it became complicated. This one is obviously +# not thread-safe and is very simple. + +import datetime +import sys + + +class Singleton(type): + # https://stackoverflow.com/q/6760685 + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super(Singleton, cls).__call__( + *args, **kwargs + ) + return cls._instances[cls] + + +class Logger(metaclass=Singleton): + def __init__(self): + self.enabled = True + + def enable(self): + self.enabled = True + + def disable(self): + self.enabled = False + + def _log(self, msg, mode): + if not self.enabled: + return + if not mode in ("info", "warn"): + raise ValueError("Unknown logging mode: %s" % mode) + file = sys.stdout if mode == "info" else sys.stderr + now = datetime.datetime.now() + nowstr = now.strftime("%Y-%m-%d %H:%M:%S") + print("%s - %s - %s" % (nowstr, mode.upper(), msg), file=file) + file.flush() + + def info(self, msg): + self._log(msg, "info") + + def warning(self, msg): + self._log(msg, "warn") diff --git a/paper2remarkable/pdf_ops.py b/paper2remarkable/pdf_ops.py index d1eae40..8636017 100644 --- a/paper2remarkable/pdf_ops.py +++ b/paper2remarkable/pdf_ops.py @@ -10,27 +10,28 @@ Copyright: 2019, The Alan Turing Institute import PyPDF2 -import logging import os import subprocess from .crop import Cropper +from .log import Logger +logger = Logger() def crop_pdf(filepath, pdfcrop_path="pdfcrop"): """Crop the pdf file using Cropper """ - logging.info("Cropping pdf file") + logger.info("Cropping pdf file") cropped_file = os.path.splitext(filepath)[0] + "-crop.pdf" cropper = Cropper(filepath, cropped_file, pdfcrop_path=pdfcrop_path) status = cropper.crop(margins=15) if not status == 0: - logging.warning("Failed to crop the pdf file at: %s" % filepath) + logger.warning("Failed to crop the pdf file at: %s" % filepath) return filepath if not os.path.exists(cropped_file): - logging.warning( + logger.warning( "Can't find cropped file '%s' where expected." % cropped_file ) return filepath @@ -40,17 +41,17 @@ def crop_pdf(filepath, pdfcrop_path="pdfcrop"): def center_pdf(filepath, pdfcrop_path="pdfcrop"): """Center the pdf file on the reMarkable """ - logging.info("Centering pdf file") + logger.info("Centering pdf file") centered_file = os.path.splitext(filepath)[0] + "-center.pdf" cropper = Cropper(filepath, centered_file, pdfcrop_path=pdfcrop_path) status = cropper.center() if not status == 0: - logging.warning("Failed to center the pdf file at: %s" % filepath) + logger.warning("Failed to center the pdf file at: %s" % filepath) return filepath if not os.path.exists(centered_file): - logging.warning( + logger.warning( "Can't find centered file '%s' where expected." % centered_file ) return filepath @@ -60,7 +61,7 @@ def center_pdf(filepath, pdfcrop_path="pdfcrop"): def blank_pdf(filepath): """Add blank pages to PDF """ - logging.info("Adding blank pages") + logger.info("Adding blank pages") input_pdf = PyPDF2.PdfFileReader(filepath) output_pdf = PyPDF2.PdfFileWriter() for page in input_pdf.pages: @@ -76,7 +77,7 @@ def blank_pdf(filepath): def shrink_pdf(filepath, gs_path="gs"): """Shrink the PDF file size using Ghostscript """ - logging.info("Shrinking pdf file") + logger.info("Shrinking pdf file") output_file = os.path.splitext(filepath)[0] + "-shrink.pdf" status = subprocess.call( [ @@ -92,6 +93,6 @@ def shrink_pdf(filepath, gs_path="gs"): ] ) if not status == 0: - logging.warning("Failed to shrink the pdf file") + logger.warning("Failed to shrink the pdf file") return filepath return output_file diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py index ca6ab70..5432d48 100644 --- a/paper2remarkable/providers/_base.py +++ b/paper2remarkable/providers/_base.py @@ -9,7 +9,6 @@ Copyright: 2019, G.J.J. van den Burg """ import abc -import logging import os import shutil import tempfile @@ -17,6 +16,9 @@ import tempfile from ._info import Informer from ..pdf_ops import crop_pdf, center_pdf, blank_pdf, shrink_pdf from ..utils import assert_file_is_pdf, download_url, upload_to_remarkable +from ..log import Logger + +logger = Logger() class Provider(metaclass=abc.ABCMeta): @@ -45,9 +47,8 @@ class Provider(metaclass=abc.ABCMeta): self.informer = Informer() # disable logging if requested - logging.basicConfig(level=logging.INFO) if not verbose: - logging.disable() + logger.disable() # Define the operations to run on the pdf. Providers can add others. self.operations = [("crop", self.crop_pdf)] @@ -58,7 +59,7 @@ class Provider(metaclass=abc.ABCMeta): self.operations.append(("blank", blank_pdf)) self.operations.append(("shrink", self.shrink_pdf)) - logging.info("Starting %s" % type(self).__name__) + logger.info("Starting %s" % type(self).__name__) @staticmethod @abc.abstractmethod diff --git a/paper2remarkable/providers/_info.py b/paper2remarkable/providers/_info.py index 04efcb1..9130e34 100644 --- a/paper2remarkable/providers/_info.py +++ b/paper2remarkable/providers/_info.py @@ -3,12 +3,14 @@ """Functionality for retrieving paper info """ -import logging import titlecase import unidecode import bs4 from ..utils import clean_string, get_page_with_retry +from ..log import Logger + +logger = Logger() class Informer: @@ -38,7 +40,7 @@ class Informer: The provided url must be to a HTMl page where this information can be found, not to the PDF file itself. """ - logging.info("Generating output filename") + logger.info("Generating output filename") # Retrieve the paper information self.get_info(abs_url) @@ -59,11 +61,11 @@ class Informer: name = authors + "_-_" + title + "_" + year + ".pdf" name = unidecode.unidecode(name) - logging.info("Created filename: %s" % name) + logger.info("Created filename: %s" % name) return name def get_info(self, url): - logging.info("Getting paper info") + logger.info("Getting paper info") page = get_page_with_retry(url) soup = bs4.BeautifulSoup(page, "html.parser") self.authors = self.authors or self.get_authors(soup) diff --git a/paper2remarkable/providers/acm.py b/paper2remarkable/providers/acm.py index e14efa7..a0d79bd 100644 --- a/paper2remarkable/providers/acm.py +++ b/paper2remarkable/providers/acm.py @@ -15,6 +15,9 @@ from ._base import Provider from ._info import Informer from .. import GITHUB_URL from ..utils import exception, get_page_with_retry +from ..log import Logger + +logger = Logger() class ACMInformer(Informer): @@ -26,7 +29,7 @@ class ACMInformer(Informer): def _format_year(self, soup_date): if not re.match("\d{2}/\d{2}/\d{4}", soup_date.strip()): - self.warn( + logger.warning( "Couldn't extract year from ACM page, please raise an " "issue on GitHub so it can be fixed: %s" % GITHUB_URL ) diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py index d950e47..e022658 100644 --- a/paper2remarkable/providers/arxiv.py +++ b/paper2remarkable/providers/arxiv.py @@ -11,11 +11,13 @@ Copyright: 2019, G.J.J. van den Burg import os import re import subprocess -import logging from ._info import Informer from ._base import Provider from ..utils import exception +from ..log import Logger + +logger = Logger() class ArxivInformer(Informer): @@ -52,7 +54,7 @@ class Arxiv(Provider): def dearxiv(self, input_file): """Remove the arXiv timestamp from a pdf""" - logging.info("Removing arXiv timestamp") + logger.info("Removing arXiv timestamp") basename = os.path.splitext(input_file)[0] uncompress_file = basename + "_uncompress.pdf" diff --git a/paper2remarkable/providers/pdf_url.py b/paper2remarkable/providers/pdf_url.py index f28c742..dfc8646 100644 --- a/paper2remarkable/providers/pdf_url.py +++ b/paper2remarkable/providers/pdf_url.py @@ -12,7 +12,6 @@ import urllib from ._base import Provider from ._info import Informer - from ..utils import exception class PdfUrlInformer(Informer): diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py index 2bed231..d80c954 100644 --- a/paper2remarkable/utils.py +++ b/paper2remarkable/utils.py @@ -9,7 +9,6 @@ Copyright: 2019, G.J.J. van den Burg """ import PyPDF2 -import logging import requests import string import subprocess @@ -18,6 +17,7 @@ import time import unidecode from . import GITHUB_URL +from .log import Logger HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " @@ -26,6 +26,8 @@ HEADERS = { } +logger = Logger() + def exception(msg): print("ERROR: " + msg, file=sys.stderr) print("Error occurred. Exiting.", file=sys.stderr) @@ -37,6 +39,7 @@ def exception(msg): raise SystemExit(1) + def clean_string(s): """ Clean a string by replacing accented characters with equivalents and keeping only the allowed characters (ascii letters, digits, underscore, @@ -64,7 +67,7 @@ def assert_file_is_pdf(filename): def download_url(url, filename): """Download the content of an url and save it to a filename """ - logging.info("Downloading file at url: %s" % url) + logger.info("Downloading file at url: %s" % url) content = get_page_with_retry(url) with open(filename, "wb") as fid: fid.write(content) @@ -80,18 +83,18 @@ def get_page_with_retry(url, tries=5): except requests.exceptions.ConnectionError: error = True if error or not res.ok: - logging.warning( + logger.warning( "(%i/%i) Error getting url %s. Retrying in 5 seconds." % (count, tries, url) ) time.sleep(5) continue - logging.info("Downloading url: %s" % url) + logger.info("Downloading url: %s" % url) return res.content def upload_to_remarkable(filepath, remarkable_dir="/", rmapi_path="rmapi"): - logging.info("Starting upload to reMarkable") + logger.info("Starting upload to reMarkable") # Create the reMarkable dir if it doesn't exist remarkable_dir = remarkable_dir.rstrip("/") @@ -112,4 +115,4 @@ def upload_to_remarkable(filepath, remarkable_dir="/", rmapi_path="rmapi"): ) if not status == 0: exception("Uploading file %s to reMarkable failed" % filepath) - logging.info("Upload successful.") + logger.info("Upload successful.") |
