Add support for a configuration file

author: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2020-12-28 00:25:31 +0000
committer: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2020-12-28 00:25:31 +0000
commit: 421d8de29d17d9390cae1f56bfc98667158a8096 (patch)
tree: fc1ddf31c5a0b36c43d0fef83402e309c20d409a
parent: Bump version and update changelog and readme (diff)
download: paper2remarkable-421d8de29d17d9390cae1f56bfc98667158a8096.tar.gz
paper2remarkable-421d8de29d17d9390cae1f56bfc98667158a8096.zip
7 files changed, 165 insertions, 79 deletions
diff --git a/config.example.yml b/config.example.yml
new file mode 100644
index 0000000..b50b88e
--- /dev/null
+++ b/config.example.yml
@@ -0,0 +1,23 @@
+---
+core:
+  crop: 'left'          # or: 'none', 'left', 'right'
+  blank: false          # or: false
+  upload: true          # or: false
+  verbose: true         # or: false
+  experimental: true    # or: false
+
+# System settings are all optional, but can be used if executables are not on 
+# the PATH.
+system:
+  gs: /usr/bin/gs
+
+# Settings for styling HTML sources
+html:
+  css: |
+    @page { size: 702px 936px; margin: 1in; }
+    img { display: block; margin: 0 auto; text-align: center; max-width: 70%; }
+    h1,h2,h3 { font-family: 'Montserrat'; }
+    p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; }
+
+  font_urls:
+    - https://fonts.googleapis.com/css2?family=Montserrat&display=swap
diff --git a/docs/man.md b/docs/man.md
index db7d600..a6115a9 100644
--- a/docs/man.md
+++ b/docs/man.md
@@ -30,6 +30,11 @@ Basic options:
       Add a blank page after every page of the PDF document. This can be 
       useful for taking notes on papers.
 
+-C, --config=FILENAME
+      Read options from a configuration file. A YAML file is supported, see 
+      [CONFIGURATION FILE](#configuration) for further details. By default the 
+      file at ``~/.p2r.yml`` is used if it exists.
+
 -e, --experimental
       Enable the experimental features of paper2remarkable. See below under 
       [EXPERIMENTAL FEATURES](#experimental-features) for an overview.
@@ -147,6 +152,29 @@ Finally, paper2remarkable supports extracting articles from websites. In this
 case an effort is done to detect the main content of the article and clean up 
 the HTML before sending the file to the reMarkable.
 
+## CONFIGURATION FILE
+
+To avoid having to provide frequently-used command line flags, a configuration 
+file can be created for paper2remarkable. By default it is a YAML file located 
+at ``~/.p2r.yml``, but an alternative location can be provided with the 
+``--config`` option to the script.
+
+The configuration file consists of three sections: ``core``, ``system``, and 
+``html``. In the ``core`` section options for cropping, verbosity, and blank 
+pages can be added, among others. The ``system`` section allows setting paths 
+to executables such as ``rmapi``, ``pdftk``, etc.  Finally, the ``html`` 
+section allows you to provide custom CSS and font urls for formatting the 
+output of web articles.
+
+Options provided on the command line overwrite those in the configuration 
+file. So, for instance, if the configuration file has the setting ``crop: 
+'left'`` in the ``core`` section and the command line flag ``-c`` is provided, 
+the PDF will be centered.
+
+An example file is provided in the repository on 
+[GitHub](https://www.github.com/GjjvdBurg/paper2remarkable), which also 
+contains more information on the available options and their values.
+
 ## EXPERIMENTAL FEATURES
 
 Occassionally, experimental (beta) features will be included in 
diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index 56ffa31..0453c7a 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -39,18 +39,16 @@ class Provider(metaclass=abc.ABCMeta):
         upload=True,
         debug=False,
         experimental=False,
-        center=False,
-        right=False,
+        crop="left",
         blank=False,
-        no_crop=False,
         remarkable_dir="/",
         rmapi_path="rmapi",
         pdftoppm_path="pdftoppm",
         pdftk_path="pdftk",
         qpdf_path="qpdf",
         gs_path="gs",
-        css_path=None,
-        font_urls_path=None,
+        css=None,
+        font_urls=None,
         cookiejar=None,
     ):
         self.upload = upload
@@ -62,8 +60,8 @@ class Provider(metaclass=abc.ABCMeta):
         self.pdftk_path = pdftk_path
         self.qpdf_path = qpdf_path
         self.gs_path = gs_path
-        self.css_path = css_path
-        self.font_urls_path = font_urls_path
+        self.css = css
+        self.font_urls = font_urls
         self.cookiejar = cookiejar
 
         self.informer = Informer()
@@ -79,11 +77,11 @@ class Provider(metaclass=abc.ABCMeta):
 
         # Define the operations to run on the pdf. Providers can add others.
         self.operations = [("rewrite", self.rewrite_pdf)]
-        if center:
+        if crop == "center":
             self.operations.append(("center", self.center_pdf))
-        elif right:
+        elif crop == "right":
             self.operations.append(("right", self.right_pdf))
-        elif not no_crop:
+        elif crop == "left":
             self.operations.append(("crop", self.crop_pdf))
 
         if blank:
diff --git a/paper2remarkable/providers/html.py b/paper2remarkable/providers/html.py
index 48ede10..c22cac4 100644
--- a/paper2remarkable/providers/html.py
+++ b/paper2remarkable/providers/html.py
@@ -13,7 +13,6 @@ Copyright: 2020, G.J.J. van den Burg
 
 import html2text
 import markdown
-import os
 import re
 import readability
 import titlecase
@@ -146,7 +145,8 @@ class HTML(Provider):
 
         # This attempts to fix sites where the image src element points to a
         # placeholder and the data-src attribute contains the url to the actual
-        # image.
+        # image. Note that results may differ between readability and
+        # Readability.JS
         regex = '<img src="(?P<src>.*?)" (?P<rest1>.*) data-src="(?P<datasrc>.*?)" (?P<rest2>.*?)>'
         sub = '<img src="\g<datasrc>" \g<rest1> \g<rest2>>'
 
@@ -174,30 +174,6 @@ class HTML(Provider):
         html_article = md.convert(article)
         return html_article
 
-    def get_css(self):
-        if self.css_path is None:
-            return CSS
-        if not os.path.exists(self.css_path):
-            logger.warning(
-                f"CSS file {self.css_path} doesn't exist, using default style."
-            )
-            return CSS
-        with open(self.css_path, "r") as fp:
-            css = fp.read()
-        return css
-
-    def get_font_urls(self):
-        if self.font_urls_path is None:
-            return FONT_URLS
-        if not os.path.exists(self.font_urls_path):
-            logger.warning(
-                f"Font urls file {self.font_urls_path} doesn't exist, using default."
-            )
-            return FONT_URLS
-        with open(self.font_urls_path, "r") as fp:
-            font_urls = [l.strip() for l in fp.read().split("\n")]
-        return font_urls
-
     def retrieve_pdf(self, pdf_url, filename):
         """Turn the HTML article in a clean pdf file
 
@@ -224,8 +200,8 @@ class HTML(Provider):
                 fp.write(html_article)
 
         html = weasyprint.HTML(string=html_article, url_fetcher=url_fetcher)
-        css = self.get_css()
-        font_urls = self.get_font_urls()
+        css = CSS if self.css is None else self.css
+        font_urls = FONT_URLS if self.font_urls is None else self.font_urls
         style = weasyprint.CSS(string=css)
         html.write_pdf(filename, stylesheets=[style] + font_urls)
 
diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py
index 095b69a..1b95dca 100644
--- a/paper2remarkable/ui.py
+++ b/paper2remarkable/ui.py
@@ -9,7 +9,10 @@ Copyright: 2019, G.J.J. van den Burg
 """
 
 import argparse
+import copy
+import os
 import sys
+import yaml
 
 from . import __version__, GITHUB_URL
 
@@ -49,7 +52,7 @@ def parse_args():
     parser.add_argument(
         "-n",
         "--no-upload",
-        help="don't upload to the reMarkable, save the output in current working dir",
+        help="don't upload to reMarkable, save the output in current directory",
         action="store_true",
     )
     parser.add_argument(
@@ -85,27 +88,27 @@ def parse_args():
         action="append",
     )
     parser.add_argument(
-        "--gs", help="path to gs executable (default: gs)", default="gs"
+        "--gs", help="path to gs executable (default: gs)", default=None
     )
     parser.add_argument(
         "--pdftoppm",
         help="path to pdftoppm executable (default: pdftoppm)",
-        default="pdftoppm",
+        default=None,
     )
     parser.add_argument(
         "--pdftk",
         help="path to pdftk executable (default: pdftk)",
-        default="pdftk",
+        default=None,
     )
     parser.add_argument(
         "--qpdf",
         help="path to qpdf executable (default: qpdf)",
-        default="qpdf",
+        default=None,
     )
     parser.add_argument(
         "--rmapi",
         help="path to rmapi executable (default: rmapi)",
-        default="rmapi",
+        default=None,
     )
     parser.add_argument(
         "--css", help="path to custom CSS file for HTML output", default=None
@@ -116,6 +119,12 @@ def parse_args():
         default=None,
     )
     parser.add_argument(
+        "-C",
+        "--config",
+        help="path to config file (default: ~/.p2r.yml)",
+        default=None,
+    )
+    parser.add_argument(
         "input",
         help="One or more URLs to a paper or paths to local PDF files",
         nargs="+",
@@ -186,6 +195,69 @@ def choose_provider(cli_input):
     return provider, new_input, cookiejar
 
 
+def load_config(path=None):
+    if path is None:
+        path = os.path.join(os.path.expanduser("~"), ".p2r.yml")
+    if not os.path.exists(path):
+        return {"core": {}, "system": {}, "html": {}}
+    with open(path, "r") as fp:
+        config = yaml.safe_load(fp)
+    return config
+
+
+def merge_options(config, args):
+    # command line arguments always overwrite config
+    opts = copy.deepcopy(config)
+
+    def set_bool(d, key, value):
+        if value:
+            d[key] = True
+        elif not key in d:
+            d[key] = False
+
+    def set_path(d, key, value):
+        if not value is None:
+            d[key] = value
+        elif not key in d:
+            d[key] = key
+
+    set_bool(opts["core"], "blank", args.blank)
+    set_bool(opts["core"], "verbose", args.verbose)
+    set_bool(opts["core"], "upload", not args.no_upload)
+    set_bool(opts["core"], "experimental", args.experimental)
+
+    if args.center:
+        opts["core"]["crop"] = "center"
+    elif args.right:
+        opts["core"]["crop"] = "right"
+    elif args.no_crop:
+        opts["core"]["crop"] = "none"
+    elif not "crop" in opts["core"]:
+        opts["core"]["crop"] = "left"
+
+    set_path(opts["system"], "gs", args.gs)
+    set_path(opts["system"], "pdftoppm", args.pdftoppm)
+    set_path(opts["system"], "pdftk", args.pdftk)
+    set_path(opts["system"], "qpdf", args.qpdf)
+    set_path(opts["system"], "rmapi", args.rmapi)
+
+    if args.css and os.path.exists(args.css):
+        with open(args.css, "r") as fp:
+            contents = fp.read()
+        opts["html"]["css"] = contents
+    else:
+        opts["html"]["css"] = None
+
+    if args.font_urls and os.path.exists(args.font_urls):
+        with open(args.font_urls, "r") as fp:
+            urls = [l.strip() for l in fp.readlines()]
+        opts["html"]["font_urls"] = urls
+    else:
+        opts["html"]["font_urls"] = None
+
+    return opts
+
+
 def set_excepthook(debug):
     sys_hook = sys.excepthook
 
@@ -216,6 +288,9 @@ def main():
             "When providing --filename and multiple inputs, their number must match."
         )
 
+    config = load_config(path=args.config)
+    options = merge_options(config, args)
+
     filenames = (
         [None] * len(args.input) if not args.filename else args.filename
     )
@@ -223,22 +298,20 @@ def main():
     for cli_input, filename in zip(args.input, filenames):
         provider, new_input, cookiejar = choose_provider(cli_input)
         prov = provider(
-            verbose=args.verbose,
-            upload=not args.no_upload,
+            verbose=options["core"]["verbose"],
+            upload=options["core"]["upload"],
             debug=args.debug,
-            experimental=args.experimental,
-            center=args.center,
-            right=args.right,
-            blank=args.blank,
-            no_crop=args.no_crop,
+            experimental=options["core"]["experimental"],
+            crop=options["core"]["crop"],
+            blank=options["core"]["blank"],
             remarkable_dir=args.remarkable_dir,
-            rmapi_path=args.rmapi,
-            pdftoppm_path=args.pdftoppm,
-            pdftk_path=args.pdftk,
-            qpdf_path=args.qpdf,
-            gs_path=args.gs,
-            css_path=args.css,
-            font_urls_path=args.font_urls,
+            rmapi_path=options["system"]["rmapi"],
+            pdftoppm_path=options["system"]["pdftoppm"],
+            pdftk_path=options["system"]["pdftk"],
+            qpdf_path=options["system"]["qpdf"],
+            gs_path=options["system"]["gs"],
+            css=options["html"]["css"],
+            font_urls=options["html"]["font_urls"],
             cookiejar=cookiejar,
         )
         prov.run(new_input, filename=filename)
diff --git a/setup.py b/setup.py
index e79448c..e529cc2 100644
--- a/setup.py
+++ b/setup.py
@@ -19,17 +19,18 @@ VERSION = None
 
 # What packages are required for this module to be executed?
 REQUIRED = [
+    "PyPDF2>=1.26",
     "beautifulsoup4>=4.8",
-    "requests>=2.21",
+    "html2text>=2020.1.16",
+    "markdown>=3.1.1",
     "pdfplumber>=0.5",
-    "unidecode>=1.1",
-    "titlecase>=0.12",
-    "PyPDF2>=1.26",
-    "regex>=2018.11",
+    "pyyaml>=5.1",
     "readability-lxml>=0.7.1",
-    "html2text>=2020.1.16",
+    "regex>=2018.11",
+    "requests>=2.21",
+    "titlecase>=0.12",
+    "unidecode>=1.1",
     "weasyprint>=51",
-    "markdown>=3.1.1",
 ]
 
 full_require = ["readabilipy"]
diff --git a/tests/test_html.py b/tests/test_html.py
index 7d5c92b..41f6b83 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -9,7 +9,6 @@ This file is part of paper2remarkable.
 
 import os
 import pdfplumber
-import tempfile
 import unittest
 
 from paper2remarkable.providers.html import HTML
@@ -39,24 +38,12 @@ class TestHTML(unittest.TestCase):
             "https://fonts.googleapis.com/css2?family=Montserrat&display=swap"
         ]
 
-        tmpfd, tempfname_css = tempfile.mkstemp(prefix="p2r_", suffix=".css")
-        with os.fdopen(tmpfd, "w") as fp:
-            fp.write(test_css)
-
-        tmpfd, tempfname_urls = tempfile.mkstemp(prefix="p2r_", suffix=".txt")
-        with os.fdopen(tmpfd, "w") as fp:
-            fp.write("\n".join(test_font_urls))
-
         url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines"
-        prov = HTML(
-            upload=False, css_path=tempfname_css, font_urls_path=tempfname_urls
-        )
+        prov = HTML(upload=False, css=test_css, font_urls=test_font_urls)
         filename = prov.run(url)
         with pdfplumber.open(filename) as pdf:
             self.assertEqual(8, len(pdf.pages))
 
-        os.unlink(tempfname_css)
-        os.unlink(tempfname_urls)
         os.unlink(filename)
author	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2020-12-28 00:25:31 +0000
committer	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2020-12-28 00:25:31 +0000
commit	421d8de29d17d9390cae1f56bfc98667158a8096 (patch)
tree	fc1ddf31c5a0b36c43d0fef83402e309c20d409a
parent	Bump version and update changelog and readme (diff)
download	paper2remarkable-421d8de29d17d9390cae1f56bfc98667158a8096.tar.gz paper2remarkable-421d8de29d17d9390cae1f56bfc98667158a8096.zip