diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-12-28 00:25:31 +0000 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-12-28 00:25:31 +0000 |
| commit | 421d8de29d17d9390cae1f56bfc98667158a8096 (patch) | |
| tree | fc1ddf31c5a0b36c43d0fef83402e309c20d409a | |
| parent | Bump version and update changelog and readme (diff) | |
| download | paper2remarkable-421d8de29d17d9390cae1f56bfc98667158a8096.tar.gz paper2remarkable-421d8de29d17d9390cae1f56bfc98667158a8096.zip | |
Add support for a configuration file
| -rw-r--r-- | config.example.yml | 23 | ||||
| -rw-r--r-- | docs/man.md | 28 | ||||
| -rw-r--r-- | paper2remarkable/providers/_base.py | 18 | ||||
| -rw-r--r-- | paper2remarkable/providers/html.py | 32 | ||||
| -rw-r--r-- | paper2remarkable/ui.py | 113 | ||||
| -rw-r--r-- | setup.py | 15 | ||||
| -rw-r--r-- | tests/test_html.py | 15 |
7 files changed, 165 insertions, 79 deletions
diff --git a/config.example.yml b/config.example.yml new file mode 100644 index 0000000..b50b88e --- /dev/null +++ b/config.example.yml @@ -0,0 +1,23 @@ +--- +core: + crop: 'left' # or: 'none', 'left', 'right' + blank: false # or: false + upload: true # or: false + verbose: true # or: false + experimental: true # or: false + +# System settings are all optional, but can be used if executables are not on +# the PATH. +system: + gs: /usr/bin/gs + +# Settings for styling HTML sources +html: + css: | + @page { size: 702px 936px; margin: 1in; } + img { display: block; margin: 0 auto; text-align: center; max-width: 70%; } + h1,h2,h3 { font-family: 'Montserrat'; } + p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; } + + font_urls: + - https://fonts.googleapis.com/css2?family=Montserrat&display=swap diff --git a/docs/man.md b/docs/man.md index db7d600..a6115a9 100644 --- a/docs/man.md +++ b/docs/man.md @@ -30,6 +30,11 @@ Basic options: Add a blank page after every page of the PDF document. This can be useful for taking notes on papers. +-C, --config=FILENAME + Read options from a configuration file. A YAML file is supported, see + [CONFIGURATION FILE](#configuration) for further details. By default the + file at ``~/.p2r.yml`` is used if it exists. + -e, --experimental Enable the experimental features of paper2remarkable. See below under [EXPERIMENTAL FEATURES](#experimental-features) for an overview. @@ -147,6 +152,29 @@ Finally, paper2remarkable supports extracting articles from websites. In this case an effort is done to detect the main content of the article and clean up the HTML before sending the file to the reMarkable. +## CONFIGURATION FILE + +To avoid having to provide frequently-used command line flags, a configuration +file can be created for paper2remarkable. By default it is a YAML file located +at ``~/.p2r.yml``, but an alternative location can be provided with the +``--config`` option to the script. + +The configuration file consists of three sections: ``core``, ``system``, and +``html``. In the ``core`` section options for cropping, verbosity, and blank +pages can be added, among others. The ``system`` section allows setting paths +to executables such as ``rmapi``, ``pdftk``, etc. Finally, the ``html`` +section allows you to provide custom CSS and font urls for formatting the +output of web articles. + +Options provided on the command line overwrite those in the configuration +file. So, for instance, if the configuration file has the setting ``crop: +'left'`` in the ``core`` section and the command line flag ``-c`` is provided, +the PDF will be centered. + +An example file is provided in the repository on +[GitHub](https://www.github.com/GjjvdBurg/paper2remarkable), which also +contains more information on the available options and their values. + ## EXPERIMENTAL FEATURES Occassionally, experimental (beta) features will be included in diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py index 56ffa31..0453c7a 100644 --- a/paper2remarkable/providers/_base.py +++ b/paper2remarkable/providers/_base.py @@ -39,18 +39,16 @@ class Provider(metaclass=abc.ABCMeta): upload=True, debug=False, experimental=False, - center=False, - right=False, + crop="left", blank=False, - no_crop=False, remarkable_dir="/", rmapi_path="rmapi", pdftoppm_path="pdftoppm", pdftk_path="pdftk", qpdf_path="qpdf", gs_path="gs", - css_path=None, - font_urls_path=None, + css=None, + font_urls=None, cookiejar=None, ): self.upload = upload @@ -62,8 +60,8 @@ class Provider(metaclass=abc.ABCMeta): self.pdftk_path = pdftk_path self.qpdf_path = qpdf_path self.gs_path = gs_path - self.css_path = css_path - self.font_urls_path = font_urls_path + self.css = css + self.font_urls = font_urls self.cookiejar = cookiejar self.informer = Informer() @@ -79,11 +77,11 @@ class Provider(metaclass=abc.ABCMeta): # Define the operations to run on the pdf. Providers can add others. self.operations = [("rewrite", self.rewrite_pdf)] - if center: + if crop == "center": self.operations.append(("center", self.center_pdf)) - elif right: + elif crop == "right": self.operations.append(("right", self.right_pdf)) - elif not no_crop: + elif crop == "left": self.operations.append(("crop", self.crop_pdf)) if blank: diff --git a/paper2remarkable/providers/html.py b/paper2remarkable/providers/html.py index 48ede10..c22cac4 100644 --- a/paper2remarkable/providers/html.py +++ b/paper2remarkable/providers/html.py @@ -13,7 +13,6 @@ Copyright: 2020, G.J.J. van den Burg import html2text import markdown -import os import re import readability import titlecase @@ -146,7 +145,8 @@ class HTML(Provider): # This attempts to fix sites where the image src element points to a # placeholder and the data-src attribute contains the url to the actual - # image. + # image. Note that results may differ between readability and + # Readability.JS regex = '<img src="(?P<src>.*?)" (?P<rest1>.*) data-src="(?P<datasrc>.*?)" (?P<rest2>.*?)>' sub = '<img src="\g<datasrc>" \g<rest1> \g<rest2>>' @@ -174,30 +174,6 @@ class HTML(Provider): html_article = md.convert(article) return html_article - def get_css(self): - if self.css_path is None: - return CSS - if not os.path.exists(self.css_path): - logger.warning( - f"CSS file {self.css_path} doesn't exist, using default style." - ) - return CSS - with open(self.css_path, "r") as fp: - css = fp.read() - return css - - def get_font_urls(self): - if self.font_urls_path is None: - return FONT_URLS - if not os.path.exists(self.font_urls_path): - logger.warning( - f"Font urls file {self.font_urls_path} doesn't exist, using default." - ) - return FONT_URLS - with open(self.font_urls_path, "r") as fp: - font_urls = [l.strip() for l in fp.read().split("\n")] - return font_urls - def retrieve_pdf(self, pdf_url, filename): """Turn the HTML article in a clean pdf file @@ -224,8 +200,8 @@ class HTML(Provider): fp.write(html_article) html = weasyprint.HTML(string=html_article, url_fetcher=url_fetcher) - css = self.get_css() - font_urls = self.get_font_urls() + css = CSS if self.css is None else self.css + font_urls = FONT_URLS if self.font_urls is None else self.font_urls style = weasyprint.CSS(string=css) html.write_pdf(filename, stylesheets=[style] + font_urls) diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py index 095b69a..1b95dca 100644 --- a/paper2remarkable/ui.py +++ b/paper2remarkable/ui.py @@ -9,7 +9,10 @@ Copyright: 2019, G.J.J. van den Burg """ import argparse +import copy +import os import sys +import yaml from . import __version__, GITHUB_URL @@ -49,7 +52,7 @@ def parse_args(): parser.add_argument( "-n", "--no-upload", - help="don't upload to the reMarkable, save the output in current working dir", + help="don't upload to reMarkable, save the output in current directory", action="store_true", ) parser.add_argument( @@ -85,27 +88,27 @@ def parse_args(): action="append", ) parser.add_argument( - "--gs", help="path to gs executable (default: gs)", default="gs" + "--gs", help="path to gs executable (default: gs)", default=None ) parser.add_argument( "--pdftoppm", help="path to pdftoppm executable (default: pdftoppm)", - default="pdftoppm", + default=None, ) parser.add_argument( "--pdftk", help="path to pdftk executable (default: pdftk)", - default="pdftk", + default=None, ) parser.add_argument( "--qpdf", help="path to qpdf executable (default: qpdf)", - default="qpdf", + default=None, ) parser.add_argument( "--rmapi", help="path to rmapi executable (default: rmapi)", - default="rmapi", + default=None, ) parser.add_argument( "--css", help="path to custom CSS file for HTML output", default=None @@ -116,6 +119,12 @@ def parse_args(): default=None, ) parser.add_argument( + "-C", + "--config", + help="path to config file (default: ~/.p2r.yml)", + default=None, + ) + parser.add_argument( "input", help="One or more URLs to a paper or paths to local PDF files", nargs="+", @@ -186,6 +195,69 @@ def choose_provider(cli_input): return provider, new_input, cookiejar +def load_config(path=None): + if path is None: + path = os.path.join(os.path.expanduser("~"), ".p2r.yml") + if not os.path.exists(path): + return {"core": {}, "system": {}, "html": {}} + with open(path, "r") as fp: + config = yaml.safe_load(fp) + return config + + +def merge_options(config, args): + # command line arguments always overwrite config + opts = copy.deepcopy(config) + + def set_bool(d, key, value): + if value: + d[key] = True + elif not key in d: + d[key] = False + + def set_path(d, key, value): + if not value is None: + d[key] = value + elif not key in d: + d[key] = key + + set_bool(opts["core"], "blank", args.blank) + set_bool(opts["core"], "verbose", args.verbose) + set_bool(opts["core"], "upload", not args.no_upload) + set_bool(opts["core"], "experimental", args.experimental) + + if args.center: + opts["core"]["crop"] = "center" + elif args.right: + opts["core"]["crop"] = "right" + elif args.no_crop: + opts["core"]["crop"] = "none" + elif not "crop" in opts["core"]: + opts["core"]["crop"] = "left" + + set_path(opts["system"], "gs", args.gs) + set_path(opts["system"], "pdftoppm", args.pdftoppm) + set_path(opts["system"], "pdftk", args.pdftk) + set_path(opts["system"], "qpdf", args.qpdf) + set_path(opts["system"], "rmapi", args.rmapi) + + if args.css and os.path.exists(args.css): + with open(args.css, "r") as fp: + contents = fp.read() + opts["html"]["css"] = contents + else: + opts["html"]["css"] = None + + if args.font_urls and os.path.exists(args.font_urls): + with open(args.font_urls, "r") as fp: + urls = [l.strip() for l in fp.readlines()] + opts["html"]["font_urls"] = urls + else: + opts["html"]["font_urls"] = None + + return opts + + def set_excepthook(debug): sys_hook = sys.excepthook @@ -216,6 +288,9 @@ def main(): "When providing --filename and multiple inputs, their number must match." ) + config = load_config(path=args.config) + options = merge_options(config, args) + filenames = ( [None] * len(args.input) if not args.filename else args.filename ) @@ -223,22 +298,20 @@ def main(): for cli_input, filename in zip(args.input, filenames): provider, new_input, cookiejar = choose_provider(cli_input) prov = provider( - verbose=args.verbose, - upload=not args.no_upload, + verbose=options["core"]["verbose"], + upload=options["core"]["upload"], debug=args.debug, - experimental=args.experimental, - center=args.center, - right=args.right, - blank=args.blank, - no_crop=args.no_crop, + experimental=options["core"]["experimental"], + crop=options["core"]["crop"], + blank=options["core"]["blank"], remarkable_dir=args.remarkable_dir, - rmapi_path=args.rmapi, - pdftoppm_path=args.pdftoppm, - pdftk_path=args.pdftk, - qpdf_path=args.qpdf, - gs_path=args.gs, - css_path=args.css, - font_urls_path=args.font_urls, + rmapi_path=options["system"]["rmapi"], + pdftoppm_path=options["system"]["pdftoppm"], + pdftk_path=options["system"]["pdftk"], + qpdf_path=options["system"]["qpdf"], + gs_path=options["system"]["gs"], + css=options["html"]["css"], + font_urls=options["html"]["font_urls"], cookiejar=cookiejar, ) prov.run(new_input, filename=filename) @@ -19,17 +19,18 @@ VERSION = None # What packages are required for this module to be executed? REQUIRED = [ + "PyPDF2>=1.26", "beautifulsoup4>=4.8", - "requests>=2.21", + "html2text>=2020.1.16", + "markdown>=3.1.1", "pdfplumber>=0.5", - "unidecode>=1.1", - "titlecase>=0.12", - "PyPDF2>=1.26", - "regex>=2018.11", + "pyyaml>=5.1", "readability-lxml>=0.7.1", - "html2text>=2020.1.16", + "regex>=2018.11", + "requests>=2.21", + "titlecase>=0.12", + "unidecode>=1.1", "weasyprint>=51", - "markdown>=3.1.1", ] full_require = ["readabilipy"] diff --git a/tests/test_html.py b/tests/test_html.py index 7d5c92b..41f6b83 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -9,7 +9,6 @@ This file is part of paper2remarkable. import os import pdfplumber -import tempfile import unittest from paper2remarkable.providers.html import HTML @@ -39,24 +38,12 @@ class TestHTML(unittest.TestCase): "https://fonts.googleapis.com/css2?family=Montserrat&display=swap" ] - tmpfd, tempfname_css = tempfile.mkstemp(prefix="p2r_", suffix=".css") - with os.fdopen(tmpfd, "w") as fp: - fp.write(test_css) - - tmpfd, tempfname_urls = tempfile.mkstemp(prefix="p2r_", suffix=".txt") - with os.fdopen(tmpfd, "w") as fp: - fp.write("\n".join(test_font_urls)) - url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines" - prov = HTML( - upload=False, css_path=tempfname_css, font_urls_path=tempfname_urls - ) + prov = HTML(upload=False, css=test_css, font_urls=test_font_urls) filename = prov.run(url) with pdfplumber.open(filename) as pdf: self.assertEqual(8, len(pdf.pages)) - os.unlink(tempfname_css) - os.unlink(tempfname_urls) os.unlink(filename) |
