aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-12-27 13:47:40 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-12-27 13:47:40 +0000
commit948d314b47be221f7694a793c964d4728212c33c (patch)
tree6135afbcab009f1f6b22b4bcfec293ff07a25827
parentMerge branch 'savagej-patch-1' (diff)
downloadpaper2remarkable-948d314b47be221f7694a793c964d4728212c33c.tar.gz
paper2remarkable-948d314b47be221f7694a793c964d4728212c33c.zip
Add support for custom styling of HTML output
-rw-r--r--docs/man.md12
-rw-r--r--paper2remarkable/providers/_base.py7
-rw-r--r--paper2remarkable/providers/html.py42
-rw-r--r--paper2remarkable/ui.py10
-rw-r--r--tests/test_html.py35
5 files changed, 99 insertions, 7 deletions
diff --git a/docs/man.md b/docs/man.md
index 132d896..db7d600 100644
--- a/docs/man.md
+++ b/docs/man.md
@@ -71,6 +71,18 @@ reMarkable options:
If the target directory does not exist it will be created. If not
specified, the root directory will be used.
+Output customization:
+
+--css=FILENAME
+ Path to a CSS file with custom styling for the HTML output. This option
+ is ignored for any of the other providers. The code for the HTML
+ provider contains the default CSS style, which can be used as a starting
+ point.
+
+--font-urls=FILENAME
+ Path to a file with font urls (one per line) for the HTML output. This
+ will generally be used in combination with the ``--css`` option.
+
System settings:
You'll only need to specify these options if the programs are not available on
diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index a664f23..56ffa31 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -49,6 +49,8 @@ class Provider(metaclass=abc.ABCMeta):
pdftk_path="pdftk",
qpdf_path="qpdf",
gs_path="gs",
+ css_path=None,
+ font_urls_path=None,
cookiejar=None,
):
self.upload = upload
@@ -60,9 +62,12 @@ class Provider(metaclass=abc.ABCMeta):
self.pdftk_path = pdftk_path
self.qpdf_path = qpdf_path
self.gs_path = gs_path
- self.informer = Informer()
+ self.css_path = css_path
+ self.font_urls_path = font_urls_path
self.cookiejar = cookiejar
+ self.informer = Informer()
+
self.pdftool = check_pdftool(self.pdftk_path, self.qpdf_path)
# wait time to not hit the server too frequently
diff --git a/paper2remarkable/providers/html.py b/paper2remarkable/providers/html.py
index 3e32539..48ede10 100644
--- a/paper2remarkable/providers/html.py
+++ b/paper2remarkable/providers/html.py
@@ -13,13 +13,13 @@ Copyright: 2020, G.J.J. van den Burg
import html2text
import markdown
+import os
import re
import readability
import titlecase
import unidecode
import urllib
import weasyprint
-import weasyprint.fonts
from ._base import Provider
from ._info import Informer
@@ -34,7 +34,6 @@ from ..log import Logger
logger = Logger()
CSS = """
-@import url('https://fonts.googleapis.com/css?family=EB+Garamond|Noto+Serif|Inconsolata&display=swap');
@page { size: 702px 936px; margin: 1in; }
a { color: black; }
img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; }
@@ -48,6 +47,13 @@ pre { font-family: 'Inconsolata'; padding-left: 2.5%; background: #efefef; }
code { font-family: 'Inconsolata'; font-size: .7rem; background: #efefef; }
"""
+# NOTE: For some reason, Weasyprint no longer accepts the @import statement in
+# the CSS to load the fonts. This may have to do with recent changes they've
+# introduced. Providing the font urls separately does seem to work.
+FONT_URLS = [
+ "https://fonts.googleapis.com/css2?family=EB+Garamond&family=Noto+Serif&family=Inconsolata"
+]
+
def url_fetcher(url):
if url.startswith("//"):
@@ -168,6 +174,30 @@ class HTML(Provider):
html_article = md.convert(article)
return html_article
+ def get_css(self):
+ if self.css_path is None:
+ return CSS
+ if not os.path.exists(self.css_path):
+ logger.warning(
+ f"CSS file {self.css_path} doesn't exist, using default style."
+ )
+ return CSS
+ with open(self.css_path, "r") as fp:
+ css = fp.read()
+ return css
+
+ def get_font_urls(self):
+ if self.font_urls_path is None:
+ return FONT_URLS
+ if not os.path.exists(self.font_urls_path):
+ logger.warning(
+ f"Font urls file {self.font_urls_path} doesn't exist, using default."
+ )
+ return FONT_URLS
+ with open(self.font_urls_path, "r") as fp:
+ font_urls = [l.strip() for l in fp.read().split("\n")]
+ return font_urls
+
def retrieve_pdf(self, pdf_url, filename):
"""Turn the HTML article in a clean pdf file
@@ -193,11 +223,11 @@ class HTML(Provider):
with open("./paper.html", "w") as fp:
fp.write(html_article)
- font_config = weasyprint.fonts.FontConfiguration()
html = weasyprint.HTML(string=html_article, url_fetcher=url_fetcher)
- css = weasyprint.CSS(string=CSS, font_config=font_config)
-
- html.write_pdf(filename, stylesheets=[css], font_config=font_config)
+ css = self.get_css()
+ font_urls = self.get_font_urls()
+ style = weasyprint.CSS(string=css)
+ html.write_pdf(filename, stylesheets=[style] + font_urls)
def validate(src):
# first check if it is a valid url
diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py
index f9af28f..095b69a 100644
--- a/paper2remarkable/ui.py
+++ b/paper2remarkable/ui.py
@@ -108,6 +108,14 @@ def parse_args():
default="rmapi",
)
parser.add_argument(
+ "--css", help="path to custom CSS file for HTML output", default=None
+ )
+ parser.add_argument(
+ "--font-urls",
+ help="path to custom font urls file for HTML output",
+ default=None,
+ )
+ parser.add_argument(
"input",
help="One or more URLs to a paper or paths to local PDF files",
nargs="+",
@@ -229,6 +237,8 @@ def main():
pdftk_path=args.pdftk,
qpdf_path=args.qpdf,
gs_path=args.gs,
+ css_path=args.css,
+ font_urls_path=args.font_urls,
cookiejar=cookiejar,
)
prov.run(new_input, filename=filename)
diff --git a/tests/test_html.py b/tests/test_html.py
index d271bb5..7d5c92b 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -7,6 +7,9 @@ This file is part of paper2remarkable.
"""
+import os
+import pdfplumber
+import tempfile
import unittest
from paper2remarkable.providers.html import HTML
@@ -24,6 +27,38 @@ class TestHTML(unittest.TestCase):
expected_image = "https://www.seriouseats.com/images/2015/01/20150118-tea-max-falkowitz-3.jpg"
self.assertIn(expected_image, html_article)
+ def test_custom_css(self):
+ test_css = """
+ @page { size: 702px 936px; margin: 1in; }
+ img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; }
+ h1,h2,h3 { font-family: 'Montserrat'; }
+ p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; text-align: left; }
+ """
+
+ test_font_urls = [
+ "https://fonts.googleapis.com/css2?family=Montserrat&display=swap"
+ ]
+
+ tmpfd, tempfname_css = tempfile.mkstemp(prefix="p2r_", suffix=".css")
+ with os.fdopen(tmpfd, "w") as fp:
+ fp.write(test_css)
+
+ tmpfd, tempfname_urls = tempfile.mkstemp(prefix="p2r_", suffix=".txt")
+ with os.fdopen(tmpfd, "w") as fp:
+ fp.write("\n".join(test_font_urls))
+
+ url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines"
+ prov = HTML(
+ upload=False, css_path=tempfname_css, font_urls_path=tempfname_urls
+ )
+ filename = prov.run(url)
+ with pdfplumber.open(filename) as pdf:
+ self.assertEqual(8, len(pdf.pages))
+
+ os.unlink(tempfname_css)
+ os.unlink(tempfname_urls)
+ os.unlink(filename)
+
if __name__ == "__main__":
unittest.main()