From 882805565241bf2765b632e7b89a1f733a935a45 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Wed, 11 Nov 2020 19:36:07 +0000 Subject: Add experimental fix for lazy loaded images in html --- tests/test_html.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/test_html.py (limited to 'tests/test_html.py') diff --git a/tests/test_html.py b/tests/test_html.py new file mode 100644 index 0000000..d271bb5 --- /dev/null +++ b/tests/test_html.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Additional tests for the HTML provider + +This file is part of paper2remarkable. + +""" + +import unittest + +from paper2remarkable.providers.html import HTML +from paper2remarkable.providers.html import make_readable +from paper2remarkable.utils import get_page_with_retry + + +class TestHTML(unittest.TestCase): + def test_experimental_fix_lazy_loading(self): + url = "https://www.seriouseats.com/2015/01/tea-for-everyone.html" + prov = HTML(upload=False, experimental=True) + page = get_page_with_retry(url, return_text=True) + title, article = make_readable(page) + html_article = prov.preprocess_html(url, title, article) + expected_image = "https://www.seriouseats.com/images/2015/01/20150118-tea-max-falkowitz-3.jpg" + self.assertIn(expected_image, html_article) + + +if __name__ == "__main__": + unittest.main() -- cgit v1.2.3 From 948d314b47be221f7694a793c964d4728212c33c Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 27 Dec 2020 13:47:40 +0000 Subject: Add support for custom styling of HTML output --- tests/test_html.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'tests/test_html.py') diff --git a/tests/test_html.py b/tests/test_html.py index d271bb5..7d5c92b 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -7,6 +7,9 @@ This file is part of paper2remarkable. """ +import os +import pdfplumber +import tempfile import unittest from paper2remarkable.providers.html import HTML @@ -24,6 +27,38 @@ class TestHTML(unittest.TestCase): expected_image = "https://www.seriouseats.com/images/2015/01/20150118-tea-max-falkowitz-3.jpg" self.assertIn(expected_image, html_article) + def test_custom_css(self): + test_css = """ + @page { size: 702px 936px; margin: 1in; } + img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; } + h1,h2,h3 { font-family: 'Montserrat'; } + p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; text-align: left; } + """ + + test_font_urls = [ + "https://fonts.googleapis.com/css2?family=Montserrat&display=swap" + ] + + tmpfd, tempfname_css = tempfile.mkstemp(prefix="p2r_", suffix=".css") + with os.fdopen(tmpfd, "w") as fp: + fp.write(test_css) + + tmpfd, tempfname_urls = tempfile.mkstemp(prefix="p2r_", suffix=".txt") + with os.fdopen(tmpfd, "w") as fp: + fp.write("\n".join(test_font_urls)) + + url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines" + prov = HTML( + upload=False, css_path=tempfname_css, font_urls_path=tempfname_urls + ) + filename = prov.run(url) + with pdfplumber.open(filename) as pdf: + self.assertEqual(8, len(pdf.pages)) + + os.unlink(tempfname_css) + os.unlink(tempfname_urls) + os.unlink(filename) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 421d8de29d17d9390cae1f56bfc98667158a8096 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 28 Dec 2020 00:25:31 +0000 Subject: Add support for a configuration file --- tests/test_html.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'tests/test_html.py') diff --git a/tests/test_html.py b/tests/test_html.py index 7d5c92b..41f6b83 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -9,7 +9,6 @@ This file is part of paper2remarkable. import os import pdfplumber -import tempfile import unittest from paper2remarkable.providers.html import HTML @@ -39,24 +38,12 @@ class TestHTML(unittest.TestCase): "https://fonts.googleapis.com/css2?family=Montserrat&display=swap" ] - tmpfd, tempfname_css = tempfile.mkstemp(prefix="p2r_", suffix=".css") - with os.fdopen(tmpfd, "w") as fp: - fp.write(test_css) - - tmpfd, tempfname_urls = tempfile.mkstemp(prefix="p2r_", suffix=".txt") - with os.fdopen(tmpfd, "w") as fp: - fp.write("\n".join(test_font_urls)) - url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines" - prov = HTML( - upload=False, css_path=tempfname_css, font_urls_path=tempfname_urls - ) + prov = HTML(upload=False, css=test_css, font_urls=test_font_urls) filename = prov.run(url) with pdfplumber.open(filename) as pdf: self.assertEqual(8, len(pdf.pages)) - os.unlink(tempfname_css) - os.unlink(tempfname_urls) os.unlink(filename) -- cgit v1.2.3