aboutsummaryrefslogtreecommitdiff
path: root/tests/test_html.py
blob: 41f6b831b504c8dcf6543bec8530681f60de739d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Additional tests for the HTML provider

This file is part of paper2remarkable.

"""

import os
import pdfplumber
import unittest

from paper2remarkable.providers.html import HTML
from paper2remarkable.providers.html import make_readable
from paper2remarkable.utils import get_page_with_retry


class TestHTML(unittest.TestCase):
    def test_experimental_fix_lazy_loading(self):
        url = "https://www.seriouseats.com/2015/01/tea-for-everyone.html"
        prov = HTML(upload=False, experimental=True)
        page = get_page_with_retry(url, return_text=True)
        title, article = make_readable(page)
        html_article = prov.preprocess_html(url, title, article)
        expected_image = "https://www.seriouseats.com/images/2015/01/20150118-tea-max-falkowitz-3.jpg"
        self.assertIn(expected_image, html_article)

    def test_custom_css(self):
        test_css = """
        @page { size: 702px 936px; margin: 1in; }
        img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; }
        h1,h2,h3 { font-family: 'Montserrat'; }
        p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; text-align: left; }
        """

        test_font_urls = [
            "https://fonts.googleapis.com/css2?family=Montserrat&display=swap"
        ]

        url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines"
        prov = HTML(upload=False, css=test_css, font_urls=test_font_urls)
        filename = prov.run(url)
        with pdfplumber.open(filename) as pdf:
            self.assertEqual(8, len(pdf.pages))

        os.unlink(filename)


if __name__ == "__main__":
    unittest.main()