From ec000de563a32b4e757c9afde5a1b1b5ac80a511 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sat, 20 Jun 2020 22:42:10 +0100 Subject: Add support for using ReadabiliPy --- tests/test_providers.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index fb75fbd..ca6c1ae 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -268,6 +268,13 @@ class TestProviders(unittest.TestCase): # this is a proxy test to check that all images are included self.assertEqual(4, len(pdfplumber.open(filename).pages)) + def test_html_5(self): + prov = HTML(upload=False, verbose=VERBOSE) + url = "https://www.spiegel.de/panorama/london-tausende-rechtsextreme-demonstranten-wollen-statuen-schuetzen-a-2a1ed9b9-708a-40dc-a5ff-f312e97a60ca#" + filename = prov.run(url) + # this is a proxy test to check that all images are included + self.assertEqual(4, len(pdfplumber.open(filename).pages)) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 6338388cea254ba4c6090eb17a8942a13b7a2b1c Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 25 Sep 2020 22:25:41 +0200 Subject: Clean up readability providers This reorganizes the code a bit to ensure we only pull the HTML page once, and use the same readability provider for both the informer and the converter. --- tests/test_providers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tests/test_providers.py') diff --git a/tests/test_providers.py b/tests/test_providers.py index ca6c1ae..479fb84 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -255,7 +255,10 @@ class TestProviders(unittest.TestCase): def test_html_3(self): prov = HTML(upload=False, verbose=VERBOSE) url = "https://conclave-team.github.io/conclave-site/" - exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf" + #exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf" + # NOTE: Title differs between Readability.JS and readability-lxml, we + # assume that testing is done with Readability.JS + exp = "Conclave.pdf" filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) # this is a proxy test to check that all images are included -- cgit v1.2.3