From 3b5e7eb5f34f92496aa96ee088db2925eadafd65 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 25 Sep 2020 23:36:47 +0200 Subject: Improve docs --- paper2remarkable/providers/html.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/paper2remarkable/providers/html.py b/paper2remarkable/providers/html.py index b734bd1..e050ea3 100644 --- a/paper2remarkable/providers/html.py +++ b/paper2remarkable/providers/html.py @@ -134,14 +134,17 @@ class HTML(Provider): return url, url def retrieve_pdf(self, pdf_url, filename): - """Turn the HTML article in a clean pdf file""" - # Steps - # 1. Pull the HTML page using requests - # 2. Extract the article part of the page using readability - # 3. Convert the article HTML to markdown using html2text - # 4. Convert the markdown back to HTML (this is done to sanitize HTML) - # 4. Convert the HTML to PDF, pulling in images where needed - # 5. Save the PDF to the specified filename. + """Turn the HTML article in a clean pdf file + + This function takes the following steps: + + 1. Pull the HTML page using requests, if not done in Informer + 2. Extract the article part of the page using readability/readabiliPy + 3. Convert the article HTML to markdown using html2text + 4. Convert the markdown back to HTML (done to sanitize the HTML) + 4. Convert the HTML to PDF, pulling in images where needed + 5. Save the PDF to the specified filename. + """ if self.informer._cached_title and self.informer._cached_article: title = self.informer._cached_title article = self.informer._cached_article -- cgit v1.2.3