aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-09-25 23:36:47 +0200
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-09-25 23:36:47 +0200
commit3b5e7eb5f34f92496aa96ee088db2925eadafd65 (patch)
treea67f43a9178401fbdc0d6e6b7cb7047250ab841c
parentMerge branch 'master' into bugfix/html-figure (diff)
downloadpaper2remarkable-3b5e7eb5f34f92496aa96ee088db2925eadafd65.tar.gz
paper2remarkable-3b5e7eb5f34f92496aa96ee088db2925eadafd65.zip
Improve docs
-rw-r--r--paper2remarkable/providers/html.py19
1 files changed, 11 insertions, 8 deletions
diff --git a/paper2remarkable/providers/html.py b/paper2remarkable/providers/html.py
index b734bd1..e050ea3 100644
--- a/paper2remarkable/providers/html.py
+++ b/paper2remarkable/providers/html.py
@@ -134,14 +134,17 @@ class HTML(Provider):
return url, url
def retrieve_pdf(self, pdf_url, filename):
- """Turn the HTML article in a clean pdf file"""
- # Steps
- # 1. Pull the HTML page using requests
- # 2. Extract the article part of the page using readability
- # 3. Convert the article HTML to markdown using html2text
- # 4. Convert the markdown back to HTML (this is done to sanitize HTML)
- # 4. Convert the HTML to PDF, pulling in images where needed
- # 5. Save the PDF to the specified filename.
+ """Turn the HTML article in a clean pdf file
+
+ This function takes the following steps:
+
+ 1. Pull the HTML page using requests, if not done in Informer
+ 2. Extract the article part of the page using readability/readabiliPy
+ 3. Convert the article HTML to markdown using html2text
+ 4. Convert the markdown back to HTML (done to sanitize the HTML)
+ 4. Convert the HTML to PDF, pulling in images where needed
+ 5. Save the PDF to the specified filename.
+ """
if self.informer._cached_title and self.informer._cached_article:
title = self.informer._cached_title
article = self.informer._cached_article