aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-02-14 18:56:30 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-02-14 18:56:30 +0000
commitd4c682b869bb2bf391d5cf686baec026c5956875 (patch)
tree64b29e1faa0241d27aa3407c16c58c65f9837647
parentBump version and update changelog (diff)
downloadpaper2remarkable-d4c682b869bb2bf391d5cf686baec026c5956875.tar.gz
paper2remarkable-d4c682b869bb2bf391d5cf686baec026c5956875.zip
Be more robust against missing meta data
-rw-r--r--paper2remarkable/providers/_info.py27
1 files changed, 18 insertions, 9 deletions
diff --git a/paper2remarkable/providers/_info.py b/paper2remarkable/providers/_info.py
index 746c436..47c2e25 100644
--- a/paper2remarkable/providers/_info.py
+++ b/paper2remarkable/providers/_info.py
@@ -76,8 +76,13 @@ class Informer:
## Title
def get_title(self, soup):
- target = soup.find_all("meta", {"name": self.meta_title_key})
- return target[0]["content"]
+ meta = soup.find_all("meta", {"name": self.meta_title_key})
+ if not meta:
+ logger.warning(
+ "Couldn't determine title information, maybe provide the desired filename using '--filename'?"
+ )
+ return ""
+ return meta[0]["content"]
## Authors
@@ -87,10 +92,13 @@ class Informer:
return [x.strip().split(sep)[idx].strip() for x in op(soup_authors)]
def get_authors(self, soup):
- authors = [
- x["content"]
- for x in soup.find_all("meta", {"name": self.meta_author_key})
- ]
+ meta = soup.find_all("meta", {"name": self.meta_author_key})
+ if not meta:
+ logger.warning(
+ "Couldn't determine author information, maybe provide the desired filename using '--filename'?"
+ )
+ return ""
+ authors = [x["content"] for x in meta]
return self._format_authors(authors)
## Year
@@ -100,7 +108,8 @@ class Informer:
def get_year(self, soup):
""" Retrieve the contents of the meta_date_key field and format it """
- date = soup.find_all("meta", {"name": self.meta_date_key})[0][
- "content"
- ]
+ meta = soup.find_all("meta", {"name": self.meta_date_key})
+ if not meta:
+ return ""
+ date = meta[0]["content"]
return self._format_year(date)