From d4c682b869bb2bf391d5cf686baec026c5956875 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 14 Feb 2020 18:56:30 +0000 Subject: Be more robust against missing meta data --- paper2remarkable/providers/_info.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/paper2remarkable/providers/_info.py b/paper2remarkable/providers/_info.py index 746c436..47c2e25 100644 --- a/paper2remarkable/providers/_info.py +++ b/paper2remarkable/providers/_info.py @@ -76,8 +76,13 @@ class Informer: ## Title def get_title(self, soup): - target = soup.find_all("meta", {"name": self.meta_title_key}) - return target[0]["content"] + meta = soup.find_all("meta", {"name": self.meta_title_key}) + if not meta: + logger.warning( + "Couldn't determine title information, maybe provide the desired filename using '--filename'?" + ) + return "" + return meta[0]["content"] ## Authors @@ -87,10 +92,13 @@ class Informer: return [x.strip().split(sep)[idx].strip() for x in op(soup_authors)] def get_authors(self, soup): - authors = [ - x["content"] - for x in soup.find_all("meta", {"name": self.meta_author_key}) - ] + meta = soup.find_all("meta", {"name": self.meta_author_key}) + if not meta: + logger.warning( + "Couldn't determine author information, maybe provide the desired filename using '--filename'?" + ) + return "" + authors = [x["content"] for x in meta] return self._format_authors(authors) ## Year @@ -100,7 +108,8 @@ class Informer: def get_year(self, soup): """ Retrieve the contents of the meta_date_key field and format it """ - date = soup.find_all("meta", {"name": self.meta_date_key})[0][ - "content" - ] + meta = soup.find_all("meta", {"name": self.meta_date_key}) + if not meta: + return "" + date = meta[0]["content"] return self._format_year(date) -- cgit v1.2.3