aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-08-27 13:17:03 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-08-27 13:17:03 +0100
commit7ae25e6f86dcd1da60cdb40d2d12ca45c4b68201 (patch)
tree8d234010d4a4e6dfff6702ca028545f52770333b
parentReadme formatting (diff)
downloadpaper2remarkable-bugfix/openreview.tar.gz
paper2remarkable-bugfix/openreview.zip
Rewrite author info function for OpenReviewbugfix/openreview
-rw-r--r--paper2remarkable/providers/openreview.py32
1 files changed, 32 insertions, 0 deletions
diff --git a/paper2remarkable/providers/openreview.py b/paper2remarkable/providers/openreview.py
index 47c0555..8c44f45 100644
--- a/paper2remarkable/providers/openreview.py
+++ b/paper2remarkable/providers/openreview.py
@@ -8,17 +8,49 @@ Copyright: 2019, G.J.J. van den Burg
"""
+import json
import re
from ._base import Provider
from ._info import Informer
from ..exceptions import URLResolutionError
+from ..log import Logger
+
+logger = Logger()
class OpenReviewInformer(Informer):
meta_date_key = "citation_publication_date"
+ def get_authors(self, soup):
+ # Get the authors for OpenReview by parsing the JSON payload
+ #
+ # This may not be super robust long term, but works for now.
+ warning = (
+ "Couldn't determine author information, maybe provide "
+ "the desired filename using '--filename'?"
+ )
+
+ script = soup.find("script", {"id": "__NEXT_DATA__"})
+ if not script:
+ logger.warning(warning)
+ return ""
+
+ try:
+ paper_data = json.loads(script.contents[0])
+ except json.JSONDecodeError:
+ logger.warning(warning)
+ return ""
+
+ try:
+ content = paper_data["props"]["pageProps"]["forumNote"]["content"]
+ authors = content["authors"]
+ except KeyError:
+ logger.warning(warning)
+ return ""
+ return self._format_authors(authors)
+
def _format_authors(self, soup_authors):
return super()._format_authors(soup_authors, sep=" ", idx=-1)