diff options
| -rw-r--r-- | paper2remarkable/providers/openreview.py | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/paper2remarkable/providers/openreview.py b/paper2remarkable/providers/openreview.py index 47c0555..8c44f45 100644 --- a/paper2remarkable/providers/openreview.py +++ b/paper2remarkable/providers/openreview.py @@ -8,17 +8,49 @@ Copyright: 2019, G.J.J. van den Burg """ +import json import re from ._base import Provider from ._info import Informer from ..exceptions import URLResolutionError +from ..log import Logger + +logger = Logger() class OpenReviewInformer(Informer): meta_date_key = "citation_publication_date" + def get_authors(self, soup): + # Get the authors for OpenReview by parsing the JSON payload + # + # This may not be super robust long term, but works for now. + warning = ( + "Couldn't determine author information, maybe provide " + "the desired filename using '--filename'?" + ) + + script = soup.find("script", {"id": "__NEXT_DATA__"}) + if not script: + logger.warning(warning) + return "" + + try: + paper_data = json.loads(script.contents[0]) + except json.JSONDecodeError: + logger.warning(warning) + return "" + + try: + content = paper_data["props"]["pageProps"]["forumNote"]["content"] + authors = content["authors"] + except KeyError: + logger.warning(warning) + return "" + return self._format_authors(authors) + def _format_authors(self, soup_authors): return super()._format_authors(soup_authors, sep=" ", idx=-1) |
