diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-08-27 13:17:03 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-08-27 13:17:03 +0100 |
| commit | 7ae25e6f86dcd1da60cdb40d2d12ca45c4b68201 (patch) | |
| tree | 8d234010d4a4e6dfff6702ca028545f52770333b | |
| parent | Readme formatting (diff) | |
| download | paper2remarkable-bugfix/openreview.tar.gz paper2remarkable-bugfix/openreview.zip | |
Rewrite author info function for OpenReviewbugfix/openreview
| -rw-r--r-- | paper2remarkable/providers/openreview.py | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/paper2remarkable/providers/openreview.py b/paper2remarkable/providers/openreview.py index 47c0555..8c44f45 100644 --- a/paper2remarkable/providers/openreview.py +++ b/paper2remarkable/providers/openreview.py @@ -8,17 +8,49 @@ Copyright: 2019, G.J.J. van den Burg """ +import json import re from ._base import Provider from ._info import Informer from ..exceptions import URLResolutionError +from ..log import Logger + +logger = Logger() class OpenReviewInformer(Informer): meta_date_key = "citation_publication_date" + def get_authors(self, soup): + # Get the authors for OpenReview by parsing the JSON payload + # + # This may not be super robust long term, but works for now. + warning = ( + "Couldn't determine author information, maybe provide " + "the desired filename using '--filename'?" + ) + + script = soup.find("script", {"id": "__NEXT_DATA__"}) + if not script: + logger.warning(warning) + return "" + + try: + paper_data = json.loads(script.contents[0]) + except json.JSONDecodeError: + logger.warning(warning) + return "" + + try: + content = paper_data["props"]["pageProps"]["forumNote"]["content"] + authors = content["authors"] + except KeyError: + logger.warning(warning) + return "" + return self._format_authors(authors) + def _format_authors(self, soup_authors): return super()._format_authors(soup_authors, sep=" ", idx=-1) |
