diff options
Diffstat (limited to 'arxiv2remarkable.py')
| -rwxr-xr-x | arxiv2remarkable.py | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/arxiv2remarkable.py b/arxiv2remarkable.py index 6eb81ff..8ee377b 100755 --- a/arxiv2remarkable.py +++ b/arxiv2remarkable.py @@ -511,6 +511,51 @@ class ACMProvider(Provider): return dict(title=title, date=date, authors=authors) +class OpenReviewProvider(Provider): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def get_abs_pdf_urls(self, url): + """ Get the pdf and abstract url from a OpenReview url """ + if re.match( + "https?://openreview.net/forum\?id=[A-Za-z0-9]+", url): + abs_url = url + pdf_url = url.replace('forum', 'pdf') + elif re.match( + "https?://openreview.net/pdf\?id=[A-Za-z0-9]+", url): + abs_url = url.replace('pdf', 'forum') + pdf_url = url + else: + exception("Couldn't figure out OpenReview urls.") + return abs_url, pdf_url + + def validate(src): + """ Check if the url is a valid OpenReview url. """ + m = re.match( + "https?://openreview.net/(forum|pdf)\?id=[A-Za-z0-9]+", src + ) + return not m is None + + def retrieve_pdf(self, src, filename): + """ Download the file and save as filename """ + _, pdf_url = self.get_abs_pdf_urls(src) + self.download_url(pdf_url, filename) + + def get_paper_info(self, src): + """ Extract the paper's authors, title, and publication year """ + abs_url, _ = self.get_abs_pdf_urls(src) + self.log("Getting paper info from OpenReview") + page = self.get_page_with_retry(abs_url) + soup = bs4.BeautifulSoup(page, "html.parser") + authors = [ + x["content"] for x in soup.find_all("meta", {"name": + "citation_author"})] + authors = [x.split(' ')[-1].strip() for x in authors] + title = soup.find_all("meta", {"name": "citation_title"})[0]["content"] + date = soup.find_all("meta", {"name": + "citation_publication_date"})[0]["content"] + return dict(title=title, date=date, authors=authors) + class LocalFileProvider(Provider): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) |
