aboutsummaryrefslogtreecommitdiff
path: root/arxiv2remarkable.py
diff options
context:
space:
mode:
Diffstat (limited to 'arxiv2remarkable.py')
-rwxr-xr-xarxiv2remarkable.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/arxiv2remarkable.py b/arxiv2remarkable.py
index 6eb81ff..8ee377b 100755
--- a/arxiv2remarkable.py
+++ b/arxiv2remarkable.py
@@ -511,6 +511,51 @@ class ACMProvider(Provider):
return dict(title=title, date=date, authors=authors)
+class OpenReviewProvider(Provider):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def get_abs_pdf_urls(self, url):
+ """ Get the pdf and abstract url from a OpenReview url """
+ if re.match(
+ "https?://openreview.net/forum\?id=[A-Za-z0-9]+", url):
+ abs_url = url
+ pdf_url = url.replace('forum', 'pdf')
+ elif re.match(
+ "https?://openreview.net/pdf\?id=[A-Za-z0-9]+", url):
+ abs_url = url.replace('pdf', 'forum')
+ pdf_url = url
+ else:
+ exception("Couldn't figure out OpenReview urls.")
+ return abs_url, pdf_url
+
+ def validate(src):
+ """ Check if the url is a valid OpenReview url. """
+ m = re.match(
+ "https?://openreview.net/(forum|pdf)\?id=[A-Za-z0-9]+", src
+ )
+ return not m is None
+
+ def retrieve_pdf(self, src, filename):
+ """ Download the file and save as filename """
+ _, pdf_url = self.get_abs_pdf_urls(src)
+ self.download_url(pdf_url, filename)
+
+ def get_paper_info(self, src):
+ """ Extract the paper's authors, title, and publication year """
+ abs_url, _ = self.get_abs_pdf_urls(src)
+ self.log("Getting paper info from OpenReview")
+ page = self.get_page_with_retry(abs_url)
+ soup = bs4.BeautifulSoup(page, "html.parser")
+ authors = [
+ x["content"] for x in soup.find_all("meta", {"name":
+ "citation_author"})]
+ authors = [x.split(' ')[-1].strip() for x in authors]
+ title = soup.find_all("meta", {"name": "citation_title"})[0]["content"]
+ date = soup.find_all("meta", {"name":
+ "citation_publication_date"})[0]["content"]
+ return dict(title=title, date=date, authors=authors)
+
class LocalFileProvider(Provider):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)