aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2019-06-13 16:57:52 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2019-06-13 16:57:52 +0100
commit7e85930bf0e6dd74856d438dcb632b633af1cb83 (patch)
treef50bbed5ea76b38add706e5cf7b47441d5b6f656
parentbump version and update readme (diff)
downloadpaper2remarkable-7e85930bf0e6dd74856d438dcb632b633af1cb83.tar.gz
paper2remarkable-7e85930bf0e6dd74856d438dcb632b633af1cb83.zip
Add support for OpenReview papers
-rwxr-xr-xarxiv2remarkable.py45
-rw-r--r--test.py10
2 files changed, 55 insertions, 0 deletions
diff --git a/arxiv2remarkable.py b/arxiv2remarkable.py
index 6eb81ff..8ee377b 100755
--- a/arxiv2remarkable.py
+++ b/arxiv2remarkable.py
@@ -511,6 +511,51 @@ class ACMProvider(Provider):
return dict(title=title, date=date, authors=authors)
+class OpenReviewProvider(Provider):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def get_abs_pdf_urls(self, url):
+ """ Get the pdf and abstract url from a OpenReview url """
+ if re.match(
+ "https?://openreview.net/forum\?id=[A-Za-z0-9]+", url):
+ abs_url = url
+ pdf_url = url.replace('forum', 'pdf')
+ elif re.match(
+ "https?://openreview.net/pdf\?id=[A-Za-z0-9]+", url):
+ abs_url = url.replace('pdf', 'forum')
+ pdf_url = url
+ else:
+ exception("Couldn't figure out OpenReview urls.")
+ return abs_url, pdf_url
+
+ def validate(src):
+ """ Check if the url is a valid OpenReview url. """
+ m = re.match(
+ "https?://openreview.net/(forum|pdf)\?id=[A-Za-z0-9]+", src
+ )
+ return not m is None
+
+ def retrieve_pdf(self, src, filename):
+ """ Download the file and save as filename """
+ _, pdf_url = self.get_abs_pdf_urls(src)
+ self.download_url(pdf_url, filename)
+
+ def get_paper_info(self, src):
+ """ Extract the paper's authors, title, and publication year """
+ abs_url, _ = self.get_abs_pdf_urls(src)
+ self.log("Getting paper info from OpenReview")
+ page = self.get_page_with_retry(abs_url)
+ soup = bs4.BeautifulSoup(page, "html.parser")
+ authors = [
+ x["content"] for x in soup.find_all("meta", {"name":
+ "citation_author"})]
+ authors = [x.split(' ')[-1].strip() for x in authors]
+ title = soup.find_all("meta", {"name": "citation_title"})[0]["content"]
+ date = soup.find_all("meta", {"name":
+ "citation_publication_date"})[0]["content"]
+ return dict(title=title, date=date, authors=authors)
+
class LocalFileProvider(Provider):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
diff --git a/test.py b/test.py
index 2ec59d8..201cc9f 100644
--- a/test.py
+++ b/test.py
@@ -15,6 +15,7 @@ from arxiv2remarkable import (
ArxivProvider,
PMCProvider,
ACMProvider,
+ OpenReviewProvider,
LocalFileProvider,
PdfUrlProvider,
)
@@ -73,6 +74,15 @@ class Tests(unittest.TestCase):
fsize = os.path.getsize(filename)
self.assertTrue(1691444 < fsize <= 1693444)
+ def test_openreview(self):
+ prov = OpenReviewProvider(upload=False)
+ url = "https://openreview.net/forum?id=S1x4ghC9tQ"
+ exp_filename = "Gregor_et_al_-_Temporal_Difference_Variational_Auto-Encoder_2018.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp_filename, os.path.basename(filename))
+ fsize = os.path.getsize(filename)
+ self.assertTrue(1110316 < fsize <= 1112316)
+
def test_local(self):
local_filename = "test.pdf"
with open(local_filename, "w") as fp: