diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-06-24 10:31:37 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-06-24 10:31:37 +0100 |
| commit | 2440a225c7f0ce7608a5e26924831ae1afaf18e0 (patch) | |
| tree | a0f4c920ed557e8551084dc474554228b7c0f3bd /arxiv2remarkable.py | |
| parent | add provider in providers list (diff) | |
| download | paper2remarkable-2440a225c7f0ce7608a5e26924831ae1afaf18e0.tar.gz paper2remarkable-2440a225c7f0ce7608a5e26924831ae1afaf18e0.zip | |
Formatting
Diffstat (limited to 'arxiv2remarkable.py')
| -rwxr-xr-x | arxiv2remarkable.py | 23 |
1 files changed, 12 insertions, 11 deletions
diff --git a/arxiv2remarkable.py b/arxiv2remarkable.py index 45d1176..f165287 100755 --- a/arxiv2remarkable.py +++ b/arxiv2remarkable.py @@ -517,13 +517,11 @@ class OpenReviewProvider(Provider): def get_abs_pdf_urls(self, url): """ Get the pdf and abstract url from a OpenReview url """ - if re.match( - "https?://openreview.net/forum\?id=[A-Za-z0-9]+", url): + if re.match("https?://openreview.net/forum\?id=[A-Za-z0-9]+", url): abs_url = url - pdf_url = url.replace('forum', 'pdf') - elif re.match( - "https?://openreview.net/pdf\?id=[A-Za-z0-9]+", url): - abs_url = url.replace('pdf', 'forum') + pdf_url = url.replace("forum", "pdf") + elif re.match("https?://openreview.net/pdf\?id=[A-Za-z0-9]+", url): + abs_url = url.replace("pdf", "forum") pdf_url = url else: exception("Couldn't figure out OpenReview urls.") @@ -548,14 +546,17 @@ class OpenReviewProvider(Provider): page = self.get_page_with_retry(abs_url) soup = bs4.BeautifulSoup(page, "html.parser") authors = [ - x["content"] for x in soup.find_all("meta", {"name": - "citation_author"})] - authors = [x.split(' ')[-1].strip() for x in authors] + x["content"] + for x in soup.find_all("meta", {"name": "citation_author"}) + ] + authors = [x.split(" ")[-1].strip() for x in authors] title = soup.find_all("meta", {"name": "citation_title"})[0]["content"] - date = soup.find_all("meta", {"name": - "citation_publication_date"})[0]["content"] + date = soup.find_all("meta", {"name": "citation_publication_date"})[0][ + "content" + ] return dict(title=title, date=date, authors=authors) + class LocalFileProvider(Provider): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) |
