aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2019-08-19 17:42:47 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2019-08-19 17:42:47 +0100
commit5965e2d32538defe6c79edabe177747ca2c114bc (patch)
treea4cdcf650e835b851fa550f36b644e828afa9ad6
parentSimplify provider names (diff)
downloadpaper2remarkable-5965e2d32538defe6c79edabe177747ca2c114bc.tar.gz
paper2remarkable-5965e2d32538defe6c79edabe177747ca2c114bc.zip
Add provider for Springer papers
-rwxr-xr-xarxiv2remarkable.py53
1 files changed, 45 insertions, 8 deletions
diff --git a/arxiv2remarkable.py b/arxiv2remarkable.py
index 8f605ab..bf999fb 100755
--- a/arxiv2remarkable.py
+++ b/arxiv2remarkable.py
@@ -548,6 +548,50 @@ class OpenReview(Provider):
return dict(title=title, date=date, authors=authors)
+class Springer(Provider):
+
+ re_abs = "https?:\/\/link.springer.com\/article\/10\.\d{4}\/[a-z0-9\-]+"
+ re_pdf = "https?:\/\/link\.springer\.com\/content\/pdf\/10\.\d{4}(%2F|\/)[a-z0-9\-]+\.pdf"
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def get_abs_pdf_urls(self, url):
+ """ Get the pdf and abstract urls from a Springer url """
+ if re.match(self.re_abs, url):
+ abs_url = url
+ pdf_url = url.replace("article", "content/pdf")
+ elif re.match(self.re_pdf, url):
+ abs_url = url.replace("content/pdf", "article")
+ pdf_url = url
+ else:
+ exception("Couldn't figure out Springer urls.")
+ return abs_url, pdf_url
+
+ def validate(src):
+ return re.match(Springer.re_abs, src) or re.match(Springer.re_pdf, src)
+
+ def retrieve_pdf(self, src, filename):
+ _, pdf_url = self.get_abs_pdf_urls(src)
+ self.download_url(pdf_url, filename)
+
+ def get_paper_info(self, src):
+ abs_url, _ = self.get_abs_pdf_urls(src)
+ self.log("Getting paper info from Springer")
+ page = self.get_page_with_retry(abs_url)
+ soup = bs4.BeautifulSoup(page, "html.parser")
+ authors = [
+ x["content"]
+ for x in soup.find_all("meta", {"name": "citation_author"})
+ ]
+ authors = [x.split(" ")[-1].strip() for x in authors]
+ title = soup.find_all("meta", {"name": "citation_title"})[0]["content"]
+ date = soup.find_all("meta", {"name": "citation_online_date"})[0][
+ "content"
+ ]
+ return dict(title=title, date=date, authors=authors)
+
+
class LocalFile(Provider):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@@ -804,14 +848,7 @@ def parse_args():
def main():
args = parse_args()
- providers = [
- Arxiv,
- Pubmed,
- ACM,
- OpenReview,
- LocalFile,
- PdfUrl,
- ]
+ providers = [Arxiv, Pubmed, ACM, OpenReview, Springer, LocalFile, PdfUrl]
provider = next((p for p in providers if p.validate(args.input)), None)
if provider is None: