aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-10-27 22:50:35 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-10-27 22:50:35 +0100
commit244d0f51eb01086ff008c355cff8dba32eb58843 (patch)
treeec087aee1cb0edf8a67733482c27085fbc29cb48
parentUse a cookiejar instead of empty dict (diff)
downloadpaper2remarkable-244d0f51eb01086ff008c355cff8dba32eb58843.tar.gz
paper2remarkable-244d0f51eb01086ff008c355cff8dba32eb58843.zip
Add provider for Nature
-rw-r--r--paper2remarkable/providers/__init__.py2
-rw-r--r--paper2remarkable/providers/nature.py47
-rw-r--r--tests/test_providers.py15
-rw-r--r--tests/test_ui.py6
4 files changed, 70 insertions, 0 deletions
diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py
index 78fa370..3eeda5c 100644
--- a/paper2remarkable/providers/__init__.py
+++ b/paper2remarkable/providers/__init__.py
@@ -7,6 +7,7 @@ from .cvf import CVF
from .html import HTML
from .jmlr import JMLR
from .local import LocalFile
+from .nature import Nature
from .nber import NBER
from .neurips import NeurIPS
from .openreview import OpenReview
@@ -24,6 +25,7 @@ providers = [
CiteSeerX,
CVF,
JMLR,
+ Nature,
NBER,
NeurIPS,
OpenReview,
diff --git a/paper2remarkable/providers/nature.py b/paper2remarkable/providers/nature.py
new file mode 100644
index 0000000..108f209
--- /dev/null
+++ b/paper2remarkable/providers/nature.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+"""Provider for Nature
+
+Author: G.J.J. van den Burg
+License: See LICENSE file
+Copyright: 2020, G.J.J. van den Burg
+
+"""
+
+import re
+
+from ._base import Provider
+from ._info import Informer
+from ..exceptions import URLResolutionError
+
+
+class NatureInformer(Informer):
+
+ meta_date_key = "citation_online_date"
+
+ def _format_authors(self, soup_authors):
+ return super()._format_authors(soup_authors, sep=" ", idx=-1)
+
+
+class Nature(Provider):
+
+ re_abs = "^https://www.nature.com/articles/s[a-z0-9\-]+$"
+ re_pdf = "^https://www.nature.com/articles/s[a-z0-9\-]+\.pdf$"
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.informer = NatureInformer()
+
+ def get_abs_pdf_urls(self, url):
+ if re.match(self.re_abs, url):
+ abs_url = url
+ pdf_url = url + ".pdf"
+ elif re.match(self.re_pdf, url):
+ pdf_url = url
+ abs_url = url.replace(".pdf", "")
+ else:
+ raise URLResolutionError("Nature", url)
+ return abs_url, pdf_url
+
+ def validate(src):
+ return re.match(Nature.re_abs, src) or re.match(Nature.re_pdf, src)
diff --git a/tests/test_providers.py b/tests/test_providers.py
index e701234..def77d0 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -20,6 +20,7 @@ from paper2remarkable.providers import (
HTML,
JMLR,
LocalFile,
+ Nature,
NBER,
NeurIPS,
OpenReview,
@@ -355,6 +356,20 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url)
self.assertEqual(exp, os.path.basename(filename))
+ def test_nature_1(self):
+ prov = Nature(upload=False, verbose=VERBOSE)
+ url = "https://www.nature.com/articles/s41598-020-75456-0"
+ exp = "Golozar_et_al_-_Direct_Observation_of_Lithium_Metal_Dendrites_With_Ceramic_Solid_Electrolyte_2020.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_nature_2(self):
+ prov = Nature(upload=False, verbose=VERBOSE)
+ url = "https://www.nature.com/articles/s41599-019-0371-1.pdf"
+ exp = "Leroi_et_al_-_On_Revolutions_2020.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/test_ui.py b/tests/test_ui.py
index 835f594..a1eb372 100644
--- a/tests/test_ui.py
+++ b/tests/test_ui.py
@@ -24,6 +24,7 @@ from paper2remarkable.providers import (
HTML,
JMLR,
LocalFile,
+ Nature,
NBER,
NeurIPS,
OpenReview,
@@ -180,6 +181,11 @@ class TestUI(unittest.TestCase):
"https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html",
"https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html",
),
+ (
+ Nature,
+ "https://www.nature.com/articles/s41599-019-0349-z",
+ "https://www.nature.com/articles/s41599-019-0349-z",
+ ),
]
for exp_prov, url, exp_url in tests:
prov, new_url, jar = choose_provider(url)