diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-10-27 22:50:35 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-10-27 22:50:35 +0100 |
| commit | 244d0f51eb01086ff008c355cff8dba32eb58843 (patch) | |
| tree | ec087aee1cb0edf8a67733482c27085fbc29cb48 | |
| parent | Use a cookiejar instead of empty dict (diff) | |
| download | paper2remarkable-244d0f51eb01086ff008c355cff8dba32eb58843.tar.gz paper2remarkable-244d0f51eb01086ff008c355cff8dba32eb58843.zip | |
Add provider for Nature
| -rw-r--r-- | paper2remarkable/providers/__init__.py | 2 | ||||
| -rw-r--r-- | paper2remarkable/providers/nature.py | 47 | ||||
| -rw-r--r-- | tests/test_providers.py | 15 | ||||
| -rw-r--r-- | tests/test_ui.py | 6 |
4 files changed, 70 insertions, 0 deletions
diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py index 78fa370..3eeda5c 100644 --- a/paper2remarkable/providers/__init__.py +++ b/paper2remarkable/providers/__init__.py @@ -7,6 +7,7 @@ from .cvf import CVF from .html import HTML from .jmlr import JMLR from .local import LocalFile +from .nature import Nature from .nber import NBER from .neurips import NeurIPS from .openreview import OpenReview @@ -24,6 +25,7 @@ providers = [ CiteSeerX, CVF, JMLR, + Nature, NBER, NeurIPS, OpenReview, diff --git a/paper2remarkable/providers/nature.py b/paper2remarkable/providers/nature.py new file mode 100644 index 0000000..108f209 --- /dev/null +++ b/paper2remarkable/providers/nature.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +"""Provider for Nature + +Author: G.J.J. van den Burg +License: See LICENSE file +Copyright: 2020, G.J.J. van den Burg + +""" + +import re + +from ._base import Provider +from ._info import Informer +from ..exceptions import URLResolutionError + + +class NatureInformer(Informer): + + meta_date_key = "citation_online_date" + + def _format_authors(self, soup_authors): + return super()._format_authors(soup_authors, sep=" ", idx=-1) + + +class Nature(Provider): + + re_abs = "^https://www.nature.com/articles/s[a-z0-9\-]+$" + re_pdf = "^https://www.nature.com/articles/s[a-z0-9\-]+\.pdf$" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.informer = NatureInformer() + + def get_abs_pdf_urls(self, url): + if re.match(self.re_abs, url): + abs_url = url + pdf_url = url + ".pdf" + elif re.match(self.re_pdf, url): + pdf_url = url + abs_url = url.replace(".pdf", "") + else: + raise URLResolutionError("Nature", url) + return abs_url, pdf_url + + def validate(src): + return re.match(Nature.re_abs, src) or re.match(Nature.re_pdf, src) diff --git a/tests/test_providers.py b/tests/test_providers.py index e701234..def77d0 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -20,6 +20,7 @@ from paper2remarkable.providers import ( HTML, JMLR, LocalFile, + Nature, NBER, NeurIPS, OpenReview, @@ -355,6 +356,20 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_nature_1(self): + prov = Nature(upload=False, verbose=VERBOSE) + url = "https://www.nature.com/articles/s41598-020-75456-0" + exp = "Golozar_et_al_-_Direct_Observation_of_Lithium_Metal_Dendrites_With_Ceramic_Solid_Electrolyte_2020.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_nature_2(self): + prov = Nature(upload=False, verbose=VERBOSE) + url = "https://www.nature.com/articles/s41599-019-0371-1.pdf" + exp = "Leroi_et_al_-_On_Revolutions_2020.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_ui.py b/tests/test_ui.py index 835f594..a1eb372 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -24,6 +24,7 @@ from paper2remarkable.providers import ( HTML, JMLR, LocalFile, + Nature, NBER, NeurIPS, OpenReview, @@ -180,6 +181,11 @@ class TestUI(unittest.TestCase): "https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html", "https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html", ), + ( + Nature, + "https://www.nature.com/articles/s41599-019-0349-z", + "https://www.nature.com/articles/s41599-019-0349-z", + ), ] for exp_prov, url, exp_url in tests: prov, new_url, jar = choose_provider(url) |
