diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-02-19 12:14:44 +0000 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-02-19 12:14:44 +0000 |
| commit | 0128dce1c10be8db965584aa387bf00040a3f018 (patch) | |
| tree | 76a45c3d06eaa4c647a50a7830c5005f0b5c0643 | |
| parent | Replace spaces in author names (diff) | |
| download | paper2remarkable-0128dce1c10be8db965584aa387bf00040a3f018.tar.gz paper2remarkable-0128dce1c10be8db965584aa387bf00040a3f018.zip | |
Add NBER provider
| -rw-r--r-- | paper2remarkable/providers/__init__.py | 4 | ||||
| -rw-r--r-- | paper2remarkable/providers/nber.py | 46 | ||||
| -rw-r--r-- | tests/test_providers.py | 15 |
3 files changed, 64 insertions, 1 deletions
diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py index f87a044..c868bc4 100644 --- a/paper2remarkable/providers/__init__.py +++ b/paper2remarkable/providers/__init__.py @@ -5,6 +5,7 @@ from .arxiv import Arxiv from .citeseerx import CiteSeerX from .html import HTML from .local import LocalFile +from .nber import NBER from .neurips import NeurIPS from .openreview import OpenReview from .pdf_url import PdfUrl @@ -12,11 +13,12 @@ from .pmlr import PMLR from .pubmed import PubMed from .springer import Springer -# NOTE: Order matters here, PdfUrl should be last +# NOTE: Order matters here, PdfUrl and HTML should be last providers = [ ACM, Arxiv, CiteSeerX, + NBER, NeurIPS, OpenReview, PMLR, diff --git a/paper2remarkable/providers/nber.py b/paper2remarkable/providers/nber.py new file mode 100644 index 0000000..76bc85f --- /dev/null +++ b/paper2remarkable/providers/nber.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +"""Provider for NBER + +(US) National Bureau of Economic Research + +Author: G.J.J. van den Burg +License: See LICENSE file +Copyright: 2020, G.J.J. van den Burg + +""" + +import re + +from ._base import Provider +from ._info import Informer +from ..exceptions import URLResolutionError + + +class NBERInformer(Informer): + def _format_year(self, soup_date): + return soup_date.split("-")[0] + + +class NBER(Provider): + + re_abs = "https?://www\.nber\.org/papers/(?P<ref>[a-z0-9]+)$" + re_pdf = "https?://www\.nber\.org/papers/(?P<ref>[a-z0-9]+)\.pdf$" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.informer = NBERInformer() + + def get_abs_pdf_urls(self, url): + if re.match(self.re_abs, url): + abs_url = url + pdf_url = url + ".pdf" + elif re.match(self.re_pdf, url): + pdf_url = url + abs_url = url[: -len(".pdf")] + else: + raise URLResolutionError("NBER", url) + return abs_url, pdf_url + + def validate(src): + return re.match(NBER.re_abs, src) or re.match(NBER.re_pdf, src) diff --git a/tests/test_providers.py b/tests/test_providers.py index d0e3d40..38f88b7 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -18,6 +18,7 @@ from paper2remarkable.providers import ( CiteSeerX, HTML, LocalFile, + NBER, NeurIPS, OpenReview, PMLR, @@ -179,6 +180,20 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_nber_1(self): + prov = NBER(upload=False, verbose=VERBOSE) + url = "https://www.nber.org/papers/w26752" + exp = "Bhattacharya_Packalen_-_Stagnation_and_Scientific_Incentives_2020.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_nber_2(self): + prov = NBER(upload=False, verbose=VERBOSE) + url = "https://www.nber.org/papers/w19152.pdf" + exp = "Herbst_Schorfheide_-_Sequential_Monte_Carlo_Sampling_for_DSGE_Models_2013.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + def test_neurips_1(self): prov = NeurIPS(upload=False, verbose=VERBOSE) url = "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf" |
