diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-02-22 14:15:08 +0000 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-02-22 14:15:08 +0000 |
| commit | 922f203aeeff2d28001f12c2dd923977b4e91cfa (patch) | |
| tree | e510ca170bfd7fdd5ac6d9f1f8bf1d0d17451e03 | |
| parent | Remove texlive from travis (diff) | |
| parent | Merge branch 'feature/provider_jmlr' (diff) | |
| download | paper2remarkable-922f203aeeff2d28001f12c2dd923977b4e91cfa.tar.gz paper2remarkable-922f203aeeff2d28001f12c2dd923977b4e91cfa.zip | |
Merge branch 'master' into feature/speedup
| -rw-r--r-- | README.md | 1 | ||||
| -rw-r--r-- | paper2remarkable/providers/__init__.py | 2 | ||||
| -rw-r--r-- | paper2remarkable/providers/jmlr.py | 70 | ||||
| -rw-r--r-- | tests/test_providers.py | 16 |
4 files changed, 88 insertions, 1 deletions
@@ -25,6 +25,7 @@ reMarkable from any of the following sources: * [arXiv](https://arxiv.org/) * [ACM Digital Library](https://dl.acm.org/dl.cfm) * [CiteSeerX](http://citeseerx.ist.psu.edu/index) +* [JMLR](http://jmlr.org) * [NBER](https://www.nber.org) * [NeurIPS](https://papers.nips.cc/) * [OpenReview](https://openreview.net/) diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py index c868bc4..e4fa1bd 100644 --- a/paper2remarkable/providers/__init__.py +++ b/paper2remarkable/providers/__init__.py @@ -4,6 +4,7 @@ from .acm import ACM from .arxiv import Arxiv from .citeseerx import CiteSeerX from .html import HTML +from .jmlr import JMLR from .local import LocalFile from .nber import NBER from .neurips import NeurIPS @@ -18,6 +19,7 @@ providers = [ ACM, Arxiv, CiteSeerX, + JMLR, NBER, NeurIPS, OpenReview, diff --git a/paper2remarkable/providers/jmlr.py b/paper2remarkable/providers/jmlr.py new file mode 100644 index 0000000..3634b4f --- /dev/null +++ b/paper2remarkable/providers/jmlr.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +"""Provider for JMLR + +Journal of Machine Learning Research + +Author: G.J.J. van den Burg +License: See LICENSE file +Copyright: 2020, G.J.J. van den Burg + +""" + +import re + +from ._base import Provider +from ._info import Informer +from ..exceptions import URLResolutionError + + +class JMLRInformer(Informer): + + meta_date_key = "citation_publication_date" + + def _format_authors(self, soup_authors): + have_comma = any(("," in auth for auth in soup_authors)) + if have_comma: + return super()._format_authors(soup_authors, sep=",", idx=0) + return super()._format_authors(soup_authors, sep=" ", idx=-1) + + +class JMLR(Provider): + + re_abs_1 = "https?://(www\.)?jmlr\.org/papers/v(?P<vol>\d+)/(?P<pid>\d{2}\-\d{3}).html$" + re_pdf_1 = "https?://(www\.)?jmlr\.org/papers/volume(?P<vol>\d+)/(?P<pid>\d{2}\-\d{3})/(?P=pid).pdf$" + + re_abs_2 = "https?://(www\.)?jmlr\.org/papers/v(?P<vol>\d+)/(?P<pid>\w+\d{2}\w).html$" + re_pdf_2 = "https?://(www\.)?jmlr\.org/papers/volume(?P<vol>\d+)/(?P<pid>\w+\d{2}\w)/(?P=pid).pdf$" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.informer = JMLRInformer() + + def get_abs_pdf_urls(self, url): + abs_url = pdf_url = None + abs_fmt = "http://jmlr.org/papers/v{vol}/{pid}.html" + pdf_fmt = "http://jmlr.org/papers/volume{vol}/{pid}/{pid}.pdf" + formats = [ + (self.re_abs_1, self.re_pdf_1), + (self.re_abs_2, self.re_pdf_2), + ] + + for re_abs, re_pdf in formats: + ma = re.match(re_abs, url) + mp = re.match(re_pdf, url) + if ma: + abs_url = url + pdf_url = pdf_fmt.format( + vol=ma.group("vol"), pid=ma.group("pid") + ) + elif mp: + abs_url = abs_fmt.format( + vol=mp.group("vol"), pid=mp.group("pid") + ) + pdf_url = url + if abs_url is None or pdf_url is None: + raise URLResolutionError("JMLR", url) + return abs_url, pdf_url + + def validate(src): + return re.match(JMLR.re_abs, src) or re.match(JMLR.re_pdf, src) diff --git a/tests/test_providers.py b/tests/test_providers.py index 493a209..2bf7507 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -17,6 +17,7 @@ from paper2remarkable.providers import ( Arxiv, CiteSeerX, HTML, + JMLR, LocalFile, NBER, NeurIPS, @@ -152,6 +153,20 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual("14-526.pdf", os.path.basename(filename)) + def test_jmlr_1(self): + prov = JMLR(upload=False, verbose=VERBOSE) + url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf" + exp = "Burg_Groenen_-_GenSVM_a_Generalized_Multiclass_Support_Vector_Machine_2016.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_jmlr_2(self): + prov = JMLR(upload=False, verbose=VERBOSE) + url = "http://www.jmlr.org/papers/v10/xu09a.html" + exp = "Xu_Zhang_-_Refinement_of_Reproducing_Kernels_2009.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + def test_pmlr_1(self): prov = PMLR(upload=False, verbose=VERBOSE) url = "http://proceedings.mlr.press/v97/behrmann19a.html" @@ -237,6 +252,5 @@ class TestProviders(unittest.TestCase): self.assertEqual(exp, os.path.basename(filename)) - if __name__ == "__main__": unittest.main() |
