aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-02-22 14:15:08 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-02-22 14:15:08 +0000
commit922f203aeeff2d28001f12c2dd923977b4e91cfa (patch)
treee510ca170bfd7fdd5ac6d9f1f8bf1d0d17451e03
parentRemove texlive from travis (diff)
parentMerge branch 'feature/provider_jmlr' (diff)
downloadpaper2remarkable-922f203aeeff2d28001f12c2dd923977b4e91cfa.tar.gz
paper2remarkable-922f203aeeff2d28001f12c2dd923977b4e91cfa.zip
Merge branch 'master' into feature/speedup
-rw-r--r--README.md1
-rw-r--r--paper2remarkable/providers/__init__.py2
-rw-r--r--paper2remarkable/providers/jmlr.py70
-rw-r--r--tests/test_providers.py16
4 files changed, 88 insertions, 1 deletions
diff --git a/README.md b/README.md
index dd11653..dfaae7d 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ reMarkable from any of the following sources:
* [arXiv](https://arxiv.org/)
* [ACM Digital Library](https://dl.acm.org/dl.cfm)
* [CiteSeerX](http://citeseerx.ist.psu.edu/index)
+* [JMLR](http://jmlr.org)
* [NBER](https://www.nber.org)
* [NeurIPS](https://papers.nips.cc/)
* [OpenReview](https://openreview.net/)
diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py
index c868bc4..e4fa1bd 100644
--- a/paper2remarkable/providers/__init__.py
+++ b/paper2remarkable/providers/__init__.py
@@ -4,6 +4,7 @@ from .acm import ACM
from .arxiv import Arxiv
from .citeseerx import CiteSeerX
from .html import HTML
+from .jmlr import JMLR
from .local import LocalFile
from .nber import NBER
from .neurips import NeurIPS
@@ -18,6 +19,7 @@ providers = [
ACM,
Arxiv,
CiteSeerX,
+ JMLR,
NBER,
NeurIPS,
OpenReview,
diff --git a/paper2remarkable/providers/jmlr.py b/paper2remarkable/providers/jmlr.py
new file mode 100644
index 0000000..3634b4f
--- /dev/null
+++ b/paper2remarkable/providers/jmlr.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+"""Provider for JMLR
+
+Journal of Machine Learning Research
+
+Author: G.J.J. van den Burg
+License: See LICENSE file
+Copyright: 2020, G.J.J. van den Burg
+
+"""
+
+import re
+
+from ._base import Provider
+from ._info import Informer
+from ..exceptions import URLResolutionError
+
+
+class JMLRInformer(Informer):
+
+ meta_date_key = "citation_publication_date"
+
+ def _format_authors(self, soup_authors):
+ have_comma = any(("," in auth for auth in soup_authors))
+ if have_comma:
+ return super()._format_authors(soup_authors, sep=",", idx=0)
+ return super()._format_authors(soup_authors, sep=" ", idx=-1)
+
+
+class JMLR(Provider):
+
+ re_abs_1 = "https?://(www\.)?jmlr\.org/papers/v(?P<vol>\d+)/(?P<pid>\d{2}\-\d{3}).html$"
+ re_pdf_1 = "https?://(www\.)?jmlr\.org/papers/volume(?P<vol>\d+)/(?P<pid>\d{2}\-\d{3})/(?P=pid).pdf$"
+
+ re_abs_2 = "https?://(www\.)?jmlr\.org/papers/v(?P<vol>\d+)/(?P<pid>\w+\d{2}\w).html$"
+ re_pdf_2 = "https?://(www\.)?jmlr\.org/papers/volume(?P<vol>\d+)/(?P<pid>\w+\d{2}\w)/(?P=pid).pdf$"
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.informer = JMLRInformer()
+
+ def get_abs_pdf_urls(self, url):
+ abs_url = pdf_url = None
+ abs_fmt = "http://jmlr.org/papers/v{vol}/{pid}.html"
+ pdf_fmt = "http://jmlr.org/papers/volume{vol}/{pid}/{pid}.pdf"
+ formats = [
+ (self.re_abs_1, self.re_pdf_1),
+ (self.re_abs_2, self.re_pdf_2),
+ ]
+
+ for re_abs, re_pdf in formats:
+ ma = re.match(re_abs, url)
+ mp = re.match(re_pdf, url)
+ if ma:
+ abs_url = url
+ pdf_url = pdf_fmt.format(
+ vol=ma.group("vol"), pid=ma.group("pid")
+ )
+ elif mp:
+ abs_url = abs_fmt.format(
+ vol=mp.group("vol"), pid=mp.group("pid")
+ )
+ pdf_url = url
+ if abs_url is None or pdf_url is None:
+ raise URLResolutionError("JMLR", url)
+ return abs_url, pdf_url
+
+ def validate(src):
+ return re.match(JMLR.re_abs, src) or re.match(JMLR.re_pdf, src)
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 493a209..2bf7507 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -17,6 +17,7 @@ from paper2remarkable.providers import (
Arxiv,
CiteSeerX,
HTML,
+ JMLR,
LocalFile,
NBER,
NeurIPS,
@@ -152,6 +153,20 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url)
self.assertEqual("14-526.pdf", os.path.basename(filename))
+ def test_jmlr_1(self):
+ prov = JMLR(upload=False, verbose=VERBOSE)
+ url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf"
+ exp = "Burg_Groenen_-_GenSVM_a_Generalized_Multiclass_Support_Vector_Machine_2016.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_jmlr_2(self):
+ prov = JMLR(upload=False, verbose=VERBOSE)
+ url = "http://www.jmlr.org/papers/v10/xu09a.html"
+ exp = "Xu_Zhang_-_Refinement_of_Reproducing_Kernels_2009.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
def test_pmlr_1(self):
prov = PMLR(upload=False, verbose=VERBOSE)
url = "http://proceedings.mlr.press/v97/behrmann19a.html"
@@ -237,6 +252,5 @@ class TestProviders(unittest.TestCase):
self.assertEqual(exp, os.path.basename(filename))
-
if __name__ == "__main__":
unittest.main()