aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--paper2remarkable/providers/__init__.py4
-rw-r--r--paper2remarkable/providers/pmlr.py68
-rw-r--r--tests/test_providers.py29
3 files changed, 100 insertions, 1 deletions
diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py
index f6f93f9..fcb2d22 100644
--- a/paper2remarkable/providers/__init__.py
+++ b/paper2remarkable/providers/__init__.py
@@ -7,5 +7,7 @@ from .openreview import OpenReview
from .springer import Springer
from .local import LocalFile
from .pdf_url import PdfUrl
+from .pmlr import PMLR
-providers = [Arxiv, PubMed, ACM, OpenReview, Springer, LocalFile, PdfUrl]
+# NOTE: Order matters here, PdfUrl should be last
+providers = [Arxiv, PubMed, ACM, OpenReview, Springer, PMLR, LocalFile, PdfUrl]
diff --git a/paper2remarkable/providers/pmlr.py b/paper2remarkable/providers/pmlr.py
new file mode 100644
index 0000000..82b8b4d
--- /dev/null
+++ b/paper2remarkable/providers/pmlr.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+
+"""Provider for PMLR
+
+Author: G.J.J. van den Burg
+License: See LICENSE file
+Copyright: 2019, G.J.J. van den Burg
+
+"""
+
+import re
+
+from ._base import Provider
+from ._info import Informer
+from ..utils import exception
+
+
+class PMLRInformer(Informer):
+
+ meta_date_key = "citation_publication_date"
+
+ def _format_authors(self, soup_authors):
+ return super()._format_authors(soup_authors, sep=' ', idx=-1)
+
+
+class PMLR(Provider):
+
+ re_abs_1 = "https?://proceedings.mlr.press/v\d+/[\w\-\w]+\d+.html"
+ re_pdf_1 = "https?://proceedings.mlr.press/v\d+/[\w\-\w]+\d+.pdf"
+
+ re_abs_2 = "https?://proceedings.mlr.press/v\d+/[\w\-\w]+\d+\w?.html"
+ re_pdf_2 = "https?://proceedings.mlr.press/v\d+/(?P<ref>[\w\-\w]+\d+\w?)/(?P=ref).pdf"
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.informer = PMLRInformer()
+
+ def get_abs_pdf_urls(self, url):
+ """ Get the pdf and abstract url from a OpenReview url """
+ if re.match(self.re_abs_1, url):
+ abs_url = url
+ pdf_url = url.replace(".html", ".pdf")
+ elif re.match(self.re_pdf_1, url):
+ abs_url = url.replace(".pdf", ".html")
+ pdf_url = url
+ elif re.match(self.re_abs_2, url):
+ abs_url = url
+ parts = url.split("/")
+ authoridx = parts[-1].split(".")[0]
+ pdf_url = "/".join(parts[:-1]) + "/%s/%s.pdf" % (
+ authoridx,
+ authoridx,
+ )
+ elif re.match(self.re_pdf_2, url):
+ parts = url.split("/")
+ abs_url = "/".join(parts[:-1]) + ".html"
+ pdf_url = url
+ else:
+ exception("Couldn't figure out OpenReview urls.")
+ return abs_url, pdf_url
+
+ def validate(src):
+ return (
+ re.fullmatch(PMLR.re_abs_1, src)
+ or re.fullmatch(PMLR.re_pdf_1, src)
+ or re.fullmatch(PMLR.re_abs_2, src)
+ or re.fullmatch(PMLR.re_pdf_2, src)
+ )
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 1479967..ba5e598 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -17,6 +17,7 @@ from paper2remarkable.providers import (
Arxiv,
LocalFile,
OpenReview,
+ PMLR,
PdfUrl,
PubMed,
Springer,
@@ -122,6 +123,34 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url, filename="test.pdf")
self.assertEqual("test.pdf", os.path.basename(filename))
+ def test_pmlr_1(self):
+ prov = PMLR(upload=False, verbose=VERBOSE)
+ url = "http://proceedings.mlr.press/v97/behrmann19a.html"
+ exp = "Behrmann_et_al_-_Invertible_Residual_Networks_2019.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_pmlr_2(self):
+ prov = PMLR(upload=False, verbose=VERBOSE)
+ url = "http://proceedings.mlr.press/v15/maaten11b/maaten11b.pdf"
+ exp = "Maaten_Welling_Saul_-_Hidden-Unit_Conditional_Random_Fields_2011.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_pmlr_3(self):
+ prov = PMLR(upload=False, verbose=VERBOSE)
+ url = "http://proceedings.mlr.press/v48/melnyk16.pdf"
+ exp = "Melnyk_Banerjee_-_Estimating_Structured_Vector_Autoregressive_Models_2016.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_pmlr_4(self):
+ prov = PMLR(upload=False, verbose=VERBOSE)
+ url = "http://proceedings.mlr.press/v48/zhangf16.html"
+ exp = "Zhang_Paisley_-_Markov_Latent_Feature_Models_2016.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
if __name__ == "__main__":
unittest.main()