aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-07-15 22:44:33 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-07-15 22:44:33 +0100
commit071b5a0f2958c34f1a189259346a8732a1110de2 (patch)
tree08207950ca965f02a4e6a18e5d529cceb9202c61
parentBump version and update changelog (diff)
downloadpaper2remarkable-071b5a0f2958c34f1a189259346a8732a1110de2.tar.gz
paper2remarkable-071b5a0f2958c34f1a189259346a8732a1110de2.zip
Add provider for SagePub
-rw-r--r--README.md1
-rw-r--r--paper2remarkable/providers/__init__.py2
-rw-r--r--paper2remarkable/providers/sagepub.py52
-rw-r--r--paper2remarkable/utils.py1
-rw-r--r--tests/test_providers.py17
5 files changed, 72 insertions, 1 deletions
diff --git a/README.md b/README.md
index 7108c3e..62c2b0b 100644
--- a/README.md
+++ b/README.md
@@ -36,6 +36,7 @@ reMarkable from any of the following sources:
* [OpenReview](https://openreview.net/)
* [PMLR](http://proceedings.mlr.press/)
* [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/)
+* [SagePub](https://journals.sagepub.com/)
* [SemanticScholar](https://www.semanticscholar.org/)
* [SpringerLink](https://link.springer.com/)
* A generic URL to a PDF file
diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py
index c4e3eb5..e3075f0 100644
--- a/paper2remarkable/providers/__init__.py
+++ b/paper2remarkable/providers/__init__.py
@@ -12,6 +12,7 @@ from .openreview import OpenReview
from .pdf_url import PdfUrl
from .pmlr import PMLR
from .pubmed import PubMed
+from .sagepub import SagePub
from .springer import Springer
from .semantic_scholar import SemanticScholar
@@ -26,6 +27,7 @@ providers = [
OpenReview,
PMLR,
PubMed,
+ SagePub,
Springer,
SemanticScholar,
LocalFile,
diff --git a/paper2remarkable/providers/sagepub.py b/paper2remarkable/providers/sagepub.py
new file mode 100644
index 0000000..7e76df8
--- /dev/null
+++ b/paper2remarkable/providers/sagepub.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+"""Provider for SagePub
+
+Author: G.J.J. van den Burg
+License: See LICENSE file
+Copyright: 2020, G.J.J. van den Burg
+
+"""
+
+import re
+
+from ._base import Provider
+from ._info import Informer
+from ..exceptions import URLResolutionError
+
+
+class SagePubInformer(Informer):
+
+ meta_author_key = "dc.Creator"
+ meta_title_key = "dc.Title"
+ meta_date_key = "dc.Date"
+
+ def _format_authors(self, soup_authors):
+ return super()._format_authors(soup_authors, sep=" ", idx=-1)
+
+ def _format_year(self, soup_date):
+ return soup_date.split("-")[0]
+
+
+class SagePub(Provider):
+
+ re_abs = "https?:\/\/journals\.sagepub\.com\/doi\/full\/\d{2}\.\d{4}\/\d+"
+ re_pdf = "https?:\/\/journals\.sagepub\.com\/doi\/pdf\/\d{2}\.\d{4}\/\d+"
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.informer = SagePubInformer()
+
+ def get_abs_pdf_urls(self, url):
+ if re.match(self.re_abs, url):
+ abs_url = url
+ pdf_url = url.replace("full", "pdf")
+ elif re.match(self.re_pdf, url):
+ pdf_url = url
+ abs_url = url.replace("pdf", "full")
+ else:
+ raise URLResolutionError("SagePub", url)
+ return abs_url, pdf_url
+
+ def validate(src):
+ return re.match(SagePub.re_abs, src) or re.match(SagePub.re_pdf, src)
diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py
index c2917d5..07b1524 100644
--- a/paper2remarkable/utils.py
+++ b/paper2remarkable/utils.py
@@ -38,6 +38,7 @@ def clean_string(s):
cleaned = "".join(c if c in allowed else "_" for c in normalized)
while "__" in cleaned:
cleaned = cleaned.replace("__", "_")
+ cleaned = cleaned.strip('_')
return cleaned
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 5c8a8e4..ba1cc3a 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -25,8 +25,9 @@ from paper2remarkable.providers import (
PMLR,
PdfUrl,
PubMed,
+ SagePub,
Springer,
- SemanticScholar
+ SemanticScholar,
)
VERBOSE = False
@@ -290,6 +291,20 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url)
self.assertEqual(exp, os.path.basename(filename))
+ def test_sagepub_1(self):
+ prov = SagePub(upload=False, verbose=VERBOSE)
+ url = "https://journals.sagepub.com/doi/full/10.1177/0306312714535679"
+ exp = "Rekdal_-_Academic_Urban_Legends_2014.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_sagepub_2(self):
+ prov = SagePub(upload=False, verbose=VERBOSE)
+ url = "https://journals.sagepub.com/doi/pdf/10.1177/1352458517694432"
+ exp = "Kobelt_et_al_-_New_Insights_Into_the_Burden_and_Costs_of_Multiple_Sclerosis_in_Europe_2017.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
if __name__ == "__main__":
unittest.main()