diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-07-15 22:44:33 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-07-15 22:44:33 +0100 |
| commit | 071b5a0f2958c34f1a189259346a8732a1110de2 (patch) | |
| tree | 08207950ca965f02a4e6a18e5d529cceb9202c61 | |
| parent | Bump version and update changelog (diff) | |
| download | paper2remarkable-071b5a0f2958c34f1a189259346a8732a1110de2.tar.gz paper2remarkable-071b5a0f2958c34f1a189259346a8732a1110de2.zip | |
Add provider for SagePub
| -rw-r--r-- | README.md | 1 | ||||
| -rw-r--r-- | paper2remarkable/providers/__init__.py | 2 | ||||
| -rw-r--r-- | paper2remarkable/providers/sagepub.py | 52 | ||||
| -rw-r--r-- | paper2remarkable/utils.py | 1 | ||||
| -rw-r--r-- | tests/test_providers.py | 17 |
5 files changed, 72 insertions, 1 deletions
@@ -36,6 +36,7 @@ reMarkable from any of the following sources: * [OpenReview](https://openreview.net/) * [PMLR](http://proceedings.mlr.press/) * [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/) +* [SagePub](https://journals.sagepub.com/) * [SemanticScholar](https://www.semanticscholar.org/) * [SpringerLink](https://link.springer.com/) * A generic URL to a PDF file diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py index c4e3eb5..e3075f0 100644 --- a/paper2remarkable/providers/__init__.py +++ b/paper2remarkable/providers/__init__.py @@ -12,6 +12,7 @@ from .openreview import OpenReview from .pdf_url import PdfUrl from .pmlr import PMLR from .pubmed import PubMed +from .sagepub import SagePub from .springer import Springer from .semantic_scholar import SemanticScholar @@ -26,6 +27,7 @@ providers = [ OpenReview, PMLR, PubMed, + SagePub, Springer, SemanticScholar, LocalFile, diff --git a/paper2remarkable/providers/sagepub.py b/paper2remarkable/providers/sagepub.py new file mode 100644 index 0000000..7e76df8 --- /dev/null +++ b/paper2remarkable/providers/sagepub.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +"""Provider for SagePub + +Author: G.J.J. van den Burg +License: See LICENSE file +Copyright: 2020, G.J.J. van den Burg + +""" + +import re + +from ._base import Provider +from ._info import Informer +from ..exceptions import URLResolutionError + + +class SagePubInformer(Informer): + + meta_author_key = "dc.Creator" + meta_title_key = "dc.Title" + meta_date_key = "dc.Date" + + def _format_authors(self, soup_authors): + return super()._format_authors(soup_authors, sep=" ", idx=-1) + + def _format_year(self, soup_date): + return soup_date.split("-")[0] + + +class SagePub(Provider): + + re_abs = "https?:\/\/journals\.sagepub\.com\/doi\/full\/\d{2}\.\d{4}\/\d+" + re_pdf = "https?:\/\/journals\.sagepub\.com\/doi\/pdf\/\d{2}\.\d{4}\/\d+" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.informer = SagePubInformer() + + def get_abs_pdf_urls(self, url): + if re.match(self.re_abs, url): + abs_url = url + pdf_url = url.replace("full", "pdf") + elif re.match(self.re_pdf, url): + pdf_url = url + abs_url = url.replace("pdf", "full") + else: + raise URLResolutionError("SagePub", url) + return abs_url, pdf_url + + def validate(src): + return re.match(SagePub.re_abs, src) or re.match(SagePub.re_pdf, src) diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py index c2917d5..07b1524 100644 --- a/paper2remarkable/utils.py +++ b/paper2remarkable/utils.py @@ -38,6 +38,7 @@ def clean_string(s): cleaned = "".join(c if c in allowed else "_" for c in normalized) while "__" in cleaned: cleaned = cleaned.replace("__", "_") + cleaned = cleaned.strip('_') return cleaned diff --git a/tests/test_providers.py b/tests/test_providers.py index 5c8a8e4..ba1cc3a 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -25,8 +25,9 @@ from paper2remarkable.providers import ( PMLR, PdfUrl, PubMed, + SagePub, Springer, - SemanticScholar + SemanticScholar, ) VERBOSE = False @@ -290,6 +291,20 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_sagepub_1(self): + prov = SagePub(upload=False, verbose=VERBOSE) + url = "https://journals.sagepub.com/doi/full/10.1177/0306312714535679" + exp = "Rekdal_-_Academic_Urban_Legends_2014.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_sagepub_2(self): + prov = SagePub(upload=False, verbose=VERBOSE) + url = "https://journals.sagepub.com/doi/pdf/10.1177/1352458517694432" + exp = "Kobelt_et_al_-_New_Insights_Into_the_Burden_and_Costs_of_Multiple_Sclerosis_in_Europe_2017.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + if __name__ == "__main__": unittest.main() |
