diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-10-23 17:36:20 +0200 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-10-23 17:36:20 +0200 |
| commit | 14cacacf3fd7b78b287ec7e6b127bd24f0ea4f56 (patch) | |
| tree | 25133d126af595dd4901c825a3a28b38375a683d | |
| parent | Merge branch 'feature/pre-commit' (diff) | |
| download | paper2remarkable-14cacacf3fd7b78b287ec7e6b127bd24f0ea4f56.tar.gz paper2remarkable-14cacacf3fd7b78b287ec7e6b127bd24f0ea4f56.zip | |
Add CVF provider
| -rw-r--r-- | README.md | 1 | ||||
| -rw-r--r-- | paper2remarkable/providers/__init__.py | 2 | ||||
| -rw-r--r-- | paper2remarkable/providers/cvf.py | 51 | ||||
| -rw-r--r-- | tests/test_providers.py | 19 | ||||
| -rw-r--r-- | tests/test_ui.py | 6 |
5 files changed, 79 insertions, 0 deletions
@@ -32,6 +32,7 @@ reMarkable from any of the following sources: * [arXiv](https://arxiv.org/) * [ACM Digital Library](https://dl.acm.org/dl.cfm) * [CiteSeerX](http://citeseerx.ist.psu.edu/index) +* [CVF](https://openaccess.thecvf.com/menu) * [JMLR](http://jmlr.org) * [NBER](https://www.nber.org) * [NeurIPS](https://papers.nips.cc/) diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py index e3075f0..78fa370 100644 --- a/paper2remarkable/providers/__init__.py +++ b/paper2remarkable/providers/__init__.py @@ -3,6 +3,7 @@ from .acm import ACM from .arxiv import Arxiv from .citeseerx import CiteSeerX +from .cvf import CVF from .html import HTML from .jmlr import JMLR from .local import LocalFile @@ -21,6 +22,7 @@ providers = [ ACM, Arxiv, CiteSeerX, + CVF, JMLR, NBER, NeurIPS, diff --git a/paper2remarkable/providers/cvf.py b/paper2remarkable/providers/cvf.py new file mode 100644 index 0000000..76ca9c0 --- /dev/null +++ b/paper2remarkable/providers/cvf.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +"""Provider for CVF + +Author: G.J.J. van den Burg +License: See LICENSE file +Copyright: 2020, G.J.J. van den Burg + +""" + +import re + +from ._base import Provider +from ._info import Informer + +from ..exceptions import URLResolutionError +from ..log import Logger + +logger = Logger() + + +class CVFInformer(Informer): + + meta_date_key = "citation_publication_date" + + +class CVF(Provider): + + re_abs = "^https?://openaccess.thecvf.com/content_([\w\d]+)/html/([\w\d\_\-]+).html$" + re_pdf = "^https?://openaccess.thecvf.com/content_([\w\d]+)/papers/([\w\d\_\-]+).pdf$" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.informer = CVFInformer() + + def get_abs_pdf_urls(self, url): + if re.match(self.re_abs, url): + abs_url = url + pdf_url = url[: -len(".html")] + pdf_url += ".pdf" + pdf_url = pdf_url.replace("html", "papers") + elif re.match(self.re_pdf, url): + pdf_url = url + abs_url = url.replace("papers", "html").replace(".pdf", ".html") + else: + raise URLResolutionError("CVF", url) + return abs_url, pdf_url + + def validate(src): + m = re.match(CVF.re_abs, src) or re.match(CVF.re_pdf, src) + return not m is None diff --git a/tests/test_providers.py b/tests/test_providers.py index 546794c..e701234 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -16,6 +16,7 @@ from paper2remarkable.providers import ( ACM, Arxiv, CiteSeerX, + CVF, HTML, JMLR, LocalFile, @@ -336,6 +337,24 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_cvf_1(self): + prov = CVF(upload=False, verbose=VERBOSE) + url = "https://openaccess.thecvf.com/content_ICCV_2019/html/Muhammad_Goal-Driven_Sequential_Data_Abstraction_ICCV_2019_paper.html" + exp = ( + "Muhammad_et_al_-_Goal-Driven_Sequential_Data_Abstraction_2019.pdf" + ) + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_cvf_2(self): + prov = CVF(upload=False, verbose=VERBOSE) + url = "https://openaccess.thecvf.com/content_CVPR_2020/papers/Park_Seeing_the_World_in_a_Bag_of_Chips_CVPR_2020_paper.pdf" + exp = ( + "Park_Holynski_Seitz_-_Seeing_the_World_in_a_Bag_of_Chips_2020.pdf" + ) + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_ui.py b/tests/test_ui.py index 97ec44d..835f594 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -20,6 +20,7 @@ from paper2remarkable.providers import ( ACM, Arxiv, CiteSeerX, + CVF, HTML, JMLR, LocalFile, @@ -174,6 +175,11 @@ class TestUI(unittest.TestCase): "https://www.nature.com/articles/d41586-020-00176-4", "https://www.nature.com/articles/d41586-020-00176-4", ), + ( + CVF, + "https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html", + "https://openaccess.thecvf.com/content_cvpr_2018/html/Cheng_Dual_Skipping_Networks_CVPR_2018_paper.html", + ), ] for exp_prov, url, exp_url in tests: prov, new_url, jar = choose_provider(url) |
