diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-10-25 16:26:23 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-10-25 16:26:23 +0100 |
| commit | a284f4035416590f875ba9996ec5673affab5da4 (patch) | |
| tree | 1f18660cea9bb6bc441a6600a1a77d509784cc8c /paper2remarkable/providers/arxiv.py | |
| parent | Fix for alternative arXiv urls (#7) (diff) | |
| download | paper2remarkable-a284f4035416590f875ba9996ec5673affab5da4.tar.gz paper2remarkable-a284f4035416590f875ba9996ec5673affab5da4.zip | |
Fix arxiv stamp removal regex and add tests
Diffstat (limited to 'paper2remarkable/providers/arxiv.py')
| -rw-r--r-- | paper2remarkable/providers/arxiv.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py index e022658..1fd1795 100644 --- a/paper2remarkable/providers/arxiv.py +++ b/paper2remarkable/providers/arxiv.py @@ -19,6 +19,10 @@ from ..log import Logger logger = Logger() +DEARXIV_TEXT_REGEX = ( + b"arXiv:\d{4}\.\d{4,5}v\d+\s+\[[\w\-]+\.\w+\]\s+\d{1,2}\s\w{3}\s\d{4}" +) + class ArxivInformer(Informer): pass @@ -73,11 +77,7 @@ class Arxiv(Provider): with open(uncompress_file, "rb") as fid: data = fid.read() # Remove the text element - data = re.sub( - b"\(arXiv:\d{4}\.\d{4,5}v\d+\s+\[\w+\.\w+\]\s+\d{1,2}\s\w{3}\s\d{4}\)Tj", - b"()Tj", - data, - ) + data = re.sub(b"\(" + DEARXIV_TEXT_REGEX + b"\)Tj", b"()Tj", data) # Remove the URL element data = re.sub( b"<<\\n\/URI \(http://arxiv\.org/abs/\d{4}\.\d{4,5}v\d+\)\\n\/S /URI\\n>>\\n", |
