aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-12-28 15:22:49 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-12-28 15:22:49 +0000
commitab0611a9d597bf0e3c78bb3d01e857f4727bb2d3 (patch)
treeec804b5bf5f15308c5a85928e4ce009bd9c8eb06
parentUpdate badge in readme (diff)
parentBump version and update changelog and readme (diff)
downloadpaper2remarkable-ab0611a9d597bf0e3c78bb3d01e857f4727bb2d3.tar.gz
paper2remarkable-ab0611a9d597bf0e3c78bb3d01e857f4727bb2d3.zip
Merge branch 'master' into feature/gh_actions
-rw-r--r--CHANGELOG.md28
-rw-r--r--MANIFEST.in3
-rw-r--r--Makefile17
-rw-r--r--README.md81
-rw-r--r--config.example.yml26
-rw-r--r--docs/Makefile72
-rw-r--r--docs/conf.py62
-rw-r--r--docs/index.rst0
-rw-r--r--docs/man.md195
-rw-r--r--paper2remarkable/__version__.py2
-rw-r--r--paper2remarkable/log.py4
-rw-r--r--paper2remarkable/providers/__init__.py2
-rw-r--r--paper2remarkable/providers/_base.py19
-rw-r--r--paper2remarkable/providers/html.py65
-rw-r--r--paper2remarkable/providers/science_direct.py106
-rw-r--r--paper2remarkable/ui.py128
-rw-r--r--setup.py16
-rw-r--r--tests/test_html.py51
-rw-r--r--tests/test_providers.py22
19 files changed, 818 insertions, 81 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f4867fb..d7030fa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,33 @@
# Changelog
+## Version 0.8.6
+
+* Rename default configuration file
+
+## Version 0.8.5
+
+* Handle the case where the configuration file doesn't contain all sections.
+
+## Version 0.8.4
+
+* Add support for using a configuration file to avoid having to use command
+ line flags.
+
+## Version 0.8.3
+
+* Add support for providing custom styling for HTML output (closes
+ [#82](https://github.com/GjjvdBurg/paper2remarkable/issues/82)).
+
+## Version 0.8.2
+
+* Add provider for ScienceDirect
+* Add man page to package
+* Add short flag, -f, for --filename
+
+## Version 0.8.1
+
+* Add experimental fix for lazy loaded images in HTML
+
## Version 0.8.0
* Add provider for Nature
diff --git a/MANIFEST.in b/MANIFEST.in
index 1de226b..038a56f 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,7 +1,10 @@
include README.md
+include p2r.1
recursive-include tests *.py
exclude Makefile
exclude .gitignore
exclude Dockerfile
exclude make_release.py
+exclude pyproject.toml
prune old
+prune tests
diff --git a/Makefile b/Makefile
index bcbc420..6a4a460 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ PACKAGE=paper2remarkable
DOC_DIR='./docs/'
VENV_DIR=/tmp/p2r_venv/
-.PHONY: help dist venv
+.PHONY: help dist venv docs
.DEFAULT_GOAL := help
@@ -35,22 +35,15 @@ clean: ## Clean build dist and egg directories left after install
rm -rf ./$(PACKAGE).egg-info
rm -rf $(VENV_DIR)
rm -f MANIFEST
+ rm -f ./p2r.1
find . -type f -iname '*.pyc' -delete
find . -type d -name '__pycache__' -empty -delete
-dist: ## Make Python source distribution
+dist: docs ## Make Python source distribution
python setup.py sdist bdist_wheel
-docs: doc
-doc: install ## Build documentation with Sphinx
- source $(VENV_DIR)/bin/activate && m2r README.md && mv README.rst $(DOC_DIR)
- source $(VENV_DIR)/bin/activate && m2r CHANGELOG.md && mv CHANGELOG.rst $(DOC_DIR)
- cd $(DOC_DIR) && \
- rm source/* && \
- source $(VENV_DIR)/bin/activate && \
- sphinx-apidoc -H 'Paper2Remarkable API Documentation' -o source ../$(PACKAGE) && \
- touch source/AUTOGENERATED
- $(MAKE) -C $(DOC_DIR) html
+docs:
+ $(MAKE) -C $(DOC_DIR) clean && $(MAKE) -C $(DOC_DIR) man
venv: $(VENV_DIR)/bin/activate
diff --git a/README.md b/README.md
index 70efbf2..d212549 100644
--- a/README.md
+++ b/README.md
@@ -41,6 +41,7 @@ reMarkable from any of the following sources:
* [PMLR](http://proceedings.mlr.press/)
* [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/)
* [SagePub](https://journals.sagepub.com/)
+* [ScienceDirect](https://www.sciencedirect.com/)
* [SemanticScholar](https://www.semanticscholar.org/)
* [SpringerLink](https://link.springer.com/)
* [Taylor & Francis](https://www.tandfonline.com/)
@@ -148,40 +149,71 @@ Github][github-url].
## Usage
The full help of the script is as follows. Hopefully the various command line
-flags are self-explanatory, but if you'd like more information, please open an
-issue [on GitHub][github-url].
+flags are self-explanatory, but if you'd like more information see the [man
+page](docs/man.md) (``man p2r``) or open an issue [on GitHub][github-url].
```
-usage: p2r [-h] [-b] [-c] [-d] [-n] [-p REMARKABLE_DIR] [-r] [-k] [-v] [-V]
- [--filename FILENAME] [--gs GS] [--pdftoppm PDFTOPPM] [--pdftk PDFTK]
- [--qpdf QPDF] [--rmapi RMAPI]
+usage: p2r [-h] [-b] [-c] [-d] [-e] [-n] [-p REMARKABLE_DIR] [-r] [-k] [-v]
+ [-V] [-f FILENAME] [--gs GS] [--pdftoppm PDFTOPPM] [--pdftk PDFTK]
+ [--qpdf QPDF] [--rmapi RMAPI] [--css CSS] [--font-urls FONT_URLS]
+ [-C CONFIG]
input [input ...]
-Paper2reMarkable version 0.7.3
+Paper2reMarkable version 0.8.6
positional arguments:
- input One or more URLs to a paper or paths to local PDF files
+ input One or more URLs to a paper or paths to local PDF
+ files
optional arguments:
-h, --help show this help message and exit
-b, --blank Add a blank page after every page of the PDF
-c, --center Center the PDF on the page, instead of left align
-d, --debug debug mode, doesn't upload to reMarkable
- -n, --no-upload don't upload to the reMarkable, save the output in current working dir
+ -e, --experimental enable experimental features
+ -n, --no-upload don't upload to reMarkable, save the output in current
+ directory
-p REMARKABLE_DIR, --remarkable-path REMARKABLE_DIR
- directory on reMarkable to put the file (created if missing, default: /)
+ directory on reMarkable to put the file (created if
+ missing, default: /)
-r, --right Right align so the menu doesn't cover it
-k, --no-crop Don't crop the pdf file
-v, --verbose be verbose
-V, --version Show version and exit
- --filename FILENAME Filename to use for the file on reMarkable
+ -f FILENAME, --filename FILENAME
+ Filename to use for the file on reMarkable
--gs GS path to gs executable (default: gs)
--pdftoppm PDFTOPPM path to pdftoppm executable (default: pdftoppm)
--pdftk PDFTK path to pdftk executable (default: pdftk)
--qpdf QPDF path to qpdf executable (default: qpdf)
--rmapi RMAPI path to rmapi executable (default: rmapi)
+ --css CSS path to custom CSS file for HTML output
+ --font-urls FONT_URLS
+ path to custom font urls file for HTML output
+ -C CONFIG, --config CONFIG
+ path to config file (default: ~/.paper2remarkable.yml)
```
+By default ``paper2remarkable`` makes a PDF fit better on the reMarkable by
+changing the page size and removing unnecessary whitespace. Some tools for
+exporting a PDF with annotations do not handle different page sizes properly,
+causing annotations to be misplaced (see
+[discussion](https://github.com/GjjvdBurg/paper2remarkable/issues/77)). If
+this is an issue for you, you can disable cropping using the
+``-k``/``--no-crop`` option to ``p2r``.
+
+For HTML sources (i.e. web articles) you can specify custom styling using the
+``--css`` and ``--font-urls`` options. The default style in the [HTML
+provider](https://github.com/GjjvdBurg/paper2remarkable/blob/a6e50d07748c842f1f0a09e4b173c87850c6ddee/paper2remarkable/providers/html.py#L36)
+can serve as a starting point.
+
+A configuration file can be used to provide commonly-used command line
+options. By default the configuration file at ``~/.paper2remarkable.yml`` is
+used if it exists, but an alternative location can be provided with the
+``-C/--config`` flag. Command line flags override the settings in the
+configuration file. See the [config.example.yml](./config.example.yml) file
+for an example configuration file and an overview of supported options.
+
## Alfred Workflow
On MacOS, you can optionally install [this Alfred workflow][workflow]. Alfred
@@ -229,19 +261,31 @@ docker build -t p2r .
### Authorization
-If you already have a `~/.rmapi` file, you can skip this section. Otherwise
-we'll use `rmapi` to create it.
+``paper2remarkable`` uses [rMapi](https://github.com/juruen/rmapi) to sync
+documents to the reMarkable. The first time you run ``paper2remarkable`` you
+will have to authenticate rMapi using a one-time code provided by reMarkable.
+By default, rMapi uses the ``${HOME}/.rmapi`` file as a configuration file to
+store the credentials, and so this is the location we will use in the commands
+below. If you'd like to use a different location for the configuration (for
+instance, ``${HOME}/.config/rmapi/rmapi.conf``), make sure to change the
+commands below accordingly.
+
+If you already have a `~/.rmapi` file with the authentication details, you can
+skip this section. Otherwise we'll create it and run ``rmapi`` in the docker
+container for authentication:
```bash
-touch ${HOME}/.rmapi
-docker run --rm -i -t -v "${HOME}/.rmapi:/home/user/.rmapi:rw" --entrypoint=rmapi p2r version
+$ touch ${HOME}/.rmapi
+$ docker run --rm -i -t -v "${HOME}/.rmapi:/home/user/.rmapi:rw" --entrypoint=rmapi p2r version
```
-which should end with output like
+This command will print a link where you can obtain a one-time code to
+authenticate rMapi and afterwards print the rMapi version (the version number
+may be different):
```bash
ReMarkable Cloud API Shell
-rmapi version: 0.0.5
+rmapi version: 0.0.12
```
### Usage
@@ -257,7 +301,7 @@ docker run --rm -v "${HOME}/.rmapi:/home/user/.rmapi:rw" p2r --help
docker run --rm -v "${HOME}/.rmapi:/home/user/.rmapi:rw" p2r -v https://arxiv.org/abs/1811.11242
# to transfer a local file in the current directory
-docker run --rm -v "${HOME}/.rmapi:/home/user/.rmapi:rw" -v "$(pwd):/home/user:r" p2r -v localfile.pdf
+docker run --rm -v "${HOME}/.rmapi:/home/user/.rmapi:rw" -v "$(pwd):/home/user:ro" p2r -v localfile.pdf
```
For transferring local files using the Docker image, you may find [this helper
@@ -273,7 +317,8 @@ your ``~/.bashrc`` file to abstract away the Docker commands:
alias p2r="docker run --rm -v \"${HOME}/.rmapi:/home/user/.rmapi:rw\" p2r"
```
-Then you can use ``paper2remarkable`` from the command line as ``p2r``!
+After running ``source ~/.bashrc`` to activate the alias, you can then use
+``paper2remarkable`` through Docker by calling ``p2r`` from the command line.
# Notes
diff --git a/config.example.yml b/config.example.yml
new file mode 100644
index 0000000..52d3ac4
--- /dev/null
+++ b/config.example.yml
@@ -0,0 +1,26 @@
+---
+core:
+ crop: 'left' # options: 'none', 'left', 'center', 'right'
+ blank: false # options: true, false
+ upload: true # options: true, false
+ verbose: true # options: true, false
+ experimental: true # options: true, false
+
+# System settings are all optional, but can be used if executables are not on
+# the PATH. Options in this section include: gs, pdftk, pdftoppm, qpdf, and
+# rmapi.
+system:
+ gs: /usr/bin/gs
+
+# Settings for styling HTML sources. This section has support for a css field
+# and a font_urls field. The former is expected to be a multiline string and
+# the latter a list of urls.
+html:
+ css: |
+ @page { size: 702px 936px; margin: 1in; }
+ img { display: block; margin: 0 auto; text-align: center; max-width: 70%; }
+ h1,h2,h3 { font-family: 'Montserrat'; }
+ p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; }
+
+ font_urls:
+ - https://fonts.googleapis.com/css2?family=Montserrat&display=swap
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..ae82c87
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,72 @@
+# Makefile for paper2remarkable documentation
+#
+# Author: G.J.J. van den Burg
+# Copyright (c) 2020 G.J.J. van den Burg
+# License: See LICENSE file
+#
+
+SHELL := bash
+.SHELLFLAGS := -eu -o pipefail -c
+MAKEFLAGS += --no-builtin-rules
+
+VENV_DIR=/tmp/p2r_docs/
+OUTPUT_DIR=./output
+
+PACKAGE = p2r
+SPHINXBUILD = sphinx-build
+ALLSPHINXOPTS = -d .build/doctrees .
+
+.PHONY: all
+
+all: man
+
+############
+# Man page #
+############
+
+.PHONY: man output-dir clean_output
+
+man: $(OUTPUT_DIR)/$(PACKAGE).1
+ cp $< ../$(PACKAGE).1
+ @echo "Generated man page copied to ../$(PACKAGE).1"
+
+$(OUTPUT_DIR)/$(PACKAGE).1: man.rst | output-dir
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(OUTPUT_DIR)
+
+man.rst: man.md venv
+ source $(VENV_DIR)/bin/activate && m2r man.md
+
+output-dir:
+ mkdir -p $(OUTPUT_DIR)
+ mkdir -p _static
+
+clean_output:
+ rm -f man.rst
+ rm -rf $(OUTPUT_DIR)
+ rm -rf _static
+ rm -rf .build
+
+#######################
+# Virtual environment #
+#######################
+
+.PHONY: venv clean_venv
+
+venv: $(VENV_DIR)/bin/activate
+
+$(VENV_DIR)/bin/activate:
+ test -d $(VENV_DIR) || python -m venv $(VENV_DIR)
+ source $(VENV_DIR)/bin/activate && pip install m2r
+ touch $(VENV_DIR)/bin/activate
+
+clean_venv:
+ rm -rf $(VENV_DIR)
+
+############
+# Clean up #
+############
+
+.PHONY: venv
+
+clean: clean_venv clean_output
+ rm -f ../$(PACKAGE).1
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..92961c1
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,62 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = "paper2remarkable"
+copyright = "2020, G.J.J. van den Burg"
+author = "Gertjan van den Burg"
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = []
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = "alabaster"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+# Man page
+man_pages = [
+ (
+ "man",
+ "p2r",
+ "Fetch an academic paper or web article and send it to the reMarkable tablet",
+ author + " & other helpful contributors.",
+ 1,
+ )
+]
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/docs/index.rst
diff --git a/docs/man.md b/docs/man.md
new file mode 100644
index 0000000..9c331f1
--- /dev/null
+++ b/docs/man.md
@@ -0,0 +1,195 @@
+# paper2remarkable
+
+## SYNOPSIS
+
+```
+p2r [OPTION]... [INPUT]...
+```
+
+## DESCRIPTION
+
+Fetch an academic paper, local pdf file, or any web article and send it to the
+reMarkable tablet. The input to the script can be a URL to a PDF file or
+article on a website, or a local file. For supported scientific outlets, the
+program will collect the metadata for the paper and create a nice filename
+(unless ``--filename`` is specified). See [SUPPORTED
+SOURCES](#supported-sources) for an overview of supported scientific paper
+sources.
+
+By default, paper2remarkable crops the unnecessary whitespace from a PDF file
+to make the paper fit better on the reMarkable. The default setting yields a
+left-aligned document on the reMarkable which can be useful for taking margin
+notes. Alternatively, the program supports the ``--center``, ``--right``, and
+``--no-crop`` options to change this crop setting.
+
+## OPTIONS
+
+Basic options:
+
+-b, --blank
+ Add a blank page after every page of the PDF document. This can be
+ useful for taking notes on papers.
+
+-C, --config=FILENAME
+ Read options from a configuration file. A YAML file is supported, see
+ [CONFIGURATION FILE](#configuration) for further details. By default the
+ file at ``~/.paper2remarkable.yml`` is used if it exists.
+
+-e, --experimental
+ Enable the experimental features of paper2remarkable. See below under
+ [EXPERIMENTAL FEATURES](#experimental-features) for an overview.
+
+-f, --filename=FILENAME
+ Filename to use for the file on reMarkable. If you specify multiple
+ ``INPUT`` files and want to use a specific filename for each, you can
+ specify ``--filename`` for each ``INPUT`` source by repeating it.
+
+-h, --help
+ Show help message and exit.
+
+-v, --verbose
+ Enable verbose mode of paper2remarkable. By default the program prints
+ no output.
+
+-V, --version
+ Show the version and exit.
+
+Crop options:
+
+-c, --center
+ Center the PDF on the page.
+
+-k, --no-crop
+ Don't crop the document at all.
+
+-r, --right
+ Right-align the document on the reMarkable so the menu doesn't cover it.
+
+reMarkable options:
+
+-n, --no-upload
+ Don't upload the document to the reMarkable, save the output in the
+ current working directory.
+
+-p, --remarkable-path=DIR
+ The directory on the reMarkable where the document will be uploaded to.
+ If the target directory does not exist it will be created. If not
+ specified, the root directory will be used.
+
+Output customization:
+
+--css=FILENAME
+ Path to a CSS file with custom styling for the HTML output. This option
+ is ignored for any of the other providers. The code for the HTML
+ provider contains the default CSS style, which can be used as a starting
+ point.
+
+--font-urls=FILENAME
+ Path to a file with font urls (one per line) for the HTML output. This
+ will generally be used in combination with the ``--css`` option.
+
+System settings:
+
+You'll only need to specify these options if the programs are not available on
+the PATH variable.
+
+--gs=GS
+ Path to the GhostScript executable.
+
+--pdftoppm=PDFTOPPM
+ Path to pdftoppm executable (default: pdftoppm). Note that pdftoppm is
+ optional.
+
+--pdftk=PDFTK
+ Path to PDFtk executable (default: pdftk). Either pdftk or qpdf is
+ needed.
+
+--qpdf=QPDF
+ Path to qpdf executable (default: qpdf). Either pdftk or qpdf is needed.
+
+--rmapi=RMAPI
+ Path to rmapi executable (default: rmapi).
+
+Developer options:
+
+-d, --debug
+ Debug mode, when used the program doesn't upload the document to the
+ reMarkable by default and leaves the temporary directory with
+ intermediate files.
+
+## SUPPORTED SOURCES
+
+The following scientific sources are currently supported and paper2remarkable
+will create a filename based on the authors, title, and publication year of
+the work. For the sources below the program is generally flexible with regards
+to whether a URL to the PDF or to the abstract page is provided.
+
+- arXiv
+- ACM Digital Library
+- CiteSeerX
+- CVF
+- JMLR
+- Nature
+- NBER
+- NeurIPS
+- OpenReview
+- PMLR
+- PubMed Central
+- SagePub
+- ScienceDirect
+- SemanticScholar
+- SpringerLink
+- Taylor & Francis
+
+paper2remarkable also supports a generic URL to a PDF file or a local file, in
+which case no "nice" filename will be generated.
+
+- A generic URL to a PDF file. This can be considered a fallback option for
+ when a PDF source is not supported (yet).
+- A local PDF file.
+
+Finally, paper2remarkable supports extracting articles from websites. In this
+case an effort is done to detect the main content of the article and clean up
+the HTML before sending the file to the reMarkable.
+
+## CONFIGURATION FILE
+
+To avoid having to provide frequently-used command line flags, a configuration
+file can be created for paper2remarkable. By default it is a YAML file located
+at ``~/.paper2remarkable.yml``, but an alternative location can be provided
+with the ``--config`` option to the script.
+
+The configuration file consists of three sections: ``core``, ``system``, and
+``html``. In the ``core`` section options for cropping, verbosity, and blank
+pages can be added, among others. The ``system`` section allows setting paths
+to executables such as ``rmapi``, ``pdftk``, etc. Finally, the ``html``
+section allows you to provide custom CSS and font urls for formatting the
+output of web articles.
+
+Options provided on the command line overwrite those in the configuration
+file. So, for instance, if the configuration file has the setting ``crop:
+'left'`` in the ``core`` section and the command line flag ``-c`` is provided,
+the PDF will be centered.
+
+An example file is provided in the repository on
+[GitHub](https://www.github.com/GjjvdBurg/paper2remarkable), which also
+contains more information on the available options and their values.
+
+## EXPERIMENTAL FEATURES
+
+Occassionally, experimental (beta) features will be included in
+paper2remarkable and they will be listed here. You can enable the experimental
+features by using the ``-e`` flag to paper2remarkable.
+
+- The HTML provider currently has an experimental feature to handle lazy
+ loading of images. Certain websites use a small placeholder image and load
+ the main image using Javascript, with the actual image source stored in a
+ ``data-src`` attribute in the ``img`` tag. The experimental feature uses the
+ ``data-src`` attribute as the image source instead of that in the ``src``
+ attribute.
+
+## BUGS
+
+Please report bugs to:
+
+https://www.github.com/GjjvdBurg/paper2remarkable
diff --git a/paper2remarkable/__version__.py b/paper2remarkable/__version__.py
index 2b9f826..5f406ca 100644
--- a/paper2remarkable/__version__.py
+++ b/paper2remarkable/__version__.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-VERSION = (0, 8, 0)
+VERSION = (0, 8, 6)
__version__ = ".".join(map(str, VERSION))
diff --git a/paper2remarkable/log.py b/paper2remarkable/log.py
index fb9d8a3..8240a62 100644
--- a/paper2remarkable/log.py
+++ b/paper2remarkable/log.py
@@ -9,8 +9,8 @@ Copyright: 2019, G.J.J. van den Burg
"""
# NOTE: I know about the logging module, but this was easier because one of the
-# dependencies was using that and it became complicated. This one is obviously
-# not thread-safe and is very simple.
+# dependencies was using that and it interfered with our logging. The logger
+# class below is obviously not thread-safe and is very simple.
import datetime
import sys
diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py
index 371ab82..2be218f 100644
--- a/paper2remarkable/providers/__init__.py
+++ b/paper2remarkable/providers/__init__.py
@@ -15,6 +15,7 @@ from .pdf_url import PdfUrl
from .pmlr import PMLR
from .pubmed import PubMed
from .sagepub import SagePub
+from .science_direct import ScienceDirect
from .semantic_scholar import SemanticScholar
from .springer import Springer
from .tandfonline import TandFOnline
@@ -33,6 +34,7 @@ providers = [
PMLR,
PubMed,
SagePub,
+ ScienceDirect,
Springer,
SemanticScholar,
TandFOnline,
diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index 57774d6..0453c7a 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -38,29 +38,34 @@ class Provider(metaclass=abc.ABCMeta):
verbose=False,
upload=True,
debug=False,
- center=False,
- right=False,
+ experimental=False,
+ crop="left",
blank=False,
- no_crop=False,
remarkable_dir="/",
rmapi_path="rmapi",
pdftoppm_path="pdftoppm",
pdftk_path="pdftk",
qpdf_path="qpdf",
gs_path="gs",
+ css=None,
+ font_urls=None,
cookiejar=None,
):
self.upload = upload
self.debug = debug
+ self.experimental = experimental
self.remarkable_dir = remarkable_dir
self.rmapi_path = rmapi_path
self.pdftoppm_path = pdftoppm_path
self.pdftk_path = pdftk_path
self.qpdf_path = qpdf_path
self.gs_path = gs_path
- self.informer = Informer()
+ self.css = css
+ self.font_urls = font_urls
self.cookiejar = cookiejar
+ self.informer = Informer()
+
self.pdftool = check_pdftool(self.pdftk_path, self.qpdf_path)
# wait time to not hit the server too frequently
@@ -72,11 +77,11 @@ class Provider(metaclass=abc.ABCMeta):
# Define the operations to run on the pdf. Providers can add others.
self.operations = [("rewrite", self.rewrite_pdf)]
- if center:
+ if crop == "center":
self.operations.append(("center", self.center_pdf))
- elif right:
+ elif crop == "right":
self.operations.append(("right", self.right_pdf))
- elif not no_crop:
+ elif crop == "left":
self.operations.append(("crop", self.crop_pdf))
if blank:
diff --git a/paper2remarkable/providers/html.py b/paper2remarkable/providers/html.py
index e050ea3..c22cac4 100644
--- a/paper2remarkable/providers/html.py
+++ b/paper2remarkable/providers/html.py
@@ -13,12 +13,12 @@ Copyright: 2020, G.J.J. van den Burg
import html2text
import markdown
+import re
import readability
import titlecase
import unidecode
import urllib
import weasyprint
-import weasyprint.fonts
from ._base import Provider
from ._info import Informer
@@ -33,7 +33,6 @@ from ..log import Logger
logger = Logger()
CSS = """
-@import url('https://fonts.googleapis.com/css?family=EB+Garamond|Noto+Serif|Inconsolata&display=swap');
@page { size: 702px 936px; margin: 1in; }
a { color: black; }
img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; }
@@ -47,6 +46,13 @@ pre { font-family: 'Inconsolata'; padding-left: 2.5%; background: #efefef; }
code { font-family: 'Inconsolata'; font-size: .7rem; background: #efefef; }
"""
+# NOTE: For some reason, Weasyprint no longer accepts the @import statement in
+# the CSS to load the fonts. This may have to do with recent changes they've
+# introduced. Providing the font urls separately does seem to work.
+FONT_URLS = [
+ "https://fonts.googleapis.com/css2?family=EB+Garamond&family=Noto+Serif&family=Inconsolata"
+]
+
def url_fetcher(url):
if url.startswith("//"):
@@ -133,6 +139,41 @@ class HTML(Provider):
def get_abs_pdf_urls(self, url):
return url, url
+ def fix_lazy_loading(self, article):
+ if not self.experimental:
+ return article
+
+ # This attempts to fix sites where the image src element points to a
+ # placeholder and the data-src attribute contains the url to the actual
+ # image. Note that results may differ between readability and
+ # Readability.JS
+ regex = '<img src="(?P<src>.*?)" (?P<rest1>.*) data-src="(?P<datasrc>.*?)" (?P<rest2>.*?)>'
+ sub = '<img src="\g<datasrc>" \g<rest1> \g<rest2>>'
+
+ article, nsub = re.subn(regex, sub, article, flags=re.MULTILINE)
+ if nsub:
+ logger.info(
+ f"[experimental] Attempted to fix lazy image loading ({nsub} times). "
+ "Please report bad results."
+ )
+ return article
+
+ def preprocess_html(self, pdf_url, title, article):
+ article = self.fix_lazy_loading(article)
+
+ h2t = html2text.HTML2Text()
+ h2t.wrap_links = False
+ text = h2t.handle(article)
+
+ # Add the title back to the document
+ article = "# {title}\n\n{text}".format(title=title, text=text)
+
+ # Convert to html, fixing relative image urls.
+ md = markdown.Markdown()
+ md.treeprocessors.register(ImgProcessor(pdf_url), "img", 10)
+ html_article = md.convert(article)
+ return html_article
+
def retrieve_pdf(self, pdf_url, filename):
"""Turn the HTML article in a clean pdf file
@@ -152,27 +193,17 @@ class HTML(Provider):
request_html = get_page_with_retry(pdf_url, return_text=True)
title, article = make_readable(request_html)
- h2t = html2text.HTML2Text()
- h2t.wrap_links = False
- text = h2t.handle(article)
-
- # Add the title back to the document
- article = "# {title}\n\n{text}".format(title=title, text=text)
-
- # Convert to html, fixing relative image urls.
- md = markdown.Markdown()
- md.treeprocessors.register(ImgProcessor(pdf_url), "img", 10)
- html_article = md.convert(article)
+ html_article = self.preprocess_html(pdf_url, title, article)
if self.debug:
with open("./paper.html", "w") as fp:
fp.write(html_article)
- font_config = weasyprint.fonts.FontConfiguration()
html = weasyprint.HTML(string=html_article, url_fetcher=url_fetcher)
- css = weasyprint.CSS(string=CSS, font_config=font_config)
-
- html.write_pdf(filename, stylesheets=[css], font_config=font_config)
+ css = CSS if self.css is None else self.css
+ font_urls = FONT_URLS if self.font_urls is None else self.font_urls
+ style = weasyprint.CSS(string=css)
+ html.write_pdf(filename, stylesheets=[style] + font_urls)
def validate(src):
# first check if it is a valid url
diff --git a/paper2remarkable/providers/science_direct.py b/paper2remarkable/providers/science_direct.py
new file mode 100644
index 0000000..704a3b0
--- /dev/null
+++ b/paper2remarkable/providers/science_direct.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+"""Provider for ScienceDirect
+
+Author: G.J.J. van den Burg
+License: See LICENSE file
+Copyright: 2020, G.J.J. van den Burg
+
+"""
+
+import re
+import bs4
+import urllib
+import json
+
+from ._base import Provider
+from ._info import Informer
+from ..exceptions import URLResolutionError
+from ..log import Logger
+from ..utils import get_page_with_retry, follow_redirects
+
+logger = Logger()
+
+
+class ScienceDirectInformer(Informer):
+
+ meta_date_key = "citation_publication_date"
+
+ def get_authors(self, soup):
+ surname_tags = soup.find_all("span", attrs={"class": "text surname"})
+ if not surname_tags:
+ logger.warning(
+ "Couldn't determine author information, maybe provide the desired filename using '--filename'?"
+ )
+ return ""
+ authors = [x.text for x in surname_tags]
+ return authors
+
+
+class ScienceDirect(Provider):
+
+ re_abs = (
+ "https?:\/\/www.sciencedirect.com/science/article/pii/[A-Za-z0-9]+"
+ )
+ re_pdf = "https://pdf.sciencedirectassets.com/\d+/([0-9a-zA-Z\-\.]+)/(?P<data>[0-9a-zA-Z\-\.]+)/main.pdf\?.*"
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.informer = ScienceDirectInformer()
+
+ def get_abs_pdf_urls(self, url):
+ m1 = re.match(self.re_abs, url)
+ m2 = re.match(self.re_pdf, url)
+ if m1:
+ abs_url = url
+ pdf_url = self._get_pdf_url(abs_url)
+ elif m2:
+ pdf_url = url
+ data = m2.group("data")
+ paper_id = data.split("-")[-1]
+ abs_url = (
+ f"https://www.sciencedirect.com/science/article/pii/{paper_id}"
+ )
+ else:
+ raise URLResolutionError("ScienceDirect", url)
+ return abs_url, pdf_url
+
+ def _get_pdf_url(self, url):
+ page = get_page_with_retry(url)
+ soup = bs4.BeautifulSoup(page, "html.parser")
+
+ # For open access (and maybe behind institution?) the full text pdf url
+ # is currently in the json payload of a script tag.
+ scripts = soup.find_all("script", attrs={"data-iso-key": "_0"})
+ if not scripts:
+ raise URLResolutionError("ScienceDirect", url)
+ json_data = scripts[0].string
+ data = json.loads(json_data)
+ if not "article" in data:
+ raise URLResolutionError("ScienceDirect", url)
+ data = data["article"]
+ if not "pdfDownload" in data:
+ raise URLResolutionError("ScienceDirect", url)
+ data = data["pdfDownload"]
+ if not "linkToPdf" in data:
+ raise URLResolutionError("ScienceDirect", url)
+ link = data["linkToPdf"]
+ tmp_url = urllib.parse.urljoin("https://sciencedirect.com/", link)
+
+ # tmp_url gives a page with a ten second wait or a direct url, we need
+ # the direct url
+ page = get_page_with_retry(tmp_url)
+ soup = bs4.BeautifulSoup(page, "html.parser")
+ noscript = soup.find_all("noscript")
+ if not noscript:
+ raise URLResolutionError("ScienceDirect", url)
+ a = noscript[0].find_all("a")
+ if not a:
+ raise URLResolutionError("ScienceDirect", url)
+ pdf_url = a[0].get("href")
+ return pdf_url
+
+ def validate(src):
+ return re.match(ScienceDirect.re_abs, src) or re.match(
+ ScienceDirect.re_pdf, src
+ )
diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py
index ea24403..6f95e30 100644
--- a/paper2remarkable/ui.py
+++ b/paper2remarkable/ui.py
@@ -9,7 +9,10 @@ Copyright: 2019, G.J.J. van den Burg
"""
import argparse
+import copy
+import os
import sys
+import yaml
from . import __version__, GITHUB_URL
@@ -41,9 +44,15 @@ def parse_args():
action="store_true",
)
parser.add_argument(
+ "-e",
+ "--experimental",
+ help="enable experimental features",
+ action="store_true",
+ )
+ parser.add_argument(
"-n",
"--no-upload",
- help="don't upload to the reMarkable, save the output in current working dir",
+ help="don't upload to reMarkable, save the output in current directory",
action="store_true",
)
parser.add_argument(
@@ -73,32 +82,47 @@ def parse_args():
version=__version__,
)
parser.add_argument(
+ "-f",
"--filename",
help="Filename to use for the file on reMarkable",
action="append",
)
parser.add_argument(
- "--gs", help="path to gs executable (default: gs)", default="gs"
+ "--gs", help="path to gs executable (default: gs)", default=None
)
parser.add_argument(
"--pdftoppm",
help="path to pdftoppm executable (default: pdftoppm)",
- default="pdftoppm",
+ default=None,
)
parser.add_argument(
"--pdftk",
help="path to pdftk executable (default: pdftk)",
- default="pdftk",
+ default=None,
)
parser.add_argument(
"--qpdf",
help="path to qpdf executable (default: qpdf)",
- default="qpdf",
+ default=None,
)
parser.add_argument(
"--rmapi",
help="path to rmapi executable (default: rmapi)",
- default="rmapi",
+ default=None,
+ )
+ parser.add_argument(
+ "--css", help="path to custom CSS file for HTML output", default=None
+ )
+ parser.add_argument(
+ "--font-urls",
+ help="path to custom font urls file for HTML output",
+ default=None,
+ )
+ parser.add_argument(
+ "-C",
+ "--config",
+ help="path to config file (default: ~/.paper2remarkable.yml)",
+ default=None,
)
parser.add_argument(
"input",
@@ -171,6 +195,72 @@ def choose_provider(cli_input):
return provider, new_input, cookiejar
+def load_config(path=None):
+ if path is None:
+ path = os.path.join(os.path.expanduser("~"), ".paper2remarkable.yml")
+ if not os.path.exists(path):
+ return {"core": {}, "system": {}, "html": {}}
+ with open(path, "r") as fp:
+ config = yaml.safe_load(fp)
+ return config
+
+
+def merge_options(config, args):
+ # command line arguments always overwrite config
+ opts = copy.deepcopy(config)
+ opts.setdefault("core", {})
+ opts.setdefault("system", {})
+ opts.setdefault("html", {})
+
+ def set_bool(d, key, value):
+ if value:
+ d[key] = True
+ elif not key in d:
+ d[key] = False
+
+ def set_path(d, key, value):
+ if not value is None:
+ d[key] = value
+ elif not key in d:
+ d[key] = key
+
+ set_bool(opts["core"], "blank", args.blank)
+ set_bool(opts["core"], "verbose", args.verbose)
+ set_bool(opts["core"], "upload", not args.no_upload)
+ set_bool(opts["core"], "experimental", args.experimental)
+
+ if args.center:
+ opts["core"]["crop"] = "center"
+ elif args.right:
+ opts["core"]["crop"] = "right"
+ elif args.no_crop:
+ opts["core"]["crop"] = "none"
+ elif not "crop" in opts["core"]:
+ opts["core"]["crop"] = "left"
+
+ set_path(opts["system"], "gs", args.gs)
+ set_path(opts["system"], "pdftoppm", args.pdftoppm)
+ set_path(opts["system"], "pdftk", args.pdftk)
+ set_path(opts["system"], "qpdf", args.qpdf)
+ set_path(opts["system"], "rmapi", args.rmapi)
+
+ if args.css and os.path.exists(args.css):
+ with open(args.css, "r") as fp:
+ contents = fp.read()
+ opts["html"]["css"] = contents
+ else:
+ opts["html"]["css"] = None
+
+ if args.font_urls and os.path.exists(args.font_urls):
+ with open(args.font_urls, "r") as fp:
+ urls = [l.strip() for l in fp.readlines()]
+ opts["html"]["font_urls"] = urls
+ else:
+ opts["html"]["font_urls"] = None
+
+ return opts
+
+
def set_excepthook(debug):
sys_hook = sys.excepthook
@@ -201,6 +291,9 @@ def main():
"When providing --filename and multiple inputs, their number must match."
)
+ config = load_config(path=args.config)
+ options = merge_options(config, args)
+
filenames = (
[None] * len(args.input) if not args.filename else args.filename
)
@@ -208,19 +301,20 @@ def main():
for cli_input, filename in zip(args.input, filenames):
provider, new_input, cookiejar = choose_provider(cli_input)
prov = provider(
- verbose=args.verbose,
- upload=not args.no_upload,
+ verbose=options["core"]["verbose"],
+ upload=options["core"]["upload"],
debug=args.debug,
- center=args.center,
- right=args.right,
- blank=args.blank,
- no_crop=args.no_crop,
+ experimental=options["core"]["experimental"],
+ crop=options["core"]["crop"],
+ blank=options["core"]["blank"],
remarkable_dir=args.remarkable_dir,
- rmapi_path=args.rmapi,
- pdftoppm_path=args.pdftoppm,
- pdftk_path=args.pdftk,
- qpdf_path=args.qpdf,
- gs_path=args.gs,
+ rmapi_path=options["system"]["rmapi"],
+ pdftoppm_path=options["system"]["pdftoppm"],
+ pdftk_path=options["system"]["pdftk"],
+ qpdf_path=options["system"]["qpdf"],
+ gs_path=options["system"]["gs"],
+ css=options["html"]["css"],
+ font_urls=options["html"]["font_urls"],
cookiejar=cookiejar,
)
prov.run(new_input, filename=filename)
diff --git a/setup.py b/setup.py
index 54a8cb1..e529cc2 100644
--- a/setup.py
+++ b/setup.py
@@ -19,17 +19,18 @@ VERSION = None
# What packages are required for this module to be executed?
REQUIRED = [
+ "PyPDF2>=1.26",
"beautifulsoup4>=4.8",
- "requests>=2.21",
+ "html2text>=2020.1.16",
+ "markdown>=3.1.1",
"pdfplumber>=0.5",
- "unidecode>=1.1",
- "titlecase>=0.12",
- "PyPDF2>=1.26",
- "regex>=2018.11",
+ "pyyaml>=5.1",
"readability-lxml>=0.7.1",
- "html2text>=2020.1.16",
+ "regex>=2018.11",
+ "requests>=2.21",
+ "titlecase>=0.12",
+ "unidecode>=1.1",
"weasyprint>=51",
- "markdown>=3.1.1",
]
full_require = ["readabilipy"]
@@ -86,6 +87,7 @@ setup(
install_requires=REQUIRED,
extras_require=EXTRAS,
include_package_data=True,
+ data_files=[("man/man1", ["p2r.1"])],
license=LICENSE,
ext_modules=[],
entry_points={"console_scripts": ["p2r = paper2remarkable.__main__:main"]},
diff --git a/tests/test_html.py b/tests/test_html.py
new file mode 100644
index 0000000..41f6b83
--- /dev/null
+++ b/tests/test_html.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Additional tests for the HTML provider
+
+This file is part of paper2remarkable.
+
+"""
+
+import os
+import pdfplumber
+import unittest
+
+from paper2remarkable.providers.html import HTML
+from paper2remarkable.providers.html import make_readable
+from paper2remarkable.utils import get_page_with_retry
+
+
+class TestHTML(unittest.TestCase):
+ def test_experimental_fix_lazy_loading(self):
+ url = "https://www.seriouseats.com/2015/01/tea-for-everyone.html"
+ prov = HTML(upload=False, experimental=True)
+ page = get_page_with_retry(url, return_text=True)
+ title, article = make_readable(page)
+ html_article = prov.preprocess_html(url, title, article)
+ expected_image = "https://www.seriouseats.com/images/2015/01/20150118-tea-max-falkowitz-3.jpg"
+ self.assertIn(expected_image, html_article)
+
+ def test_custom_css(self):
+ test_css = """
+ @page { size: 702px 936px; margin: 1in; }
+ img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; }
+ h1,h2,h3 { font-family: 'Montserrat'; }
+ p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; text-align: left; }
+ """
+
+ test_font_urls = [
+ "https://fonts.googleapis.com/css2?family=Montserrat&display=swap"
+ ]
+
+ url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines"
+ prov = HTML(upload=False, css=test_css, font_urls=test_font_urls)
+ filename = prov.run(url)
+ with pdfplumber.open(filename) as pdf:
+ self.assertEqual(8, len(pdf.pages))
+
+ os.unlink(filename)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 9b2f24d..db616e9 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -28,6 +28,7 @@ from paper2remarkable.providers import (
PdfUrl,
PubMed,
SagePub,
+ ScienceDirect,
SemanticScholar,
Springer,
TandFOnline,
@@ -392,6 +393,27 @@ class TestProviders(unittest.TestCase):
filename = prov.run(url)
self.assertEqual(exp, os.path.basename(filename))
+ def test_sciencedirect_1(self):
+ prov = ScienceDirect(upload=False, verbose=VERBOSE)
+ url = "https://www.sciencedirect.com/science/article/pii/S0166354220302011"
+ exp = "Caly_et_al_-_The_FDA-approved_Drug_Ivermectin_Inhibits_the_Replication_of_SARS-CoV-2_in_Vitro_2020.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_sciencedirect_2(self):
+ prov = ScienceDirect(upload=False, verbose=VERBOSE)
+ url = "https://www.sciencedirect.com/science/article/pii/S0047235220302543"
+ exp = "Bolger_Lytle_Bolger_-_What_Matters_in_Citizen_Satisfaction_With_Police_a_Meta-Analysis_2021.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
+ def test_sciencedirect_3(self):
+ prov = ScienceDirect(upload=False, verbose=VERBOSE)
+ url = r"https://pdf.sciencedirectassets.com/272398/1-s2.0-S0022039616X00095/1-s2.0-S0022039616001029/main.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJIMEYCIQCRRFGFc7b02V86pkMeqytyBK%2BR8I%2BfdsIpYbjfXSpIBwIhAORxDxLYdr4EoSyn1P7wlhG%2F1RnX8tIG0IRGOidKKm69KrQDCDAQAxoMMDU5MDAzNTQ2ODY1IgwzsYwSRMjSfdr4cbUqkQOPUxG702LEv3POe5ESC9FBVVHGeUF%2BB46FTtWqkhHgjkRIpuoFiavu1cuBWHQ9FwCZjcocan56LfXiySYBfl259MC8ieSYor9FKZLBaAhDCEblkiTdW2%2Fk4nfogp6fwWVdckC8gGVbu3wQ9Mdh%2FE91ZEix%2FIftmJ6IpAZkm0l0AFFt%2BngI7geWoZDeku5iImEUw6JJPgFz5Yw9cKa%2FuGM3hi29JsuI30qzBqZC9nGRCIx%2FLYeiDfF1v0QjFLmT%2FE5xpaNxMt%2FoWLiazRcconSQCCax6%2Bw9SR4NvWg2illOrLMEPuRYacIFRNhV9zj7Y06Bf%2BfG%2FTQxXdnDLH0VMkUWx%2BgjwRAqSvIb0JRg9q5gErPB1cZLCuCd3ybFSmtj7aQmfl7uhMAjQwnCcN6fhtlVK6Xb3Us7YglDaHekzf8RDv9stbxBWFGMPVmDUXHWOsUo89LY%2F9IbtQTs5Uu3ieMGePUVMY4ox3FPYAb5jWjaOFqs54LqfQ5nqjkLMiAY%2F11zCVyOAoPiDnDs6Wjuj52iszCtuc%2F9BTrqATkmIC%2Bu2w6MEow0zbPVAaqNF%2BjUh8Tv%2BWTInq9G3Q4PXIqL3CNNiISPDvuUggRwWGJDgXtr0C%2B4Gtv1bfs3BGHHgWOD261c6O0LHQuP11BLN8GCr7bFO1hjVAqHhC06vyhGQRmRzN32CPwo8pUM2gWw9xXGUioUiSJ%2FgRpDaszsW4Yr8Wm7L9Q7jAOYxEf7WLxPwAWO69o8JbJoouxwL4qeTEGMJ5IpUk3x3xPQIlawOlqY%2FHi0s4E1DE4ZMjH21hc3PrQ%2FiwI%2BTqY9Rg5sjLCBJ4vRCiqb3dpOWLsR5LFOTySXWoqIdO7b9Q%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20201117T155020Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTY7OS7PK7A%2F20201117%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=03abad117208b684a1a4ca2ffdcbe5b9a40a19e6c841c609e299315a2f2234ce&hash=24f71da9f05f6835c9797841d1462d11eea85c49e9655dde043ed9f748edf17e&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S0022039616001029&tid=spdf-6b78a4fa-826e-4267-8ce6-43c814fa51b2&sid=776192553463724f1a4b56613fcf5e514b72gxrqb&type=client"
+ exp = "Kristiansen_Wulff_-_Exponential_Estimates_of_Symplectic_Slow_Manifolds_2016.pdf"
+ filename = prov.run(url)
+ self.assertEqual(exp, os.path.basename(filename))
+
if __name__ == "__main__":
unittest.main()