From 284495561acb8b1bfd79cd41c88dd478e7e871a5 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 8 Nov 2020 15:17:14 +0000 Subject: Expand docker authentication instructions --- README.md | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6ee330d..6892eaa 100644 --- a/README.md +++ b/README.md @@ -229,19 +229,31 @@ docker build -t p2r . ### Authorization -If you already have a `~/.rmapi` file, you can skip this section. Otherwise -we'll use `rmapi` to create it. +``paper2remarkable`` uses [rMapi](https://github.com/juruen/rmapi) to sync +documents to the reMarkable. The first time you run ``paper2remarkable`` you +will have to authenticate rMapi using a one-time code provided by reMarkable. +By default, rMapi uses the ``${HOME}/.rmapi`` file as a configuration file to +store the credentials, and so this is the location we will use in the commands +below. If you'd like to use a different location for the configuration (for +instance, ``${HOME}/.config/rmapi/rmapi.conf``), make sure to change the +commands below accordingly. + +If you already have a `~/.rmapi` file with the authentication details, you can +skip this section. Otherwise we'll create it and run ``rmapi`` in the docker +container for authentication: ```bash -touch ${HOME}/.rmapi -docker run --rm -i -t -v "${HOME}/.rmapi:/home/user/.rmapi:rw" --entrypoint=rmapi p2r version +$ touch ${HOME}/.rmapi +$ docker run --rm -i -t -v "${HOME}/.rmapi:/home/user/.rmapi:rw" --entrypoint=rmapi p2r version ``` -which should end with output like +This command will print a link where you can obtain a one-time code to +authenticate rMapi and afterwards print the rMapi version (the version number +may be different): ```bash ReMarkable Cloud API Shell -rmapi version: 0.0.5 +rmapi version: 0.0.12 ``` ### Usage -- cgit v1.2.3 From fc6702323f35b76ca85f984312de1ab98e884f6e Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 9 Nov 2020 14:11:09 +0000 Subject: Add note on activating alias --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6892eaa..ea602a5 100644 --- a/README.md +++ b/README.md @@ -285,7 +285,8 @@ your ``~/.bashrc`` file to abstract away the Docker commands: alias p2r="docker run --rm -v \"${HOME}/.rmapi:/home/user/.rmapi:rw\" p2r" ``` -Then you can use ``paper2remarkable`` from the command line as ``p2r``! +After running ``source ~/.bashrc`` to activate the alias, you can then use +``paper2remarkable`` through Docker by calling ``p2r`` from the command line. # Notes -- cgit v1.2.3 From 882805565241bf2765b632e7b89a1f733a935a45 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Wed, 11 Nov 2020 19:36:07 +0000 Subject: Add experimental fix for lazy loaded images in html --- paper2remarkable/providers/_base.py | 2 ++ paper2remarkable/providers/html.py | 47 ++++++++++++++++++++++++++++--------- paper2remarkable/ui.py | 7 ++++++ tests/test_html.py | 29 +++++++++++++++++++++++ 4 files changed, 74 insertions(+), 11 deletions(-) create mode 100644 tests/test_html.py diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py index 57774d6..a664f23 100644 --- a/paper2remarkable/providers/_base.py +++ b/paper2remarkable/providers/_base.py @@ -38,6 +38,7 @@ class Provider(metaclass=abc.ABCMeta): verbose=False, upload=True, debug=False, + experimental=False, center=False, right=False, blank=False, @@ -52,6 +53,7 @@ class Provider(metaclass=abc.ABCMeta): ): self.upload = upload self.debug = debug + self.experimental = experimental self.remarkable_dir = remarkable_dir self.rmapi_path = rmapi_path self.pdftoppm_path = pdftoppm_path diff --git a/paper2remarkable/providers/html.py b/paper2remarkable/providers/html.py index e050ea3..3e32539 100644 --- a/paper2remarkable/providers/html.py +++ b/paper2remarkable/providers/html.py @@ -13,6 +13,7 @@ Copyright: 2020, G.J.J. van den Burg import html2text import markdown +import re import readability import titlecase import unidecode @@ -133,6 +134,40 @@ class HTML(Provider): def get_abs_pdf_urls(self, url): return url, url + def fix_lazy_loading(self, article): + if not self.experimental: + return article + + # This attempts to fix sites where the image src element points to a + # placeholder and the data-src attribute contains the url to the actual + # image. + regex = '.*) data-src="(?P.*?)" (?P.*?)>' + sub = ' \g>' + + article, nsub = re.subn(regex, sub, article, flags=re.MULTILINE) + if nsub: + logger.info( + f"[experimental] Attempted to fix lazy image loading ({nsub} times). " + "Please report bad results." + ) + return article + + def preprocess_html(self, pdf_url, title, article): + article = self.fix_lazy_loading(article) + + h2t = html2text.HTML2Text() + h2t.wrap_links = False + text = h2t.handle(article) + + # Add the title back to the document + article = "# {title}\n\n{text}".format(title=title, text=text) + + # Convert to html, fixing relative image urls. + md = markdown.Markdown() + md.treeprocessors.register(ImgProcessor(pdf_url), "img", 10) + html_article = md.convert(article) + return html_article + def retrieve_pdf(self, pdf_url, filename): """Turn the HTML article in a clean pdf file @@ -152,17 +187,7 @@ class HTML(Provider): request_html = get_page_with_retry(pdf_url, return_text=True) title, article = make_readable(request_html) - h2t = html2text.HTML2Text() - h2t.wrap_links = False - text = h2t.handle(article) - - # Add the title back to the document - article = "# {title}\n\n{text}".format(title=title, text=text) - - # Convert to html, fixing relative image urls. - md = markdown.Markdown() - md.treeprocessors.register(ImgProcessor(pdf_url), "img", 10) - html_article = md.convert(article) + html_article = self.preprocess_html(pdf_url, title, article) if self.debug: with open("./paper.html", "w") as fp: diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py index ea24403..a2b71cb 100644 --- a/paper2remarkable/ui.py +++ b/paper2remarkable/ui.py @@ -40,6 +40,12 @@ def parse_args(): help="debug mode, doesn't upload to reMarkable", action="store_true", ) + parser.add_argument( + "-e", + "--experimental", + help="enable experimental features", + action="store_true", + ) parser.add_argument( "-n", "--no-upload", @@ -211,6 +217,7 @@ def main(): verbose=args.verbose, upload=not args.no_upload, debug=args.debug, + experimental=args.experimental, center=args.center, right=args.right, blank=args.blank, diff --git a/tests/test_html.py b/tests/test_html.py new file mode 100644 index 0000000..d271bb5 --- /dev/null +++ b/tests/test_html.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Additional tests for the HTML provider + +This file is part of paper2remarkable. + +""" + +import unittest + +from paper2remarkable.providers.html import HTML +from paper2remarkable.providers.html import make_readable +from paper2remarkable.utils import get_page_with_retry + + +class TestHTML(unittest.TestCase): + def test_experimental_fix_lazy_loading(self): + url = "https://www.seriouseats.com/2015/01/tea-for-everyone.html" + prov = HTML(upload=False, experimental=True) + page = get_page_with_retry(url, return_text=True) + title, article = make_readable(page) + html_article = prov.preprocess_html(url, title, article) + expected_image = "https://www.seriouseats.com/images/2015/01/20150118-tea-max-falkowitz-3.jpg" + self.assertIn(expected_image, html_article) + + +if __name__ == "__main__": + unittest.main() -- cgit v1.2.3 From f2981af016370e04ec1e8cc886a64b9017970210 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Wed, 11 Nov 2020 20:09:42 +0000 Subject: Bump version and update changelog --- CHANGELOG.md | 4 ++++ README.md | 18 +++++++++++------- paper2remarkable/__version__.py | 2 +- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4867fb..5df04eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Version 0.8.1 + +* Add experimental fix for lazy loaded images in HTML + ## Version 0.8.0 * Add provider for Nature diff --git a/README.md b/README.md index ea602a5..0c7694c 100644 --- a/README.md +++ b/README.md @@ -152,24 +152,28 @@ flags are self-explanatory, but if you'd like more information, please open an issue [on GitHub][github-url]. ``` -usage: p2r [-h] [-b] [-c] [-d] [-n] [-p REMARKABLE_DIR] [-r] [-k] [-v] [-V] - [--filename FILENAME] [--gs GS] [--pdftoppm PDFTOPPM] [--pdftk PDFTK] - [--qpdf QPDF] [--rmapi RMAPI] +usage: p2r [-h] [-b] [-c] [-d] [-e] [-n] [-p REMARKABLE_DIR] [-r] [-k] [-v] + [-V] [--filename FILENAME] [--gs GS] [--pdftoppm PDFTOPPM] + [--pdftk PDFTK] [--qpdf QPDF] [--rmapi RMAPI] input [input ...] -Paper2reMarkable version 0.7.3 +Paper2reMarkable version 0.8.1 positional arguments: - input One or more URLs to a paper or paths to local PDF files + input One or more URLs to a paper or paths to local PDF + files optional arguments: -h, --help show this help message and exit -b, --blank Add a blank page after every page of the PDF -c, --center Center the PDF on the page, instead of left align -d, --debug debug mode, doesn't upload to reMarkable - -n, --no-upload don't upload to the reMarkable, save the output in current working dir + -e, --experimental enable experimental features + -n, --no-upload don't upload to the reMarkable, save the output in + current working dir -p REMARKABLE_DIR, --remarkable-path REMARKABLE_DIR - directory on reMarkable to put the file (created if missing, default: /) + directory on reMarkable to put the file (created if + missing, default: /) -r, --right Right align so the menu doesn't cover it -k, --no-crop Don't crop the pdf file -v, --verbose be verbose diff --git a/paper2remarkable/__version__.py b/paper2remarkable/__version__.py index 2b9f826..090df0c 100644 --- a/paper2remarkable/__version__.py +++ b/paper2remarkable/__version__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -VERSION = (0, 8, 0) +VERSION = (0, 8, 1) __version__ = ".".join(map(str, VERSION)) -- cgit v1.2.3 From 9846b39c2ddc57b233dca8e4dbc818d0e29f46f2 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Thu, 12 Nov 2020 00:01:55 +0000 Subject: Add short flag for --filename --- paper2remarkable/ui.py | 1 + 1 file changed, 1 insertion(+) diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py index a2b71cb..f9af28f 100644 --- a/paper2remarkable/ui.py +++ b/paper2remarkable/ui.py @@ -79,6 +79,7 @@ def parse_args(): version=__version__, ) parser.add_argument( + "-f", "--filename", help="Filename to use for the file on reMarkable", action="append", -- cgit v1.2.3 From 8aa49bdfa5298c913caf0e7025f5cc1199fa5612 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Thu, 12 Nov 2020 00:05:14 +0000 Subject: Add man page to package --- MANIFEST.in | 1 + Makefile | 17 ++----- docs/Makefile | 71 +++++++++++++++++++++++++++ docs/conf.py | 62 ++++++++++++++++++++++++ docs/index.rst | 0 docs/man.md | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 7 files changed, 290 insertions(+), 12 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/man.md diff --git a/MANIFEST.in b/MANIFEST.in index 1de226b..7533e12 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,3 +5,4 @@ exclude .gitignore exclude Dockerfile exclude make_release.py prune old +include p2r.1 diff --git a/Makefile b/Makefile index bcbc420..6a4a460 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ PACKAGE=paper2remarkable DOC_DIR='./docs/' VENV_DIR=/tmp/p2r_venv/ -.PHONY: help dist venv +.PHONY: help dist venv docs .DEFAULT_GOAL := help @@ -35,22 +35,15 @@ clean: ## Clean build dist and egg directories left after install rm -rf ./$(PACKAGE).egg-info rm -rf $(VENV_DIR) rm -f MANIFEST + rm -f ./p2r.1 find . -type f -iname '*.pyc' -delete find . -type d -name '__pycache__' -empty -delete -dist: ## Make Python source distribution +dist: docs ## Make Python source distribution python setup.py sdist bdist_wheel -docs: doc -doc: install ## Build documentation with Sphinx - source $(VENV_DIR)/bin/activate && m2r README.md && mv README.rst $(DOC_DIR) - source $(VENV_DIR)/bin/activate && m2r CHANGELOG.md && mv CHANGELOG.rst $(DOC_DIR) - cd $(DOC_DIR) && \ - rm source/* && \ - source $(VENV_DIR)/bin/activate && \ - sphinx-apidoc -H 'Paper2Remarkable API Documentation' -o source ../$(PACKAGE) && \ - touch source/AUTOGENERATED - $(MAKE) -C $(DOC_DIR) html +docs: + $(MAKE) -C $(DOC_DIR) clean && $(MAKE) -C $(DOC_DIR) man venv: $(VENV_DIR)/bin/activate diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..4c617df --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,71 @@ +# Makefile for paper2remarkable documentation +# +# Author: G.J.J. van den Burg +# Copyright (c) 2020 G.J.J. van den Burg +# License: See LICENSE file +# + +SHELL := bash +.SHELLFLAGS := -eu -o pipefail -c +MAKEFLAGS += --no-builtin-rules + +VENV_DIR=/tmp/p2r_docs/ +OUTPUT_DIR=./output + +PACKAGE = p2r +SPHINXBUILD = sphinx-build +ALLSPHINXOPTS = -d .build/doctrees . + +.PHONY: all + +all: man + +############ +# Man page # +############ + +.PHONY: man output-dir clean_output + +man: $(OUTPUT_DIR)/$(PACKAGE).1 + cp $< ../$(PACKAGE).1 + @echo "Generated man page copied to ../$(PACKAGE).1" + +$(OUTPUT_DIR)/$(PACKAGE).1: man.rst | output-dir + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(OUTPUT_DIR) + +man.rst: man.md venv + source $(VENV_DIR)/bin/activate && m2r man.md + +output-dir: + mkdir -p $(OUTPUT_DIR) + mkdir -p _static + +clean_output: + rm -f man.rst + rm -rf $(OUTPUT_DIR) + rm -rf _static + +####################### +# Virtual environment # +####################### + +.PHONY: venv clean_venv + +venv: $(VENV_DIR)/bin/activate + +$(VENV_DIR)/bin/activate: + test -d $(VENV_DIR) || python -m venv $(VENV_DIR) + source $(VENV_DIR)/bin/activate && pip install m2r + touch $(VENV_DIR)/bin/activate + +clean_venv: + rm -rf $(VENV_DIR) + +############ +# Clean up # +############ + +.PHONY: venv + +clean: clean_venv clean_output + rm -f ../$(PACKAGE).1 diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..92961c1 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,62 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = "paper2remarkable" +copyright = "2020, G.J.J. van den Burg" +author = "Gertjan van den Burg" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "alabaster" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Man page +man_pages = [ + ( + "man", + "p2r", + "Fetch an academic paper or web article and send it to the reMarkable tablet", + author + " & other helpful contributors.", + 1, + ) +] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..e69de29 diff --git a/docs/man.md b/docs/man.md new file mode 100644 index 0000000..dcaf594 --- /dev/null +++ b/docs/man.md @@ -0,0 +1,150 @@ +# paper2remarkable + +## SYNOPSIS + +``` +p2r [OPTION]... [INPUT]... +``` + +## DESCRIPTION + +Fetch an academic paper, local pdf file, or any web article and send it to the +reMarkable tablet. The input to the script can be a URL to a PDF file or +article on a website, or a local file. For supported scientific outlets, the +program will collect the metadata for the paper and create a nice filename +(unless ``--filename`` is specified). See [SUPPORTED +SOURCES](#supported-sources) for an overview of supported scientific outlets. + +By default, paper2remarkable crops the unnecessary whitespace from a PDF file +to make the paper fit better on the reMarkable. The default setting yields a +left-aligned document on the reMarkable which can be useful for taking margin +notes. Alternatively, the program supports the ``--center``, ``--right``, and +``--no-crop`` options to change this crop setting. + +## OPTIONS + +Basic options: + +-b, --blank + Add a blank page after every page of the PDF document. This can be + useful for taking notes on papers. + +-e, --experimental + Enable the experimental features of paper2remarkable. See below under + [EXPERIMENTAL FEATURES](#experimental-features) for an overview. + +-f, --filename=FILENAME + Filename to use for the file on reMarkable. If you specify multiple + ``INPUT`` files and want to use a specific filename for each, you can + specify ``--filename`` for each ``INPUT`` source by repeating it. + +-h, --help + Show help message and exit. + +-v, --verbose + Enable verbose mode of paper2remarkable. By default the program prints + no output. + +-V, --version + Show the version and exit. + +Crop options: + +-c, --center + Center the PDF on the page. + +-k, --no-crop + Don't crop the document at all. + +-r, --right + Right-align the document on the reMarkable so the menu doesn't cover it. + +reMarkable options: + +-n, --no-upload + Don't upload the document to the reMarkable, save the output in the + current working dir. + +-p, --remarkable-path=DIR + The directory on the reMarkable where the document will be uploaded to. + If the target directory does not exist it will be created. If not + specified, the root directory will be used. + +System settings: + +You'll only need to specify these options if the programs are not available on +the PATH variable. + +--gs=GS + Path to the GhostScript executable. + +--pdftoppm=PDFTOPPM + Path to pdftoppm executable (default: pdftoppm) + +--pdftk=PDFTK + Path to PDFtk executable (default: pdftk) + +--qpdf=QPDF + Path to qpdf executable (default: qpdf) + +--rmapi=RMAPI + Path to rmapi executable (default: rmapi) + +Developer options: + +-d, --debug + Debug mode, when used the program doesn't upload the document to the + reMarkable by default and leaves the temporary directory with + intermediate files. + +## SUPPORTED SOURCES + +The following scientific sources are currently supported and paper2remarkable +will create a filename based on the authors, title, and publication year of +the work. For the sources below the program is generally flexible with regards +to whether a URL to the PDF or to the abstract page is provided. + +- arXiv +- ACM Digital Library +- CiteSeerX +- CVF +- JMLR +- Nature +- NBER +- NeurIPS +- OpenReview +- PMLR +- PubMed Central +- SagePub +- SemanticScholar +- SpringerLink +- Taylor & Francis + +paper2remarkable also supports a generic URL to a PDF file or a local file, in +which case no "nice" filename will be generated. + +- A generic URL to a PDF file. This can be considered a fallback option for + when a PDF source is not supported (yet). +- A local PDF file + +Finally, paper2remarkable supports extracting articles from websites. In this +case an effort is done to detect the main content of the article and clean up +the HTML before sending the file to the reMarkable. + +## EXPERIMENTAL FEATURES + +Occassionally, experimental (beta) features will be included in +paper2remarkable and they will be listed here. You can enable the experimental +features using the ``-e`` flag to paper2remarkable. + +- The HTML provider currently has an experimental feature to handle lazy + loading of images. Certain websites use a small placeholder image and + subsequently load the main image, which is often stored in a ``data-src`` + attribute in the ``img`` tag. The experimental feature uses the ``data-src`` + attribute as the image source instead of the ``src``. + +## BUGS + +Please report bugs to: + +https://www.github.com/GjjvdBurg/paper2remarkable diff --git a/setup.py b/setup.py index 54a8cb1..e79448c 100644 --- a/setup.py +++ b/setup.py @@ -86,6 +86,7 @@ setup( install_requires=REQUIRED, extras_require=EXTRAS, include_package_data=True, + data_files=[("man/man1", ["p2r.1"])], license=LICENSE, ext_modules=[], entry_points={"console_scripts": ["p2r = paper2remarkable.__main__:main"]}, -- cgit v1.2.3 From 13b9866be80d911342519935934c251e490a5277 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 13 Nov 2020 18:49:14 +0000 Subject: remove tests and pyproject from distribution --- MANIFEST.in | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 7533e12..038a56f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,8 +1,10 @@ include README.md +include p2r.1 recursive-include tests *.py exclude Makefile exclude .gitignore exclude Dockerfile exclude make_release.py +exclude pyproject.toml prune old -include p2r.1 +prune tests -- cgit v1.2.3 From fadbad68166a8d3577e5cb3dfe8dba724dacb76d Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 13 Nov 2020 18:49:32 +0000 Subject: minor fixes to docs --- docs/Makefile | 1 + docs/man.md | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/Makefile b/docs/Makefile index 4c617df..ae82c87 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -44,6 +44,7 @@ clean_output: rm -f man.rst rm -rf $(OUTPUT_DIR) rm -rf _static + rm -rf .build ####################### # Virtual environment # diff --git a/docs/man.md b/docs/man.md index dcaf594..958b825 100644 --- a/docs/man.md +++ b/docs/man.md @@ -13,7 +13,8 @@ reMarkable tablet. The input to the script can be a URL to a PDF file or article on a website, or a local file. For supported scientific outlets, the program will collect the metadata for the paper and create a nice filename (unless ``--filename`` is specified). See [SUPPORTED -SOURCES](#supported-sources) for an overview of supported scientific outlets. +SOURCES](#supported-sources) for an overview of supported scientific paper +sources. By default, paper2remarkable crops the unnecessary whitespace from a PDF file to make the paper fit better on the reMarkable. The default setting yields a -- cgit v1.2.3 From 7675b7ea4f07e0f5f0d9dcfb9a3846eaace07432 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 17 Nov 2020 16:50:40 +0000 Subject: Add provider for ScienceDirect --- paper2remarkable/providers/__init__.py | 2 + paper2remarkable/providers/science_direct.py | 106 +++++++++++++++++++++++++++ tests/test_providers.py | 22 ++++++ 3 files changed, 130 insertions(+) create mode 100644 paper2remarkable/providers/science_direct.py diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py index 371ab82..2be218f 100644 --- a/paper2remarkable/providers/__init__.py +++ b/paper2remarkable/providers/__init__.py @@ -15,6 +15,7 @@ from .pdf_url import PdfUrl from .pmlr import PMLR from .pubmed import PubMed from .sagepub import SagePub +from .science_direct import ScienceDirect from .semantic_scholar import SemanticScholar from .springer import Springer from .tandfonline import TandFOnline @@ -33,6 +34,7 @@ providers = [ PMLR, PubMed, SagePub, + ScienceDirect, Springer, SemanticScholar, TandFOnline, diff --git a/paper2remarkable/providers/science_direct.py b/paper2remarkable/providers/science_direct.py new file mode 100644 index 0000000..704a3b0 --- /dev/null +++ b/paper2remarkable/providers/science_direct.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- + +"""Provider for ScienceDirect + +Author: G.J.J. van den Burg +License: See LICENSE file +Copyright: 2020, G.J.J. van den Burg + +""" + +import re +import bs4 +import urllib +import json + +from ._base import Provider +from ._info import Informer +from ..exceptions import URLResolutionError +from ..log import Logger +from ..utils import get_page_with_retry, follow_redirects + +logger = Logger() + + +class ScienceDirectInformer(Informer): + + meta_date_key = "citation_publication_date" + + def get_authors(self, soup): + surname_tags = soup.find_all("span", attrs={"class": "text surname"}) + if not surname_tags: + logger.warning( + "Couldn't determine author information, maybe provide the desired filename using '--filename'?" + ) + return "" + authors = [x.text for x in surname_tags] + return authors + + +class ScienceDirect(Provider): + + re_abs = ( + "https?:\/\/www.sciencedirect.com/science/article/pii/[A-Za-z0-9]+" + ) + re_pdf = "https://pdf.sciencedirectassets.com/\d+/([0-9a-zA-Z\-\.]+)/(?P[0-9a-zA-Z\-\.]+)/main.pdf\?.*" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.informer = ScienceDirectInformer() + + def get_abs_pdf_urls(self, url): + m1 = re.match(self.re_abs, url) + m2 = re.match(self.re_pdf, url) + if m1: + abs_url = url + pdf_url = self._get_pdf_url(abs_url) + elif m2: + pdf_url = url + data = m2.group("data") + paper_id = data.split("-")[-1] + abs_url = ( + f"https://www.sciencedirect.com/science/article/pii/{paper_id}" + ) + else: + raise URLResolutionError("ScienceDirect", url) + return abs_url, pdf_url + + def _get_pdf_url(self, url): + page = get_page_with_retry(url) + soup = bs4.BeautifulSoup(page, "html.parser") + + # For open access (and maybe behind institution?) the full text pdf url + # is currently in the json payload of a script tag. + scripts = soup.find_all("script", attrs={"data-iso-key": "_0"}) + if not scripts: + raise URLResolutionError("ScienceDirect", url) + json_data = scripts[0].string + data = json.loads(json_data) + if not "article" in data: + raise URLResolutionError("ScienceDirect", url) + data = data["article"] + if not "pdfDownload" in data: + raise URLResolutionError("ScienceDirect", url) + data = data["pdfDownload"] + if not "linkToPdf" in data: + raise URLResolutionError("ScienceDirect", url) + link = data["linkToPdf"] + tmp_url = urllib.parse.urljoin("https://sciencedirect.com/", link) + + # tmp_url gives a page with a ten second wait or a direct url, we need + # the direct url + page = get_page_with_retry(tmp_url) + soup = bs4.BeautifulSoup(page, "html.parser") + noscript = soup.find_all("noscript") + if not noscript: + raise URLResolutionError("ScienceDirect", url) + a = noscript[0].find_all("a") + if not a: + raise URLResolutionError("ScienceDirect", url) + pdf_url = a[0].get("href") + return pdf_url + + def validate(src): + return re.match(ScienceDirect.re_abs, src) or re.match( + ScienceDirect.re_pdf, src + ) diff --git a/tests/test_providers.py b/tests/test_providers.py index 9b2f24d..db616e9 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -28,6 +28,7 @@ from paper2remarkable.providers import ( PdfUrl, PubMed, SagePub, + ScienceDirect, SemanticScholar, Springer, TandFOnline, @@ -392,6 +393,27 @@ class TestProviders(unittest.TestCase): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + def test_sciencedirect_1(self): + prov = ScienceDirect(upload=False, verbose=VERBOSE) + url = "https://www.sciencedirect.com/science/article/pii/S0166354220302011" + exp = "Caly_et_al_-_The_FDA-approved_Drug_Ivermectin_Inhibits_the_Replication_of_SARS-CoV-2_in_Vitro_2020.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_sciencedirect_2(self): + prov = ScienceDirect(upload=False, verbose=VERBOSE) + url = "https://www.sciencedirect.com/science/article/pii/S0047235220302543" + exp = "Bolger_Lytle_Bolger_-_What_Matters_in_Citizen_Satisfaction_With_Police_a_Meta-Analysis_2021.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + + def test_sciencedirect_3(self): + prov = ScienceDirect(upload=False, verbose=VERBOSE) + url = r"https://pdf.sciencedirectassets.com/272398/1-s2.0-S0022039616X00095/1-s2.0-S0022039616001029/main.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJIMEYCIQCRRFGFc7b02V86pkMeqytyBK%2BR8I%2BfdsIpYbjfXSpIBwIhAORxDxLYdr4EoSyn1P7wlhG%2F1RnX8tIG0IRGOidKKm69KrQDCDAQAxoMMDU5MDAzNTQ2ODY1IgwzsYwSRMjSfdr4cbUqkQOPUxG702LEv3POe5ESC9FBVVHGeUF%2BB46FTtWqkhHgjkRIpuoFiavu1cuBWHQ9FwCZjcocan56LfXiySYBfl259MC8ieSYor9FKZLBaAhDCEblkiTdW2%2Fk4nfogp6fwWVdckC8gGVbu3wQ9Mdh%2FE91ZEix%2FIftmJ6IpAZkm0l0AFFt%2BngI7geWoZDeku5iImEUw6JJPgFz5Yw9cKa%2FuGM3hi29JsuI30qzBqZC9nGRCIx%2FLYeiDfF1v0QjFLmT%2FE5xpaNxMt%2FoWLiazRcconSQCCax6%2Bw9SR4NvWg2illOrLMEPuRYacIFRNhV9zj7Y06Bf%2BfG%2FTQxXdnDLH0VMkUWx%2BgjwRAqSvIb0JRg9q5gErPB1cZLCuCd3ybFSmtj7aQmfl7uhMAjQwnCcN6fhtlVK6Xb3Us7YglDaHekzf8RDv9stbxBWFGMPVmDUXHWOsUo89LY%2F9IbtQTs5Uu3ieMGePUVMY4ox3FPYAb5jWjaOFqs54LqfQ5nqjkLMiAY%2F11zCVyOAoPiDnDs6Wjuj52iszCtuc%2F9BTrqATkmIC%2Bu2w6MEow0zbPVAaqNF%2BjUh8Tv%2BWTInq9G3Q4PXIqL3CNNiISPDvuUggRwWGJDgXtr0C%2B4Gtv1bfs3BGHHgWOD261c6O0LHQuP11BLN8GCr7bFO1hjVAqHhC06vyhGQRmRzN32CPwo8pUM2gWw9xXGUioUiSJ%2FgRpDaszsW4Yr8Wm7L9Q7jAOYxEf7WLxPwAWO69o8JbJoouxwL4qeTEGMJ5IpUk3x3xPQIlawOlqY%2FHi0s4E1DE4ZMjH21hc3PrQ%2FiwI%2BTqY9Rg5sjLCBJ4vRCiqb3dpOWLsR5LFOTySXWoqIdO7b9Q%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20201117T155020Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTY7OS7PK7A%2F20201117%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=03abad117208b684a1a4ca2ffdcbe5b9a40a19e6c841c609e299315a2f2234ce&hash=24f71da9f05f6835c9797841d1462d11eea85c49e9655dde043ed9f748edf17e&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S0022039616001029&tid=spdf-6b78a4fa-826e-4267-8ce6-43c814fa51b2&sid=776192553463724f1a4b56613fcf5e514b72gxrqb&type=client" + exp = "Kristiansen_Wulff_-_Exponential_Estimates_of_Symplectic_Slow_Manifolds_2016.pdf" + filename = prov.run(url) + self.assertEqual(exp, os.path.basename(filename)) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 81d25f406060ac53ab2e6e66d08793204b3db918 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 17 Nov 2020 16:51:34 +0000 Subject: Man page updates --- docs/man.md | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/docs/man.md b/docs/man.md index 958b825..7949145 100644 --- a/docs/man.md +++ b/docs/man.md @@ -64,7 +64,7 @@ reMarkable options: -n, --no-upload Don't upload the document to the reMarkable, save the output in the - current working dir. + current working directory. -p, --remarkable-path=DIR The directory on the reMarkable where the document will be uploaded to. @@ -80,16 +80,18 @@ the PATH variable. Path to the GhostScript executable. --pdftoppm=PDFTOPPM - Path to pdftoppm executable (default: pdftoppm) + Path to pdftoppm executable (default: pdftoppm). Note that pdftoppm is + optional. --pdftk=PDFTK - Path to PDFtk executable (default: pdftk) + Path to PDFtk executable (default: pdftk). Either pdftk or qpdf is + needed. --qpdf=QPDF - Path to qpdf executable (default: qpdf) + Path to qpdf executable (default: qpdf). Either pdftk or qpdf is needed. --rmapi=RMAPI - Path to rmapi executable (default: rmapi) + Path to rmapi executable (default: rmapi). Developer options: @@ -126,7 +128,7 @@ which case no "nice" filename will be generated. - A generic URL to a PDF file. This can be considered a fallback option for when a PDF source is not supported (yet). -- A local PDF file +- A local PDF file. Finally, paper2remarkable supports extracting articles from websites. In this case an effort is done to detect the main content of the article and clean up @@ -136,13 +138,14 @@ the HTML before sending the file to the reMarkable. Occassionally, experimental (beta) features will be included in paper2remarkable and they will be listed here. You can enable the experimental -features using the ``-e`` flag to paper2remarkable. +features by using the ``-e`` flag to paper2remarkable. - The HTML provider currently has an experimental feature to handle lazy - loading of images. Certain websites use a small placeholder image and - subsequently load the main image, which is often stored in a ``data-src`` - attribute in the ``img`` tag. The experimental feature uses the ``data-src`` - attribute as the image source instead of the ``src``. + loading of images. Certain websites use a small placeholder image and load + the main image using Javascript, with the actual image source stored in a + ``data-src`` attribute in the ``img`` tag. The experimental feature uses the + ``data-src`` attribute as the image source instead of that in the ``src`` + attribute. ## BUGS -- cgit v1.2.3 From 37ff3633857d797de916c856ac6da11b1d328ee0 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 17 Nov 2020 22:11:56 +0000 Subject: Add ScienceDirect to man page --- docs/man.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/man.md b/docs/man.md index 958b825..8202d1f 100644 --- a/docs/man.md +++ b/docs/man.md @@ -117,6 +117,7 @@ to whether a URL to the PDF or to the abstract page is provided. - PMLR - PubMed Central - SagePub +- ScienceDirect - SemanticScholar - SpringerLink - Taylor & Francis -- cgit v1.2.3 From 2c3fea83a36483c0cad1be3a113b6dd256856f31 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 17 Nov 2020 22:18:34 +0000 Subject: Bump version and update changelog and readme --- CHANGELOG.md | 6 ++++++ README.md | 5 +++-- paper2remarkable/__version__.py | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5df04eb..e41c4c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Version 0.8.2 + +* Add provider for ScienceDirect +* Add man page to package +* Add short flag, -f, for --filename + ## Version 0.8.1 * Add experimental fix for lazy loaded images in HTML diff --git a/README.md b/README.md index 0c7694c..be1879a 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ reMarkable from any of the following sources: * [PMLR](http://proceedings.mlr.press/) * [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/) * [SagePub](https://journals.sagepub.com/) +* [ScienceDirect](https://www.sciencedirect.com/) * [SemanticScholar](https://www.semanticscholar.org/) * [SpringerLink](https://link.springer.com/) * [Taylor & Francis](https://www.tandfonline.com/) @@ -148,8 +149,8 @@ Github][github-url]. ## Usage The full help of the script is as follows. Hopefully the various command line -flags are self-explanatory, but if you'd like more information, please open an -issue [on GitHub][github-url]. +flags are self-explanatory, but if you'd like more information see the [man +page](docs/man.md) (``man p2r``) or open an issue [on GitHub][github-url]. ``` usage: p2r [-h] [-b] [-c] [-d] [-e] [-n] [-p REMARKABLE_DIR] [-r] [-k] [-v] diff --git a/paper2remarkable/__version__.py b/paper2remarkable/__version__.py index 090df0c..6955111 100644 --- a/paper2remarkable/__version__.py +++ b/paper2remarkable/__version__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -VERSION = (0, 8, 1) +VERSION = (0, 8, 2) __version__ = ".".join(map(str, VERSION)) -- cgit v1.2.3 From bdd9eed73d874522753d58841d6ec22c42eeb1b8 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 20 Nov 2020 14:11:04 +0000 Subject: Add note on annotations and cropping to README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index be1879a..acf392e 100644 --- a/README.md +++ b/README.md @@ -187,6 +187,12 @@ optional arguments: --rmapi RMAPI path to rmapi executable (default: rmapi) ``` +By default ``paper2remarkable`` makes a PDF fit better on the reMarkable by +changing the page size and removing unnecessary whitespace. Some tools for +exporting a PDF with annotations do not handle different page sizes properly, +causing annotations to be misplaced. If this is an issue for you, you can +disable cropping using the ``-k``/``--no-crop`` option to ``p2r``. + ## Alfred Workflow On MacOS, you can optionally install [this Alfred workflow][workflow]. Alfred -- cgit v1.2.3 From c1381594e70c5a3f678c83bc602d48dbac42df9f Mon Sep 17 00:00:00 2001 From: John Savage Date: Tue, 8 Dec 2020 11:12:37 +0000 Subject: Update README.md Need to use ro for readonly, otherwise get error ```docker: Error response from daemon: invalid mode: r``` --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index acf392e..ce7ecd6 100644 --- a/README.md +++ b/README.md @@ -280,7 +280,7 @@ docker run --rm -v "${HOME}/.rmapi:/home/user/.rmapi:rw" p2r --help docker run --rm -v "${HOME}/.rmapi:/home/user/.rmapi:rw" p2r -v https://arxiv.org/abs/1811.11242 # to transfer a local file in the current directory -docker run --rm -v "${HOME}/.rmapi:/home/user/.rmapi:rw" -v "$(pwd):/home/user:r" p2r -v localfile.pdf +docker run --rm -v "${HOME}/.rmapi:/home/user/.rmapi:rw" -v "$(pwd):/home/user:ro" p2r -v localfile.pdf ``` For transferring local files using the Docker image, you may find [this helper -- cgit v1.2.3 From 948d314b47be221f7694a793c964d4728212c33c Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 27 Dec 2020 13:47:40 +0000 Subject: Add support for custom styling of HTML output --- docs/man.md | 12 +++++++++++ paper2remarkable/providers/_base.py | 7 ++++++- paper2remarkable/providers/html.py | 42 +++++++++++++++++++++++++++++++------ paper2remarkable/ui.py | 10 +++++++++ tests/test_html.py | 35 +++++++++++++++++++++++++++++++ 5 files changed, 99 insertions(+), 7 deletions(-) diff --git a/docs/man.md b/docs/man.md index 132d896..db7d600 100644 --- a/docs/man.md +++ b/docs/man.md @@ -71,6 +71,18 @@ reMarkable options: If the target directory does not exist it will be created. If not specified, the root directory will be used. +Output customization: + +--css=FILENAME + Path to a CSS file with custom styling for the HTML output. This option + is ignored for any of the other providers. The code for the HTML + provider contains the default CSS style, which can be used as a starting + point. + +--font-urls=FILENAME + Path to a file with font urls (one per line) for the HTML output. This + will generally be used in combination with the ``--css`` option. + System settings: You'll only need to specify these options if the programs are not available on diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py index a664f23..56ffa31 100644 --- a/paper2remarkable/providers/_base.py +++ b/paper2remarkable/providers/_base.py @@ -49,6 +49,8 @@ class Provider(metaclass=abc.ABCMeta): pdftk_path="pdftk", qpdf_path="qpdf", gs_path="gs", + css_path=None, + font_urls_path=None, cookiejar=None, ): self.upload = upload @@ -60,9 +62,12 @@ class Provider(metaclass=abc.ABCMeta): self.pdftk_path = pdftk_path self.qpdf_path = qpdf_path self.gs_path = gs_path - self.informer = Informer() + self.css_path = css_path + self.font_urls_path = font_urls_path self.cookiejar = cookiejar + self.informer = Informer() + self.pdftool = check_pdftool(self.pdftk_path, self.qpdf_path) # wait time to not hit the server too frequently diff --git a/paper2remarkable/providers/html.py b/paper2remarkable/providers/html.py index 3e32539..48ede10 100644 --- a/paper2remarkable/providers/html.py +++ b/paper2remarkable/providers/html.py @@ -13,13 +13,13 @@ Copyright: 2020, G.J.J. van den Burg import html2text import markdown +import os import re import readability import titlecase import unidecode import urllib import weasyprint -import weasyprint.fonts from ._base import Provider from ._info import Informer @@ -34,7 +34,6 @@ from ..log import Logger logger = Logger() CSS = """ -@import url('https://fonts.googleapis.com/css?family=EB+Garamond|Noto+Serif|Inconsolata&display=swap'); @page { size: 702px 936px; margin: 1in; } a { color: black; } img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; } @@ -48,6 +47,13 @@ pre { font-family: 'Inconsolata'; padding-left: 2.5%; background: #efefef; } code { font-family: 'Inconsolata'; font-size: .7rem; background: #efefef; } """ +# NOTE: For some reason, Weasyprint no longer accepts the @import statement in +# the CSS to load the fonts. This may have to do with recent changes they've +# introduced. Providing the font urls separately does seem to work. +FONT_URLS = [ + "https://fonts.googleapis.com/css2?family=EB+Garamond&family=Noto+Serif&family=Inconsolata" +] + def url_fetcher(url): if url.startswith("//"): @@ -168,6 +174,30 @@ class HTML(Provider): html_article = md.convert(article) return html_article + def get_css(self): + if self.css_path is None: + return CSS + if not os.path.exists(self.css_path): + logger.warning( + f"CSS file {self.css_path} doesn't exist, using default style." + ) + return CSS + with open(self.css_path, "r") as fp: + css = fp.read() + return css + + def get_font_urls(self): + if self.font_urls_path is None: + return FONT_URLS + if not os.path.exists(self.font_urls_path): + logger.warning( + f"Font urls file {self.font_urls_path} doesn't exist, using default." + ) + return FONT_URLS + with open(self.font_urls_path, "r") as fp: + font_urls = [l.strip() for l in fp.read().split("\n")] + return font_urls + def retrieve_pdf(self, pdf_url, filename): """Turn the HTML article in a clean pdf file @@ -193,11 +223,11 @@ class HTML(Provider): with open("./paper.html", "w") as fp: fp.write(html_article) - font_config = weasyprint.fonts.FontConfiguration() html = weasyprint.HTML(string=html_article, url_fetcher=url_fetcher) - css = weasyprint.CSS(string=CSS, font_config=font_config) - - html.write_pdf(filename, stylesheets=[css], font_config=font_config) + css = self.get_css() + font_urls = self.get_font_urls() + style = weasyprint.CSS(string=css) + html.write_pdf(filename, stylesheets=[style] + font_urls) def validate(src): # first check if it is a valid url diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py index f9af28f..095b69a 100644 --- a/paper2remarkable/ui.py +++ b/paper2remarkable/ui.py @@ -107,6 +107,14 @@ def parse_args(): help="path to rmapi executable (default: rmapi)", default="rmapi", ) + parser.add_argument( + "--css", help="path to custom CSS file for HTML output", default=None + ) + parser.add_argument( + "--font-urls", + help="path to custom font urls file for HTML output", + default=None, + ) parser.add_argument( "input", help="One or more URLs to a paper or paths to local PDF files", @@ -229,6 +237,8 @@ def main(): pdftk_path=args.pdftk, qpdf_path=args.qpdf, gs_path=args.gs, + css_path=args.css, + font_urls_path=args.font_urls, cookiejar=cookiejar, ) prov.run(new_input, filename=filename) diff --git a/tests/test_html.py b/tests/test_html.py index d271bb5..7d5c92b 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -7,6 +7,9 @@ This file is part of paper2remarkable. """ +import os +import pdfplumber +import tempfile import unittest from paper2remarkable.providers.html import HTML @@ -24,6 +27,38 @@ class TestHTML(unittest.TestCase): expected_image = "https://www.seriouseats.com/images/2015/01/20150118-tea-max-falkowitz-3.jpg" self.assertIn(expected_image, html_article) + def test_custom_css(self): + test_css = """ + @page { size: 702px 936px; margin: 1in; } + img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; } + h1,h2,h3 { font-family: 'Montserrat'; } + p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; text-align: left; } + """ + + test_font_urls = [ + "https://fonts.googleapis.com/css2?family=Montserrat&display=swap" + ] + + tmpfd, tempfname_css = tempfile.mkstemp(prefix="p2r_", suffix=".css") + with os.fdopen(tmpfd, "w") as fp: + fp.write(test_css) + + tmpfd, tempfname_urls = tempfile.mkstemp(prefix="p2r_", suffix=".txt") + with os.fdopen(tmpfd, "w") as fp: + fp.write("\n".join(test_font_urls)) + + url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines" + prov = HTML( + upload=False, css_path=tempfname_css, font_urls_path=tempfname_urls + ) + filename = prov.run(url) + with pdfplumber.open(filename) as pdf: + self.assertEqual(8, len(pdf.pages)) + + os.unlink(tempfname_css) + os.unlink(tempfname_urls) + os.unlink(filename) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3 From 8f4bceee3dd40a0a835e7b1f8daeb1dbecdd8cd2 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 27 Dec 2020 13:48:05 +0000 Subject: fix comment --- paper2remarkable/log.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper2remarkable/log.py b/paper2remarkable/log.py index fb9d8a3..8240a62 100644 --- a/paper2remarkable/log.py +++ b/paper2remarkable/log.py @@ -9,8 +9,8 @@ Copyright: 2019, G.J.J. van den Burg """ # NOTE: I know about the logging module, but this was easier because one of the -# dependencies was using that and it became complicated. This one is obviously -# not thread-safe and is very simple. +# dependencies was using that and it interfered with our logging. The logger +# class below is obviously not thread-safe and is very simple. import datetime import sys -- cgit v1.2.3 From c016d13c065f47d99bda996474afb44461f1a719 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 27 Dec 2020 20:49:45 +0000 Subject: Bump version and update changelog and readme --- CHANGELOG.md | 5 +++++ README.md | 23 +++++++++++++++++------ paper2remarkable/__version__.py | 2 +- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e41c4c7..a637d58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Version 0.8.3 + +* Add support for providing custom styling for HTML output (closes + [#82](https://github.com/GjjvdBurg/paper2remarkable/issues/82)). + ## Version 0.8.2 * Add provider for ScienceDirect diff --git a/README.md b/README.md index ce7ecd6..2a2a8b3 100644 --- a/README.md +++ b/README.md @@ -154,11 +154,11 @@ page](docs/man.md) (``man p2r``) or open an issue [on GitHub][github-url]. ``` usage: p2r [-h] [-b] [-c] [-d] [-e] [-n] [-p REMARKABLE_DIR] [-r] [-k] [-v] - [-V] [--filename FILENAME] [--gs GS] [--pdftoppm PDFTOPPM] - [--pdftk PDFTK] [--qpdf QPDF] [--rmapi RMAPI] + [-V] [-f FILENAME] [--gs GS] [--pdftoppm PDFTOPPM] [--pdftk PDFTK] + [--qpdf QPDF] [--rmapi RMAPI] [--css CSS] [--font-urls FONT_URLS] input [input ...] -Paper2reMarkable version 0.8.1 +Paper2reMarkable version 0.8.3 positional arguments: input One or more URLs to a paper or paths to local PDF @@ -179,19 +179,30 @@ optional arguments: -k, --no-crop Don't crop the pdf file -v, --verbose be verbose -V, --version Show version and exit - --filename FILENAME Filename to use for the file on reMarkable + -f FILENAME, --filename FILENAME + Filename to use for the file on reMarkable --gs GS path to gs executable (default: gs) --pdftoppm PDFTOPPM path to pdftoppm executable (default: pdftoppm) --pdftk PDFTK path to pdftk executable (default: pdftk) --qpdf QPDF path to qpdf executable (default: qpdf) --rmapi RMAPI path to rmapi executable (default: rmapi) + --css CSS path to custom CSS file for HTML output + --font-urls FONT_URLS + path to custom font urls file for HTML output ``` By default ``paper2remarkable`` makes a PDF fit better on the reMarkable by changing the page size and removing unnecessary whitespace. Some tools for exporting a PDF with annotations do not handle different page sizes properly, -causing annotations to be misplaced. If this is an issue for you, you can -disable cropping using the ``-k``/``--no-crop`` option to ``p2r``. +causing annotations to be misplaced (see +[discussion](https://github.com/GjjvdBurg/paper2remarkable/issues/77)). If +this is an issue for you, you can disable cropping using the +``-k``/``--no-crop`` option to ``p2r``. + +For HTML sources (i.e. web articles) you can specify custom styling using the +``--css`` and ``--font-urls`` options. The default style in the [HTML +provider](https://github.com/GjjvdBurg/paper2remarkable/blob/a6e50d07748c842f1f0a09e4b173c87850c6ddee/paper2remarkable/providers/html.py#L36) +can serve as a starting point. ## Alfred Workflow diff --git a/paper2remarkable/__version__.py b/paper2remarkable/__version__.py index 6955111..b248410 100644 --- a/paper2remarkable/__version__.py +++ b/paper2remarkable/__version__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -VERSION = (0, 8, 2) +VERSION = (0, 8, 3) __version__ = ".".join(map(str, VERSION)) -- cgit v1.2.3 From 421d8de29d17d9390cae1f56bfc98667158a8096 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 28 Dec 2020 00:25:31 +0000 Subject: Add support for a configuration file --- config.example.yml | 23 ++++++++ docs/man.md | 28 +++++++++ paper2remarkable/providers/_base.py | 18 +++--- paper2remarkable/providers/html.py | 32 ++-------- paper2remarkable/ui.py | 113 +++++++++++++++++++++++++++++------- setup.py | 15 ++--- tests/test_html.py | 15 +---- 7 files changed, 165 insertions(+), 79 deletions(-) create mode 100644 config.example.yml diff --git a/config.example.yml b/config.example.yml new file mode 100644 index 0000000..b50b88e --- /dev/null +++ b/config.example.yml @@ -0,0 +1,23 @@ +--- +core: + crop: 'left' # or: 'none', 'left', 'right' + blank: false # or: false + upload: true # or: false + verbose: true # or: false + experimental: true # or: false + +# System settings are all optional, but can be used if executables are not on +# the PATH. +system: + gs: /usr/bin/gs + +# Settings for styling HTML sources +html: + css: | + @page { size: 702px 936px; margin: 1in; } + img { display: block; margin: 0 auto; text-align: center; max-width: 70%; } + h1,h2,h3 { font-family: 'Montserrat'; } + p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; } + + font_urls: + - https://fonts.googleapis.com/css2?family=Montserrat&display=swap diff --git a/docs/man.md b/docs/man.md index db7d600..a6115a9 100644 --- a/docs/man.md +++ b/docs/man.md @@ -30,6 +30,11 @@ Basic options: Add a blank page after every page of the PDF document. This can be useful for taking notes on papers. +-C, --config=FILENAME + Read options from a configuration file. A YAML file is supported, see + [CONFIGURATION FILE](#configuration) for further details. By default the + file at ``~/.p2r.yml`` is used if it exists. + -e, --experimental Enable the experimental features of paper2remarkable. See below under [EXPERIMENTAL FEATURES](#experimental-features) for an overview. @@ -147,6 +152,29 @@ Finally, paper2remarkable supports extracting articles from websites. In this case an effort is done to detect the main content of the article and clean up the HTML before sending the file to the reMarkable. +## CONFIGURATION FILE + +To avoid having to provide frequently-used command line flags, a configuration +file can be created for paper2remarkable. By default it is a YAML file located +at ``~/.p2r.yml``, but an alternative location can be provided with the +``--config`` option to the script. + +The configuration file consists of three sections: ``core``, ``system``, and +``html``. In the ``core`` section options for cropping, verbosity, and blank +pages can be added, among others. The ``system`` section allows setting paths +to executables such as ``rmapi``, ``pdftk``, etc. Finally, the ``html`` +section allows you to provide custom CSS and font urls for formatting the +output of web articles. + +Options provided on the command line overwrite those in the configuration +file. So, for instance, if the configuration file has the setting ``crop: +'left'`` in the ``core`` section and the command line flag ``-c`` is provided, +the PDF will be centered. + +An example file is provided in the repository on +[GitHub](https://www.github.com/GjjvdBurg/paper2remarkable), which also +contains more information on the available options and their values. + ## EXPERIMENTAL FEATURES Occassionally, experimental (beta) features will be included in diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py index 56ffa31..0453c7a 100644 --- a/paper2remarkable/providers/_base.py +++ b/paper2remarkable/providers/_base.py @@ -39,18 +39,16 @@ class Provider(metaclass=abc.ABCMeta): upload=True, debug=False, experimental=False, - center=False, - right=False, + crop="left", blank=False, - no_crop=False, remarkable_dir="/", rmapi_path="rmapi", pdftoppm_path="pdftoppm", pdftk_path="pdftk", qpdf_path="qpdf", gs_path="gs", - css_path=None, - font_urls_path=None, + css=None, + font_urls=None, cookiejar=None, ): self.upload = upload @@ -62,8 +60,8 @@ class Provider(metaclass=abc.ABCMeta): self.pdftk_path = pdftk_path self.qpdf_path = qpdf_path self.gs_path = gs_path - self.css_path = css_path - self.font_urls_path = font_urls_path + self.css = css + self.font_urls = font_urls self.cookiejar = cookiejar self.informer = Informer() @@ -79,11 +77,11 @@ class Provider(metaclass=abc.ABCMeta): # Define the operations to run on the pdf. Providers can add others. self.operations = [("rewrite", self.rewrite_pdf)] - if center: + if crop == "center": self.operations.append(("center", self.center_pdf)) - elif right: + elif crop == "right": self.operations.append(("right", self.right_pdf)) - elif not no_crop: + elif crop == "left": self.operations.append(("crop", self.crop_pdf)) if blank: diff --git a/paper2remarkable/providers/html.py b/paper2remarkable/providers/html.py index 48ede10..c22cac4 100644 --- a/paper2remarkable/providers/html.py +++ b/paper2remarkable/providers/html.py @@ -13,7 +13,6 @@ Copyright: 2020, G.J.J. van den Burg import html2text import markdown -import os import re import readability import titlecase @@ -146,7 +145,8 @@ class HTML(Provider): # This attempts to fix sites where the image src element points to a # placeholder and the data-src attribute contains the url to the actual - # image. + # image. Note that results may differ between readability and + # Readability.JS regex = '.*) data-src="(?P.*?)" (?P.*?)>' sub = ' \g>' @@ -174,30 +174,6 @@ class HTML(Provider): html_article = md.convert(article) return html_article - def get_css(self): - if self.css_path is None: - return CSS - if not os.path.exists(self.css_path): - logger.warning( - f"CSS file {self.css_path} doesn't exist, using default style." - ) - return CSS - with open(self.css_path, "r") as fp: - css = fp.read() - return css - - def get_font_urls(self): - if self.font_urls_path is None: - return FONT_URLS - if not os.path.exists(self.font_urls_path): - logger.warning( - f"Font urls file {self.font_urls_path} doesn't exist, using default." - ) - return FONT_URLS - with open(self.font_urls_path, "r") as fp: - font_urls = [l.strip() for l in fp.read().split("\n")] - return font_urls - def retrieve_pdf(self, pdf_url, filename): """Turn the HTML article in a clean pdf file @@ -224,8 +200,8 @@ class HTML(Provider): fp.write(html_article) html = weasyprint.HTML(string=html_article, url_fetcher=url_fetcher) - css = self.get_css() - font_urls = self.get_font_urls() + css = CSS if self.css is None else self.css + font_urls = FONT_URLS if self.font_urls is None else self.font_urls style = weasyprint.CSS(string=css) html.write_pdf(filename, stylesheets=[style] + font_urls) diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py index 095b69a..1b95dca 100644 --- a/paper2remarkable/ui.py +++ b/paper2remarkable/ui.py @@ -9,7 +9,10 @@ Copyright: 2019, G.J.J. van den Burg """ import argparse +import copy +import os import sys +import yaml from . import __version__, GITHUB_URL @@ -49,7 +52,7 @@ def parse_args(): parser.add_argument( "-n", "--no-upload", - help="don't upload to the reMarkable, save the output in current working dir", + help="don't upload to reMarkable, save the output in current directory", action="store_true", ) parser.add_argument( @@ -85,27 +88,27 @@ def parse_args(): action="append", ) parser.add_argument( - "--gs", help="path to gs executable (default: gs)", default="gs" + "--gs", help="path to gs executable (default: gs)", default=None ) parser.add_argument( "--pdftoppm", help="path to pdftoppm executable (default: pdftoppm)", - default="pdftoppm", + default=None, ) parser.add_argument( "--pdftk", help="path to pdftk executable (default: pdftk)", - default="pdftk", + default=None, ) parser.add_argument( "--qpdf", help="path to qpdf executable (default: qpdf)", - default="qpdf", + default=None, ) parser.add_argument( "--rmapi", help="path to rmapi executable (default: rmapi)", - default="rmapi", + default=None, ) parser.add_argument( "--css", help="path to custom CSS file for HTML output", default=None @@ -115,6 +118,12 @@ def parse_args(): help="path to custom font urls file for HTML output", default=None, ) + parser.add_argument( + "-C", + "--config", + help="path to config file (default: ~/.p2r.yml)", + default=None, + ) parser.add_argument( "input", help="One or more URLs to a paper or paths to local PDF files", @@ -186,6 +195,69 @@ def choose_provider(cli_input): return provider, new_input, cookiejar +def load_config(path=None): + if path is None: + path = os.path.join(os.path.expanduser("~"), ".p2r.yml") + if not os.path.exists(path): + return {"core": {}, "system": {}, "html": {}} + with open(path, "r") as fp: + config = yaml.safe_load(fp) + return config + + +def merge_options(config, args): + # command line arguments always overwrite config + opts = copy.deepcopy(config) + + def set_bool(d, key, value): + if value: + d[key] = True + elif not key in d: + d[key] = False + + def set_path(d, key, value): + if not value is None: + d[key] = value + elif not key in d: + d[key] = key + + set_bool(opts["core"], "blank", args.blank) + set_bool(opts["core"], "verbose", args.verbose) + set_bool(opts["core"], "upload", not args.no_upload) + set_bool(opts["core"], "experimental", args.experimental) + + if args.center: + opts["core"]["crop"] = "center" + elif args.right: + opts["core"]["crop"] = "right" + elif args.no_crop: + opts["core"]["crop"] = "none" + elif not "crop" in opts["core"]: + opts["core"]["crop"] = "left" + + set_path(opts["system"], "gs", args.gs) + set_path(opts["system"], "pdftoppm", args.pdftoppm) + set_path(opts["system"], "pdftk", args.pdftk) + set_path(opts["system"], "qpdf", args.qpdf) + set_path(opts["system"], "rmapi", args.rmapi) + + if args.css and os.path.exists(args.css): + with open(args.css, "r") as fp: + contents = fp.read() + opts["html"]["css"] = contents + else: + opts["html"]["css"] = None + + if args.font_urls and os.path.exists(args.font_urls): + with open(args.font_urls, "r") as fp: + urls = [l.strip() for l in fp.readlines()] + opts["html"]["font_urls"] = urls + else: + opts["html"]["font_urls"] = None + + return opts + + def set_excepthook(debug): sys_hook = sys.excepthook @@ -216,6 +288,9 @@ def main(): "When providing --filename and multiple inputs, their number must match." ) + config = load_config(path=args.config) + options = merge_options(config, args) + filenames = ( [None] * len(args.input) if not args.filename else args.filename ) @@ -223,22 +298,20 @@ def main(): for cli_input, filename in zip(args.input, filenames): provider, new_input, cookiejar = choose_provider(cli_input) prov = provider( - verbose=args.verbose, - upload=not args.no_upload, + verbose=options["core"]["verbose"], + upload=options["core"]["upload"], debug=args.debug, - experimental=args.experimental, - center=args.center, - right=args.right, - blank=args.blank, - no_crop=args.no_crop, + experimental=options["core"]["experimental"], + crop=options["core"]["crop"], + blank=options["core"]["blank"], remarkable_dir=args.remarkable_dir, - rmapi_path=args.rmapi, - pdftoppm_path=args.pdftoppm, - pdftk_path=args.pdftk, - qpdf_path=args.qpdf, - gs_path=args.gs, - css_path=args.css, - font_urls_path=args.font_urls, + rmapi_path=options["system"]["rmapi"], + pdftoppm_path=options["system"]["pdftoppm"], + pdftk_path=options["system"]["pdftk"], + qpdf_path=options["system"]["qpdf"], + gs_path=options["system"]["gs"], + css=options["html"]["css"], + font_urls=options["html"]["font_urls"], cookiejar=cookiejar, ) prov.run(new_input, filename=filename) diff --git a/setup.py b/setup.py index e79448c..e529cc2 100644 --- a/setup.py +++ b/setup.py @@ -19,17 +19,18 @@ VERSION = None # What packages are required for this module to be executed? REQUIRED = [ + "PyPDF2>=1.26", "beautifulsoup4>=4.8", - "requests>=2.21", + "html2text>=2020.1.16", + "markdown>=3.1.1", "pdfplumber>=0.5", - "unidecode>=1.1", - "titlecase>=0.12", - "PyPDF2>=1.26", - "regex>=2018.11", + "pyyaml>=5.1", "readability-lxml>=0.7.1", - "html2text>=2020.1.16", + "regex>=2018.11", + "requests>=2.21", + "titlecase>=0.12", + "unidecode>=1.1", "weasyprint>=51", - "markdown>=3.1.1", ] full_require = ["readabilipy"] diff --git a/tests/test_html.py b/tests/test_html.py index 7d5c92b..41f6b83 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -9,7 +9,6 @@ This file is part of paper2remarkable. import os import pdfplumber -import tempfile import unittest from paper2remarkable.providers.html import HTML @@ -39,24 +38,12 @@ class TestHTML(unittest.TestCase): "https://fonts.googleapis.com/css2?family=Montserrat&display=swap" ] - tmpfd, tempfname_css = tempfile.mkstemp(prefix="p2r_", suffix=".css") - with os.fdopen(tmpfd, "w") as fp: - fp.write(test_css) - - tmpfd, tempfname_urls = tempfile.mkstemp(prefix="p2r_", suffix=".txt") - with os.fdopen(tmpfd, "w") as fp: - fp.write("\n".join(test_font_urls)) - url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines" - prov = HTML( - upload=False, css_path=tempfname_css, font_urls_path=tempfname_urls - ) + prov = HTML(upload=False, css=test_css, font_urls=test_font_urls) filename = prov.run(url) with pdfplumber.open(filename) as pdf: self.assertEqual(8, len(pdf.pages)) - os.unlink(tempfname_css) - os.unlink(tempfname_urls) os.unlink(filename) -- cgit v1.2.3 From 124abeccedb3c89408c26acc4a40b2706e66a654 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 28 Dec 2020 00:59:54 +0000 Subject: Bump version and update changelog and readme --- CHANGELOG.md | 5 +++++ README.md | 16 +++++++++++++--- paper2remarkable/__version__.py | 2 +- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a637d58..abd85a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Version 0.8.4 + +* Add support for using a configuration file to avoid having to use command + line flags. + ## Version 0.8.3 * Add support for providing custom styling for HTML output (closes diff --git a/README.md b/README.md index 2a2a8b3..ffae1b9 100644 --- a/README.md +++ b/README.md @@ -156,9 +156,10 @@ page](docs/man.md) (``man p2r``) or open an issue [on GitHub][github-url]. usage: p2r [-h] [-b] [-c] [-d] [-e] [-n] [-p REMARKABLE_DIR] [-r] [-k] [-v] [-V] [-f FILENAME] [--gs GS] [--pdftoppm PDFTOPPM] [--pdftk PDFTK] [--qpdf QPDF] [--rmapi RMAPI] [--css CSS] [--font-urls FONT_URLS] + [-C CONFIG] input [input ...] -Paper2reMarkable version 0.8.3 +Paper2reMarkable version 0.8.4 positional arguments: input One or more URLs to a paper or paths to local PDF @@ -170,8 +171,8 @@ optional arguments: -c, --center Center the PDF on the page, instead of left align -d, --debug debug mode, doesn't upload to reMarkable -e, --experimental enable experimental features - -n, --no-upload don't upload to the reMarkable, save the output in - current working dir + -n, --no-upload don't upload to reMarkable, save the output in current + directory -p REMARKABLE_DIR, --remarkable-path REMARKABLE_DIR directory on reMarkable to put the file (created if missing, default: /) @@ -189,6 +190,8 @@ optional arguments: --css CSS path to custom CSS file for HTML output --font-urls FONT_URLS path to custom font urls file for HTML output + -C CONFIG, --config CONFIG + path to config file (default: ~/.p2r.yml) ``` By default ``paper2remarkable`` makes a PDF fit better on the reMarkable by @@ -204,6 +207,13 @@ For HTML sources (i.e. web articles) you can specify custom styling using the provider](https://github.com/GjjvdBurg/paper2remarkable/blob/a6e50d07748c842f1f0a09e4b173c87850c6ddee/paper2remarkable/providers/html.py#L36) can serve as a starting point. +A configuration file can be used to provide commonly-used command line +options. By default the configuration file at ``~/.p2r.yml`` is used if it +exists, but an alternative location can be provided with the ``-C/--config`` +flag. Command line flags override the settings in the configuration file. See +the [config.example.yml](./config.example.yml) file for an example +configuration file and an overview of supported options. + ## Alfred Workflow On MacOS, you can optionally install [this Alfred workflow][workflow]. Alfred diff --git a/paper2remarkable/__version__.py b/paper2remarkable/__version__.py index b248410..eb45dce 100644 --- a/paper2remarkable/__version__.py +++ b/paper2remarkable/__version__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -VERSION = (0, 8, 3) +VERSION = (0, 8, 4) __version__ = ".".join(map(str, VERSION)) -- cgit v1.2.3 From 63cb0e845e3fc4d62f04217028fb2bb635e3f8a2 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 28 Dec 2020 01:00:11 +0000 Subject: expand comments in config example --- config.example.yml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/config.example.yml b/config.example.yml index b50b88e..52d3ac4 100644 --- a/config.example.yml +++ b/config.example.yml @@ -1,17 +1,20 @@ --- core: - crop: 'left' # or: 'none', 'left', 'right' - blank: false # or: false - upload: true # or: false - verbose: true # or: false - experimental: true # or: false + crop: 'left' # options: 'none', 'left', 'center', 'right' + blank: false # options: true, false + upload: true # options: true, false + verbose: true # options: true, false + experimental: true # options: true, false # System settings are all optional, but can be used if executables are not on -# the PATH. +# the PATH. Options in this section include: gs, pdftk, pdftoppm, qpdf, and +# rmapi. system: gs: /usr/bin/gs -# Settings for styling HTML sources +# Settings for styling HTML sources. This section has support for a css field +# and a font_urls field. The former is expected to be a multiline string and +# the latter a list of urls. html: css: | @page { size: 702px 936px; margin: 1in; } -- cgit v1.2.3 From aba4dd42769f43b08abed6c469db294f7ddbcb0b Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 28 Dec 2020 01:06:53 +0000 Subject: bugfix for missing sections in config --- paper2remarkable/ui.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py index 1b95dca..334714f 100644 --- a/paper2remarkable/ui.py +++ b/paper2remarkable/ui.py @@ -208,6 +208,9 @@ def load_config(path=None): def merge_options(config, args): # command line arguments always overwrite config opts = copy.deepcopy(config) + opts.setdefault("core", {}) + opts.setdefault("system", {}) + opts.setdefault("html", {}) def set_bool(d, key, value): if value: -- cgit v1.2.3 From c1ae7127f1fc8e1053cee900d56adf68e2809f94 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 28 Dec 2020 01:08:58 +0000 Subject: Bump version and update changelog --- CHANGELOG.md | 4 ++++ paper2remarkable/__version__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index abd85a6..1379008 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Version 0.8.5 + +* Handle the case where the configuration file doesn't contain all sections. + ## Version 0.8.4 * Add support for using a configuration file to avoid having to use command diff --git a/paper2remarkable/__version__.py b/paper2remarkable/__version__.py index eb45dce..f80fc76 100644 --- a/paper2remarkable/__version__.py +++ b/paper2remarkable/__version__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -VERSION = (0, 8, 4) +VERSION = (0, 8, 5) __version__ = ".".join(map(str, VERSION)) -- cgit v1.2.3 From 0a2e82fce24855eb854cd01a6a53f992319ab5b1 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 28 Dec 2020 01:13:22 +0000 Subject: Rename default configuration file to .paper2remarkable.yml --- README.md | 12 ++++++------ docs/man.md | 6 +++--- paper2remarkable/ui.py | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index ffae1b9..df3300e 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ optional arguments: --font-urls FONT_URLS path to custom font urls file for HTML output -C CONFIG, --config CONFIG - path to config file (default: ~/.p2r.yml) + path to config file (default: ~/.paper2remarkable.yml) ``` By default ``paper2remarkable`` makes a PDF fit better on the reMarkable by @@ -208,11 +208,11 @@ provider](https://github.com/GjjvdBurg/paper2remarkable/blob/a6e50d07748c842f1f0 can serve as a starting point. A configuration file can be used to provide commonly-used command line -options. By default the configuration file at ``~/.p2r.yml`` is used if it -exists, but an alternative location can be provided with the ``-C/--config`` -flag. Command line flags override the settings in the configuration file. See -the [config.example.yml](./config.example.yml) file for an example -configuration file and an overview of supported options. +options. By default the configuration file at ``~/.paper2remarkable.yml`` is +used if it exists, but an alternative location can be provided with the +``-C/--config`` flag. Command line flags override the settings in the +configuration file. See the [config.example.yml](./config.example.yml) file +for an example configuration file and an overview of supported options. ## Alfred Workflow diff --git a/docs/man.md b/docs/man.md index a6115a9..9c331f1 100644 --- a/docs/man.md +++ b/docs/man.md @@ -33,7 +33,7 @@ Basic options: -C, --config=FILENAME Read options from a configuration file. A YAML file is supported, see [CONFIGURATION FILE](#configuration) for further details. By default the - file at ``~/.p2r.yml`` is used if it exists. + file at ``~/.paper2remarkable.yml`` is used if it exists. -e, --experimental Enable the experimental features of paper2remarkable. See below under @@ -156,8 +156,8 @@ the HTML before sending the file to the reMarkable. To avoid having to provide frequently-used command line flags, a configuration file can be created for paper2remarkable. By default it is a YAML file located -at ``~/.p2r.yml``, but an alternative location can be provided with the -``--config`` option to the script. +at ``~/.paper2remarkable.yml``, but an alternative location can be provided +with the ``--config`` option to the script. The configuration file consists of three sections: ``core``, ``system``, and ``html``. In the ``core`` section options for cropping, verbosity, and blank diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py index 334714f..6f95e30 100644 --- a/paper2remarkable/ui.py +++ b/paper2remarkable/ui.py @@ -121,7 +121,7 @@ def parse_args(): parser.add_argument( "-C", "--config", - help="path to config file (default: ~/.p2r.yml)", + help="path to config file (default: ~/.paper2remarkable.yml)", default=None, ) parser.add_argument( @@ -197,7 +197,7 @@ def choose_provider(cli_input): def load_config(path=None): if path is None: - path = os.path.join(os.path.expanduser("~"), ".p2r.yml") + path = os.path.join(os.path.expanduser("~"), ".paper2remarkable.yml") if not os.path.exists(path): return {"core": {}, "system": {}, "html": {}} with open(path, "r") as fp: -- cgit v1.2.3 From c03ee58c647a5aba7b8d9a2026b68d5d446b940a Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 28 Dec 2020 01:16:28 +0000 Subject: Bump version and update changelog and readme --- CHANGELOG.md | 4 ++++ README.md | 2 +- paper2remarkable/__version__.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1379008..d7030fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Version 0.8.6 + +* Rename default configuration file + ## Version 0.8.5 * Handle the case where the configuration file doesn't contain all sections. diff --git a/README.md b/README.md index df3300e..05b73ad 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,7 @@ usage: p2r [-h] [-b] [-c] [-d] [-e] [-n] [-p REMARKABLE_DIR] [-r] [-k] [-v] [-C CONFIG] input [input ...] -Paper2reMarkable version 0.8.4 +Paper2reMarkable version 0.8.6 positional arguments: input One or more URLs to a paper or paths to local PDF diff --git a/paper2remarkable/__version__.py b/paper2remarkable/__version__.py index f80fc76..5f406ca 100644 --- a/paper2remarkable/__version__.py +++ b/paper2remarkable/__version__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -VERSION = (0, 8, 5) +VERSION = (0, 8, 6) __version__ = ".".join(map(str, VERSION)) -- cgit v1.2.3