aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-10-23 12:24:50 +0200
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-10-23 12:38:05 +0200
commit1e83f9f6537fa108d7a157daaaeb3dc06e80fdce (patch)
treede84520ddc9281644f40488ae661c5da7e2461ac
parentBump version and update changelog (diff)
downloadpaper2remarkable-1e83f9f6537fa108d7a157daaaeb3dc06e80fdce.tar.gz
paper2remarkable-1e83f9f6537fa108d7a157daaaeb3dc06e80fdce.zip
Code formatting
-rw-r--r--paper2remarkable/crop.py19
-rw-r--r--paper2remarkable/log.py9
-rw-r--r--paper2remarkable/pdf_ops.py6
-rw-r--r--paper2remarkable/providers/_base.py24
-rw-r--r--paper2remarkable/providers/_info.py12
-rw-r--r--paper2remarkable/providers/arxiv.py12
-rw-r--r--paper2remarkable/providers/pdf_url.py4
-rw-r--r--paper2remarkable/utils.py17
-rw-r--r--tests/test_providers.py5
9 files changed, 63 insertions, 45 deletions
diff --git a/paper2remarkable/crop.py b/paper2remarkable/crop.py
index dc4b31c..573225b 100644
--- a/paper2remarkable/crop.py
+++ b/paper2remarkable/crop.py
@@ -27,8 +27,8 @@ logger = Logger()
def find_offset_byte_line(line):
"""Find index of first nonzero bit in a line of bytes
- The given line is a string of bytes, each representing 8 pixels. This code
- finds the index of the first bit that is not zero. Used when find the
+ The given line is a string of bytes, each representing 8 pixels. This code
+ finds the index of the first bit that is not zero. Used when find the
cropbox with pdftoppm.
"""
off = 0
@@ -46,8 +46,7 @@ def find_offset_byte_line(line):
def check_pdftoppm(pth):
- """Check that we can run the provided pdftoppm executable
- """
+ """Check that we can run the provided pdftoppm executable"""
try:
subprocess.check_output([pth, "-v"], stderr=subprocess.DEVNULL)
except (subprocess.CalledProcessError, FileNotFoundError, PermissionError):
@@ -58,7 +57,10 @@ def check_pdftoppm(pth):
class Cropper(object):
def __init__(
- self, input_file=None, output_file=None, pdftoppm_path="pdftoppm",
+ self,
+ input_file=None,
+ output_file=None,
+ pdftoppm_path="pdftoppm",
):
if not input_file is None:
self.input_file = os.path.abspath(input_file)
@@ -220,7 +222,7 @@ class Cropper(object):
if margins is integer, used for all margins, else
margins = [left, top, right, bottom]
- We get the bounding box by finding the smallest rectangle that is
+ We get the bounding box by finding the smallest rectangle that is
completely surrounded by white pixels.
"""
if isinstance(margins, int):
@@ -254,7 +256,7 @@ class Cropper(object):
return [a0, b0, a1, b1]
def get_center_bbox(self, filename, padding=15):
- """Compute a bounding box that will center the page file on the
+ """Compute a bounding box that will center the page file on the
reMarkable
"""
bbox = self.get_bbox(filename, margins=0)
@@ -279,8 +281,7 @@ class Cropper(object):
return self.get_bbox(filename, margins=margins)
def get_right_bbox(self, filename, padding=15):
- """Get the bounding box that ensures the menu doesn't hide the text
- """
+ """Get the bounding box that ensures the menu doesn't hide the text"""
bbox = self.get_bbox(filename, margins=0)
diff --git a/paper2remarkable/log.py b/paper2remarkable/log.py
index 3a2fcc5..fb9d8a3 100644
--- a/paper2remarkable/log.py
+++ b/paper2remarkable/log.py
@@ -38,7 +38,7 @@ class Logger(metaclass=Singleton):
def disable(self):
self.enabled = False
- def _log(self, msg, mode, end='\n', add_prefix=True):
+ def _log(self, msg, mode, end="\n", add_prefix=True):
if not self.enabled:
return
if not mode in ("info", "warn"):
@@ -53,12 +53,11 @@ class Logger(metaclass=Singleton):
print("%s%s" % (prefix, msg), end=end, file=file)
file.flush()
- def info(self, msg, end='\n'):
+ def info(self, msg, end="\n"):
self._log(msg, "info", end=end)
- def warning(self, msg, end='\n'):
+ def warning(self, msg, end="\n"):
self._log(msg, "warn", end=end)
- def append(self, msg, mode, end='\n'):
+ def append(self, msg, mode, end="\n"):
self._log(msg, mode, end=end, add_prefix=False)
-
diff --git a/paper2remarkable/pdf_ops.py b/paper2remarkable/pdf_ops.py
index 41cb85f..c365920 100644
--- a/paper2remarkable/pdf_ops.py
+++ b/paper2remarkable/pdf_ops.py
@@ -40,8 +40,7 @@ def prepare_pdf(filepath, operation, pdftoppm_path="pdftoppm"):
def blank_pdf(filepath):
- """Add blank pages to PDF
- """
+ """Add blank pages to PDF"""
logger.info("Adding blank pages")
input_pdf = PyPDF2.PdfFileReader(filepath)
output_pdf = PyPDF2.PdfFileWriter()
@@ -56,8 +55,7 @@ def blank_pdf(filepath):
def shrink_pdf(filepath, gs_path="gs"):
- """Shrink the PDF file size using Ghostscript
- """
+ """Shrink the PDF file size using Ghostscript"""
logger.info("Shrinking pdf file ...")
size_before = os.path.getsize(filepath)
output_file = os.path.splitext(filepath)[0] + "-shrink.pdf"
diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index 107f006..8f82f1d 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -122,7 +122,12 @@ class Provider(metaclass=abc.ABCMeta):
)
elif self.pdftool == "qpdf":
status = subprocess.call(
- [self.qpdf_path, "--stream-data=compress", in_pdf, out_pdf,],
+ [
+ self.qpdf_path,
+ "--stream-data=compress",
+ in_pdf,
+ out_pdf,
+ ],
stderr=subprocess.DEVNULL,
)
if not status == 0:
@@ -131,7 +136,7 @@ class Provider(metaclass=abc.ABCMeta):
)
def rewrite_pdf(self, in_pdf, out_pdf=None):
- """ Re-write the pdf using Ghostscript
+ """Re-write the pdf using Ghostscript
This helps avoid issues in dearxiv due to nested pdfs.
"""
@@ -159,11 +164,22 @@ class Provider(metaclass=abc.ABCMeta):
if self.pdftool == "pdftk":
status = subprocess.call(
- [self.pdftk_path, in_pdf, "output", out_pdf, "uncompress",]
+ [
+ self.pdftk_path,
+ in_pdf,
+ "output",
+ out_pdf,
+ "uncompress",
+ ]
)
elif self.pdftool == "qpdf":
status = subprocess.call(
- [self.qpdf_path, "--stream-data=uncompress", in_pdf, out_pdf,]
+ [
+ self.qpdf_path,
+ "--stream-data=uncompress",
+ in_pdf,
+ out_pdf,
+ ]
)
if not status == 0:
raise _CalledProcessError(
diff --git a/paper2remarkable/providers/_info.py b/paper2remarkable/providers/_info.py
index 10b6959..8cffc60 100644
--- a/paper2remarkable/providers/_info.py
+++ b/paper2remarkable/providers/_info.py
@@ -16,12 +16,12 @@ logger = Logger()
class Informer:
"""Base class for the informers.
- The "informer" class is used to retrieve the title, authors, and year of
+ The "informer" class is used to retrieve the title, authors, and year of
publication of the provided paper.
- This base class provides the main functionality, but because various
- outlets use different conventions to embed author, title, and publication
- year information, we expect that individual providers will subclass this
+ This base class provides the main functionality, but because various
+ outlets use different conventions to embed author, title, and publication
+ year information, we expect that individual providers will subclass this
class and overwrite some of the methods.
"""
@@ -35,9 +35,9 @@ class Informer:
self.year = year
def get_filename(self, abs_url):
- """ Generate nice filename using the paper information
+ """Generate nice filename using the paper information
- The provided url must be to a HTMl page where this information can be
+ The provided url must be to a HTMl page where this information can be
found, not to the PDF file itself.
"""
logger.info("Generating output filename")
diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py
index 317452e..6ec1796 100644
--- a/paper2remarkable/providers/arxiv.py
+++ b/paper2remarkable/providers/arxiv.py
@@ -45,8 +45,8 @@ class Arxiv(Provider):
def get_abs_pdf_urls(self, url):
"""Get the pdf and abs url from any given arXiv url """
- if '?' in url:
- url = url[:url.index('?')]
+ if "?" in url:
+ url = url[: url.index("?")]
if re.match(self.re_abs_1, url) or re.match(self.re_abs_2, url):
abs_url = url
pdf_url = url.replace("abs", "pdf") + ".pdf"
@@ -107,7 +107,9 @@ class Arxiv(Provider):
block = b"".join(current_obj)
# remove the text
block, n_subs1 = re.subn(
- b"\(" + DEARXIV_TEXT_REGEX + b"\)Tj", b"()Tj", block,
+ b"\(" + DEARXIV_TEXT_REGEX + b"\)Tj",
+ b"()Tj",
+ block,
)
# remove the url (type 1)
block, n_subs2 = re.subn(
@@ -119,8 +121,8 @@ class Arxiv(Provider):
)
# remove the url (type 2, i.e. Jackson arXiv 0309285v2)
block, n_subs3 = re.subn(
- b"<<\n\/S \/URI\n" +
- b"/URI \("
+ b"<<\n\/S \/URI\n"
+ + b"/URI \("
+ DEARXIV_URI_REGEX
+ b"\)\n>>\n",
b"",
diff --git a/paper2remarkable/providers/pdf_url.py b/paper2remarkable/providers/pdf_url.py
index b86c7c3..d20d4a5 100644
--- a/paper2remarkable/providers/pdf_url.py
+++ b/paper2remarkable/providers/pdf_url.py
@@ -28,7 +28,9 @@ class PdfUrlInformer(Informer):
path_parts = parsed.path.split("/")
if not path_parts:
raise FilenameMissingError(
- provider="PdfUrl", url=abs_url, reason="No URL parts",
+ provider="PdfUrl",
+ url=abs_url,
+ reason="No URL parts",
)
filename = path_parts[-1]
diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py
index 07b1524..0b4be07 100644
--- a/paper2remarkable/utils.py
+++ b/paper2remarkable/utils.py
@@ -30,15 +30,15 @@ logger = Logger()
def clean_string(s):
- """ Clean a string by replacing accented characters with equivalents and
- keeping only the allowed characters (ascii letters, digits, underscore,
+ """Clean a string by replacing accented characters with equivalents and
+ keeping only the allowed characters (ascii letters, digits, underscore,
space, dash, and period)"""
normalized = unidecode.unidecode(s)
allowed = string.ascii_letters + string.digits + "_ .-"
cleaned = "".join(c if c in allowed else "_" for c in normalized)
while "__" in cleaned:
cleaned = cleaned.replace("__", "_")
- cleaned = cleaned.strip('_')
+ cleaned = cleaned.strip("_")
return cleaned
@@ -142,7 +142,8 @@ def upload_to_remarkable(filepath, remarkable_dir="/", rmapi_path="rmapi"):
while parts:
rmdir += "/" + parts.pop(0)
status = subprocess.call(
- [rmapi_path, "mkdir", rmdir], stdout=subprocess.DEVNULL,
+ [rmapi_path, "mkdir", rmdir],
+ stdout=subprocess.DEVNULL,
)
if not status == 0:
raise RemarkableError(
@@ -186,10 +187,10 @@ def check_pdftool(pdftk_path, qpdf_path):
return "pdftk"
try:
status = subprocess.call(
- [qpdf_path, "--help"],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.DEVNULL,
- )
+ [qpdf_path, "--help"],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL,
+ )
except FileNotFoundError:
status = 1
if status == 0:
diff --git a/tests/test_providers.py b/tests/test_providers.py
index b6cce59..546794c 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -285,8 +285,8 @@ class TestProviders(unittest.TestCase):
def test_html_3(self):
prov = HTML(upload=False, verbose=VERBOSE)
url = "https://conclave-team.github.io/conclave-site/"
- #exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf"
- # NOTE: Title differs between Readability.JS and readability-lxml, we
+ # exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf"
+ # NOTE: Title differs between Readability.JS and readability-lxml, we
# assume that testing is done with Readability.JS
exp = "Conclave.pdf"
filename = prov.run(url)
@@ -337,6 +337,5 @@ class TestProviders(unittest.TestCase):
self.assertEqual(exp, os.path.basename(filename))
-
if __name__ == "__main__":
unittest.main()