From 1e83f9f6537fa108d7a157daaaeb3dc06e80fdce Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 23 Oct 2020 12:24:50 +0200 Subject: Code formatting --- paper2remarkable/crop.py | 19 ++++++++++--------- paper2remarkable/log.py | 9 ++++----- paper2remarkable/pdf_ops.py | 6 ++---- paper2remarkable/providers/_base.py | 24 ++++++++++++++++++++---- paper2remarkable/providers/_info.py | 12 ++++++------ paper2remarkable/providers/arxiv.py | 12 +++++++----- paper2remarkable/providers/pdf_url.py | 4 +++- paper2remarkable/utils.py | 17 +++++++++-------- tests/test_providers.py | 5 ++--- 9 files changed, 63 insertions(+), 45 deletions(-) diff --git a/paper2remarkable/crop.py b/paper2remarkable/crop.py index dc4b31c..573225b 100644 --- a/paper2remarkable/crop.py +++ b/paper2remarkable/crop.py @@ -27,8 +27,8 @@ logger = Logger() def find_offset_byte_line(line): """Find index of first nonzero bit in a line of bytes - The given line is a string of bytes, each representing 8 pixels. This code - finds the index of the first bit that is not zero. Used when find the + The given line is a string of bytes, each representing 8 pixels. This code + finds the index of the first bit that is not zero. Used when find the cropbox with pdftoppm. """ off = 0 @@ -46,8 +46,7 @@ def find_offset_byte_line(line): def check_pdftoppm(pth): - """Check that we can run the provided pdftoppm executable - """ + """Check that we can run the provided pdftoppm executable""" try: subprocess.check_output([pth, "-v"], stderr=subprocess.DEVNULL) except (subprocess.CalledProcessError, FileNotFoundError, PermissionError): @@ -58,7 +57,10 @@ def check_pdftoppm(pth): class Cropper(object): def __init__( - self, input_file=None, output_file=None, pdftoppm_path="pdftoppm", + self, + input_file=None, + output_file=None, + pdftoppm_path="pdftoppm", ): if not input_file is None: self.input_file = os.path.abspath(input_file) @@ -220,7 +222,7 @@ class Cropper(object): if margins is integer, used for all margins, else margins = [left, top, right, bottom] - We get the bounding box by finding the smallest rectangle that is + We get the bounding box by finding the smallest rectangle that is completely surrounded by white pixels. """ if isinstance(margins, int): @@ -254,7 +256,7 @@ class Cropper(object): return [a0, b0, a1, b1] def get_center_bbox(self, filename, padding=15): - """Compute a bounding box that will center the page file on the + """Compute a bounding box that will center the page file on the reMarkable """ bbox = self.get_bbox(filename, margins=0) @@ -279,8 +281,7 @@ class Cropper(object): return self.get_bbox(filename, margins=margins) def get_right_bbox(self, filename, padding=15): - """Get the bounding box that ensures the menu doesn't hide the text - """ + """Get the bounding box that ensures the menu doesn't hide the text""" bbox = self.get_bbox(filename, margins=0) diff --git a/paper2remarkable/log.py b/paper2remarkable/log.py index 3a2fcc5..fb9d8a3 100644 --- a/paper2remarkable/log.py +++ b/paper2remarkable/log.py @@ -38,7 +38,7 @@ class Logger(metaclass=Singleton): def disable(self): self.enabled = False - def _log(self, msg, mode, end='\n', add_prefix=True): + def _log(self, msg, mode, end="\n", add_prefix=True): if not self.enabled: return if not mode in ("info", "warn"): @@ -53,12 +53,11 @@ class Logger(metaclass=Singleton): print("%s%s" % (prefix, msg), end=end, file=file) file.flush() - def info(self, msg, end='\n'): + def info(self, msg, end="\n"): self._log(msg, "info", end=end) - def warning(self, msg, end='\n'): + def warning(self, msg, end="\n"): self._log(msg, "warn", end=end) - def append(self, msg, mode, end='\n'): + def append(self, msg, mode, end="\n"): self._log(msg, mode, end=end, add_prefix=False) - diff --git a/paper2remarkable/pdf_ops.py b/paper2remarkable/pdf_ops.py index 41cb85f..c365920 100644 --- a/paper2remarkable/pdf_ops.py +++ b/paper2remarkable/pdf_ops.py @@ -40,8 +40,7 @@ def prepare_pdf(filepath, operation, pdftoppm_path="pdftoppm"): def blank_pdf(filepath): - """Add blank pages to PDF - """ + """Add blank pages to PDF""" logger.info("Adding blank pages") input_pdf = PyPDF2.PdfFileReader(filepath) output_pdf = PyPDF2.PdfFileWriter() @@ -56,8 +55,7 @@ def blank_pdf(filepath): def shrink_pdf(filepath, gs_path="gs"): - """Shrink the PDF file size using Ghostscript - """ + """Shrink the PDF file size using Ghostscript""" logger.info("Shrinking pdf file ...") size_before = os.path.getsize(filepath) output_file = os.path.splitext(filepath)[0] + "-shrink.pdf" diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py index 107f006..8f82f1d 100644 --- a/paper2remarkable/providers/_base.py +++ b/paper2remarkable/providers/_base.py @@ -122,7 +122,12 @@ class Provider(metaclass=abc.ABCMeta): ) elif self.pdftool == "qpdf": status = subprocess.call( - [self.qpdf_path, "--stream-data=compress", in_pdf, out_pdf,], + [ + self.qpdf_path, + "--stream-data=compress", + in_pdf, + out_pdf, + ], stderr=subprocess.DEVNULL, ) if not status == 0: @@ -131,7 +136,7 @@ class Provider(metaclass=abc.ABCMeta): ) def rewrite_pdf(self, in_pdf, out_pdf=None): - """ Re-write the pdf using Ghostscript + """Re-write the pdf using Ghostscript This helps avoid issues in dearxiv due to nested pdfs. """ @@ -159,11 +164,22 @@ class Provider(metaclass=abc.ABCMeta): if self.pdftool == "pdftk": status = subprocess.call( - [self.pdftk_path, in_pdf, "output", out_pdf, "uncompress",] + [ + self.pdftk_path, + in_pdf, + "output", + out_pdf, + "uncompress", + ] ) elif self.pdftool == "qpdf": status = subprocess.call( - [self.qpdf_path, "--stream-data=uncompress", in_pdf, out_pdf,] + [ + self.qpdf_path, + "--stream-data=uncompress", + in_pdf, + out_pdf, + ] ) if not status == 0: raise _CalledProcessError( diff --git a/paper2remarkable/providers/_info.py b/paper2remarkable/providers/_info.py index 10b6959..8cffc60 100644 --- a/paper2remarkable/providers/_info.py +++ b/paper2remarkable/providers/_info.py @@ -16,12 +16,12 @@ logger = Logger() class Informer: """Base class for the informers. - The "informer" class is used to retrieve the title, authors, and year of + The "informer" class is used to retrieve the title, authors, and year of publication of the provided paper. - This base class provides the main functionality, but because various - outlets use different conventions to embed author, title, and publication - year information, we expect that individual providers will subclass this + This base class provides the main functionality, but because various + outlets use different conventions to embed author, title, and publication + year information, we expect that individual providers will subclass this class and overwrite some of the methods. """ @@ -35,9 +35,9 @@ class Informer: self.year = year def get_filename(self, abs_url): - """ Generate nice filename using the paper information + """Generate nice filename using the paper information - The provided url must be to a HTMl page where this information can be + The provided url must be to a HTMl page where this information can be found, not to the PDF file itself. """ logger.info("Generating output filename") diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py index 317452e..6ec1796 100644 --- a/paper2remarkable/providers/arxiv.py +++ b/paper2remarkable/providers/arxiv.py @@ -45,8 +45,8 @@ class Arxiv(Provider): def get_abs_pdf_urls(self, url): """Get the pdf and abs url from any given arXiv url """ - if '?' in url: - url = url[:url.index('?')] + if "?" in url: + url = url[: url.index("?")] if re.match(self.re_abs_1, url) or re.match(self.re_abs_2, url): abs_url = url pdf_url = url.replace("abs", "pdf") + ".pdf" @@ -107,7 +107,9 @@ class Arxiv(Provider): block = b"".join(current_obj) # remove the text block, n_subs1 = re.subn( - b"\(" + DEARXIV_TEXT_REGEX + b"\)Tj", b"()Tj", block, + b"\(" + DEARXIV_TEXT_REGEX + b"\)Tj", + b"()Tj", + block, ) # remove the url (type 1) block, n_subs2 = re.subn( @@ -119,8 +121,8 @@ class Arxiv(Provider): ) # remove the url (type 2, i.e. Jackson arXiv 0309285v2) block, n_subs3 = re.subn( - b"<<\n\/S \/URI\n" + - b"/URI \(" + b"<<\n\/S \/URI\n" + + b"/URI \(" + DEARXIV_URI_REGEX + b"\)\n>>\n", b"", diff --git a/paper2remarkable/providers/pdf_url.py b/paper2remarkable/providers/pdf_url.py index b86c7c3..d20d4a5 100644 --- a/paper2remarkable/providers/pdf_url.py +++ b/paper2remarkable/providers/pdf_url.py @@ -28,7 +28,9 @@ class PdfUrlInformer(Informer): path_parts = parsed.path.split("/") if not path_parts: raise FilenameMissingError( - provider="PdfUrl", url=abs_url, reason="No URL parts", + provider="PdfUrl", + url=abs_url, + reason="No URL parts", ) filename = path_parts[-1] diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py index 07b1524..0b4be07 100644 --- a/paper2remarkable/utils.py +++ b/paper2remarkable/utils.py @@ -30,15 +30,15 @@ logger = Logger() def clean_string(s): - """ Clean a string by replacing accented characters with equivalents and - keeping only the allowed characters (ascii letters, digits, underscore, + """Clean a string by replacing accented characters with equivalents and + keeping only the allowed characters (ascii letters, digits, underscore, space, dash, and period)""" normalized = unidecode.unidecode(s) allowed = string.ascii_letters + string.digits + "_ .-" cleaned = "".join(c if c in allowed else "_" for c in normalized) while "__" in cleaned: cleaned = cleaned.replace("__", "_") - cleaned = cleaned.strip('_') + cleaned = cleaned.strip("_") return cleaned @@ -142,7 +142,8 @@ def upload_to_remarkable(filepath, remarkable_dir="/", rmapi_path="rmapi"): while parts: rmdir += "/" + parts.pop(0) status = subprocess.call( - [rmapi_path, "mkdir", rmdir], stdout=subprocess.DEVNULL, + [rmapi_path, "mkdir", rmdir], + stdout=subprocess.DEVNULL, ) if not status == 0: raise RemarkableError( @@ -186,10 +187,10 @@ def check_pdftool(pdftk_path, qpdf_path): return "pdftk" try: status = subprocess.call( - [qpdf_path, "--help"], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) + [qpdf_path, "--help"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) except FileNotFoundError: status = 1 if status == 0: diff --git a/tests/test_providers.py b/tests/test_providers.py index b6cce59..546794c 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -285,8 +285,8 @@ class TestProviders(unittest.TestCase): def test_html_3(self): prov = HTML(upload=False, verbose=VERBOSE) url = "https://conclave-team.github.io/conclave-site/" - #exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf" - # NOTE: Title differs between Readability.JS and readability-lxml, we + # exp = "Conclave_Case_Study_-_A_Private_and_Secure_Real-Time_Collaborative_Text_Editor.pdf" + # NOTE: Title differs between Readability.JS and readability-lxml, we # assume that testing is done with Readability.JS exp = "Conclave.pdf" filename = prov.run(url) @@ -337,6 +337,5 @@ class TestProviders(unittest.TestCase): self.assertEqual(exp, os.path.basename(filename)) - if __name__ == "__main__": unittest.main() -- cgit v1.2.3