diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-10-23 17:34:52 +0200 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-10-23 17:34:52 +0200 |
| commit | 2860a9e5dcfd5726d59b3fce622b170e1f318209 (patch) | |
| tree | 178b544c08f50ce4114e942990d778b8b0d645e7 | |
| parent | Add pre-commit config for code formatting (diff) | |
| parent | Code formatting (diff) | |
| download | paper2remarkable-2860a9e5dcfd5726d59b3fce622b170e1f318209.tar.gz paper2remarkable-2860a9e5dcfd5726d59b3fce622b170e1f318209.zip | |
Merge branch 'master' into feature/pre-commit
| -rw-r--r-- | paper2remarkable/crop.py | 19 | ||||
| -rw-r--r-- | paper2remarkable/pdf_ops.py | 6 | ||||
| -rw-r--r-- | paper2remarkable/providers/_base.py | 24 | ||||
| -rw-r--r-- | paper2remarkable/providers/_info.py | 12 | ||||
| -rw-r--r-- | paper2remarkable/providers/arxiv.py | 4 | ||||
| -rw-r--r-- | paper2remarkable/providers/pdf_url.py | 4 | ||||
| -rw-r--r-- | paper2remarkable/utils.py | 7 |
7 files changed, 48 insertions, 28 deletions
diff --git a/paper2remarkable/crop.py b/paper2remarkable/crop.py index dc4b31c..573225b 100644 --- a/paper2remarkable/crop.py +++ b/paper2remarkable/crop.py @@ -27,8 +27,8 @@ logger = Logger() def find_offset_byte_line(line): """Find index of first nonzero bit in a line of bytes - The given line is a string of bytes, each representing 8 pixels. This code - finds the index of the first bit that is not zero. Used when find the + The given line is a string of bytes, each representing 8 pixels. This code + finds the index of the first bit that is not zero. Used when find the cropbox with pdftoppm. """ off = 0 @@ -46,8 +46,7 @@ def find_offset_byte_line(line): def check_pdftoppm(pth): - """Check that we can run the provided pdftoppm executable - """ + """Check that we can run the provided pdftoppm executable""" try: subprocess.check_output([pth, "-v"], stderr=subprocess.DEVNULL) except (subprocess.CalledProcessError, FileNotFoundError, PermissionError): @@ -58,7 +57,10 @@ def check_pdftoppm(pth): class Cropper(object): def __init__( - self, input_file=None, output_file=None, pdftoppm_path="pdftoppm", + self, + input_file=None, + output_file=None, + pdftoppm_path="pdftoppm", ): if not input_file is None: self.input_file = os.path.abspath(input_file) @@ -220,7 +222,7 @@ class Cropper(object): if margins is integer, used for all margins, else margins = [left, top, right, bottom] - We get the bounding box by finding the smallest rectangle that is + We get the bounding box by finding the smallest rectangle that is completely surrounded by white pixels. """ if isinstance(margins, int): @@ -254,7 +256,7 @@ class Cropper(object): return [a0, b0, a1, b1] def get_center_bbox(self, filename, padding=15): - """Compute a bounding box that will center the page file on the + """Compute a bounding box that will center the page file on the reMarkable """ bbox = self.get_bbox(filename, margins=0) @@ -279,8 +281,7 @@ class Cropper(object): return self.get_bbox(filename, margins=margins) def get_right_bbox(self, filename, padding=15): - """Get the bounding box that ensures the menu doesn't hide the text - """ + """Get the bounding box that ensures the menu doesn't hide the text""" bbox = self.get_bbox(filename, margins=0) diff --git a/paper2remarkable/pdf_ops.py b/paper2remarkable/pdf_ops.py index 41cb85f..c365920 100644 --- a/paper2remarkable/pdf_ops.py +++ b/paper2remarkable/pdf_ops.py @@ -40,8 +40,7 @@ def prepare_pdf(filepath, operation, pdftoppm_path="pdftoppm"): def blank_pdf(filepath): - """Add blank pages to PDF - """ + """Add blank pages to PDF""" logger.info("Adding blank pages") input_pdf = PyPDF2.PdfFileReader(filepath) output_pdf = PyPDF2.PdfFileWriter() @@ -56,8 +55,7 @@ def blank_pdf(filepath): def shrink_pdf(filepath, gs_path="gs"): - """Shrink the PDF file size using Ghostscript - """ + """Shrink the PDF file size using Ghostscript""" logger.info("Shrinking pdf file ...") size_before = os.path.getsize(filepath) output_file = os.path.splitext(filepath)[0] + "-shrink.pdf" diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py index 107f006..8f82f1d 100644 --- a/paper2remarkable/providers/_base.py +++ b/paper2remarkable/providers/_base.py @@ -122,7 +122,12 @@ class Provider(metaclass=abc.ABCMeta): ) elif self.pdftool == "qpdf": status = subprocess.call( - [self.qpdf_path, "--stream-data=compress", in_pdf, out_pdf,], + [ + self.qpdf_path, + "--stream-data=compress", + in_pdf, + out_pdf, + ], stderr=subprocess.DEVNULL, ) if not status == 0: @@ -131,7 +136,7 @@ class Provider(metaclass=abc.ABCMeta): ) def rewrite_pdf(self, in_pdf, out_pdf=None): - """ Re-write the pdf using Ghostscript + """Re-write the pdf using Ghostscript This helps avoid issues in dearxiv due to nested pdfs. """ @@ -159,11 +164,22 @@ class Provider(metaclass=abc.ABCMeta): if self.pdftool == "pdftk": status = subprocess.call( - [self.pdftk_path, in_pdf, "output", out_pdf, "uncompress",] + [ + self.pdftk_path, + in_pdf, + "output", + out_pdf, + "uncompress", + ] ) elif self.pdftool == "qpdf": status = subprocess.call( - [self.qpdf_path, "--stream-data=uncompress", in_pdf, out_pdf,] + [ + self.qpdf_path, + "--stream-data=uncompress", + in_pdf, + out_pdf, + ] ) if not status == 0: raise _CalledProcessError( diff --git a/paper2remarkable/providers/_info.py b/paper2remarkable/providers/_info.py index 10b6959..8cffc60 100644 --- a/paper2remarkable/providers/_info.py +++ b/paper2remarkable/providers/_info.py @@ -16,12 +16,12 @@ logger = Logger() class Informer: """Base class for the informers. - The "informer" class is used to retrieve the title, authors, and year of + The "informer" class is used to retrieve the title, authors, and year of publication of the provided paper. - This base class provides the main functionality, but because various - outlets use different conventions to embed author, title, and publication - year information, we expect that individual providers will subclass this + This base class provides the main functionality, but because various + outlets use different conventions to embed author, title, and publication + year information, we expect that individual providers will subclass this class and overwrite some of the methods. """ @@ -35,9 +35,9 @@ class Informer: self.year = year def get_filename(self, abs_url): - """ Generate nice filename using the paper information + """Generate nice filename using the paper information - The provided url must be to a HTMl page where this information can be + The provided url must be to a HTMl page where this information can be found, not to the PDF file itself. """ logger.info("Generating output filename") diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py index 865a3f8..6ec1796 100644 --- a/paper2remarkable/providers/arxiv.py +++ b/paper2remarkable/providers/arxiv.py @@ -107,7 +107,9 @@ class Arxiv(Provider): block = b"".join(current_obj) # remove the text block, n_subs1 = re.subn( - b"\(" + DEARXIV_TEXT_REGEX + b"\)Tj", b"()Tj", block, + b"\(" + DEARXIV_TEXT_REGEX + b"\)Tj", + b"()Tj", + block, ) # remove the url (type 1) block, n_subs2 = re.subn( diff --git a/paper2remarkable/providers/pdf_url.py b/paper2remarkable/providers/pdf_url.py index b86c7c3..d20d4a5 100644 --- a/paper2remarkable/providers/pdf_url.py +++ b/paper2remarkable/providers/pdf_url.py @@ -28,7 +28,9 @@ class PdfUrlInformer(Informer): path_parts = parsed.path.split("/") if not path_parts: raise FilenameMissingError( - provider="PdfUrl", url=abs_url, reason="No URL parts", + provider="PdfUrl", + url=abs_url, + reason="No URL parts", ) filename = path_parts[-1] diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py index a29be3c..0b4be07 100644 --- a/paper2remarkable/utils.py +++ b/paper2remarkable/utils.py @@ -30,8 +30,8 @@ logger = Logger() def clean_string(s): - """ Clean a string by replacing accented characters with equivalents and - keeping only the allowed characters (ascii letters, digits, underscore, + """Clean a string by replacing accented characters with equivalents and + keeping only the allowed characters (ascii letters, digits, underscore, space, dash, and period)""" normalized = unidecode.unidecode(s) allowed = string.ascii_letters + string.digits + "_ .-" @@ -142,7 +142,8 @@ def upload_to_remarkable(filepath, remarkable_dir="/", rmapi_path="rmapi"): while parts: rmdir += "/" + parts.pop(0) status = subprocess.call( - [rmapi_path, "mkdir", rmdir], stdout=subprocess.DEVNULL, + [rmapi_path, "mkdir", rmdir], + stdout=subprocess.DEVNULL, ) if not status == 0: raise RemarkableError( |
