From 575aeb3d18c9bca3541f34d35c7345a6521224e2 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Thu, 23 Jan 2020 20:22:18 +0000 Subject: Reorder the input validation (fixes #21) We used to assume that if a source is not a local file, then it must be a URL. Now, we check if a source is a URL and if not, also check if it's a local file. If neither, then we can raise an error. --- paper2remarkable/ui.py | 20 +++++++++++++------- paper2remarkable/utils.py | 12 ++++++++++-- setup.py | 1 + 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py index 6343077..032bf99 100644 --- a/paper2remarkable/ui.py +++ b/paper2remarkable/ui.py @@ -14,7 +14,7 @@ import sys from . import __version__, GITHUB_URL from .providers import providers, LocalFile -from .utils import follow_redirects +from .utils import follow_redirects, is_url def parse_args(): @@ -97,16 +97,21 @@ def exception(msg): def main(): args = parse_args() + cookiejar = None - if LocalFile.validate(args.input): + if is_url(args.input): + # input is a url + url, cookiejar = follow_redirects(args.input) + provider = next((p for p in providers if p.validate(url)), None) + elif LocalFile.validate(args.input): # input is a local file provider = LocalFile else: - # input is a url - url = args.input - # follow all redirects of the url - url = follow_redirects(url) - provider = next((p for p in providers if p.validate(url)), None) + # not a proper URL or non-existent file + exception( + "Couldn't figure out what source you mean. If it's a " + "local file, make sure it exists." + ) if provider is None: exception("Input not valid, no provider can handle this source.") @@ -122,6 +127,7 @@ def main(): pdfcrop_path=args.pdfcrop, pdftk_path=args.pdftk, gs_path=args.gs, + cookiejar=cookiejar, ) prov.run(args.input, filename=args.filename) diff --git a/paper2remarkable/utils.py b/paper2remarkable/utils.py index 1bf261e..79421df 100644 --- a/paper2remarkable/utils.py +++ b/paper2remarkable/utils.py @@ -9,6 +9,7 @@ Copyright: 2019, G.J.J. van den Burg """ import PyPDF2 +import regex import requests import string import subprocess @@ -110,8 +111,7 @@ def upload_to_remarkable(filepath, remarkable_dir="/", rmapi_path="rmapi"): remarkable_dir = remarkable_dir.rstrip("/") if remarkable_dir: status = subprocess.call( - [rmapi_path, "mkdir", remarkable_dir], - stdout=subprocess.DEVNULL, + [rmapi_path, "mkdir", remarkable_dir], stdout=subprocess.DEVNULL, ) if not status == 0: raise RemarkableError( @@ -128,3 +128,11 @@ def upload_to_remarkable(filepath, remarkable_dir="/", rmapi_path="rmapi"): "Uploading file %s to reMarkable failed" % filepath ) logger.info("Upload successful.") + + +def is_url(string): + # pattern adapted from CleverCSV + pattern = "((https?|ftp):\/\/(?!\-))?(((([\p{L}\p{N}]*\-?[\p{L}\p{N}]+)+\.)+([a-z]{2,}|local)(\.[a-z]{2,3})?)|localhost|(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(\:\d{1,5})?))(\/[\p{L}\p{N}_\/()~?=&%\-\#\.:]*)?(\.[a-z]+)?" + string = string.strip(" ") + match = regex.fullmatch(pattern, string) + return match is not None diff --git a/setup.py b/setup.py index f54170a..bddbd24 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ REQUIRED = [ "unidecode>=1.1", "titlecase>=0.12", "PyPDF2>=1.26", + "regex>=2018.11" ] docs_require = [] -- cgit v1.2.3