diff options
| -rw-r--r-- | paper2remarkable/exceptions.py | 10 | ||||
| -rw-r--r-- | paper2remarkable/providers/pdf_url.py | 31 | ||||
| -rw-r--r-- | tests/test_providers.py | 4 |
3 files changed, 37 insertions, 8 deletions
diff --git a/paper2remarkable/exceptions.py b/paper2remarkable/exceptions.py index 86f39b4..a608bcc 100644 --- a/paper2remarkable/exceptions.py +++ b/paper2remarkable/exceptions.py @@ -48,13 +48,17 @@ class URLResolutionError(Error): class FilenameMissingError(Error): """Exception raised for providers that need a filename to be provided""" - def __init__(self, provider): + def __init__(self, provider, url, reason=None): self.provider = provider + self.url = url + self.reason = reason def __str__(self): - msg = "ERROR: Filename must be given with the {provider} provider (hint: use --filename)".format( - provider=self.provider + msg = "ERROR: Couldn't determine a filename from {url} for provider {provider}".format( + provider=self.provider, url=self.url ) + if self.reason: + msg += "\nReason: {reason}".format(reason=self.reason) msg += GH_MSG return msg diff --git a/paper2remarkable/providers/pdf_url.py b/paper2remarkable/providers/pdf_url.py index 77accc9..b86c7c3 100644 --- a/paper2remarkable/providers/pdf_url.py +++ b/paper2remarkable/providers/pdf_url.py @@ -12,14 +12,39 @@ import urllib from ._base import Provider from ._info import Informer + +from .. import GITHUB_URL from ..exceptions import FilenameMissingError +from ..log import Logger from ..utils import get_content_type_with_retry +logger = Logger() + class PdfUrlInformer(Informer): def get_filename(self, abs_url): - # if this is called, filename must not have been provided - raise FilenameMissingError(provider="PDFUrl") + # try to get a nice filename by parsing the url + parsed = urllib.parse.urlparse(abs_url) + path_parts = parsed.path.split("/") + if not path_parts: + raise FilenameMissingError( + provider="PdfUrl", url=abs_url, reason="No URL parts", + ) + + filename = path_parts[-1] + if not filename.endswith(".pdf"): + raise FilenameMissingError( + provider="PdfUrl", + url=abs_url, + reason="URL path didn't end in .pdf", + ) + logger.warning( + "Using filename {filename} extracted from url. " + "You might want to provide a nicer one using --filename " + "or request this paper source to be added " + "(see: {github}).".format(filename=filename, github=GITHUB_URL) + ) + return filename class PdfUrl(Provider): @@ -28,7 +53,7 @@ class PdfUrl(Provider): self.informer = PdfUrlInformer() def get_abs_pdf_urls(self, url): - return (None, url) + return (url, url) def validate(src): # first check if it is a valid url diff --git a/tests/test_providers.py b/tests/test_providers.py index d0e3d40..82c8500 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -148,8 +148,8 @@ class TestProviders(unittest.TestCase): def test_pdfurl(self): prov = PdfUrl(upload=False, verbose=VERBOSE) url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf" - filename = prov.run(url, filename="test.pdf") - self.assertEqual("test.pdf", os.path.basename(filename)) + filename = prov.run(url) + self.assertEqual("14-526.pdf", os.path.basename(filename)) def test_pmlr_1(self): prov = PMLR(upload=False, verbose=VERBOSE) |
