aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--paper2remarkable/exceptions.py10
-rw-r--r--paper2remarkable/providers/pdf_url.py31
-rw-r--r--tests/test_providers.py4
3 files changed, 37 insertions, 8 deletions
diff --git a/paper2remarkable/exceptions.py b/paper2remarkable/exceptions.py
index 86f39b4..a608bcc 100644
--- a/paper2remarkable/exceptions.py
+++ b/paper2remarkable/exceptions.py
@@ -48,13 +48,17 @@ class URLResolutionError(Error):
class FilenameMissingError(Error):
"""Exception raised for providers that need a filename to be provided"""
- def __init__(self, provider):
+ def __init__(self, provider, url, reason=None):
self.provider = provider
+ self.url = url
+ self.reason = reason
def __str__(self):
- msg = "ERROR: Filename must be given with the {provider} provider (hint: use --filename)".format(
- provider=self.provider
+ msg = "ERROR: Couldn't determine a filename from {url} for provider {provider}".format(
+ provider=self.provider, url=self.url
)
+ if self.reason:
+ msg += "\nReason: {reason}".format(reason=self.reason)
msg += GH_MSG
return msg
diff --git a/paper2remarkable/providers/pdf_url.py b/paper2remarkable/providers/pdf_url.py
index 77accc9..b86c7c3 100644
--- a/paper2remarkable/providers/pdf_url.py
+++ b/paper2remarkable/providers/pdf_url.py
@@ -12,14 +12,39 @@ import urllib
from ._base import Provider
from ._info import Informer
+
+from .. import GITHUB_URL
from ..exceptions import FilenameMissingError
+from ..log import Logger
from ..utils import get_content_type_with_retry
+logger = Logger()
+
class PdfUrlInformer(Informer):
def get_filename(self, abs_url):
- # if this is called, filename must not have been provided
- raise FilenameMissingError(provider="PDFUrl")
+ # try to get a nice filename by parsing the url
+ parsed = urllib.parse.urlparse(abs_url)
+ path_parts = parsed.path.split("/")
+ if not path_parts:
+ raise FilenameMissingError(
+ provider="PdfUrl", url=abs_url, reason="No URL parts",
+ )
+
+ filename = path_parts[-1]
+ if not filename.endswith(".pdf"):
+ raise FilenameMissingError(
+ provider="PdfUrl",
+ url=abs_url,
+ reason="URL path didn't end in .pdf",
+ )
+ logger.warning(
+ "Using filename {filename} extracted from url. "
+ "You might want to provide a nicer one using --filename "
+ "or request this paper source to be added "
+ "(see: {github}).".format(filename=filename, github=GITHUB_URL)
+ )
+ return filename
class PdfUrl(Provider):
@@ -28,7 +53,7 @@ class PdfUrl(Provider):
self.informer = PdfUrlInformer()
def get_abs_pdf_urls(self, url):
- return (None, url)
+ return (url, url)
def validate(src):
# first check if it is a valid url
diff --git a/tests/test_providers.py b/tests/test_providers.py
index d0e3d40..82c8500 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -148,8 +148,8 @@ class TestProviders(unittest.TestCase):
def test_pdfurl(self):
prov = PdfUrl(upload=False, verbose=VERBOSE)
url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf"
- filename = prov.run(url, filename="test.pdf")
- self.assertEqual("test.pdf", os.path.basename(filename))
+ filename = prov.run(url)
+ self.assertEqual("14-526.pdf", os.path.basename(filename))
def test_pmlr_1(self):
prov = PMLR(upload=False, verbose=VERBOSE)