diff options
| -rw-r--r-- | paper2remarkable/exceptions.py | 10 | ||||
| -rw-r--r-- | paper2remarkable/providers/citeseerx.py | 4 | ||||
| -rw-r--r-- | paper2remarkable/providers/pdf_url.py | 31 | ||||
| -rw-r--r-- | tests/test_providers.py | 4 |
4 files changed, 37 insertions, 12 deletions
diff --git a/paper2remarkable/exceptions.py b/paper2remarkable/exceptions.py index 86f39b4..a608bcc 100644 --- a/paper2remarkable/exceptions.py +++ b/paper2remarkable/exceptions.py @@ -48,13 +48,17 @@ class URLResolutionError(Error): class FilenameMissingError(Error): """Exception raised for providers that need a filename to be provided""" - def __init__(self, provider): + def __init__(self, provider, url, reason=None): self.provider = provider + self.url = url + self.reason = reason def __str__(self): - msg = "ERROR: Filename must be given with the {provider} provider (hint: use --filename)".format( - provider=self.provider + msg = "ERROR: Couldn't determine a filename from {url} for provider {provider}".format( + provider=self.provider, url=self.url ) + if self.reason: + msg += "\nReason: {reason}".format(reason=self.reason) msg += GH_MSG return msg diff --git a/paper2remarkable/providers/citeseerx.py b/paper2remarkable/providers/citeseerx.py index e483f28..e819c30 100644 --- a/paper2remarkable/providers/citeseerx.py +++ b/paper2remarkable/providers/citeseerx.py @@ -49,10 +49,6 @@ class CiteSeerX(Provider): ) time.sleep(30) - # NOTE: The delay should only be hit twice when p2r is used as a - # library (e.g. during testing). Otherwise the ``server_delay`` is - # never reached in run(). - def _get_doi(self, url): m = re.match(self.re_abs, url) or re.match(self.re_pdf, url) if m: diff --git a/paper2remarkable/providers/pdf_url.py b/paper2remarkable/providers/pdf_url.py index 77accc9..b86c7c3 100644 --- a/paper2remarkable/providers/pdf_url.py +++ b/paper2remarkable/providers/pdf_url.py @@ -12,14 +12,39 @@ import urllib from ._base import Provider from ._info import Informer + +from .. import GITHUB_URL from ..exceptions import FilenameMissingError +from ..log import Logger from ..utils import get_content_type_with_retry +logger = Logger() + class PdfUrlInformer(Informer): def get_filename(self, abs_url): - # if this is called, filename must not have been provided - raise FilenameMissingError(provider="PDFUrl") + # try to get a nice filename by parsing the url + parsed = urllib.parse.urlparse(abs_url) + path_parts = parsed.path.split("/") + if not path_parts: + raise FilenameMissingError( + provider="PdfUrl", url=abs_url, reason="No URL parts", + ) + + filename = path_parts[-1] + if not filename.endswith(".pdf"): + raise FilenameMissingError( + provider="PdfUrl", + url=abs_url, + reason="URL path didn't end in .pdf", + ) + logger.warning( + "Using filename {filename} extracted from url. " + "You might want to provide a nicer one using --filename " + "or request this paper source to be added " + "(see: {github}).".format(filename=filename, github=GITHUB_URL) + ) + return filename class PdfUrl(Provider): @@ -28,7 +53,7 @@ class PdfUrl(Provider): self.informer = PdfUrlInformer() def get_abs_pdf_urls(self, url): - return (None, url) + return (url, url) def validate(src): # first check if it is a valid url diff --git a/tests/test_providers.py b/tests/test_providers.py index 38f88b7..493a209 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -149,8 +149,8 @@ class TestProviders(unittest.TestCase): def test_pdfurl(self): prov = PdfUrl(upload=False, verbose=VERBOSE) url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf" - filename = prov.run(url, filename="test.pdf") - self.assertEqual("test.pdf", os.path.basename(filename)) + filename = prov.run(url) + self.assertEqual("14-526.pdf", os.path.basename(filename)) def test_pmlr_1(self): prov = PMLR(upload=False, verbose=VERBOSE) |
