Verified Commit 751d7418 authored by Huste, Tobias (FWCC) - 111645's avatar Huste, Tobias (FWCC) - 111645
Browse files

api: check URL before starting background job

- it is checked, if the given URL is resolvable
- if possible, the file size constraints are also checked via
  content-length header information
- install httpretty module in
parent ba0ad1b0
Pipeline #4190 passed with stages
in 35 minutes and 58 seconds
......@@ -28,6 +28,7 @@ from urllib.parse import urlencode, urlparse
import humanize
import paramiko
import requests
from celery.result import AsyncResult
from flask import Blueprint, abort, current_app, jsonify, request, url_for
from flask_login import current_user, login_required
......@@ -41,7 +42,7 @@ from werkzeug.local import LocalProxy
from .decorators import cached, need_authentication
from .errors import AuthenticationError, FileDoesNotExist, FileTooLargeError, \
MissingPathError, MissingURLError, NoAbsolutePathError, NoFileError, \
NoPathError, RemoteServerNotFoundError, SSHException, \
NoPathError, RemoteServerNotFoundError, RequestError, SSHException, \
SSHKeyNotFoundError, UnsupportedProtocolError
from .models import RemoteServer, SSHKey
from .tasks import download_files, download_via_sftp
......@@ -66,12 +67,21 @@ def make_key(*args, **kwargs):
def url_validator(value):
"""Validates if the given URL is in the list of allowed domains."""
"""Validates if the given URL is in the list of allowed domains.
The validator also checks, if the URL is reachable and, if possible,
tries to determine the filesize of the specified file.
parse_result = urlparse(value)
allowed_protocols = ['https']
# check if the used protocol scheme is allowed
if parse_result.scheme not in allowed_protocols:
raise UnsupportedProtocolError()
# check if URL is valid and reachable
r = requests.head(value, timeout=1)
except requests.RequestException:
raise RequestError()
def path_validator(value):
......@@ -130,6 +140,7 @@ class UploadByUrl(ContentNegotiatedMethodView):
def post(self, bucket, url=None, key=None):
"""Create new object version from the file in the given URL."""
if url:
self.check_filesize(url, bucket)
celery_id = download_files.delay(
......@@ -155,6 +166,19 @@ class UploadByUrl(ContentNegotiatedMethodView):
return response
raise MissingURLError()
def check_filesize(url, bucket):
"""Check filesize of URL via Content-Length header."""
r = requests.head(url, timeout=1)
size = int(r.headers.get('content-length', 0))
# check file size limits
if (bucket.quota_left < size or
bucket.size_limit < size):
raise FileTooLargeError()
except requests.RequestException:
class SFTPBrowserAPI(ContentNegotiatedMethodView):
"""Browse files API via sftp."""
......@@ -119,3 +119,11 @@ class NoPathError(RESTException):
code = 400
description = 'Please provide a valid directory path.'
class RequestError(RESTException):
"""An error during an HTTP request."""
code = 400
description = 'An error occured while connecting to the specified URL. ' \
'Please check and try again. Contact us, if the problem persists.'
......@@ -30,6 +30,7 @@ tests_require = [
......@@ -23,6 +23,7 @@ import json
import os
import uuid
import httpretty
import mock
import pytest
from flask import Flask, url_for
......@@ -73,7 +74,7 @@ def test_get_api(mock_asyncresult, client, bucket, user):
assert resp.status_code == 200
def test_post_api(client, bucket, user):
def test_post_api(client, bucket, user, db):
"""Test post request."""
login_user(client, user)
url = url_for('invenio_uploadbyurl.uploadbyurl_api',
......@@ -113,6 +114,49 @@ def test_post_api(client, bucket, user):
resp =
assert resp.status_code == 400
# Test with invalid URL
url = url_for('invenio_uploadbyurl.uploadbyurl_api',,
resp =
assert resp.status_code == 400
assert (b'An error occured while connecting to the specified URL.'
# set max file size of bucket
bucket.quota_size = 1500
'Content-Length': '1500',
'Content-Length': '1500',
# Test with too large file
with httpretty.enabled():
url = url_for('invenio_uploadbyurl.uploadbyurl_api',,
resp =
assert resp.status_code == 400
assert b'The given file is too large.' in
def test_sftp_post(client, bucket, remote, user, user2, db):
"""Test API endpoint for sftp download."""
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment