diff --git a/tests/test_arbitrary_package_attack.py b/tests/test_arbitrary_package_attack.py index 2dd329ced5..21096fb4bf 100755 --- a/tests/test_arbitrary_package_attack.py +++ b/tests/test_arbitrary_package_attack.py @@ -124,7 +124,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_download.py b/tests/test_download.py index c6933fcb1b..6ae0e4c4e4 100755 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -43,6 +43,7 @@ import tuf import tuf.download as download +import tuf.requests_fetcher import tuf.log import tuf.unittest_toolbox as unittest_toolbox import tuf.exceptions @@ -76,7 +77,7 @@ def setUp(self): self.server_process_handler = utils.TestServerProcess(log=logger) rel_target_filepath = os.path.basename(target_filepath) - self.url = 'http://localhost:' \ + self.url = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + '/' + rel_target_filepath # Computing hash of target file data. @@ -85,6 +86,10 @@ def setUp(self): digest = m.hexdigest() self.target_hash = {'md5':digest} + # Initialize the default fetcher for the download + self.fetcher = tuf.requests_fetcher.RequestsFetcher() + + # Stop server process and perform clean up. def tearDown(self): @@ -100,13 +105,36 @@ def tearDown(self): def test_download_url_to_tempfileobj(self): download_file = download.safe_download - with download_file(self.url, self.target_data_length) as temp_fileobj: + with download_file(self.url, self.target_data_length, self.fetcher) as temp_fileobj: temp_fileobj.seek(0) temp_file_data = temp_fileobj.read().decode('utf-8') self.assertEqual(self.target_data, temp_file_data) self.assertEqual(self.target_data_length, len(temp_file_data)) + # Test: Download url in more than one chunk. + def test_download_url_in_chunks(self): + + # Set smaller chunk size to ensure that the file will be downloaded + # in more than one chunk + default_chunk_size = tuf.settings.CHUNK_SIZE + tuf.settings.CHUNK_SIZE = 4 + # We don't have access to chunks from download_file() + # so we just confirm that the expectation of more than one chunk is + # correct and verify that no errors are raised during download + chunks_count = self.target_data_length/tuf.settings.CHUNK_SIZE + self.assertGreater(chunks_count, 1) + + download_file = download.safe_download + with download_file(self.url, self.target_data_length, self.fetcher) as temp_fileobj: + temp_fileobj.seek(0) + temp_file_data = temp_fileobj.read().decode('utf-8') + self.assertEqual(self.target_data, temp_file_data) + self.assertEqual(self.target_data_length, len(temp_file_data)) + + # Restore default settings + tuf.settings.CHUNK_SIZE = default_chunk_size + # Test: Incorrect lengths. def test_download_url_to_tempfileobj_and_lengths(self): @@ -118,18 +146,18 @@ def test_download_url_to_tempfileobj_and_lengths(self): # the server-reported length of the file does not match the # required_length. 'updater.py' *does* verify the hashes of downloaded # content. - download.safe_download(self.url, self.target_data_length - 4).close() - download.unsafe_download(self.url, self.target_data_length - 4).close() + download.safe_download(self.url, self.target_data_length - 4, self.fetcher).close() + download.unsafe_download(self.url, self.target_data_length - 4, self.fetcher).close() # We catch 'tuf.exceptions.DownloadLengthMismatchError' for safe_download() # because it will not download more bytes than requested (in this case, a # length greater than the size of the target file). self.assertRaises(tuf.exceptions.DownloadLengthMismatchError, - download.safe_download, self.url, self.target_data_length + 1) + download.safe_download, self.url, self.target_data_length + 1, self.fetcher) # Calling unsafe_download() with a mismatched length should not raise an # exception. - download.unsafe_download(self.url, self.target_data_length + 1).close() + download.unsafe_download(self.url, self.target_data_length + 1, self.fetcher).close() @@ -164,32 +192,26 @@ def test_download_url_to_tempfileobj_and_urls(self): download_file = download.safe_download unsafe_download_file = download.unsafe_download - self.assertRaises(securesystemslib.exceptions.FormatError, - download_file, None, self.target_data_length) - - self.assertRaises(tuf.exceptions.URLParsingError, - download_file, - self.random_string(), self.target_data_length) + with self.assertRaises(securesystemslib.exceptions.FormatError): + download_file(None, self.target_data_length, self.fetcher) - url = 'http://localhost:' \ + url = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + '/' + self.random_string() - self.assertRaises(requests.exceptions.HTTPError, - download_file, - url, - self.target_data_length) - url1 = 'http://localhost:' \ + with self.assertRaises(tuf.exceptions.FetcherHTTPError) as cm: + download_file(url, self.target_data_length, self.fetcher) + self.assertEqual(cm.exception.status_code, 404) + + url1 = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port + 1) + '/' + self.random_string() - self.assertRaises(requests.exceptions.ConnectionError, - download_file, - url1, - self.target_data_length) + with self.assertRaises(requests.exceptions.ConnectionError): + download_file(url1, self.target_data_length, self.fetcher) # Specify an unsupported URI scheme. url_with_unsupported_uri = self.url.replace('http', 'file') self.assertRaises(requests.exceptions.InvalidSchema, download_file, url_with_unsupported_uri, - self.target_data_length) + self.target_data_length, self.fetcher) self.assertRaises(requests.exceptions.InvalidSchema, unsafe_download_file, - url_with_unsupported_uri, self.target_data_length) + url_with_unsupported_uri, self.target_data_length, self.fetcher) @@ -290,7 +312,7 @@ def test_https_connection(self): os.environ['REQUESTS_CA_BUNDLE'] = bad_cert_fname # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} # Try connecting to the server process with the bad cert while trusting # the bad cert. Expect failure because even though we trust it, the @@ -302,9 +324,9 @@ def test_https_connection(self): category=urllib3.exceptions.SubjectAltNameWarning) with self.assertRaises(requests.exceptions.SSLError): - download.safe_download(bad_https_url, target_data_length) + download.safe_download(bad_https_url, target_data_length, self.fetcher) with self.assertRaises(requests.exceptions.SSLError): - download.unsafe_download(bad_https_url, target_data_length) + download.unsafe_download(bad_https_url, target_data_length, self.fetcher) # Try connecting to the server processes with the good certs while not # trusting the good certs (trusting the bad cert instead). Expect failure @@ -312,31 +334,31 @@ def test_https_connection(self): # trust it. logger.info('Trying HTTPS download of target file: ' + good_https_url) with self.assertRaises(requests.exceptions.SSLError): - download.safe_download(good_https_url, target_data_length) + download.safe_download(good_https_url, target_data_length, self.fetcher) with self.assertRaises(requests.exceptions.SSLError): - download.unsafe_download(good_https_url, target_data_length) + download.unsafe_download(good_https_url, target_data_length, self.fetcher) logger.info('Trying HTTPS download of target file: ' + good2_https_url) with self.assertRaises(requests.exceptions.SSLError): - download.safe_download(good2_https_url, target_data_length) + download.safe_download(good2_https_url, target_data_length, self.fetcher) with self.assertRaises(requests.exceptions.SSLError): - download.unsafe_download(good2_https_url, target_data_length) + download.unsafe_download(good2_https_url, target_data_length, self.fetcher) # Configure environment to now trust the certfile that is expired. os.environ['REQUESTS_CA_BUNDLE'] = expired_cert_fname # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} # Try connecting to the server process with the expired cert while # trusting the expired cert. Expect failure because even though we trust # it, it is expired. logger.info('Trying HTTPS download of target file: ' + expired_https_url) with self.assertRaises(requests.exceptions.SSLError): - download.safe_download(expired_https_url, target_data_length) + download.safe_download(expired_https_url, target_data_length, self.fetcher) with self.assertRaises(requests.exceptions.SSLError): - download.unsafe_download(expired_https_url, target_data_length) + download.unsafe_download(expired_https_url, target_data_length, self.fetcher) # Try connecting to the server processes with the good certs while @@ -346,18 +368,18 @@ def test_https_connection(self): os.environ['REQUESTS_CA_BUNDLE'] = good_cert_fname # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTPS download of target file: ' + good_https_url) - download.safe_download(good_https_url, target_data_length).close() - download.unsafe_download(good_https_url, target_data_length).close() + download.safe_download(good_https_url, target_data_length, self.fetcher).close() + download.unsafe_download(good_https_url, target_data_length,self.fetcher).close() os.environ['REQUESTS_CA_BUNDLE'] = good2_cert_fname # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTPS download of target file: ' + good2_https_url) - download.safe_download(good2_https_url, target_data_length).close() - download.unsafe_download(good2_https_url, target_data_length).close() + download.safe_download(good2_https_url, target_data_length, self.fetcher).close() + download.unsafe_download(good2_https_url, target_data_length, self.fetcher).close() finally: for proc_handler in [ diff --git a/tests/test_endless_data_attack.py b/tests/test_endless_data_attack.py index 759119ae9d..01701d9830 100755 --- a/tests/test_endless_data_attack.py +++ b/tests/test_endless_data_attack.py @@ -126,7 +126,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_extraneous_dependencies_attack.py b/tests/test_extraneous_dependencies_attack.py index 8eece9971d..2e71ee800b 100755 --- a/tests/test_extraneous_dependencies_attack.py +++ b/tests/test_extraneous_dependencies_attack.py @@ -133,7 +133,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py new file mode 100644 index 0000000000..312ef80959 --- /dev/null +++ b/tests/test_fetcher.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python + +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Unit test for RequestsFetcher. +""" +# Help with Python 2 compatibility, where the '/' operator performs +# integer division. +from __future__ import division + +import logging +import os +import io +import sys +import unittest +import tempfile +import math + +import tuf +import tuf.exceptions +import tuf.requests_fetcher +import tuf.unittest_toolbox as unittest_toolbox + +from tests import utils + +logger = logging.getLogger(__name__) + + +class TestFetcher(unittest_toolbox.Modified_TestCase): + def setUp(self): + """ + Create a temporary file and launch a simple server in the + current working directory. + """ + + unittest_toolbox.Modified_TestCase.setUp(self) + + # Making a temporary file. + current_dir = os.getcwd() + target_filepath = self.make_temp_data_file(directory=current_dir) + self.target_fileobj = open(target_filepath, 'r') + self.file_contents = self.target_fileobj.read() + self.file_length = len(self.file_contents) + + # Launch a SimpleHTTPServer (serves files in the current dir). + self.server_process_handler = utils.TestServerProcess(log=logger) + + rel_target_filepath = os.path.basename(target_filepath) + self.url = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + + str(self.server_process_handler.port) + '/' + rel_target_filepath + + # Create a temporary file where the target file chunks are written + # during fetching + self.temp_file = tempfile.TemporaryFile() + self.fetcher = tuf.requests_fetcher.RequestsFetcher() + + + # Stop server process and perform clean up. + def tearDown(self): + unittest_toolbox.Modified_TestCase.tearDown(self) + + # Cleans the resources and flush the logged lines (if any). + self.server_process_handler.clean() + + self.target_fileobj.close() + self.temp_file.close() + + + # Test: Normal case. + def test_fetch(self): + for chunk in self.fetcher.fetch(self.url, self.file_length): + self.temp_file.write(chunk) + + self.temp_file.seek(0) + temp_file_data = self.temp_file.read().decode('utf-8') + self.assertEqual(self.file_contents, temp_file_data) + + # Test if fetcher downloads file up to a required length + def test_fetch_restricted_length(self): + for chunk in self.fetcher.fetch(self.url, self.file_length-4): + self.temp_file.write(chunk) + + self.temp_file.seek(0, io.SEEK_END) + self.assertEqual(self.temp_file.tell(), self.file_length-4) + + + # Test that fetcher does not download more than actual file length + def test_fetch_upper_length(self): + for chunk in self.fetcher.fetch(self.url, self.file_length+4): + self.temp_file.write(chunk) + + self.temp_file.seek(0, io.SEEK_END) + self.assertEqual(self.temp_file.tell(), self.file_length) + + + # Test incorrect URL parsing + def test_url_parsing(self): + with self.assertRaises(tuf.exceptions.URLParsingError): + self.fetcher.fetch(self.random_string(), self.file_length) + + + # Test: Normal case with url data downloaded in more than one chunk + def test_fetch_in_chunks(self): + # Set smaller chunk size to ensure that the file will be downloaded + # in more than one chunk + default_chunk_size = tuf.settings.CHUNK_SIZE + tuf.settings.CHUNK_SIZE = 4 + + # expected_chunks_count: 3 + expected_chunks_count = math.ceil(self.file_length/tuf.settings.CHUNK_SIZE) + self.assertEqual(expected_chunks_count, 3) + + chunks_count = 0 + for chunk in self.fetcher.fetch(self.url, self.file_length): + self.temp_file.write(chunk) + chunks_count+=1 + + self.temp_file.seek(0) + temp_file_data = self.temp_file.read().decode('utf-8') + self.assertEqual(self.file_contents, temp_file_data) + # Check that we calculate chunks as expected + self.assertEqual(chunks_count, expected_chunks_count) + + # Restore default settings + tuf.settings.CHUNK_SIZE = default_chunk_size + + + +# Run unit test. +if __name__ == '__main__': + utils.configure_test_logging(sys.argv) + unittest.main() diff --git a/tests/test_indefinite_freeze_attack.py b/tests/test_indefinite_freeze_attack.py index e72299c456..6e10f3a075 100755 --- a/tests/test_indefinite_freeze_attack.py +++ b/tests/test_indefinite_freeze_attack.py @@ -146,7 +146,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_key_revocation_integration.py b/tests/test_key_revocation_integration.py index ed0909b14f..b235760e4a 100755 --- a/tests/test_key_revocation_integration.py +++ b/tests/test_key_revocation_integration.py @@ -132,7 +132,7 @@ def setUp(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_mix_and_match_attack.py b/tests/test_mix_and_match_attack.py index 472d88b940..123c447325 100755 --- a/tests/test_mix_and_match_attack.py +++ b/tests/test_mix_and_match_attack.py @@ -132,7 +132,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_multiple_repositories_integration.py b/tests/test_multiple_repositories_integration.py index 24bd48e5bd..de9920244d 100755 --- a/tests/test_multiple_repositories_integration.py +++ b/tests/test_multiple_repositories_integration.py @@ -136,8 +136,12 @@ def setUp(self): logger.debug('Server process 2 started.') - url_prefix = 'http://localhost:' + str(self.server_process_handler.port) - url_prefix2 = 'http://localhost:' + str(self.server_process_handler2.port) + url_prefix = \ + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.server_process_handler.port) + url_prefix2 = \ + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.server_process_handler2.port) self.repository_mirrors = {'mirror1': {'url_prefix': url_prefix, 'metadata_path': 'metadata', diff --git a/tests/test_proxy_use.py b/tests/test_proxy_use.py index b3305d97e2..8731b407a6 100755 --- a/tests/test_proxy_use.py +++ b/tests/test_proxy_use.py @@ -97,7 +97,8 @@ def setUpClass(cls): # Note that the HTTP proxy server's address uses http://, regardless of the # type of connection used with the target server. - cls.http_proxy_addr = 'http://127.0.0.1:' + str(cls.http_proxy_handler.port) + cls.http_proxy_addr = 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(cls.http_proxy_handler.port) # Launch an HTTPS proxy server, also derived from inaz2/proxy2. @@ -121,6 +122,9 @@ def setUpClass(cls): # the type of connection used with the target server. cls.https_proxy_addr = 'https://localhost:' + str(cls.https_proxy_handler.port) + # Initialize the default fetcher for the download + self.fetcher = tuf.fetcher.RequestsFetcher() + @classmethod @@ -164,10 +168,12 @@ def setUp(self): suffix = '/' + os.path.basename(target_filepath) self.url = \ - 'http://localhost:' + str(self.http_server_handler.port) + suffix + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.http_server_handler.port) + suffix self.url_https = \ - 'https://localhost:' + str(self.https_server_handler.port) + suffix + 'https://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.https_server_handler.port) + suffix @@ -202,8 +208,8 @@ def test_baseline_no_proxy(self): """ logger.info('Trying HTTP download with no proxy: ' + self.url) - download.safe_download(self.url, self.target_data_length) - download.unsafe_download(self.url, self.target_data_length) + download.safe_download(self.url, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url, self.target_data_length, self.fetcher).close() @@ -218,8 +224,8 @@ def test_http_dl_via_smart_http_proxy(self): self.set_env_value('HTTP_PROXY', self.http_proxy_addr) logger.info('Trying HTTP download via HTTP proxy: ' + self.url) - download.safe_download(self.url, self.target_data_length) - download.unsafe_download(self.url, self.target_data_length) + download.safe_download(self.url, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url, self.target_data_length, self.fetcher).close() @@ -243,11 +249,11 @@ def test_https_dl_via_smart_http_proxy(self): os.path.join('ssl_certs', 'ssl_cert.crt')) # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTPS download via HTTP proxy: ' + self.url_https) - download.safe_download(self.url_https, self.target_data_length) - download.unsafe_download(self.url_https, self.target_data_length) + download.safe_download(self.url_https, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url_https, self.target_data_length, self.fetcher).close() @@ -267,11 +273,11 @@ def test_http_dl_via_https_proxy(self): os.path.join('ssl_certs', 'proxy_ca.crt')) # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTP download via HTTPS proxy: ' + self.url_https) - download.safe_download(self.url, self.target_data_length) - download.unsafe_download(self.url, self.target_data_length) + download.safe_download(self.url, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url, self.target_data_length, self.fetcher).close() @@ -293,11 +299,11 @@ def test_https_dl_via_https_proxy(self): os.path.join('ssl_certs', 'proxy_ca.crt')) # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTPS download via HTTPS proxy: ' + self.url_https) - download.safe_download(self.url_https, self.target_data_length) - download.unsafe_download(self.url_https, self.target_data_length) + download.safe_download(self.url_https, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url_https, self.target_data_length, self.fetcher).close() diff --git a/tests/test_replay_attack.py b/tests/test_replay_attack.py index 3c5fa389f8..1195d99b22 100755 --- a/tests/test_replay_attack.py +++ b/tests/test_replay_attack.py @@ -132,7 +132,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_slow_retrieval_attack.py b/tests/test_slow_retrieval_attack.py index 3c87e0d912..8a56e483ba 100755 --- a/tests/test_slow_retrieval_attack.py +++ b/tests/test_slow_retrieval_attack.py @@ -171,7 +171,7 @@ def setUp(self): logger.info('Slow Retrieval Server process started.') - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_updater.py b/tests/test_updater.py index 69c67044ea..4654cfab23 100755 --- a/tests/test_updater.py +++ b/tests/test_updater.py @@ -167,7 +167,7 @@ def setUp(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client @@ -1110,7 +1110,7 @@ def test_6_get_one_valid_targetinfo(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath self.repository_mirrors = {'mirror1': {'url_prefix': url_prefix, @@ -1406,7 +1406,7 @@ def test_7_updated_targets(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client @@ -1533,7 +1533,7 @@ def test_8_remove_obsolete_targets(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client @@ -1861,8 +1861,12 @@ def setUp(self): logger.debug('Server process 2 started.') - url_prefix = 'http://localhost:' + str(self.server_process_handler.port) - url_prefix2 = 'http://localhost:' + str(self.server_process_handler2.port) + url_prefix = \ + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.server_process_handler.port) + url_prefix2 = \ + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.server_process_handler2.port) # We have all of the necessary information for two repository mirrors # in map.json, except for url prefixes. diff --git a/tests/test_updater_root_rotation_integration.py b/tests/test_updater_root_rotation_integration.py index a59e12de89..9182aa6c1f 100755 --- a/tests/test_updater_root_rotation_integration.py +++ b/tests/test_updater_root_rotation_integration.py @@ -140,7 +140,7 @@ def setUp(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/utils.py b/tests/utils.py index 8e8c07bde0..10a12436ac 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -39,6 +39,9 @@ logger = logging.getLogger(__name__) +# Used when forming URLs on the client side +TEST_HOST_ADDRESS = '127.0.0.1' + try: # is defined in Python 3 TimeoutError diff --git a/tuf/client/fetcher.py b/tuf/client/fetcher.py new file mode 100644 index 0000000000..8768bdd4b9 --- /dev/null +++ b/tuf/client/fetcher.py @@ -0,0 +1,38 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an interface for network IO abstraction. +""" + +# Imports +import abc + +# Classes +class FetcherInterface(): + """Defines an interface for abstract network download. + + By providing a concrete implementation of the abstract interface, + users of the framework can plug-in their preferred/customized + network stack. + """ + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + raise NotImplementedError # pragma: no cover diff --git a/tuf/client/updater.py b/tuf/client/updater.py index 9ada0974e2..3496889089 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -131,6 +131,7 @@ import tuf import tuf.download +import tuf.requests_fetcher import tuf.formats import tuf.settings import tuf.keydb @@ -145,7 +146,6 @@ import securesystemslib.keys import securesystemslib.util import six -import requests.exceptions # The Timestamp role does not have signed metadata about it; otherwise we # would need an infinite regress of metadata. Therefore, we use some @@ -619,7 +619,7 @@ class Updater(object): http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables """ - def __init__(self, repository_name, repository_mirrors): + def __init__(self, repository_name, repository_mirrors, fetcher=None): """ Constructor. Instantiating an updater object causes all the metadata @@ -659,6 +659,11 @@ def __init__(self, repository_name, repository_mirrors): 'targets_path': 'targets', 'confined_target_dirs': ['']}} + fetcher: + A concrete 'FetcherInterface' implementation. Performs the network + related download operations. If an external implementation is not + provided, tuf.fetcher.RequestsFetcher is used. + securesystemslib.exceptions.FormatError: If the arguments are improperly formatted. @@ -688,6 +693,13 @@ def __init__(self, repository_name, repository_mirrors): self.repository_name = repository_name self.mirrors = repository_mirrors + # Initialize Updater with an externally provided 'fetcher' implementing + # the network download. By default tuf.fetcher.RequestsFetcher is used. + if fetcher is None: + self.fetcher = tuf.requests_fetcher.RequestsFetcher() + else: + self.fetcher = fetcher + # Store the trusted metadata read from disk. self.metadata = {} @@ -1112,8 +1124,8 @@ def _update_root_metadata(self, current_root_metadata): """ def neither_403_nor_404(mirror_error): - if isinstance(mirror_error, requests.exceptions.HTTPError): - if mirror_error.response.status_code in {403, 404}: + if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): + if mirror_error.status_code in {403, 404}: return False return True @@ -1311,7 +1323,8 @@ def _get_target_file(self, target_filepath, file_length, file_hashes, for file_mirror in file_mirrors: try: - file_object = tuf.download.safe_download(file_mirror, file_length) + file_object = tuf.download.safe_download(file_mirror, + file_length, self.fetcher) # Verify 'file_object' against the expected length and hashes. self._check_file_length(file_object, file_length) @@ -1509,7 +1522,7 @@ def _get_metadata_file(self, metadata_role, remote_filename, for file_mirror in file_mirrors: try: file_object = tuf.download.unsafe_download(file_mirror, - upperbound_filelength) + upperbound_filelength, self.fetcher) file_object.seek(0) # Verify 'file_object' according to the callable function. diff --git a/tuf/download.py b/tuf/download.py index b45e842d7c..2d946ef891 100755 --- a/tuf/download.py +++ b/tuf/download.py @@ -32,42 +32,22 @@ from __future__ import unicode_literals import logging -import time import timeit import tempfile -import tuf -import requests - import securesystemslib import securesystemslib.util import six +import tuf import tuf.exceptions import tuf.formats -import urllib3.exceptions - # See 'log.py' to learn how logging is handled in TUF. logger = logging.getLogger(__name__) -# From http://docs.python-requests.org/en/master/user/advanced/#session-objects: -# -# "The Session object allows you to persist certain parameters across requests. -# It also persists cookies across all requests made from the Session instance, -# and will use urllib3's connection pooling. So if you're making several -# requests to the same host, the underlying TCP connection will be reused, -# which can result in a significant performance increase (see HTTP persistent -# connection)." -# -# NOTE: We use a separate requests.Session per scheme+hostname combination, in -# order to reuse connections to the same hostname to improve efficiency, but -# avoiding sharing state between different hosts-scheme combinations to -# minimize subtle security issues. Some cookies may not be HTTP-safe. -_sessions = {} - - -def safe_download(url, required_length): + +def safe_download(url, required_length, fetcher): """ Given the 'url' and 'required_length' of the desired file, open a connection @@ -84,6 +64,10 @@ def safe_download(url, required_length): An integer value representing the length of the file. This is an exact limit. + fetcher: + An object implementing FetcherInterface that performs the network IO + operations. + A file object is created on disk to store the contents of 'url'. @@ -105,13 +89,13 @@ def safe_download(url, required_length): securesystemslib.formats.URL_SCHEMA.check_match(url) tuf.formats.LENGTH_SCHEMA.check_match(required_length) - return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True) + return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True) -def unsafe_download(url, required_length): +def unsafe_download(url, required_length, fetcher): """ Given the 'url' and 'required_length' of the desired file, open a connection @@ -128,6 +112,10 @@ def unsafe_download(url, required_length): An integer value representing the length of the file. This is an upper limit. + fetcher: + An object implementing FetcherInterface that performs the network IO + operations. + A file object is created on disk to store the contents of 'url'. @@ -149,13 +137,13 @@ def unsafe_download(url, required_length): securesystemslib.formats.URL_SCHEMA.check_match(url) tuf.formats.LENGTH_SCHEMA.check_match(required_length) - return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=False) + return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=False) -def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True): +def _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): """ Given the url and length of the desired file, this function opens a @@ -192,12 +180,6 @@ def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True): A file object that points to the contents of 'url'. """ - - # Do all of the arguments have the appropriate format? - # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - securesystemslib.formats.URL_SCHEMA.check_match(url) - tuf.formats.LENGTH_SCHEMA.check_match(required_length) - # 'url.replace('\\', '/')' is needed for compatibility with Windows-based # systems, because they might use back-slashes in place of forward-slashes. # This converts it to the common format. unquote() replaces %xx escapes in a @@ -210,174 +192,46 @@ def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True): # the downloaded file. temp_file = tempfile.TemporaryFile() - try: - # Use a different requests.Session per schema+hostname combination, to - # reuse connections while minimizing subtle security issues. - parsed_url = six.moves.urllib.parse.urlparse(url) - - if not parsed_url.scheme or not parsed_url.hostname: - raise tuf.exceptions.URLParsingError( - 'Could not get scheme and hostname from URL: ' + url) - - session_index = parsed_url.scheme + '+' + parsed_url.hostname - - logger.debug('url: ' + url) - logger.debug('session index: ' + session_index) - - session = _sessions.get(session_index) - - if not session: - session = requests.Session() - _sessions[session_index] = session - - # Attach some default headers to every Session. - requests_user_agent = session.headers['User-Agent'] - # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 - tuf_user_agent = 'tuf/' + tuf.__version__ + ' ' + requests_user_agent - session.headers.update({ - # Tell the server not to compress or modify anything. - # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives - 'Accept-Encoding': 'identity', - # The TUF user agent. - 'User-Agent': tuf_user_agent}) - - logger.debug('Made new session for ' + session_index) - - else: - logger.debug('Reusing session for ' + session_index) - - # Get the requests.Response object for this URL. - # - # Defer downloading the response body with stream=True. - # Always set the timeout. This timeout value is interpreted by requests as: - # - connect timeout (max delay before first byte is received) - # - read (gap) timeout (max delay between bytes received) - with session.get(url, stream=True, - timeout=tuf.settings.SOCKET_TIMEOUT) as response: - - # Check response status. - response.raise_for_status() - - # Download the contents of the URL, up to the required length, to a - # temporary file, and get the total number of downloaded bytes. - total_downloaded, average_download_speed = \ - _download_fixed_amount_of_data(response, temp_file, required_length) - - # Does the total number of downloaded bytes match the required length? - _check_downloaded_length(total_downloaded, required_length, - STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, - average_download_speed=average_download_speed) - - except Exception: - # Close 'temp_file'. Any written data is lost. - temp_file.close() - logger.debug('Could not download URL: ' + repr(url)) - raise - - else: - return temp_file - - - - - -def _download_fixed_amount_of_data(response, temp_file, required_length): - """ - - This is a helper function, where the download really happens. While-block - reads data from response a fixed chunk of data at a time, or less, until - 'required_length' is reached. - - - response: - The object for communicating with the server about the contents of a URL. - - temp_file: - A temporary file where the contents at the URL specified by the - 'response' object will be stored. - - required_length: - The number of bytes that we must download for the file. This is almost - always specified by the TUF metadata for the data file in question - (except in the case of timestamp metadata, in which case we would fix a - reasonable upper bound). - - - Data from the server will be written to 'temp_file'. - - - tuf.exceptions.SlowRetrievalError - will be raised if urllib3.exceptions.ReadTimeoutError is caught (if the - download times out). - - Otherwise, runtime or network exceptions will be raised without question. - - - A (total_downloaded, average_download_speed) tuple, where - 'total_downloaded' is the total number of bytes downloaded for the desired - file and the 'average_download_speed' calculated for the download - attempt. - """ - - # Keep track of total bytes downloaded. - number_of_bytes_received = 0 average_download_speed = 0 - - start_time = timeit.default_timer() + number_of_bytes_received = 0 try: - while True: - # We download a fixed chunk of data in every round. This is so that we - # can defend against slow retrieval attacks. Furthermore, we do not wish - # to download an extremely large file in one shot. - # Before beginning the round, sleep (if set) for a short amount of time - # so that the CPU is not hogged in the while loop. - if tuf.settings.SLEEP_BEFORE_ROUND: - time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) - - read_amount = min( - tuf.settings.CHUNK_SIZE, required_length - number_of_bytes_received) - - # NOTE: This may not handle some servers adding a Content-Encoding - # header, which may cause urllib3 to misbehave: - # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 - data = response.raw.read(read_amount) - - number_of_bytes_received = number_of_bytes_received + len(data) - - # Data successfully read from the response. Store it. - temp_file.write(data) - - if number_of_bytes_received == required_length: - break + chunks = fetcher.fetch(url, required_length) + start_time = timeit.default_timer() + for chunk in chunks: stop_time = timeit.default_timer() - seconds_spent_receiving = stop_time - start_time + temp_file.write(chunk) # Measure the average download speed. + number_of_bytes_received += len(chunk) + seconds_spent_receiving = stop_time - start_time average_download_speed = number_of_bytes_received / seconds_spent_receiving if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: logger.debug('The average download speed dropped below the minimum' - ' average download speed set in tuf.settings.py.') + ' average download speed set in tuf.settings.py. Stopping the' + ' download!') break else: logger.debug('The average download speed has not dipped below the' ' minimum average download speed set in tuf.settings.py.') - # We might have no more data to read. Check number of bytes downloaded. - if not data: - logger.debug('Downloaded ' + repr(number_of_bytes_received) + '/' + - repr(required_length) + ' bytes.') + # Does the total number of downloaded bytes match the required length? + _check_downloaded_length(number_of_bytes_received, required_length, + STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, + average_download_speed=average_download_speed) - # Finally, we signal that the download is complete. - break + except Exception: + # Close 'temp_file'. Any written data is lost. + temp_file.close() + logger.debug('Could not download URL: ' + repr(url)) + raise - except urllib3.exceptions.ReadTimeoutError as e: - raise tuf.exceptions.SlowRetrievalError(str(e)) + else: + return temp_file - return number_of_bytes_received, average_download_speed diff --git a/tuf/exceptions.py b/tuf/exceptions.py index 177e7a2981..5b2a345c9c 100755 --- a/tuf/exceptions.py +++ b/tuf/exceptions.py @@ -335,3 +335,14 @@ class URLParsingError(Error): class InvalidConfigurationError(Error): """If a configuration object does not match the expected format.""" +class FetcherHTTPError(Exception): + """ + Returned by FetcherInterface implementations for HTTP errors. + + Args: + message (str): The HTTP error messsage + status_code (int): The HTTP status code + """ + def __init__(self, message, status_code): + super(FetcherHTTPError, self).__init__(message) + self.status_code = status_code diff --git a/tuf/requests_fetcher.py b/tuf/requests_fetcher.py new file mode 100644 index 0000000000..e7d0ef812d --- /dev/null +++ b/tuf/requests_fetcher.py @@ -0,0 +1,172 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an implementation of FetcherInterface using the Requests HTTP + library. +""" + +# Imports +import requests +import six +import logging +import time + +import urllib3.exceptions + +import tuf.exceptions +import tuf.settings + +from tuf.client.fetcher import FetcherInterface + +# Globals +logger = logging.getLogger(__name__) + +# Classess +class RequestsFetcher(FetcherInterface): + """A concrete implementation of FetcherInterface based on the Requests + library. + + Attributes: + _sessions: A dictionary of Requests.Session objects storing a separate + session per scheme+hostname combination. + """ + + def __init__(self): + # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # + # "The Session object allows you to persist certain parameters across + # requests. It also persists cookies across all requests made from the + # Session instance, and will use urllib3's connection pooling. So if you're + # making several requests to the same host, the underlying TCP connection + # will be reused, which can result in a significant performance increase + # (see HTTP persistent connection)." + # + # NOTE: We use a separate requests.Session per scheme+hostname combination, + # in order to reuse connections to the same hostname to improve efficiency, + # but avoiding sharing state between different hosts-scheme combinations to + # minimize subtle security issues. Some cookies may not be HTTP-safe. + self._sessions = {} + + + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + # Get a customized session for each new schema+hostname combination. + session = self._get_session(url) + + # Get the requests.Response object for this URL. + # + # Defer downloading the response body with stream=True. + # Always set the timeout. This timeout value is interpreted by requests as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + response = session.get(url, stream=True, + timeout=tuf.settings.SOCKET_TIMEOUT) + # Check response status. + try: + response.raise_for_status() + except requests.HTTPError as e: + status = e.response.status_code + raise tuf.exceptions.FetcherHTTPError(str(e), status) + + + # Define a generator function to be returned by fetch. This way the caller + # of fetch can differentiate between connection and actual data download + # and measure download times accordingly. + def chunks(): + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is so that we + # can defend against slow retrieval attacks. Furthermore, we do not + # wish to download an extremely large file in one shot. + # Before beginning the round, sleep (if set) for a short amount of + # time so that the CPU is not hogged in the while loop. + if tuf.settings.SLEEP_BEFORE_ROUND: + time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) + + read_amount = min( + tuf.settings.CHUNK_SIZE, required_length - bytes_received) + + # NOTE: This may not handle some servers adding a Content-Encoding + # header, which may cause urllib3 to misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + # We might have no more data to read. Check number of bytes downloaded. + if not data: + logger.debug('Downloaded ' + repr(bytes_received) + '/' + + repr(required_length) + ' bytes.') + + # Finally, we signal that the download is complete. + break + + yield data + + if bytes_received >= required_length: + break + + except urllib3.exceptions.ReadTimeoutError as e: + raise tuf.exceptions.SlowRetrievalError(str(e)) + + finally: + response.close() + + return chunks() + + + + def _get_session(self, url): + """Returns a different customized requests.Session per schema+hostname + combination. + """ + # Use a different requests.Session per schema+hostname combination, to + # reuse connections while minimizing subtle security issues. + parsed_url = six.moves.urllib.parse.urlparse(url) + + if not parsed_url.scheme or not parsed_url.hostname: + raise tuf.exceptions.URLParsingError( + 'Could not get scheme and hostname from URL: ' + url) + + session_index = parsed_url.scheme + '+' + parsed_url.hostname + + logger.debug('url: ' + url) + logger.debug('session index: ' + session_index) + + session = self._sessions.get(session_index) + + if not session: + session = requests.Session() + self._sessions[session_index] = session + + # Attach some default headers to every Session. + requests_user_agent = session.headers['User-Agent'] + # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 + tuf_user_agent = 'tuf/' + tuf.__version__ + ' ' + requests_user_agent + session.headers.update({ + # Tell the server not to compress or modify anything. + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives + 'Accept-Encoding': 'identity', + # The TUF user agent. + 'User-Agent': tuf_user_agent}) + + logger.debug('Made new session for ' + session_index) + + else: + logger.debug('Reusing session for ' + session_index) + + return session