From ccc169f98a1b1df1fc2f51653788d258351e77db Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 14 Jul 2020 16:15:33 -0400 Subject: [PATCH 01/12] fix: for non-chunked downloads, automatically populate standard fields Content-Encoding Content-Type Content-Lanugage Cache-Control X-Goog-Storage-Class (self.storage_class) --- google/cloud/storage/blob.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index d4b0956fe..72e3904d0 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -831,8 +831,17 @@ def _do_download( download = klass( download_url, stream=file_obj, headers=headers, start=start, end=end ) - download.consume(transport) + response = download.consume(transport) + self.content_encoding = response.raw.headers.get('Content-Encoding', None) + self.content_type = response.raw.headers.get('Content-Type', None) + self.cache_control = response.raw.headers.get('Cache-Control', None) + self.storage_class = response.raw.headers.get('X-Goog-Storage-Class', None) + self.content_language = response.raw.headers.get('Content-Language', None) + + # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', + # self.crc32c = + # self.md5_hash = else: if raw_download: From 46e202814197d3ba9b98d184079ce796f80013a9 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 14 Jul 2020 16:17:27 -0400 Subject: [PATCH 02/12] fix: don't assume GRMP has a stable interface --- google/cloud/storage/blob.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 72e3904d0..b3d350fa4 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -833,11 +833,14 @@ def _do_download( ) response = download.consume(transport) - self.content_encoding = response.raw.headers.get('Content-Encoding', None) - self.content_type = response.raw.headers.get('Content-Type', None) - self.cache_control = response.raw.headers.get('Cache-Control', None) - self.storage_class = response.raw.headers.get('X-Goog-Storage-Class', None) - self.content_language = response.raw.headers.get('Content-Language', None) + try: + self.content_encoding = response.raw.headers.get('Content-Encoding', None) + self.content_type = response.raw.headers.get('Content-Type', None) + self.cache_control = response.raw.headers.get('Cache-Control', None) + self.storage_class = response.raw.headers.get('X-Goog-Storage-Class', None) + self.content_language = response.raw.headers.get('Content-Language', None) + except AttributeError: + pass # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', # self.crc32c = From 4d37401c3129c43394a3b86f47cbd746b7bb3d35 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 14 Jul 2020 16:18:28 -0400 Subject: [PATCH 03/12] refactor: move commented out code to a more sensible location --- google/cloud/storage/blob.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index b3d350fa4..53ea9d79c 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -839,12 +839,11 @@ def _do_download( self.cache_control = response.raw.headers.get('Cache-Control', None) self.storage_class = response.raw.headers.get('X-Goog-Storage-Class', None) self.content_language = response.raw.headers.get('Content-Language', None) + # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', + # self.crc32c = + # self.md5_hash = except AttributeError: pass - - # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', - # self.crc32c = - # self.md5_hash = else: if raw_download: From d6c9d2812e801e96ffb5cce1a569648482942072 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 14 Jul 2020 16:27:27 -0400 Subject: [PATCH 04/12] feat: extract crc32c and md5 from X-Goog-Hash --- google/cloud/storage/blob.py | 39 +++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 53ea9d79c..5596f1d17 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -31,6 +31,7 @@ from io import BytesIO import mimetypes import os +import re import warnings import six @@ -783,6 +784,31 @@ def _get_download_url( ) return _add_query_parameters(base_url, name_value_pairs) + def _extract_headers_from_download(self, response): + try: + self.content_encoding = response.raw.headers.get('Content-Encoding', None) + self.content_type = response.raw.headers.get('Content-Type', None) + self.cache_control = response.raw.headers.get('Cache-Control', None) + self.storage_class = response.raw.headers.get('X-Goog-Storage-Class', None) + self.content_language = response.raw.headers.get('Content-Language', None) + # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', + hash_string = response.raw.headers.get('X-Goog-Hash', None) + except AttributeError: + pass + + if hash_string is None: + return + + digests = {} + for encoded_digest in hash_string.split(','): + match = re.match(r'(crc32c|md5)=([\w\d]+)==', encoded_digest) + if match: + method, digest = match.groups() + digests[method] = digest + + self.crc32c = digests.get('crc32c', None) + self.md5_hash = digests.get('md5', None) + def _do_download( self, transport, @@ -832,18 +858,7 @@ def _do_download( download_url, stream=file_obj, headers=headers, start=start, end=end ) response = download.consume(transport) - - try: - self.content_encoding = response.raw.headers.get('Content-Encoding', None) - self.content_type = response.raw.headers.get('Content-Type', None) - self.cache_control = response.raw.headers.get('Cache-Control', None) - self.storage_class = response.raw.headers.get('X-Goog-Storage-Class', None) - self.content_language = response.raw.headers.get('Content-Language', None) - # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', - # self.crc32c = - # self.md5_hash = - except AttributeError: - pass + self._extract_headers_from_download(response) else: if raw_download: From dc7a20784ec1b5c8c881b28094a60964fa8d4335 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 14 Jul 2020 18:34:32 -0400 Subject: [PATCH 05/12] fix: crashing when method is not defined --- google/cloud/storage/blob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 5596f1d17..51f2d8352 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -804,7 +804,7 @@ def _extract_headers_from_download(self, response): match = re.match(r'(crc32c|md5)=([\w\d]+)==', encoded_digest) if match: method, digest = match.groups() - digests[method] = digest + digests[method] = digest self.crc32c = digests.get('crc32c', None) self.md5_hash = digests.get('md5', None) From cd2763dc346bdf1b0d42fed4d47782812846c2e9 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 14 Jul 2020 18:42:48 -0400 Subject: [PATCH 06/12] refactor(blob.py): change style to comport with black --- google/cloud/storage/blob.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 51f2d8352..e9dfcc746 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -786,13 +786,13 @@ def _get_download_url( def _extract_headers_from_download(self, response): try: - self.content_encoding = response.raw.headers.get('Content-Encoding', None) - self.content_type = response.raw.headers.get('Content-Type', None) - self.cache_control = response.raw.headers.get('Cache-Control', None) - self.storage_class = response.raw.headers.get('X-Goog-Storage-Class', None) - self.content_language = response.raw.headers.get('Content-Language', None) + self.content_encoding = response.raw.headers.get("Content-Encoding", None) + self.content_type = response.raw.headers.get("Content-Type", None) + self.cache_control = response.raw.headers.get("Cache-Control", None) + self.storage_class = response.raw.headers.get("X-Goog-Storage-Class", None) + self.content_language = response.raw.headers.get("Content-Language", None) # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', - hash_string = response.raw.headers.get('X-Goog-Hash', None) + hash_string = response.raw.headers.get("X-Goog-Hash", None) except AttributeError: pass @@ -800,14 +800,14 @@ def _extract_headers_from_download(self, response): return digests = {} - for encoded_digest in hash_string.split(','): - match = re.match(r'(crc32c|md5)=([\w\d]+)==', encoded_digest) + for encoded_digest in hash_string.split(","): + match = re.match(r"(crc32c|md5)=([\w\d]+)==", encoded_digest) if match: method, digest = match.groups() digests[method] = digest - self.crc32c = digests.get('crc32c', None) - self.md5_hash = digests.get('md5', None) + self.crc32c = digests.get("crc32c", None) + self.md5_hash = digests.get("md5", None) def _do_download( self, From 42b62c85c7ef9f913471094daafa17e669ccf236 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 14 Jul 2020 19:31:21 -0400 Subject: [PATCH 07/12] refactor: remove unnecessary additional code path --- google/cloud/storage/blob.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index e9dfcc746..579bfd483 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -792,15 +792,12 @@ def _extract_headers_from_download(self, response): self.storage_class = response.raw.headers.get("X-Goog-Storage-Class", None) self.content_language = response.raw.headers.get("Content-Language", None) # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', - hash_string = response.raw.headers.get("X-Goog-Hash", None) + x_goog_hash = response.raw.headers.get("X-Goog-Hash", '') except AttributeError: pass - if hash_string is None: - return - digests = {} - for encoded_digest in hash_string.split(","): + for encoded_digest in x_goog_hash.split(","): match = re.match(r"(crc32c|md5)=([\w\d]+)==", encoded_digest) if match: method, digest = match.groups() From 55601cecaa599f440894e9450167ba89629c97a9 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 14 Jul 2020 19:38:02 -0400 Subject: [PATCH 08/12] docs: added docstring to new method --- google/cloud/storage/blob.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 579bfd483..9ab6cdeb3 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -785,6 +785,15 @@ def _get_download_url( return _add_query_parameters(base_url, name_value_pairs) def _extract_headers_from_download(self, response): + """Extract headers from a non-chunked request's http object. + + This avoids the need to make a second request for commonly used + headers. + + :type response: + :class requests.models.Response + :param response: The server response from downloading a non-chunked file + """ try: self.content_encoding = response.raw.headers.get("Content-Encoding", None) self.content_type = response.raw.headers.get("Content-Type", None) From 175f4d8aed68b301f95eb3886273fdaf98e37bb4 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 14 Jul 2020 19:58:07 -0400 Subject: [PATCH 09/12] fix: ensure x_goog_hash is set if an error is encountered --- google/cloud/storage/blob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 9ab6cdeb3..6fcdc0f03 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -803,7 +803,7 @@ def _extract_headers_from_download(self, response): # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', x_goog_hash = response.raw.headers.get("X-Goog-Hash", '') except AttributeError: - pass + x_goog_hash = '' digests = {} for encoded_digest in x_goog_hash.split(","): From 0deef45715eb528bfa05cf9ef8aabd4cea737373 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Tue, 21 Jul 2020 19:04:30 -0400 Subject: [PATCH 10/12] refactor: no need to use 'raw' with a requests.Response --- google/cloud/storage/blob.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index a26ef4a52..81b29b1c1 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -794,16 +794,13 @@ def _extract_headers_from_download(self, response): :class requests.models.Response :param response: The server response from downloading a non-chunked file """ - try: - self.content_encoding = response.raw.headers.get("Content-Encoding", None) - self.content_type = response.raw.headers.get("Content-Type", None) - self.cache_control = response.raw.headers.get("Cache-Control", None) - self.storage_class = response.raw.headers.get("X-Goog-Storage-Class", None) - self.content_language = response.raw.headers.get("Content-Language", None) - # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', - x_goog_hash = response.raw.headers.get("X-Goog-Hash", '') - except AttributeError: - x_goog_hash = '' + self.content_encoding = response.headers.get("Content-Encoding", None) + self.content_type = response.headers.get("Content-Type", None) + self.cache_control = response.headers.get("Cache-Control", None) + self.storage_class = response.headers.get("X-Goog-Storage-Class", None) + self.content_language = response.headers.get("Content-Language", None) + # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', + x_goog_hash = response.headers.get("X-Goog-Hash", '') digests = {} for encoded_digest in x_goog_hash.split(","): From 7b54722104df2ab53e88802338db0813802dbf76 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Wed, 22 Jul 2020 14:24:16 -0400 Subject: [PATCH 11/12] test: extract headers from response --- tests/unit/test_blob.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index 4635b050e..80951fed6 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -1451,6 +1451,37 @@ def _download_as_string_helper(self, raw_download, timeout=None): stream = blob._do_download.mock_calls[0].args[1] self.assertIsInstance(stream, io.BytesIO) + def test_download_as_string_w_response_headers(self): + blob_name = "blob-name" + client = mock.Mock(spec=["_http"]) + bucket = _Bucket(client) + media_link = "http://example.com/media/" + properties = {"mediaLink": media_link} + blob = self._make_one(blob_name, bucket=bucket, properties=properties) + + response = self._mock_requests_response( + http_client.OK, + headers={ + "Content-Type": "application/json", + "Content-Language": "ko-kr", + "Cache-Control": "max-age=1337;public", + "Content-Encoding": "gzip", + "X-Goog-Storage-Class": "STANDARD", + "X-Goog-Hash": "crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==", + }, + # { "x": 5 } gzipped + content=b"\x1f\x8b\x08\x00\xcfo\x17_\x02\xff\xabVP\xaaP\xb2R0U\xa8\x05\x00\xa1\xcaQ\x93\n\x00\x00\x00" + ) + blob._extract_headers_from_download(response) + + self.assertEqual(blob.content_type, "application/json") + self.assertEqual(blob.content_language, "ko-kr") + self.assertEqual(blob.content_encoding, "gzip") + self.assertEqual(blob.cache_control, "max-age=1337;public") + self.assertEqual(blob.storage_class, "STANDARD") + self.assertEqual(blob.md5_hash, "CS9tHYTtyFntzj7B9nkkJQ") + self.assertEqual(blob.crc32c, "4gcgLQ") + def test_download_as_string_w_generation_match(self): GENERATION_NUMBER = 6 MEDIA_LINK = "http://example.com/media/" From 6a733d63e85927ee0824018f30caeed58b4f45f8 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Wed, 22 Jul 2020 14:35:52 -0400 Subject: [PATCH 12/12] chore: reformat whitespace with black --- google/cloud/storage/blob.py | 2 +- tests/unit/test_blob.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 81b29b1c1..07a17867c 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -800,7 +800,7 @@ def _extract_headers_from_download(self, response): self.storage_class = response.headers.get("X-Goog-Storage-Class", None) self.content_language = response.headers.get("Content-Language", None) # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', - x_goog_hash = response.headers.get("X-Goog-Hash", '') + x_goog_hash = response.headers.get("X-Goog-Hash", "") digests = {} for encoded_digest in x_goog_hash.split(","): diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index 80951fed6..54aeae671 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -1460,7 +1460,7 @@ def test_download_as_string_w_response_headers(self): blob = self._make_one(blob_name, bucket=bucket, properties=properties) response = self._mock_requests_response( - http_client.OK, + http_client.OK, headers={ "Content-Type": "application/json", "Content-Language": "ko-kr", @@ -1468,9 +1468,9 @@ def test_download_as_string_w_response_headers(self): "Content-Encoding": "gzip", "X-Goog-Storage-Class": "STANDARD", "X-Goog-Hash": "crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==", - }, + }, # { "x": 5 } gzipped - content=b"\x1f\x8b\x08\x00\xcfo\x17_\x02\xff\xabVP\xaaP\xb2R0U\xa8\x05\x00\xa1\xcaQ\x93\n\x00\x00\x00" + content=b"\x1f\x8b\x08\x00\xcfo\x17_\x02\xff\xabVP\xaaP\xb2R0U\xa8\x05\x00\xa1\xcaQ\x93\n\x00\x00\x00", ) blob._extract_headers_from_download(response)