From 991cb1fd30fac32ded404625697dee85119817c9 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Wed, 12 Aug 2020 13:20:20 -0400 Subject: [PATCH 1/2] fix(blob): extract padding chars from hashes I didn't previously understand that the "==" at the end of the hash isn't merely a delimiter, but a padding character for base64 encoding. It's not strictly necessary to have it, but you have to add it back in to perform a base64 decode. Base64 encoded strings are padded out to a multiple of four characters. --- google/cloud/storage/blob.py | 2 +- tests/unit/test_blob.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 1380f41bb..0942a7794 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -804,7 +804,7 @@ def _extract_headers_from_download(self, response): digests = {} for encoded_digest in x_goog_hash.split(","): - match = re.match(r"(crc32c|md5)=([\w\d]+)==", encoded_digest) + match = re.match(r"(crc32c|md5)=([\w\d]+=*)", encoded_digest) if match: method, digest = match.groups() digests[method] = digest diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index 68011e438..20e20abe4 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -1476,8 +1476,8 @@ def test_download_as_string_w_response_headers(self): self.assertEqual(blob.content_encoding, "gzip") self.assertEqual(blob.cache_control, "max-age=1337;public") self.assertEqual(blob.storage_class, "STANDARD") - self.assertEqual(blob.md5_hash, "CS9tHYTtyFntzj7B9nkkJQ") - self.assertEqual(blob.crc32c, "4gcgLQ") + self.assertEqual(blob.md5_hash, "CS9tHYTtyFntzj7B9nkkJQ==") + self.assertEqual(blob.crc32c, "4gcgLQ==") def test_download_as_string_w_hash_response_header_none(self): blob_name = "blob-name" From 33064aeb219512eda9598ed4e2cee658f0c2d979 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Wed, 12 Aug 2020 15:02:56 -0400 Subject: [PATCH 2/2] fix(blob.py): use a padding char number that is more realistic --- google/cloud/storage/blob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 0942a7794..09122974a 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -804,7 +804,7 @@ def _extract_headers_from_download(self, response): digests = {} for encoded_digest in x_goog_hash.split(","): - match = re.match(r"(crc32c|md5)=([\w\d]+=*)", encoded_digest) + match = re.match(r"(crc32c|md5)=([\w\d]+={0,3})", encoded_digest) if match: method, digest = match.groups() digests[method] = digest