From e8c77359c7fe88465700e8038e7fb8644c1dcbf8 Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Wed, 21 Oct 2020 18:02:45 +0530 Subject: [PATCH 1/2] feat: add list prefix method --- google/cloud/storage/bucket.py | 118 +++++++++++++++++++++++++++++++++ google/cloud/storage/client.py | 75 +++++++++++++++++++++ tests/system/test_system.py | 5 ++ tests/unit/test_bucket.py | 56 ++++++++++++++++ tests/unit/test_client.py | 85 ++++++++++++++++++++++++ 5 files changed, 339 insertions(+) diff --git a/google/cloud/storage/bucket.py b/google/cloud/storage/bucket.py index 7ab9a13ef..aaca74bd9 100644 --- a/google/cloud/storage/bucket.py +++ b/google/cloud/storage/bucket.py @@ -124,6 +124,28 @@ def _item_to_blob(iterator, item): return blob +# pylint: disable=unused-argument +def _item_to_value(iterator, item): + """Convert a JSON to the string. + + .. note:: + + This assumes that the ``bucket`` attribute has been + added to the iterator after being created. + + :type iterator: :class:`~google.api_core.page_iterator.Iterator` + :param iterator: The iterator that has retrieved the item. + + :type item: str + :param item: An item to be returned. + + :rtype: str + :returns: The next value in the page. + """ + + return item + + def _item_to_notification(iterator, item): """Convert a JSON blob to the native object. @@ -1263,6 +1285,102 @@ def list_blobs( iterator.prefixes = set() return iterator + def list_prefixes( + self, + prefix, + delimiter="/", + max_results=None, + page_token=None, + projection="noAcl", + fields=None, + client=None, + timeout=_DEFAULT_TIMEOUT, + ): + """Return an iterator used to find directories(prefixes) in the bucket. + + :type prefix: str + :param prefix: Filter results to directories whose names begin + with this prefix. + + :type delimiter: str + :param delimiter: (Optional) Delimiter, used with ``prefix`` to + emulate hierarchy. Defaults to '/'. + + :type max_results: int + :param max_results: (Optional) The maximum number of directories to return. + + :type page_token: str + :param page_token: + (Optional) If present, return the next batch of buckets, using the + value, which must correspond to the ``nextPageToken`` value + returned in the previous response. Deprecated: use the ``pages`` + property of the returned iterator instead of manually passing the + token. + + :type projection: str + :param projection: + (Optional) Specifies the set of properties to return. If used, must + be 'full' or 'noAcl'. Defaults to 'noAcl'. + + :type fields: str + :param fields: + (Optional) Selector specifying which fields to include in a partial + response. Must be a list of fields. For example to get a partial + response with just the next page token and the language of each + bucket returned: 'items/id,nextPageToken'. + + :type projection: str + :param projection: (Optional) If used, must be 'full' or 'noAcl'. + Defaults to ``'noAcl'``. Specifies the set of properties to return. + + :type client: :class:`~google.cloud.storage.client.Client` + :param client: (Optional) The client to use. If not passed, falls back + to the ``client`` stored on the current bucket. + + :type timeout: float or tuple + :param timeout: (Optional) The amount of time, in seconds, to wait + for the server response. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + :rtype: :class:`~google.api_core.page_iterator.Iterator` + :raises ValueError: if both ``project`` is ``None`` and the client's + project is also ``None``. + :returns: Iterator of all prefixes(unicode) in this bucket matching the arguments. + """ + extra_params = { + "projection": projection, + "prefix": prefix, + "delimiter": delimiter, + } + + if fields is not None: + extra_params["fields"] = fields + + if self.user_project is not None: + extra_params["userProject"] = self.user_project + + client = self._require_client(client) + path = self.path + "/o" + api_request = functools.partial( + client._connection.api_request, timeout=timeout, retry=DEFAULT_RETRY + ) + iterator = page_iterator.HTTPIterator( + client=client, + api_request=api_request, + path=path, + items_key="prefixes", + item_to_value=_item_to_value, + page_token=page_token, + max_results=max_results, + extra_params=extra_params, + page_start=_blobs_page_start, + ) + iterator.bucket = self + iterator.prefixes = set() + return iterator + def list_notifications(self, client=None, timeout=_DEFAULT_TIMEOUT): """List Pub / Sub notifications for this bucket. diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index 27c163a29..a6e60241c 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -793,6 +793,81 @@ def list_buckets( extra_params=extra_params, ) + def list_prefixes( + self, + bucket_or_name, + prefix, + delimiter="/", + max_results=None, + page_token=None, + projection="noAcl", + fields=None, + timeout=_DEFAULT_TIMEOUT, + ): + """Return an iterator used to find directories(prefixes) in the bucket. + + :type bucket_or_name: (Union[:class:`~google.cloud.storage.bucket.Bucket`, str]): + :param bucket_or_name: The bucket resource to pass or name to create. + + :type prefix: str + :param prefix: Filter results to directories whose names begin + with this prefix. + + :type delimiter: str + :param delimiter: (Optional) Delimiter, used with ``prefix`` to + emulate hierarchy. Defaults to '/'. + + :type max_results: int + :param max_results: (Optional) The maximum number of directories to return. + + :type page_token: str + :param page_token: + (Optional) If present, return the next batch of buckets, using the + value, which must correspond to the ``nextPageToken`` value + returned in the previous response. Deprecated: use the ``pages`` + property of the returned iterator instead of manually passing the + token. + + :type projection: str + :param projection: + (Optional) Specifies the set of properties to return. If used, must + be 'full' or 'noAcl'. Defaults to 'noAcl'. + + :type fields: str + :param fields: + (Optional) Selector specifying which fields to include in a partial + response. Must be a list of fields. For example to get a partial + response with just the next page token and the language of each + bucket returned: 'items/id,nextPageToken'. + + :type projection: str + :param projection: (Optional) If used, must be 'full' or 'noAcl'. + Defaults to ``'noAcl'``. Specifies the set of properties to return. + + :type timeout: float or tuple + :param timeout: (Optional) The amount of time, in seconds, to wait + for the server response. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + :rtype: :class:`~google.api_core.page_iterator.Iterator` + :raises ValueError: if both ``project`` is ``None`` and the client's + project is also ``None``. + :returns: Iterator of all prefixes(unicode) in this bucket matching the arguments. + """ + bucket = self._bucket_arg_to_bucket(bucket_or_name) + return bucket.list_prefixes( + prefix=prefix, + delimiter=delimiter, + max_results=max_results, + page_token=page_token, + projection=projection, + fields=fields, + client=self, + timeout=timeout, + ) + def create_hmac_key( self, service_account_email, diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 4898dc061..9ff0ba912 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -1243,6 +1243,11 @@ def test_third_level(self): self.assertIsNone(iterator.next_page_token) self.assertEqual(iterator.prefixes, set()) + @RetryErrors(unittest.TestCase.failureException) + def test_list_prefix(self): + prefixes = self.bucket.list_prefixes(prefix="parent/", delimiter="/") + self.assertEqual(list(prefixes), ["parent/child/"]) + @RetryErrors(unittest.TestCase.failureException) def test_include_trailing_delimiter(self): iterator = self.bucket.list_blobs( diff --git a/tests/unit/test_bucket.py b/tests/unit/test_bucket.py index 668db2d6d..31c8a3bfc 100644 --- a/tests/unit/test_bucket.py +++ b/tests/unit/test_bucket.py @@ -913,6 +913,62 @@ def test_list_blobs_w_all_arguments_and_user_project(self): self.assertEqual(kw["query_params"], EXPECTED) self.assertEqual(kw["timeout"], 42) + def test_list_prefixes(self): + NAME = "name" + connection = _Connection({"items": []}) + client = _Client(connection) + bucket = self._make_one(client=client, name=NAME) + iterator = bucket.list_prefixes("subfolder/") + prefix = list(iterator) + self.assertEqual(prefix, []) + (kw,) = connection._requested + self.assertEqual(kw["method"], "GET") + self.assertEqual(kw["path"], "/b/%s/o" % NAME) + self.assertEqual( + kw["query_params"], + {"projection": "noAcl", "delimiter": "/", "prefix": "subfolder/"}, + ) + self.assertEqual(kw["timeout"], self._get_default_timeout()) + + def test_list_prefixes_w_all_arguments(self): + NAME = "name" + MAX_RESULTS = 10 + PAGE_TOKEN = "ABCD" + PREFIX = "subfolder/" + DELIMITER = "/" + PROJECTION = "noAcl" + FIELDS = "items/contentLanguage,nextPageToken" + USER_PROJECT = "user-project-123" + EXPECTED = { + "maxResults": 10, + "pageToken": PAGE_TOKEN, + "prefix": PREFIX, + "delimiter": DELIMITER, + "projection": PROJECTION, + "fields": FIELDS, + "userProject": USER_PROJECT, + } + connection = _Connection({"prefixes": ["subfolder/abc", "subfolder/def"]}) + client = _Client(connection) + bucket = self._make_one(name=NAME, user_project=USER_PROJECT) + iterator = bucket.list_prefixes( + max_results=MAX_RESULTS, + page_token=PAGE_TOKEN, + prefix=PREFIX, + delimiter=DELIMITER, + projection=PROJECTION, + fields=FIELDS, + client=client, + timeout=42, + ) + prefixes = list(iterator) + self.assertEqual(prefixes, ["subfolder/abc", "subfolder/def"]) + (kw,) = connection._requested + self.assertEqual(kw["method"], "GET") + self.assertEqual(kw["path"], "/b/%s/o" % NAME) + self.assertEqual(kw["query_params"], EXPECTED) + self.assertEqual(kw["timeout"], 42) + def test_list_notifications(self): from google.cloud.storage.notification import BucketNotification from google.cloud.storage.notification import _TOPIC_REF_FMT diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 4efc35e98..e9a76d83c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1303,6 +1303,91 @@ def dummy_response(): self.assertIsInstance(bucket, Bucket) self.assertEqual(bucket.name, blob_name) + def test_list_prefixes(self): + from google.cloud.storage.bucket import Bucket + + BUCKET_NAME = "bucket-name" + + credentials = _make_credentials() + client = self._make_one(project="PROJECT", credentials=credentials) + connection = _make_connection({"items": []}) + + with mock.patch( + "google.cloud.storage.client.Client._connection", + new_callable=mock.PropertyMock, + ) as client_mock: + client_mock.return_value = connection + + bucket_obj = Bucket(client, BUCKET_NAME) + iterator = client.list_prefixes(bucket_obj, "subfolder/") + prefix = list(iterator) + + self.assertEqual(prefix, []) + connection.api_request.assert_called_once_with( + method="GET", + path="/b/%s/o" % BUCKET_NAME, + query_params={ + "projection": "noAcl", + "delimiter": "/", + "prefix": "subfolder/", + }, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + ) + + def test_list_prefixes_w_all_arguments(self): + from google.cloud.storage.bucket import Bucket + + BUCKET_NAME = "name" + USER_PROJECT = "user-project-123" + MAX_RESULTS = 10 + PAGE_TOKEN = "ABCD" + PREFIX = "subfolder" + DELIMITER = "/" + PROJECTION = "full" + FIELDS = "items/contentLanguage,nextPageToken" + EXPECTED = { + "maxResults": 10, + "pageToken": PAGE_TOKEN, + "prefix": PREFIX, + "delimiter": DELIMITER, + "projection": PROJECTION, + "fields": FIELDS, + "userProject": USER_PROJECT, + } + + credentials = _make_credentials() + client = self._make_one(project=USER_PROJECT, credentials=credentials) + connection = _make_connection({"items": []}) + + with mock.patch( + "google.cloud.storage.client.Client._connection", + new_callable=mock.PropertyMock, + ) as client_mock: + client_mock.return_value = connection + + bucket = Bucket(client, BUCKET_NAME, user_project=USER_PROJECT) + iterator = client.list_prefixes( + bucket_or_name=bucket, + max_results=MAX_RESULTS, + page_token=PAGE_TOKEN, + prefix=PREFIX, + delimiter=DELIMITER, + projection=PROJECTION, + fields=FIELDS, + timeout=42, + ) + prefix = list(iterator) + + self.assertEqual(prefix, []) + connection.api_request.assert_called_once_with( + method="GET", + path="/b/%s/o" % BUCKET_NAME, + query_params=EXPECTED, + timeout=42, + retry=DEFAULT_RETRY, + ) + def _create_hmac_key_helper( self, explicit_project=None, user_project=None, timeout=None ): From 7b3bb3588f874dd6256a944f6682c1316a4f474c Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Thu, 22 Oct 2020 19:05:27 +0530 Subject: [PATCH 2/2] feat: improve docs --- google/cloud/storage/bucket.py | 13 +++++++++++-- google/cloud/storage/client.py | 14 ++++++++++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/bucket.py b/google/cloud/storage/bucket.py index aaca74bd9..965030a99 100644 --- a/google/cloud/storage/bucket.py +++ b/google/cloud/storage/bucket.py @@ -1296,7 +1296,7 @@ def list_prefixes( client=None, timeout=_DEFAULT_TIMEOUT, ): - """Return an iterator used to find directories(prefixes) in the bucket. + """Return an iterator used to find sub-directories(prefixes) of the blob in the bucket. :type prefix: str :param prefix: Filter results to directories whose names begin @@ -1347,7 +1347,16 @@ def list_prefixes( :rtype: :class:`~google.api_core.page_iterator.Iterator` :raises ValueError: if both ``project`` is ``None`` and the client's project is also ``None``. - :returns: Iterator of all prefixes(unicode) in this bucket matching the arguments. + :returns: Iterator of all prefixes of the blob in this bucket matching the arguments. + + Example: + List sub-directories of the blob in the bucket with user_project. + + >>> from google.cloud import storage + >>> client = storage.Client() + + >>> bucket = client.get_bucket("my-bucket-name") + >>> all_sub_dir = list(bucket.list_prefixes(bucket, 'abc/')) """ extra_params = { "projection": projection, diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index a6e60241c..91468ccb7 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -804,12 +804,13 @@ def list_prefixes( fields=None, timeout=_DEFAULT_TIMEOUT, ): - """Return an iterator used to find directories(prefixes) in the bucket. + """Return an iterator used to find sub-directories(prefixes) of the blob in the bucket. :type bucket_or_name: (Union[:class:`~google.cloud.storage.bucket.Bucket`, str]): :param bucket_or_name: The bucket resource to pass or name to create. - :type prefix: str + :type prefix: strh + :param prefix: Filter results to directories whose names begin with this prefix. @@ -855,6 +856,15 @@ def list_prefixes( :raises ValueError: if both ``project`` is ``None`` and the client's project is also ``None``. :returns: Iterator of all prefixes(unicode) in this bucket matching the arguments. + + Example: + List sub-directories of the blob in the bucket with user_project. + + >>> from google.cloud import storage + >>> client = storage.Client() + + >>> bucket = storage.Bucket("my-bucket-name", user_project='my-project') + >>> all_sub_dir = list(client.list_prefixes(bucket, 'abc/')) """ bucket = self._bucket_arg_to_bucket(bucket_or_name) return bucket.list_prefixes(