From 21e66a3557e87760bcd291cdcc5511cd3799457e Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 17 Sep 2025 09:59:21 +0000 Subject: [PATCH 01/44] add AsyncAbstractObjectStream this will be the parent class for AsyncReadObjectStream and AsyncWriteObjectStream --- .../asyncio/async_abstract_object_stream.py | 36 +++++++++++ .../test_async_abstract_object_stream.py | 64 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py create mode 100644 tests/unit/asyncio/test_async_abstract_object_stream.py diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py new file mode 100644 index 000000000..02e72ffae --- /dev/null +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -0,0 +1,36 @@ +import abc + + +class AsyncAbstractObjectStream(abc.ABC): + """ + Abstract class for both ReadObjectStream as well as WriteObjectStream. + + Attributes will include + 1. bucket_name + 2. object_name + 3. generation_number (if given) + + + """ + + def __init__(self, bucket_name, object_name, generation_number=None): + super().__init__() + self.bucket_name = bucket_name + self.object_name = object_name + self.generation_number = generation_number + + @abc.abstractmethod + async def open(self): + raise NotImplementedError("Subclasses should implement this method.") + + @abc.abstractmethod + async def close(self): + raise NotImplementedError("Subclasses should implement this method.") + + @abc.abstractmethod + async def send(self): + raise NotImplementedError("Subclasses should implement this method.") + + @abc.abstractmethod + async def recv(self): + raise NotImplementedError("Subclasses should implement this method.") diff --git a/tests/unit/asyncio/test_async_abstract_object_stream.py b/tests/unit/asyncio/test_async_abstract_object_stream.py new file mode 100644 index 000000000..9679d729e --- /dev/null +++ b/tests/unit/asyncio/test_async_abstract_object_stream.py @@ -0,0 +1,64 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( + AsyncAbstractObjectStream, +) + + +# A concrete implementation for testing purposes. +class _ConcreteStream(AsyncAbstractObjectStream): + async def open(self): + pass + + async def close(self): + pass + + async def send(self): + pass + + async def recv(self): + pass + + +def test_init(): + """Test the constructor of AsyncAbstractObjectStream.""" + bucket_name = "test-bucket" + object_name = "test-object" + generation = 12345 + + # Test with all parameters + stream = _ConcreteStream(bucket_name, object_name, generation_number=generation) + assert stream.bucket_name == bucket_name + assert stream.object_name == object_name + assert stream.generation_number == generation + + # Test with default generation_number + stream_no_gen = _ConcreteStream(bucket_name, object_name) + assert stream_no_gen.bucket_name == bucket_name + assert stream_no_gen.object_name == object_name + assert stream_no_gen.generation_number is None + + +def test_instantiation_fails_without_implementation(): + """Test that instantiating an incomplete subclass raises TypeError.""" + + class _IncompleteStream(AsyncAbstractObjectStream): + # Missing implementations for abstract methods like open(), close(), etc. + pass + + with pytest.raises(TypeError, match="Can't instantiate abstract class"): + _IncompleteStream("bucket", "object") From 39503f49553daa611cfe0a3425fb487b502dca29 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 17 Sep 2025 10:22:06 +0000 Subject: [PATCH 02/44] keep _AsyncAbstractObjectStream private --- .../_experimental/asyncio/async_abstract_object_stream.py | 2 +- tests/unit/asyncio/test_async_abstract_object_stream.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 02e72ffae..86de7f715 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -1,7 +1,7 @@ import abc -class AsyncAbstractObjectStream(abc.ABC): +class _AsyncAbstractObjectStream(abc.ABC): """ Abstract class for both ReadObjectStream as well as WriteObjectStream. diff --git a/tests/unit/asyncio/test_async_abstract_object_stream.py b/tests/unit/asyncio/test_async_abstract_object_stream.py index 9679d729e..e0dc130ea 100644 --- a/tests/unit/asyncio/test_async_abstract_object_stream.py +++ b/tests/unit/asyncio/test_async_abstract_object_stream.py @@ -15,12 +15,12 @@ import pytest from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( - AsyncAbstractObjectStream, + _AsyncAbstractObjectStream, ) # A concrete implementation for testing purposes. -class _ConcreteStream(AsyncAbstractObjectStream): +class _ConcreteStream(_AsyncAbstractObjectStream): async def open(self): pass @@ -56,7 +56,7 @@ def test_init(): def test_instantiation_fails_without_implementation(): """Test that instantiating an incomplete subclass raises TypeError.""" - class _IncompleteStream(AsyncAbstractObjectStream): + class _IncompleteStream(_AsyncAbstractObjectStream): # Missing implementations for abstract methods like open(), close(), etc. pass From a161fd0449375866c71f7db168def60625c9d748 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 17 Sep 2025 10:46:07 +0000 Subject: [PATCH 03/44] Add _AsyncReadObjectStream and it's stubs --- .../asyncio/async_read_object_stream.py | 80 ++++++ .../_experimental/asyncio/bidi_async.py | 230 ++++++++++++++++++ .../_experimental/asyncio/bidi_base.py | 80 ++++++ .../asyncio/test_async_read_object_stream.py | 77 ++++++ 4 files changed, 467 insertions(+) create mode 100644 google/cloud/storage/_experimental/asyncio/async_read_object_stream.py create mode 100644 google/cloud/storage/_experimental/asyncio/bidi_async.py create mode 100644 google/cloud/storage/_experimental/asyncio/bidi_base.py create mode 100644 tests/unit/asyncio/test_async_read_object_stream.py diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py new file mode 100644 index 000000000..078434073 --- /dev/null +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -0,0 +1,80 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +NOTE: +This is _experimental module for upcoming support for Rapid Storage. +(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) + +APIs may not work as intented and are not stable yet. Feature is not +GA(Generally Available) yet, please contact your TAM(Technical Account Manager) +if you want to use these APIs. + +""" + +from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( + _AsyncAbstractObjectStream, +) + + +class _AsyncReadObjectStream(_AsyncAbstractObjectStream): + """Provides an asynchronous, streaming interface for reading from a GCS object. + + This class provides a unix socket-like interface to a GCS Object, with + methods like ``open``, ``close``, ``send``, and ``recv``. + + :type client: :class:`~google.cloud.storage.aio.Client` + :param client: The asynchronous client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the bucket containing the object. + + :type object_name: str + :param object_name: The name of the object to be read. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific revision of + this object. + + :type read_handle: object + :param read_handle: (Optional) An existing handle for reading the object. + If provided, opening the bidi-gRPC connection will be faster. + """ + + def __init__( + self, + client, + bucket_name=None, + object_name=None, + generation_number=None, + read_handle=None, + ): + super().__init__( + bucket_name=bucket_name, + object_name=object_name, + generation_number=generation_number, + ) + self.client = client + self.read_handle = read_handle + + async def open(self) -> None: + pass + + async def close(self): + pass + + async def send(self, bidi_read_object_request): + pass + + async def recv(self): + pass diff --git a/google/cloud/storage/_experimental/asyncio/bidi_async.py b/google/cloud/storage/_experimental/asyncio/bidi_async.py new file mode 100644 index 000000000..8c5e58fd0 --- /dev/null +++ b/google/cloud/storage/_experimental/asyncio/bidi_async.py @@ -0,0 +1,230 @@ +# Copyright 2025, Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Asynchronous bi-directional streaming RPC helpers.""" + +import asyncio +import logging + +from google.api_core import exceptions +from google.cloud.storage._experimental.asyncio.bidi_base import BidiRpcBase + +_LOGGER = logging.getLogger(__name__) + + +class _AsyncRequestQueueGenerator: + """_AsyncRequestQueueGenerator is a helper class for sending asynchronous + requests to a gRPC stream from a Queue. + + This generator takes asynchronous requests off a given queue and yields them + to gRPC. + + This helper is useful when you have an indeterminate, indefinite, or + otherwise open-ended set of requests to send through a request-streaming + (or bidirectional) RPC. + + The reason this is necessary + + is because it's let's user have control on the when they would want to + send requests proto messages instead of sending all of them initilally. + + This is achieved via asynchronous queue (asyncio.Queue), + gRPC awaits until there's a message in the queue. + + Finally, it allows for retrying without swapping queues because if it does + pull an item off the queue when the RPC is inactive, it'll immediately put + it back and then exit. This is necessary because yielding the item in this + case will cause gRPC to discard it. In practice, this means that the order + of messages is not guaranteed. If such a thing is necessary it would be + easy to use a priority queue. + + Example:: + + requests = _AsyncRequestQueueGenerator(q) + call = await stub.StreamingRequest(requests) + requests.call = call + + async for response in call: + print(response) + await q.put(...) + + Args: + queue (asyncio.Queue): The request queue. + initial_request (Union[protobuf.Message, + Callable[[], protobuf.Message]]): The initial request to + yield. This is done independently of the request queue to allow for + easily restarting streams that require some initial configuration + request. + """ + + def __init__(self, queue: asyncio.Queue, initial_request=None): + self._queue = queue + self._initial_request = initial_request + self.call = None + + def _is_active(self): + """ + Returns true if the call is not set or not completed. + """ + return self.call is None or not self.call.done() + + async def __aiter__(self): + if self._initial_request is not None: + if callable(self._initial_request): + yield self._initial_request() + else: + yield self._initial_request + + while True: + item = await self._queue.get() + + # The consumer explicitly sent "None", indicating that the request + # should end. + if item is None: + _LOGGER.debug("Cleanly exiting request generator.") + return + + if not self._is_active(): + # We have an item, but the call is closed. We should put the + # item back on the queue so that the next call can consume it. + await self._queue.put(item) + _LOGGER.debug( + "Inactive call, replacing item on queue and exiting " + "request generator." + ) + return + + yield item + + +class AsyncBidiRpc(BidiRpcBase): + """A helper for consuming a async bi-directional streaming RPC. + + This maps gRPC's built-in interface which uses a request iterator and a + response iterator into a socket-like :func:`send` and :func:`recv`. This + is a more useful pattern for long-running or asymmetric streams (streams + where there is not a direct correlation between the requests and + responses). + + Example:: + + initial_request = example_pb2.StreamingRpcRequest( + setting='example') + rpc = AsyncBidiRpc( + stub.StreamingRpc, + initial_request=initial_request, + metadata=[('name', 'value')] + ) + + await rpc.open() + + while rpc.is_active: + print(await rpc.recv()) + await rpc.send(example_pb2.StreamingRpcRequest( + data='example')) + + This does *not* retry the stream on errors. See :class:`AsyncResumableBidiRpc`. + + Args: + start_rpc (grpc.aio.StreamStreamMultiCallable): The gRPC method used to + start the RPC. + initial_request (Union[protobuf.Message, + Callable[[], protobuf.Message]]): The initial request to + yield. This is useful if an initial request is needed to start the + stream. + metadata (Sequence[Tuple(str, str)]): RPC metadata to include in + the request. + """ + + def _create_queue(self): + """Create a queue for requests.""" + return asyncio.Queue() + + async def open(self): + """Opens the stream.""" + if self.is_active: + raise ValueError("Can not open an already open stream.") + + request_generator = _AsyncRequestQueueGenerator( + self._request_queue, initial_request=self._initial_request + ) + try: + call = await self._start_rpc(request_generator, metadata=self._rpc_metadata) + except exceptions.GoogleAPICallError as exc: + # The original `grpc.RpcError` (which is usually also a `grpc.Call`) is + # available from the ``response`` property on the mapped exception. + self._on_call_done(exc.response) + raise + + request_generator.call = call + + # TODO: api_core should expose the future interface for wrapped + # callables as well. + if hasattr(call, "_wrapped"): # pragma: NO COVER + call._wrapped.add_done_callback(self._on_call_done) + else: + call.add_done_callback(self._on_call_done) + + self._request_generator = request_generator + self.call = call + + async def close(self): + """Closes the stream.""" + if self.call is None: + return + + await self._request_queue.put(None) + self.call.cancel() + self._request_generator = None + self._initial_request = None + self._callbacks = [] + # Don't set self.call to None. Keep it around so that send/recv can + # raise the error. + + async def send(self, request): + """Queue a message to be sent on the stream. + + If the underlying RPC has been closed, this will raise. + + Args: + request (protobuf.Message): The request to send. + """ + if self.call is None: + raise ValueError("Can not send() on an RPC that has never been opened.") + + # Don't use self.is_active(), as ResumableBidiRpc will overload it + # to mean something semantically different. + if not self.call.done(): + await self._request_queue.put(request) + else: + # calling read should cause the call to raise. + await self.call.read() + + async def recv(self): + """Wait for a message to be returned from the stream. + + If the underlying RPC has been closed, this will raise. + + Returns: + protobuf.Message: The received message. + """ + if self.call is None: + raise ValueError("Can not recv() on an RPC that has never been opened.") + + return await self.call.read() + + @property + def is_active(self): + """bool: True if this stream is currently open and active.""" + return self.call is not None and not self.call.done() diff --git a/google/cloud/storage/_experimental/asyncio/bidi_base.py b/google/cloud/storage/_experimental/asyncio/bidi_base.py new file mode 100644 index 000000000..195e35750 --- /dev/null +++ b/google/cloud/storage/_experimental/asyncio/bidi_base.py @@ -0,0 +1,80 @@ +# Copyright 2025, Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may obtain a copy of the License at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base class for bi-directional streaming RPC helpers.""" + + +class BidiRpcBase: + """A base class for consuming a bi-directional streaming RPC. + + This maps gRPC's built-in interface which uses a request iterator and a + response iterator into a socket-like :func:`send` and :func:`recv`. This + is a more useful pattern for long-running or asymmetric streams (streams + where there is not a direct correlation between the requests and + responses). + + This does *not* retry the stream on errors. + + Args: + start_rpc (Union[grpc.StreamStreamMultiCallable, + grpc.aio.StreamStreamMultiCallable]): The gRPC method used + to start the RPC. + initial_request (Union[protobuf.Message, + Callable[[], protobuf.Message]]): The initial request to + yield. This is useful if an initial request is needed to start the + stream. + metadata (Sequence[Tuple(str, str)]): RPC metadata to include in + the request. + """ + + def __init__(self, start_rpc, initial_request=None, metadata=None): + self._start_rpc = start_rpc + self._initial_request = initial_request + self._rpc_metadata = metadata + self._request_queue = self._create_queue() + self._request_generator = None + self._callbacks = [] + self.call = None + + def _create_queue(self): + """Create a queue for requests.""" + raise NotImplementedError("`_create_queue` is not implemented.") + + def add_done_callback(self, callback): + """Adds a callback that will be called when the RPC terminates. + + This occurs when the RPC errors or is successfully terminated. + + Args: + callback (Callable[[grpc.Future], None]): The callback to execute. + It will be provided with the same gRPC future as the underlying + stream which will also be a :class:`grpc.aio.Call`. + """ + self._callbacks.append(callback) + + def _on_call_done(self, future): + # This occurs when the RPC errors or is successfully terminated. + # Note that grpc's "future" here can also be a grpc.RpcError. + # See note in https://github.com/grpc/grpc/issues/10885#issuecomment-302651331 + # that `grpc.RpcError` is also `grpc.aio.Call`. + for callback in self._callbacks: + callback(future) + + @property + def is_active(self): + """bool: True if this stream is currently open and active.""" + raise NotImplementedError("`is_active` is not implemented.") + + @property + def pending_requests(self): + """int: Returns an estimate of the number of queued requests.""" + return self._request_queue.qsize() diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py new file mode 100644 index 000000000..43b42f8d8 --- /dev/null +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -0,0 +1,77 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from unittest import mock + +from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( + _AsyncAbstractObjectStream, +) +from google.cloud.storage._experimental.asyncio.async_read_object_stream import ( + _AsyncReadObjectStream, +) + + +def test_inheritance(): + """Test that _AsyncReadObjectStream inherits from _AsyncAbstractObjectStream.""" + assert issubclass(_AsyncReadObjectStream, _AsyncAbstractObjectStream) + + +def test_init(): + """Test the constructor of _AsyncReadObjectStream.""" + mock_client = mock.Mock(name="client") + bucket_name = "test-bucket" + object_name = "test-object" + generation = 12345 + read_handle = "some-handle" + + # Test with all parameters + stream = _AsyncReadObjectStream( + mock_client, + bucket_name=bucket_name, + object_name=object_name, + generation_number=generation, + read_handle=read_handle, + ) + + assert stream.client is mock_client + assert stream.bucket_name == bucket_name + assert stream.object_name == object_name + assert stream.generation_number == generation + assert stream.read_handle == read_handle + + # Test with default parameters + stream_defaults = _AsyncReadObjectStream(mock_client) + assert stream_defaults.client is mock_client + assert stream_defaults.bucket_name is None + assert stream_defaults.object_name is None + assert stream_defaults.generation_number is None + assert stream_defaults.read_handle is None + + +@pytest.mark.asyncio +async def test_async_methods_are_awaitable(): + """Test that the async methods exist and are awaitable.""" + mock_client = mock.Mock(name="client") + stream = _AsyncReadObjectStream(mock_client) + + # These methods are currently empty, but we can test they are awaitable + # and don't raise exceptions. + try: + await stream.open() + await stream.close() + await stream.send(mock.Mock()) + await stream.recv() + except Exception as e: + pytest.fail(f"Async methods should be awaitable without errors. Raised: {e}") From dd862a2797a0f68c43c29970ae0c9a28230e995f Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 17 Sep 2025 12:36:59 +0000 Subject: [PATCH 04/44] complete __init__ for read_obj_str --- .../asyncio/async_read_object_stream.py | 19 +++++ .../asyncio/test_async_read_object_stream.py | 75 ++++++++++++++++++- 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 078434073..8a6251303 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -25,6 +25,8 @@ from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( _AsyncAbstractObjectStream, ) +from google.cloud import _storage_v2 +from google.cloud.storage._experimental.asyncio.bidi_async import AsyncBidiRpc class _AsyncReadObjectStream(_AsyncAbstractObjectStream): @@ -67,6 +69,23 @@ def __init__( self.client = client self.read_handle = read_handle + self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" + + # can this interface be changed tmrw ? (not accounting for that) + # self.rpc = self.client.get_bidi_rpc_str_str_mc() # expose this func in GAPIC + self.rpc = self.client._client._transport._wrapped_methods[ + self.client._client._transport.bidi_read_object + ] + first_bidi_read_req = _storage_v2.BidiReadObjectRequest( + read_object_spec=_storage_v2.BidiReadObjectSpec( + bucket=self._full_bucket_name, object=object_name + ), + ) + self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),) + self.socket_like_rpc = AsyncBidiRpc( + self.rpc, initial_request=first_bidi_read_req, metadata=self.metadata + ) + async def open(self) -> None: pass diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index 43b42f8d8..3b7c5653e 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -28,9 +28,24 @@ def test_inheritance(): assert issubclass(_AsyncReadObjectStream, _AsyncAbstractObjectStream) -def test_init(): +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" +) +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_read_object_stream._storage_v2" +) +def test_init(mock_storage_v2, mock_async_bidi_rpc): """Test the constructor of _AsyncReadObjectStream.""" + # Setup mock client + mock_rpc = mock.Mock(name="rpc") + mock_transport = mock.Mock(name="transport") + mock_transport.bidi_read_object = "bidi_read_object_key" + mock_transport._wrapped_methods = {"bidi_read_object_key": mock_rpc} + mock_gapic_client = mock.Mock(name="gapic_client") + mock_gapic_client._transport = mock_transport mock_client = mock.Mock(name="client") + mock_client._client = mock_gapic_client + bucket_name = "test-bucket" object_name = "test-object" generation = 12345 @@ -51,6 +66,31 @@ def test_init(): assert stream.generation_number == generation assert stream.read_handle == read_handle + full_bucket_name = f"projects/_/buckets/{bucket_name}" + assert stream._full_bucket_name == full_bucket_name + assert stream.rpc is mock_rpc + + mock_storage_v2.BidiReadObjectSpec.assert_called_once_with( + bucket=full_bucket_name, object=object_name + ) + mock_read_object_spec = mock_storage_v2.BidiReadObjectSpec.return_value + mock_storage_v2.BidiReadObjectRequest.assert_called_once_with( + read_object_spec=mock_read_object_spec + ) + mock_initial_request = mock_storage_v2.BidiReadObjectRequest.return_value + + expected_metadata = (("x-goog-request-params", f"bucket={full_bucket_name}"),) + assert stream.metadata == expected_metadata + + mock_async_bidi_rpc.assert_called_once_with( + mock_rpc, initial_request=mock_initial_request, metadata=expected_metadata + ) + assert stream.socket_like_rpc is mock_async_bidi_rpc.return_value + + # Reset mocks for the next test case + mock_storage_v2.reset_mock() + mock_async_bidi_rpc.reset_mock() + # Test with default parameters stream_defaults = _AsyncReadObjectStream(mock_client) assert stream_defaults.client is mock_client @@ -59,11 +99,44 @@ def test_init(): assert stream_defaults.generation_number is None assert stream_defaults.read_handle is None + # The following asserts the behavior with None values. + full_bucket_name_none = "projects/_/buckets/None" + assert stream_defaults._full_bucket_name == full_bucket_name_none + + mock_storage_v2.BidiReadObjectSpec.assert_called_once_with( + bucket=full_bucket_name_none, object=None + ) + mock_read_object_spec_none = mock_storage_v2.BidiReadObjectSpec.return_value + mock_storage_v2.BidiReadObjectRequest.assert_called_once_with( + read_object_spec=mock_read_object_spec_none + ) + mock_initial_request_none = mock_storage_v2.BidiReadObjectRequest.return_value + + expected_metadata_none = ( + ("x-goog-request-params", f"bucket={full_bucket_name_none}"), + ) + assert stream_defaults.metadata == expected_metadata_none + + mock_async_bidi_rpc.assert_called_once_with( + mock_rpc, + initial_request=mock_initial_request_none, + metadata=expected_metadata_none, + ) + @pytest.mark.asyncio async def test_async_methods_are_awaitable(): """Test that the async methods exist and are awaitable.""" + # Setup mock client to allow instantiation of the stream object. + mock_rpc = mock.Mock(name="rpc") + mock_transport = mock.Mock(name="transport") + mock_transport.bidi_read_object = "bidi_read_object_key" + mock_transport._wrapped_methods = {"bidi_read_object_key": mock_rpc} + mock_gapic_client = mock.Mock(name="gapic_client") + mock_gapic_client._transport = mock_transport mock_client = mock.Mock(name="client") + mock_client._client = mock_gapic_client + stream = _AsyncReadObjectStream(mock_client) # These methods are currently empty, but we can test they are awaitable From aaabfd7309a8008dde4ac7bd5c12e5da453c30f7 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 17 Sep 2025 17:04:46 +0000 Subject: [PATCH 05/44] remove unuseful comments --- .../storage/_experimental/asyncio/async_read_object_stream.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 8a6251303..93b2bc293 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -71,8 +71,6 @@ def __init__( self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" - # can this interface be changed tmrw ? (not accounting for that) - # self.rpc = self.client.get_bidi_rpc_str_str_mc() # expose this func in GAPIC self.rpc = self.client._client._transport._wrapped_methods[ self.client._client._transport.bidi_read_object ] From 23eea966178b88baf684c6aad6808372b255ed25 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 17 Sep 2025 17:27:08 +0000 Subject: [PATCH 06/44] add methods open close send recv --- .../asyncio/async_read_object_stream.py | 72 ++++++++--- .../asyncio/test_async_read_object_stream.py | 117 +++++++++++++----- 2 files changed, 142 insertions(+), 47 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 93b2bc293..4f9383ced 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -22,12 +22,18 @@ """ +from typing import Optional + from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( _AsyncAbstractObjectStream, ) from google.cloud import _storage_v2 from google.cloud.storage._experimental.asyncio.bidi_async import AsyncBidiRpc +from google.cloud.storage._experimental.asyncio.async_grpc_client import ( + AsyncGrpcClient, +) + class _AsyncReadObjectStream(_AsyncAbstractObjectStream): """Provides an asynchronous, streaming interface for reading from a GCS object. @@ -55,12 +61,12 @@ class _AsyncReadObjectStream(_AsyncAbstractObjectStream): def __init__( self, - client, - bucket_name=None, - object_name=None, - generation_number=None, - read_handle=None, - ): + client: AsyncGrpcClient, + bucket_name: Optional[str] = None, + object_name: Optional[str] = None, + generation_number: Optional[int] = None, + read_handle: Optional[str] = None, + ) -> None: super().__init__( bucket_name=bucket_name, object_name=object_name, @@ -85,13 +91,47 @@ def __init__( ) async def open(self) -> None: - pass - - async def close(self): - pass - - async def send(self, bidi_read_object_request): - pass - - async def recv(self): - pass + """Opens the bidi-gRPC connection to read from the object. + + This method sends an initial request to start the stream and receives + the first response containing metadata and a read handle. + """ + await self.socket_like_rpc.open() # this is actually 1 send + response = await self.socket_like_rpc.recv() + if self.generation_number is None: + self.generation_number = response.metadata.generation + + self.read_handle = response.read_handle + + return + + async def close(self) -> None: + """Closes the bidi-gRPC connection.""" + await self.socket_like_rpc.close() + return + + async def send( + self, bidi_read_object_request: _storage_v2.BidiReadObjectRequest + ) -> None: + """Sends a request message on the stream. + + Args: + bidi_read_object_request (:class:`~google.cloud._storage_v2.types.BidiReadObjectRequest`): + The request message to send. This is typically used to specify + the read offset and limit. + """ + await self.socket_like_rpc.send(bidi_read_object_request) + return + + async def recv(self) -> _storage_v2.BidiReadObjectResponse: + """Receives a response from the stream. + + This method waits for the next message from the server, which could + contain object data or metadata. + + Returns: + :class:`~google.cloud._storage_v2.types.BidiReadObjectResponse`: + The response message from the server. + """ + bidi_read_object_response = await self.socket_like_rpc.recv() + return bidi_read_object_response diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index 3b7c5653e..cbd408cdf 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -23,6 +23,20 @@ ) +@pytest.fixture +def mock_client(): + """A mock client for testing.""" + mock_rpc = mock.Mock(name="rpc") + mock_transport = mock.Mock(name="transport") + mock_transport.bidi_read_object = "bidi_read_object_key" + mock_transport._wrapped_methods = {"bidi_read_object_key": mock_rpc} + mock_gapic_client = mock.Mock(name="gapic_client") + mock_gapic_client._transport = mock_transport + mock_client = mock.Mock(name="client") + mock_client._client = mock_gapic_client + return mock_client + + def test_inheritance(): """Test that _AsyncReadObjectStream inherits from _AsyncAbstractObjectStream.""" assert issubclass(_AsyncReadObjectStream, _AsyncAbstractObjectStream) @@ -34,18 +48,9 @@ def test_inheritance(): @mock.patch( "google.cloud.storage._experimental.asyncio.async_read_object_stream._storage_v2" ) -def test_init(mock_storage_v2, mock_async_bidi_rpc): +def test_init(mock_storage_v2, mock_async_bidi_rpc, mock_client): """Test the constructor of _AsyncReadObjectStream.""" - # Setup mock client - mock_rpc = mock.Mock(name="rpc") - mock_transport = mock.Mock(name="transport") - mock_transport.bidi_read_object = "bidi_read_object_key" - mock_transport._wrapped_methods = {"bidi_read_object_key": mock_rpc} - mock_gapic_client = mock.Mock(name="gapic_client") - mock_gapic_client._transport = mock_transport - mock_client = mock.Mock(name="client") - mock_client._client = mock_gapic_client - + mock_rpc = mock_client._client._transport._wrapped_methods["bidi_read_object_key"] bucket_name = "test-bucket" object_name = "test-object" generation = 12345 @@ -125,26 +130,76 @@ def test_init(mock_storage_v2, mock_async_bidi_rpc): @pytest.mark.asyncio -async def test_async_methods_are_awaitable(): - """Test that the async methods exist and are awaitable.""" - # Setup mock client to allow instantiation of the stream object. - mock_rpc = mock.Mock(name="rpc") - mock_transport = mock.Mock(name="transport") - mock_transport.bidi_read_object = "bidi_read_object_key" - mock_transport._wrapped_methods = {"bidi_read_object_key": mock_rpc} - mock_gapic_client = mock.Mock(name="gapic_client") - mock_gapic_client._transport = mock_transport - mock_client = mock.Mock(name="client") - mock_client._client = mock_gapic_client +async def test_open(mock_client): + """Test open() when generation_number is initially None.""" + stream = _AsyncReadObjectStream(mock_client, bucket_name="b", object_name="o") + stream.socket_like_rpc = mock.AsyncMock() + stream.generation_number = None # Explicitly set for clarity + + mock_response = mock.Mock() + mock_response.metadata.generation = 98765 + mock_response.read_handle = "test-read-handle" + stream.socket_like_rpc.recv.return_value = mock_response + + await stream.open() + + stream.socket_like_rpc.open.assert_awaited_once() + stream.socket_like_rpc.recv.assert_awaited_once() + assert stream.generation_number == 98765 + assert stream.read_handle == "test-read-handle" + + +@pytest.mark.asyncio +async def test_open_with_generation_set(mock_client): + """Test open() when generation_number is already set.""" + initial_generation = 12345 + stream = _AsyncReadObjectStream( + mock_client, + bucket_name="b", + object_name="o", + generation_number=initial_generation, + ) + stream.socket_like_rpc = mock.AsyncMock() + mock_response = mock.Mock() + mock_response.metadata.generation = 98765 + mock_response.read_handle = "test-read-handle" + stream.socket_like_rpc.recv.return_value = mock_response + + await stream.open() + + stream.socket_like_rpc.open.assert_awaited_once() + stream.socket_like_rpc.recv.assert_awaited_once() + assert stream.generation_number == initial_generation # Should not change + assert stream.read_handle == "test-read-handle" + + +@pytest.mark.asyncio +async def test_close(mock_client): + """Test close().""" + stream = _AsyncReadObjectStream(mock_client) + stream.socket_like_rpc = mock.AsyncMock() + await stream.close() + stream.socket_like_rpc.close.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_send(mock_client): + """Test send().""" stream = _AsyncReadObjectStream(mock_client) + stream.socket_like_rpc = mock.AsyncMock() + mock_request = mock.Mock() + await stream.send(mock_request) + stream.socket_like_rpc.send.assert_awaited_once_with(mock_request) - # These methods are currently empty, but we can test they are awaitable - # and don't raise exceptions. - try: - await stream.open() - await stream.close() - await stream.send(mock.Mock()) - await stream.recv() - except Exception as e: - pytest.fail(f"Async methods should be awaitable without errors. Raised: {e}") + +@pytest.mark.asyncio +async def test_recv(mock_client): + """Test recv().""" + stream = _AsyncReadObjectStream(mock_client) + stream.socket_like_rpc = mock.AsyncMock() + mock_response = mock.Mock() + stream.socket_like_rpc.recv.return_value = mock_response + response = await stream.recv() + stream.socket_like_rpc.recv.assert_awaited_once() + assert response is mock_response From 71a7a796d7364cf90f6836240a1eada54dd5311d Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 04:14:01 +0000 Subject: [PATCH 07/44] change read_handle type from 'str' to 'bytes' --- .../_experimental/asyncio/async_read_object_stream.py | 4 ++-- tests/unit/asyncio/test_async_read_object_stream.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 4f9383ced..497c040b5 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -54,7 +54,7 @@ class _AsyncReadObjectStream(_AsyncAbstractObjectStream): :param generation_number: (Optional) If present, selects a specific revision of this object. - :type read_handle: object + :type read_handle: bytes :param read_handle: (Optional) An existing handle for reading the object. If provided, opening the bidi-gRPC connection will be faster. """ @@ -65,7 +65,7 @@ def __init__( bucket_name: Optional[str] = None, object_name: Optional[str] = None, generation_number: Optional[int] = None, - read_handle: Optional[str] = None, + read_handle: Optional[bytes] = None, ) -> None: super().__init__( bucket_name=bucket_name, diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index cbd408cdf..0792bb731 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -54,7 +54,7 @@ def test_init(mock_storage_v2, mock_async_bidi_rpc, mock_client): bucket_name = "test-bucket" object_name = "test-object" generation = 12345 - read_handle = "some-handle" + read_handle = b"some-handle" # Test with all parameters stream = _AsyncReadObjectStream( @@ -138,7 +138,7 @@ async def test_open(mock_client): mock_response = mock.Mock() mock_response.metadata.generation = 98765 - mock_response.read_handle = "test-read-handle" + mock_response.read_handle = b"test-read-handle" stream.socket_like_rpc.recv.return_value = mock_response await stream.open() @@ -146,7 +146,7 @@ async def test_open(mock_client): stream.socket_like_rpc.open.assert_awaited_once() stream.socket_like_rpc.recv.assert_awaited_once() assert stream.generation_number == 98765 - assert stream.read_handle == "test-read-handle" + assert stream.read_handle == b"test-read-handle" @pytest.mark.asyncio @@ -163,7 +163,7 @@ async def test_open_with_generation_set(mock_client): mock_response = mock.Mock() mock_response.metadata.generation = 98765 - mock_response.read_handle = "test-read-handle" + mock_response.read_handle = b"test-read-handle" stream.socket_like_rpc.recv.return_value = mock_response await stream.open() @@ -171,7 +171,7 @@ async def test_open_with_generation_set(mock_client): stream.socket_like_rpc.open.assert_awaited_once() stream.socket_like_rpc.recv.assert_awaited_once() assert stream.generation_number == initial_generation # Should not change - assert stream.read_handle == "test-read-handle" + assert stream.read_handle == b"test-read-handle" @pytest.mark.asyncio From 827aec01b05eb563308616ef87e535475fa538f0 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 04:36:11 +0000 Subject: [PATCH 08/44] feat: add async_multi_range_downloader --- .../asyncio/async_multi_range_downloader.py | 127 +++++++++++++ .../test_async_multi_range_downloader.py | 172 ++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py create mode 100644 tests/unit/asyncio/test_async_multi_range_downloader.py diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py new file mode 100644 index 000000000..70ea29977 --- /dev/null +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -0,0 +1,127 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import Any, List, Optional, Tuple + +from google.cloud.storage._experimental.asyncio.async_read_object_stream import ( + _AsyncReadObjectStream, +) +from google.cloud.storage._experimental.asyncio.async_grpc_client import ( + AsyncGrpcClient, +) + + +class MultiRangeDownloader: + """Provides an interface for downloading multiple ranges of a GCS object concurrently.""" + + @classmethod + async def create_mrd( + cls, + client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, + ) -> MultiRangeDownloader: + """Asynchronously creates and initializes a MultiRangeDownloader. + + This factory method creates an instance of MultiRangeDownloader and + opens the underlying bidi-gRPC connection. + + Args: + client (AsyncGrpcClient): The asynchronous client to use for making API requests. + bucket_name (str): The name of the bucket containing the object. + object_name (str): The name of the object to be read. + generation_number (int, optional): If present, selects a specific + revision of this object. + + Returns: + MultiRangeDownloader: An initialized MultiRangeDownloader instance. + """ + mrd = cls(client, bucket_name, object_name, generation_number) + await mrd.open() + return mrd + + @classmethod + def create_mrd_from_read_handle( + cls, client: AsyncGrpcClient, read_handle: bytes + ) -> MultiRangeDownloader: + """Creates a MultiRangeDownloader from an existing read handle. + + Args: + client (AsyncGrpcClient): The asynchronous client to use for making API requests. + read_handle (bytes): An existing handle for reading the object. + + Raises: + NotImplementedError: This method is not yet implemented. + """ + raise NotImplementedError("TODO") + + def __init__( + self, + client: AsyncGrpcClient, + bucket_name: Optional[str] = None, + object_name: Optional[str] = None, + generation_number: Optional[int] = None, + read_handle: Optional[bytes] = None, + ) -> None: + """Initializes a MultiRangeDownloader. + + Args: + client (AsyncGrpcClient): The asynchronous client to use for making API requests. + bucket_name (str, optional): The name of the bucket. Defaults to None. + object_name (str, optional): The name of the object. Defaults to None. + generation_number (int, optional): The generation number of the object. + Defaults to None. + read_handle (bytes, optional): An existing read handle. Defaults to None. + """ + self.client = client + self.bucket_name = bucket_name + self.object_name = object_name + self.generation_number = generation_number + self.read_handle = read_handle + self.read_obj_str: _AsyncReadObjectStream + + async def open(self) -> None: + """Opens the bidi-gRPC connection to read from the object. + + This method initializes and opens an `_AsyncReadObjectStream` to + establish a connection for downloading. It also retrieves the + generation number and read handle if they are not already set. + """ + self.read_obj_str = _AsyncReadObjectStream( + client=self.client, + bucket_name=self.bucket_name, + object_name=self.object_name, + generation_number=self.generation_number, + read_handle=self.read_handle, + ) + await self.read_obj_str.open() + if self.generation_number is None: + self.generation_number = self.read_obj_str.generation_number + self.read_handle = self.read_obj_str.read_handle + return + + async def download_ranges(self, read_ranges: List[Tuple[int, int]]) -> Any: + """Downloads multiple byte ranges from the object. + + Args: + read_ranges (List[Tuple[int, int]]): A list of tuples, where each tuple represents + a byte range (offset, length) to download. + + Raises: + NotImplementedError: This method is not yet implemented. + """ + raise NotImplementedError("TODO") diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py new file mode 100644 index 000000000..9b75b91a9 --- /dev/null +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -0,0 +1,172 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from unittest import mock + +from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + MultiRangeDownloader, +) + + +@pytest.fixture +def mock_async_grpc_client(): + """A mock for the AsyncGrpcClient.""" + return mock.Mock(name="AsyncGrpcClient") + + +@pytest.fixture +def mock_async_read_object_stream(): + """A mock for the _AsyncReadObjectStream class.""" + with mock.patch( + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + ) as mock_stream_cls: + mock_instance = mock.AsyncMock() + mock_instance.generation_number = 12345 + mock_instance.read_handle = b"test-read-handle" + mock_stream_cls.return_value = mock_instance + yield mock_stream_cls + + +def test_init(mock_async_grpc_client): + """Test the constructor of MultiRangeDownloader.""" + client = mock_async_grpc_client + bucket_name = "test-bucket" + object_name = "test-object" + generation = 123 + read_handle = b"test-handle" + + mrd = MultiRangeDownloader( + client, + bucket_name=bucket_name, + object_name=object_name, + generation_number=generation, + read_handle=read_handle, + ) + + assert mrd.client is client + assert mrd.bucket_name == bucket_name + assert mrd.object_name == object_name + assert mrd.generation_number == generation + assert mrd.read_handle == read_handle + assert not hasattr(mrd, "read_obj_str") + + +@pytest.mark.asyncio +async def test_open(mock_async_grpc_client, mock_async_read_object_stream): + """Test the open() method.""" + client = mock_async_grpc_client + bucket_name = "test-bucket" + object_name = "test-object" + + mrd = MultiRangeDownloader( + client, + bucket_name=bucket_name, + object_name=object_name, + ) + + await mrd.open() + + mock_async_read_object_stream.assert_called_once_with( + client=client, + bucket_name=bucket_name, + object_name=object_name, + generation_number=None, + read_handle=None, + ) + + mock_stream_instance = mock_async_read_object_stream.return_value + mock_stream_instance.open.assert_awaited_once() + + assert mrd.read_obj_str is mock_stream_instance + assert mrd.generation_number == mock_stream_instance.generation_number + assert mrd.read_handle == mock_stream_instance.read_handle + + +@pytest.mark.asyncio +async def test_open_with_generation( + mock_async_grpc_client, mock_async_read_object_stream +): + """Test open() when generation_number is already set.""" + client = mock_async_grpc_client + bucket_name = "test-bucket" + object_name = "test-object" + initial_generation = 456 + + mrd = MultiRangeDownloader( + client, + bucket_name=bucket_name, + object_name=object_name, + generation_number=initial_generation, + ) + + # The mock stream will have a different generation number to ensure we don't overwrite it. + mock_async_read_object_stream.return_value.generation_number = 789 + + await mrd.open() + + mock_async_read_object_stream.assert_called_once_with( + client=client, + bucket_name=bucket_name, + object_name=object_name, + generation_number=initial_generation, + read_handle=None, + ) + + mock_stream_instance = mock_async_read_object_stream.return_value + mock_stream_instance.open.assert_awaited_once() + + assert mrd.read_obj_str is mock_stream_instance + assert mrd.generation_number == initial_generation # Should not be overwritten + assert mrd.read_handle == mock_stream_instance.read_handle + + +@pytest.mark.asyncio +async def test_create_mrd(mock_async_grpc_client): + """Test the create_mrd() factory method.""" + with mock.patch( + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.MultiRangeDownloader.open", + new_callable=mock.AsyncMock, + ) as mock_open: + client = mock_async_grpc_client + bucket_name = "test-bucket" + object_name = "test-object" + generation = 123 + + mrd = await MultiRangeDownloader.create_mrd( + client, bucket_name, object_name, generation_number=generation + ) + + assert isinstance(mrd, MultiRangeDownloader) + assert mrd.client is client + assert mrd.bucket_name == bucket_name + assert mrd.object_name == object_name + assert mrd.generation_number == generation + mock_open.assert_awaited_once() + + +def test_create_mrd_from_read_handle(mock_async_grpc_client): + """Test that create_mrd_from_read_handle() raises NotImplementedError.""" + with pytest.raises(NotImplementedError): + MultiRangeDownloader.create_mrd_from_read_handle( + mock_async_grpc_client, b"handle" + ) + + +@pytest.mark.asyncio +async def test_download_ranges(mock_async_grpc_client): + """Test that download_ranges() raises NotImplementedError.""" + mrd = MultiRangeDownloader(mock_async_grpc_client) + with pytest.raises(NotImplementedError): + await mrd.download_ranges([(0, 100)]) From 5be74697652a79dff9162aa69b23d1b21b8fab0b Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 04:41:11 +0000 Subject: [PATCH 09/44] fix: read_ranges should have buffer as well --- .../asyncio/async_multi_range_downloader.py | 12 ++++++++---- .../asyncio/test_async_multi_range_downloader.py | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 70ea29977..5d0f638f1 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -23,6 +23,8 @@ AsyncGrpcClient, ) +from io import BytesIO + class MultiRangeDownloader: """Provides an interface for downloading multiple ranges of a GCS object concurrently.""" @@ -114,12 +116,14 @@ async def open(self) -> None: self.read_handle = self.read_obj_str.read_handle return - async def download_ranges(self, read_ranges: List[Tuple[int, int]]) -> Any: - """Downloads multiple byte ranges from the object. + async def download_ranges(self, read_ranges: List[Tuple[int, int, BytesIO]]) -> Any: + """Downloads multiple byte ranges from the object into the buffers + provided by user. Args: - read_ranges (List[Tuple[int, int]]): A list of tuples, where each tuple represents - a byte range (offset, length) to download. + read_ranges (List[Tuple[int, int]]): A list of tuples, where each + tuple represents a byte range (start_byte, end_byte, buffer) to download. + Raises: NotImplementedError: This method is not yet implemented. diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 9b75b91a9..be57dde04 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -18,6 +18,7 @@ from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( MultiRangeDownloader, ) +from io import BytesIO @pytest.fixture @@ -169,4 +170,4 @@ async def test_download_ranges(mock_async_grpc_client): """Test that download_ranges() raises NotImplementedError.""" mrd = MultiRangeDownloader(mock_async_grpc_client) with pytest.raises(NotImplementedError): - await mrd.download_ranges([(0, 100)]) + await mrd.download_ranges([(0, 100, BytesIO())]) From b3ad5515968f57b9c2ddb2a380197626ef8bfe54 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 05:34:49 +0000 Subject: [PATCH 10/44] rename MultiRangeDownloader to AsyncMultiRangeDownloader --- .../asyncio/async_multi_range_downloader.py | 6 +++--- .../test_async_multi_range_downloader.py | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 5d0f638f1..9b41130f9 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -26,7 +26,7 @@ from io import BytesIO -class MultiRangeDownloader: +class AsyncMultiRangeDownloader: """Provides an interface for downloading multiple ranges of a GCS object concurrently.""" @classmethod @@ -36,7 +36,7 @@ async def create_mrd( bucket_name: str, object_name: str, generation_number: Optional[int] = None, - ) -> MultiRangeDownloader: + ) -> AsyncMultiRangeDownloader: """Asynchronously creates and initializes a MultiRangeDownloader. This factory method creates an instance of MultiRangeDownloader and @@ -59,7 +59,7 @@ async def create_mrd( @classmethod def create_mrd_from_read_handle( cls, client: AsyncGrpcClient, read_handle: bytes - ) -> MultiRangeDownloader: + ) -> AsyncMultiRangeDownloader: """Creates a MultiRangeDownloader from an existing read handle. Args: diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index be57dde04..d479e3a17 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -16,7 +16,7 @@ from unittest import mock from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( - MultiRangeDownloader, + AsyncMultiRangeDownloader, ) from io import BytesIO @@ -48,7 +48,7 @@ def test_init(mock_async_grpc_client): generation = 123 read_handle = b"test-handle" - mrd = MultiRangeDownloader( + mrd = AsyncMultiRangeDownloader( client, bucket_name=bucket_name, object_name=object_name, @@ -71,7 +71,7 @@ async def test_open(mock_async_grpc_client, mock_async_read_object_stream): bucket_name = "test-bucket" object_name = "test-object" - mrd = MultiRangeDownloader( + mrd = AsyncMultiRangeDownloader( client, bucket_name=bucket_name, object_name=object_name, @@ -105,7 +105,7 @@ async def test_open_with_generation( object_name = "test-object" initial_generation = 456 - mrd = MultiRangeDownloader( + mrd = AsyncMultiRangeDownloader( client, bucket_name=bucket_name, object_name=object_name, @@ -137,7 +137,7 @@ async def test_open_with_generation( async def test_create_mrd(mock_async_grpc_client): """Test the create_mrd() factory method.""" with mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.MultiRangeDownloader.open", + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.open", new_callable=mock.AsyncMock, ) as mock_open: client = mock_async_grpc_client @@ -145,11 +145,11 @@ async def test_create_mrd(mock_async_grpc_client): object_name = "test-object" generation = 123 - mrd = await MultiRangeDownloader.create_mrd( + mrd = await AsyncMultiRangeDownloader.create_mrd( client, bucket_name, object_name, generation_number=generation ) - assert isinstance(mrd, MultiRangeDownloader) + assert isinstance(mrd, AsyncMultiRangeDownloader) assert mrd.client is client assert mrd.bucket_name == bucket_name assert mrd.object_name == object_name @@ -160,7 +160,7 @@ async def test_create_mrd(mock_async_grpc_client): def test_create_mrd_from_read_handle(mock_async_grpc_client): """Test that create_mrd_from_read_handle() raises NotImplementedError.""" with pytest.raises(NotImplementedError): - MultiRangeDownloader.create_mrd_from_read_handle( + AsyncMultiRangeDownloader.create_mrd_from_read_handle( mock_async_grpc_client, b"handle" ) @@ -168,6 +168,6 @@ def test_create_mrd_from_read_handle(mock_async_grpc_client): @pytest.mark.asyncio async def test_download_ranges(mock_async_grpc_client): """Test that download_ranges() raises NotImplementedError.""" - mrd = MultiRangeDownloader(mock_async_grpc_client) + mrd = AsyncMultiRangeDownloader(mock_async_grpc_client) with pytest.raises(NotImplementedError): await mrd.download_ranges([(0, 100, BytesIO())]) From a87f2beda0dca7b4b5b51df538a65a2cd17b97d6 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 06:38:19 +0000 Subject: [PATCH 11/44] feat: implement download_ranges method --- .../asyncio/async_multi_range_downloader.py | 86 ++++++++++++++++++- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 9b41130f9..e0791b476 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -24,6 +24,12 @@ ) from io import BytesIO +from google.cloud import _storage_v2 +import sys +import asyncio + + +_MAX_READ_RANGES_PER_BIDI_READ_REQUEST = 100 class AsyncMultiRangeDownloader: @@ -124,8 +130,80 @@ async def download_ranges(self, read_ranges: List[Tuple[int, int, BytesIO]]) -> read_ranges (List[Tuple[int, int]]): A list of tuples, where each tuple represents a byte range (start_byte, end_byte, buffer) to download. - - Raises: - NotImplementedError: This method is not yet implemented. """ - raise NotImplementedError("TODO") + if len(read_ranges) > 1000: + raise Exception("Invalid Input - ranges cannot be more than 1000") + + read_id_to_writable_buffer_dict = {} + for i in range(0, len(read_ranges), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST): + read_range_segment = read_ranges[ + i : i + _MAX_READ_RANGES_PER_BIDI_READ_REQUEST + ] + + read_ranges_for_bidi_req = [] + for j, read_range in enumerate(read_range_segment): + # generate read_id + read_id = i + j + read_id_to_writable_buffer_dict[read_id] = read_range[2] + read_ranges_for_bidi_req.append( + _storage_v2.ReadRange( + read_offset=read_range[0], + read_length=read_range[1] - read_range[0], # end - start + read_id=read_id, + ) + ) + print(read_ranges_for_bidi_req) + await self.read_obj_str.send( + _storage_v2.BidiReadObjectRequest(read_ranges=read_ranges_for_bidi_req) + ) + while len(read_id_to_writable_buffer_dict) > 0: + response = await self.read_obj_str.recv() + if response is None: + print("None response received, something went wrong.") + sys.exit(1) + for object_data_range in response.object_data_ranges: + + if object_data_range.read_range is None: + raise Exception("Invalid response, read_range is None") + + data = object_data_range.checksummed_data.content + # bytes_received_in_curr_res = object_data_range.read_range.read_length + read_id = object_data_range.read_range.read_id + buffer = read_id_to_writable_buffer_dict[read_id] + buffer.write(data) + print( + "for read_id ", + read_id, + data, + object_data_range.checksummed_data.crc32c, + ) + if object_data_range.range_end: + del read_id_to_writable_buffer_dict[ + object_data_range.read_range.read_id + ] + + +async def test_mrd(): + client = AsyncGrpcClient()._grpc_client + mrd = await AsyncMultiRangeDownloader.create_mrd( + client, bucket_name="chandrasiri-rs", object_name="test_open9" + ) + my_buff1 = BytesIO() + my_buff2 = BytesIO() + my_buff3 = BytesIO() + my_buff4 = BytesIO() + buffers = [my_buff1, my_buff2, my_buff3, my_buff4] + await mrd.download_ranges( + [ + (0, 100, my_buff1), + (100, 200, my_buff2), + (200, 300, my_buff3), + (300, 400, my_buff4), + ] + ) + for buff in buffers: + print("downloaded bytes", buff.getbuffer().nbytes) + + +if __name__ == "__main__": + asyncio.run(test_mrd()) From c2e3c7bc7b08f6f6d1ff8b2f35b363f7753d6464 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 06:49:54 +0000 Subject: [PATCH 12/44] add BytesIO in doc string --- .../_experimental/asyncio/async_multi_range_downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index e0791b476..6fc5fdd3a 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -127,7 +127,7 @@ async def download_ranges(self, read_ranges: List[Tuple[int, int, BytesIO]]) -> provided by user. Args: - read_ranges (List[Tuple[int, int]]): A list of tuples, where each + read_ranges (List[Tuple[int, int, BytesIO]]): A list of tuples, where each tuple represents a byte range (start_byte, end_byte, buffer) to download. """ From 0810afc315e29a4f9caa25c08faa7840281de545 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 11:00:17 +0000 Subject: [PATCH 13/44] fix doc strings, add licence and type hints --- .../asyncio/async_abstract_object_stream.py | 53 ++++++++++++++----- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 86de7f715..03e2c5690 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -1,36 +1,61 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import abc +from typing import Any, Optional class _AsyncAbstractObjectStream(abc.ABC): - """ - Abstract class for both ReadObjectStream as well as WriteObjectStream. + """Abstract base class for asynchronous object streams. + + This class defines the common interface for both reading from and writing + to a GCS object in a streaming fashion. - Attributes will include - 1. bucket_name - 2. object_name - 3. generation_number (if given) + :type bucket_name: str + :param bucket_name: (Optional) The name of the bucket containing the object. + :type object_name: str + :param object_name: (Optional) The name of the object. + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific revision of + this object. """ - def __init__(self, bucket_name, object_name, generation_number=None): + def __init__( + self, + bucket_name: Optional[str] = None, + object_name: Optional[str] = None, + generation_number: Optional[int] = None, + ) -> None: super().__init__() - self.bucket_name = bucket_name - self.object_name = object_name - self.generation_number = generation_number + self.bucket_name: Optional[str] = bucket_name + self.object_name: Optional[str] = object_name + self.generation_number: Optional[int] = generation_number @abc.abstractmethod - async def open(self): + async def open(self) -> None: raise NotImplementedError("Subclasses should implement this method.") @abc.abstractmethod - async def close(self): + async def close(self) -> None: raise NotImplementedError("Subclasses should implement this method.") @abc.abstractmethod - async def send(self): + async def send(self, message: Any) -> None: raise NotImplementedError("Subclasses should implement this method.") @abc.abstractmethod - async def recv(self): + async def recv(self) -> Any: raise NotImplementedError("Subclasses should implement this method.") From a14bc6895ad704beb57438b55ede8f6487b50f9e Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 11:16:30 +0000 Subject: [PATCH 14/44] pass abstract methods --- .../_experimental/asyncio/async_abstract_object_stream.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 03e2c5690..752a058c1 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -46,16 +46,16 @@ def __init__( @abc.abstractmethod async def open(self) -> None: - raise NotImplementedError("Subclasses should implement this method.") + pass @abc.abstractmethod async def close(self) -> None: - raise NotImplementedError("Subclasses should implement this method.") + pass @abc.abstractmethod async def send(self, message: Any) -> None: - raise NotImplementedError("Subclasses should implement this method.") + pass @abc.abstractmethod async def recv(self) -> Any: - raise NotImplementedError("Subclasses should implement this method.") + pass From 635ad07ea962f0bb0df491ac57631d167be29369 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 11:29:33 +0000 Subject: [PATCH 15/44] add handle param --- .../_experimental/asyncio/async_abstract_object_stream.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 752a058c1..28a92ccb4 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -31,6 +31,10 @@ class _AsyncAbstractObjectStream(abc.ABC): :type generation_number: int :param generation_number: (Optional) If present, selects a specific revision of this object. + + :type handle: bytes + :param handle: (Optional) The handle for the object, could be read_handle or + write_handle, based on how the stream is used. """ def __init__( @@ -38,11 +42,13 @@ def __init__( bucket_name: Optional[str] = None, object_name: Optional[str] = None, generation_number: Optional[int] = None, + handle: Optional[bytes] = None, ) -> None: super().__init__() self.bucket_name: Optional[str] = bucket_name self.object_name: Optional[str] = object_name self.generation_number: Optional[int] = generation_number + self.handle: Optional[bytes] = handle @abc.abstractmethod async def open(self) -> None: From ba453d4617a6c197078e9a3783730d48d5e3b1a3 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 11:36:07 +0000 Subject: [PATCH 16/44] include handle in tests --- tests/unit/asyncio/test_async_abstract_object_stream.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/unit/asyncio/test_async_abstract_object_stream.py b/tests/unit/asyncio/test_async_abstract_object_stream.py index e0dc130ea..d81c9ef56 100644 --- a/tests/unit/asyncio/test_async_abstract_object_stream.py +++ b/tests/unit/asyncio/test_async_abstract_object_stream.py @@ -39,18 +39,23 @@ def test_init(): bucket_name = "test-bucket" object_name = "test-object" generation = 12345 + handle = b"test-handle" # Test with all parameters - stream = _ConcreteStream(bucket_name, object_name, generation_number=generation) + stream = _ConcreteStream( + bucket_name, object_name, generation_number=generation, handle=handle + ) assert stream.bucket_name == bucket_name assert stream.object_name == object_name assert stream.generation_number == generation + assert stream.handle == handle # Test with default generation_number stream_no_gen = _ConcreteStream(bucket_name, object_name) assert stream_no_gen.bucket_name == bucket_name assert stream_no_gen.object_name == object_name assert stream_no_gen.generation_number is None + assert stream_no_gen.handle is None def test_instantiation_fails_without_implementation(): From 800c6df51e19123b022b9aa336cda59f939444bf Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 14:59:47 +0000 Subject: [PATCH 17/44] remove unit tests for abstract class --- .../asyncio/async_abstract_object_stream.py | 8 +-- .../test_async_abstract_object_stream.py | 69 ------------------- 2 files changed, 4 insertions(+), 73 deletions(-) delete mode 100644 tests/unit/asyncio/test_async_abstract_object_stream.py diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 28a92ccb4..325089ba5 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -17,10 +17,10 @@ class _AsyncAbstractObjectStream(abc.ABC): - """Abstract base class for asynchronous object streams. + """Abstract base class to represent gRPC stream for GCS ``Object``. - This class defines the common interface for both reading from and writing - to a GCS object in a streaming fashion. + Concrete implementation of this class could be ``_AsyncReadObjectStream`` + or ``_AsyncWriteObjectStream``. :type bucket_name: str :param bucket_name: (Optional) The name of the bucket containing the object. @@ -59,7 +59,7 @@ async def close(self) -> None: pass @abc.abstractmethod - async def send(self, message: Any) -> None: + async def send(self, protobuf: Any) -> None: pass @abc.abstractmethod diff --git a/tests/unit/asyncio/test_async_abstract_object_stream.py b/tests/unit/asyncio/test_async_abstract_object_stream.py deleted file mode 100644 index d81c9ef56..000000000 --- a/tests/unit/asyncio/test_async_abstract_object_stream.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( - _AsyncAbstractObjectStream, -) - - -# A concrete implementation for testing purposes. -class _ConcreteStream(_AsyncAbstractObjectStream): - async def open(self): - pass - - async def close(self): - pass - - async def send(self): - pass - - async def recv(self): - pass - - -def test_init(): - """Test the constructor of AsyncAbstractObjectStream.""" - bucket_name = "test-bucket" - object_name = "test-object" - generation = 12345 - handle = b"test-handle" - - # Test with all parameters - stream = _ConcreteStream( - bucket_name, object_name, generation_number=generation, handle=handle - ) - assert stream.bucket_name == bucket_name - assert stream.object_name == object_name - assert stream.generation_number == generation - assert stream.handle == handle - - # Test with default generation_number - stream_no_gen = _ConcreteStream(bucket_name, object_name) - assert stream_no_gen.bucket_name == bucket_name - assert stream_no_gen.object_name == object_name - assert stream_no_gen.generation_number is None - assert stream_no_gen.handle is None - - -def test_instantiation_fails_without_implementation(): - """Test that instantiating an incomplete subclass raises TypeError.""" - - class _IncompleteStream(_AsyncAbstractObjectStream): - # Missing implementations for abstract methods like open(), close(), etc. - pass - - with pytest.raises(TypeError, match="Can't instantiate abstract class"): - _IncompleteStream("bucket", "object") From 18529adc0d2765721ccaa1836ef5080b775602fd Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 15:23:31 +0000 Subject: [PATCH 18/44] edit doc string for _AsyncReadObjectStream --- .../asyncio/async_read_object_stream.py | 41 +++++++++++-------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 078434073..a3818d2fa 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -22,59 +22,64 @@ """ +from typing import Any, Optional +from google.cloud import _storage_v2 +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( _AsyncAbstractObjectStream, ) class _AsyncReadObjectStream(_AsyncAbstractObjectStream): - """Provides an asynchronous, streaming interface for reading from a GCS object. + """Class representing a gRPC bidi-stream for reading data from a GCS ``Object``. - This class provides a unix socket-like interface to a GCS Object, with + This class provides a unix socket-like interface to a GCS ``Object``, with methods like ``open``, ``close``, ``send``, and ``recv``. - :type client: :class:`~google.cloud.storage.aio.Client` - :param client: The asynchronous client to use for making API requests. + :type client: :class:`~google.cloud.storage.asyncio.AsyncGrpcClient` + :param client: async grpc client to use for making API requests. :type bucket_name: str - :param bucket_name: The name of the bucket containing the object. + :param bucket_name: The name of the GCS ``bucket`` containing the object. :type object_name: str - :param object_name: The name of the object to be read. + :param object_name: The name of the GCS ``object`` to be read. :type generation_number: int :param generation_number: (Optional) If present, selects a specific revision of this object. - :type read_handle: object + :type read_handle: bytes :param read_handle: (Optional) An existing handle for reading the object. If provided, opening the bidi-gRPC connection will be faster. """ def __init__( self, - client, - bucket_name=None, - object_name=None, - generation_number=None, - read_handle=None, - ): + client: AsyncGrpcClient, + bucket_name: Optional[str] = None, + object_name: Optional[str] = None, + generation_number: Optional[int] = None, + read_handle: Optional[bytes] = None, + ) -> None: super().__init__( bucket_name=bucket_name, object_name=object_name, generation_number=generation_number, ) - self.client = client - self.read_handle = read_handle + self.client: AsyncGrpcClient = client + self.read_handle: Optional[bytes] = read_handle async def open(self) -> None: pass - async def close(self): + async def close(self) -> None: pass - async def send(self, bidi_read_object_request): + async def send( + self, bidi_read_object_request: _storage_v2.BidiReadObjectRequest + ) -> None: pass - async def recv(self): + async def recv(self) -> Any: pass From b4da1acd87c686d396856fa3d83a5d221b39f4f1 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 18:14:52 +0000 Subject: [PATCH 19/44] refactor unit tests for async_read_object_stream --- .../asyncio/async_read_object_stream.py | 6 +- .../asyncio/test_async_read_object_stream.py | 132 +++--------------- 2 files changed, 24 insertions(+), 114 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 47568b005..735dd7f80 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -38,7 +38,7 @@ class _AsyncReadObjectStream(_AsyncAbstractObjectStream): This class provides a unix socket-like interface to a GCS ``Object``, with methods like ``open``, ``close``, ``send``, and ``recv``. - :type client: :class:`~google.cloud.storage.asyncio.AsyncGrpcClient` + :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` :param client: async grpc client to use for making API requests. :type bucket_name: str @@ -58,7 +58,7 @@ class _AsyncReadObjectStream(_AsyncAbstractObjectStream): def __init__( self, - client: AsyncGrpcClient, + client: AsyncGrpcClient.grpc_client, bucket_name: Optional[str] = None, object_name: Optional[str] = None, generation_number: Optional[int] = None, @@ -69,7 +69,7 @@ def __init__( object_name=object_name, generation_number=generation_number, ) - self.client: AsyncGrpcClient = client + self.client: AsyncGrpcClient.grpc_client = client self.read_handle: Optional[bytes] = read_handle self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index 3b7c5653e..b15ea60af 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -14,6 +14,7 @@ import pytest from unittest import mock +from google.cloud import _storage_v2 from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( _AsyncAbstractObjectStream, @@ -23,128 +24,37 @@ ) -def test_inheritance(): - """Test that _AsyncReadObjectStream inherits from _AsyncAbstractObjectStream.""" - assert issubclass(_AsyncReadObjectStream, _AsyncAbstractObjectStream) - - @mock.patch( "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream._storage_v2" + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) -def test_init(mock_storage_v2, mock_async_bidi_rpc): - """Test the constructor of _AsyncReadObjectStream.""" - # Setup mock client - mock_rpc = mock.Mock(name="rpc") - mock_transport = mock.Mock(name="transport") - mock_transport.bidi_read_object = "bidi_read_object_key" - mock_transport._wrapped_methods = {"bidi_read_object_key": mock_rpc} - mock_gapic_client = mock.Mock(name="gapic_client") - mock_gapic_client._transport = mock_transport - mock_client = mock.Mock(name="client") - mock_client._client = mock_gapic_client - +def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc): + # initialize with bucket, object_name and generation number. & client. bucket_name = "test-bucket" object_name = "test-object" - generation = 12345 - read_handle = "some-handle" - - # Test with all parameters - stream = _AsyncReadObjectStream( - mock_client, + generation_number = 12345 + mock_client._client._transport.bidi_read_object = "bidi_read_object_rpc" + mock_client._client._transport._wrapped_methods = { + "bidi_read_object_rpc": mock.sentinel.A + } + + read_obj_stream = _AsyncReadObjectStream( + client=mock_client, bucket_name=bucket_name, object_name=object_name, - generation_number=generation, - read_handle=read_handle, + generation_number=generation_number, ) - - assert stream.client is mock_client - assert stream.bucket_name == bucket_name - assert stream.object_name == object_name - assert stream.generation_number == generation - assert stream.read_handle == read_handle - full_bucket_name = f"projects/_/buckets/{bucket_name}" - assert stream._full_bucket_name == full_bucket_name - assert stream.rpc is mock_rpc - - mock_storage_v2.BidiReadObjectSpec.assert_called_once_with( - bucket=full_bucket_name, object=object_name + first_bidi_read_req = _storage_v2.BidiReadObjectRequest( + read_object_spec=_storage_v2.BidiReadObjectSpec( + bucket=full_bucket_name, object=object_name + ), ) - mock_read_object_spec = mock_storage_v2.BidiReadObjectSpec.return_value - mock_storage_v2.BidiReadObjectRequest.assert_called_once_with( - read_object_spec=mock_read_object_spec - ) - mock_initial_request = mock_storage_v2.BidiReadObjectRequest.return_value - - expected_metadata = (("x-goog-request-params", f"bucket={full_bucket_name}"),) - assert stream.metadata == expected_metadata - mock_async_bidi_rpc.assert_called_once_with( - mock_rpc, initial_request=mock_initial_request, metadata=expected_metadata + mock.sentinel.A, + initial_request=first_bidi_read_req, + metadata=(("x-goog-request-params", f"bucket={full_bucket_name}"),), ) - assert stream.socket_like_rpc is mock_async_bidi_rpc.return_value - - # Reset mocks for the next test case - mock_storage_v2.reset_mock() - mock_async_bidi_rpc.reset_mock() - - # Test with default parameters - stream_defaults = _AsyncReadObjectStream(mock_client) - assert stream_defaults.client is mock_client - assert stream_defaults.bucket_name is None - assert stream_defaults.object_name is None - assert stream_defaults.generation_number is None - assert stream_defaults.read_handle is None - - # The following asserts the behavior with None values. - full_bucket_name_none = "projects/_/buckets/None" - assert stream_defaults._full_bucket_name == full_bucket_name_none - - mock_storage_v2.BidiReadObjectSpec.assert_called_once_with( - bucket=full_bucket_name_none, object=None - ) - mock_read_object_spec_none = mock_storage_v2.BidiReadObjectSpec.return_value - mock_storage_v2.BidiReadObjectRequest.assert_called_once_with( - read_object_spec=mock_read_object_spec_none - ) - mock_initial_request_none = mock_storage_v2.BidiReadObjectRequest.return_value - - expected_metadata_none = ( - ("x-goog-request-params", f"bucket={full_bucket_name_none}"), - ) - assert stream_defaults.metadata == expected_metadata_none - - mock_async_bidi_rpc.assert_called_once_with( - mock_rpc, - initial_request=mock_initial_request_none, - metadata=expected_metadata_none, - ) - - -@pytest.mark.asyncio -async def test_async_methods_are_awaitable(): - """Test that the async methods exist and are awaitable.""" - # Setup mock client to allow instantiation of the stream object. - mock_rpc = mock.Mock(name="rpc") - mock_transport = mock.Mock(name="transport") - mock_transport.bidi_read_object = "bidi_read_object_key" - mock_transport._wrapped_methods = {"bidi_read_object_key": mock_rpc} - mock_gapic_client = mock.Mock(name="gapic_client") - mock_gapic_client._transport = mock_transport - mock_client = mock.Mock(name="client") - mock_client._client = mock_gapic_client - - stream = _AsyncReadObjectStream(mock_client) - - # These methods are currently empty, but we can test they are awaitable - # and don't raise exceptions. - try: - await stream.open() - await stream.close() - await stream.send(mock.Mock()) - await stream.recv() - except Exception as e: - pytest.fail(f"Async methods should be awaitable without errors. Raised: {e}") + assert read_obj_stream.socket_like_rpc is mock_async_bidi_rpc.return_value From 6dec6c692779e26ddcf30588375f4d0260a7102d Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 18:31:13 +0000 Subject: [PATCH 20/44] bucket_name and object_name cannot be NONE --- .../_experimental/asyncio/async_abstract_object_stream.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 325089ba5..1ba5aef9b 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -39,14 +39,14 @@ class _AsyncAbstractObjectStream(abc.ABC): def __init__( self, - bucket_name: Optional[str] = None, - object_name: Optional[str] = None, + bucket_name: str, + object_name: str, generation_number: Optional[int] = None, handle: Optional[bytes] = None, ) -> None: super().__init__() - self.bucket_name: Optional[str] = bucket_name - self.object_name: Optional[str] = object_name + self.bucket_name: str = bucket_name + self.object_name: str = object_name self.generation_number: Optional[int] = generation_number self.handle: Optional[bytes] = handle From a15490527a4df40dbc89e631f2b4cee6acfda694 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 18 Sep 2025 18:35:32 +0000 Subject: [PATCH 21/44] bucket_name and object_name cannot be None --- .../_experimental/asyncio/async_read_object_stream.py | 4 ++-- tests/unit/asyncio/test_async_read_object_stream.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index a3818d2fa..96639e0df 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -57,8 +57,8 @@ class _AsyncReadObjectStream(_AsyncAbstractObjectStream): def __init__( self, client: AsyncGrpcClient, - bucket_name: Optional[str] = None, - object_name: Optional[str] = None, + bucket_name: str, + object_name: str, generation_number: Optional[int] = None, read_handle: Optional[bytes] = None, ) -> None: diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index 43b42f8d8..a033851cc 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -52,10 +52,12 @@ def test_init(): assert stream.read_handle == read_handle # Test with default parameters - stream_defaults = _AsyncReadObjectStream(mock_client) + stream_defaults = _AsyncReadObjectStream( + mock_client, bucket_name=bucket_name, object_name=object_name + ) assert stream_defaults.client is mock_client - assert stream_defaults.bucket_name is None - assert stream_defaults.object_name is None + assert stream_defaults.bucket_name is bucket_name + assert stream_defaults.object_name is object_name assert stream_defaults.generation_number is None assert stream_defaults.read_handle is None @@ -64,7 +66,7 @@ def test_init(): async def test_async_methods_are_awaitable(): """Test that the async methods exist and are awaitable.""" mock_client = mock.Mock(name="client") - stream = _AsyncReadObjectStream(mock_client) + stream = _AsyncReadObjectStream(mock_client, "bucket", "object") # These methods are currently empty, but we can test they are awaitable # and don't raise exceptions. From 14a2abaff55d46247cd959d0d36259dd5df3278e Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Fri, 19 Sep 2025 06:13:41 +0000 Subject: [PATCH 22/44] simplyfy tests for open --- .../asyncio/test_async_read_object_stream.py | 74 +++++++++++-------- 1 file changed, 43 insertions(+), 31 deletions(-) diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index e405c43f6..b98d786c6 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -14,6 +14,7 @@ import pytest from unittest import mock +from unittest.mock import AsyncMock from google.cloud import _storage_v2 from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( @@ -23,24 +24,10 @@ _AsyncReadObjectStream, ) - -@pytest.fixture -def mock_client(): - """A mock client for testing.""" - mock_rpc = mock.Mock(name="rpc") - mock_transport = mock.Mock(name="transport") - mock_transport.bidi_read_object = "bidi_read_object_key" - mock_transport._wrapped_methods = {"bidi_read_object_key": mock_rpc} - mock_gapic_client = mock.Mock(name="gapic_client") - mock_gapic_client._transport = mock_transport - mock_client = mock.Mock(name="client") - mock_client._client = mock_gapic_client - return mock_client - - -def test_inheritance(): - """Test that _AsyncReadObjectStream inherits from _AsyncAbstractObjectStream.""" - assert issubclass(_AsyncReadObjectStream, _AsyncAbstractObjectStream) +_TEST_BUCKET_NAME = "test-bucket" +_TEST_OBJECT_NAME = "test-object" +_TEST_GENERATION_NUMBER = 12345 +_TEST_READ_HANDLE = b"test-read-handle" @mock.patch( @@ -50,30 +37,55 @@ def test_inheritance(): "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc): - # initialize with bucket, object_name and generation number. & client. - bucket_name = "test-bucket" - object_name = "test-object" - generation_number = 12345 + mock_client._client._transport.bidi_read_object = "bidi_read_object_rpc" mock_client._client._transport._wrapped_methods = { "bidi_read_object_rpc": mock.sentinel.A } - - read_obj_stream = _AsyncReadObjectStream( - client=mock_client, - bucket_name=bucket_name, - object_name=object_name, - generation_number=generation_number, - ) - full_bucket_name = f"projects/_/buckets/{bucket_name}" + full_bucket_name = f"projects/_/buckets/{_TEST_BUCKET_NAME}" first_bidi_read_req = _storage_v2.BidiReadObjectRequest( read_object_spec=_storage_v2.BidiReadObjectSpec( - bucket=full_bucket_name, object=object_name + bucket=full_bucket_name, object=_TEST_OBJECT_NAME ), ) + + read_obj_stream = _AsyncReadObjectStream( + client=mock_client, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, + generation_number=_TEST_GENERATION_NUMBER, + ) + mock_async_bidi_rpc.assert_called_once_with( mock.sentinel.A, initial_request=first_bidi_read_req, metadata=(("x-goog-request-params", f"bucket={full_bucket_name}"),), ) assert read_obj_stream.socket_like_rpc is mock_async_bidi_rpc.return_value + + +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" +) +@pytest.mark.asyncio +async def test_open(mock_client): + read_obj_stream = _AsyncReadObjectStream( + client=mock_client, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, + ) + read_obj_stream.socket_like_rpc.open = AsyncMock() + + recv_response = mock.MagicMock(spec=_storage_v2.BidiReadObjectResponse) + recv_response.metadata = mock.MagicMock(spec=_storage_v2.Object) + recv_response.metadata.generation = _TEST_GENERATION_NUMBER + recv_response.read_handle = _TEST_READ_HANDLE + read_obj_stream.socket_like_rpc.recv = AsyncMock(return_value=recv_response) + + await read_obj_stream.open() + + read_obj_stream.socket_like_rpc.open.assert_called_once() + read_obj_stream.socket_like_rpc.recv.assert_called_once() + + assert read_obj_stream.generation_number == _TEST_GENERATION_NUMBER + assert read_obj_stream.read_handle == _TEST_READ_HANDLE From 078afca48bff53bd01c7edca409e8c69557c4c2b Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Fri, 19 Sep 2025 06:24:45 +0000 Subject: [PATCH 23/44] simply tests for send recv and close --- .../asyncio/async_read_object_stream.py | 7 +- .../asyncio/test_async_read_object_stream.py | 70 +++++++++++++++++++ 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 01906d999..01272e91b 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -104,12 +104,9 @@ async def open(self) -> None: self.read_handle = response.read_handle - return - async def close(self) -> None: """Closes the bidi-gRPC connection.""" await self.socket_like_rpc.close() - return async def send( self, bidi_read_object_request: _storage_v2.BidiReadObjectRequest @@ -122,7 +119,6 @@ async def send( the read offset and limit. """ await self.socket_like_rpc.send(bidi_read_object_request) - return async def recv(self) -> _storage_v2.BidiReadObjectResponse: """Receives a response from the stream. @@ -134,5 +130,4 @@ async def recv(self) -> _storage_v2.BidiReadObjectResponse: :class:`~google.cloud._storage_v2.types.BidiReadObjectResponse`: The response message from the server. """ - bidi_read_object_response = await self.socket_like_rpc.recv() - return bidi_read_object_response + return await self.socket_like_rpc.recv() diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index b98d786c6..aa99821fd 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -69,6 +69,7 @@ def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc): ) @pytest.mark.asyncio async def test_open(mock_client): + # arrange read_obj_stream = _AsyncReadObjectStream( client=mock_client, bucket_name=_TEST_BUCKET_NAME, @@ -82,10 +83,79 @@ async def test_open(mock_client): recv_response.read_handle = _TEST_READ_HANDLE read_obj_stream.socket_like_rpc.recv = AsyncMock(return_value=recv_response) + # act await read_obj_stream.open() + # assert read_obj_stream.socket_like_rpc.open.assert_called_once() read_obj_stream.socket_like_rpc.recv.assert_called_once() assert read_obj_stream.generation_number == _TEST_GENERATION_NUMBER assert read_obj_stream.read_handle == _TEST_READ_HANDLE + + +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" +) +@pytest.mark.asyncio +async def test_close(mock_client): + # arrange + read_obj_stream = _AsyncReadObjectStream( + client=mock_client, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, + ) + read_obj_stream.socket_like_rpc.close = AsyncMock() + + # act + await read_obj_stream.close() + + # assert + read_obj_stream.socket_like_rpc.close.assert_called_once() + + +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" +) +@pytest.mark.asyncio +async def test_send(mock_client): + # arrange + read_obj_stream = _AsyncReadObjectStream( + client=mock_client, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, + ) + read_obj_stream.socket_like_rpc.send = AsyncMock() + + # act + bidi_read_object_request = _storage_v2.BidiReadObjectRequest() + await read_obj_stream.send(bidi_read_object_request) + + # assert + read_obj_stream.socket_like_rpc.send.assert_called_once_with( + bidi_read_object_request + ) + + +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" +) +@pytest.mark.asyncio +async def test_recv(mock_client): + # arrange + read_obj_stream = _AsyncReadObjectStream( + client=mock_client, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, + ) + bidi_read_object_response = _storage_v2.BidiReadObjectResponse() + read_obj_stream.socket_like_rpc.recv = AsyncMock( + return_value=bidi_read_object_response + ) + + # act + response = await read_obj_stream.recv() + + # assert + read_obj_stream.socket_like_rpc.recv.assert_called_once() + assert response == bidi_read_object_response From 2054989c7b83ba86d2461e046c972d61e1cbbe49 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Fri, 19 Sep 2025 06:26:06 +0000 Subject: [PATCH 24/44] minor edit - add bidi-stream in doc string --- .../_experimental/asyncio/async_abstract_object_stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 1ba5aef9b..49d7a293a 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -17,7 +17,7 @@ class _AsyncAbstractObjectStream(abc.ABC): - """Abstract base class to represent gRPC stream for GCS ``Object``. + """Abstract base class to represent gRPC bidi-stream for GCS ``Object``. Concrete implementation of this class could be ``_AsyncReadObjectStream`` or ``_AsyncWriteObjectStream``. From 8366f0bf8286701da0c794ad714cfcaeacf0f8f5 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Fri, 19 Sep 2025 06:39:56 +0000 Subject: [PATCH 25/44] simplify unit tests --- .../asyncio/async_multi_range_downloader.py | 12 ++-- .../test_async_multi_range_downloader.py | 66 +++++++++---------- 2 files changed, 37 insertions(+), 41 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 9b41130f9..9d964404b 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -32,7 +32,7 @@ class AsyncMultiRangeDownloader: @classmethod async def create_mrd( cls, - client: AsyncGrpcClient, + client: AsyncGrpcClient.grpc_client, bucket_name: str, object_name: str, generation_number: Optional[int] = None, @@ -43,7 +43,7 @@ async def create_mrd( opens the underlying bidi-gRPC connection. Args: - client (AsyncGrpcClient): The asynchronous client to use for making API requests. + client (AsyncGrpcClient.grpc_client): The asynchronous client to use for making API requests. bucket_name (str): The name of the bucket containing the object. object_name (str): The name of the object to be read. generation_number (int, optional): If present, selects a specific @@ -58,12 +58,12 @@ async def create_mrd( @classmethod def create_mrd_from_read_handle( - cls, client: AsyncGrpcClient, read_handle: bytes + cls, client: AsyncGrpcClient.grpc_client, read_handle: bytes ) -> AsyncMultiRangeDownloader: """Creates a MultiRangeDownloader from an existing read handle. Args: - client (AsyncGrpcClient): The asynchronous client to use for making API requests. + client (AsyncGrpcClient.grpc_client): The asynchronous client to use for making API requests. read_handle (bytes): An existing handle for reading the object. Raises: @@ -73,7 +73,7 @@ def create_mrd_from_read_handle( def __init__( self, - client: AsyncGrpcClient, + client: AsyncGrpcClient.grpc_client, bucket_name: Optional[str] = None, object_name: Optional[str] = None, generation_number: Optional[int] = None, @@ -82,7 +82,7 @@ def __init__( """Initializes a MultiRangeDownloader. Args: - client (AsyncGrpcClient): The asynchronous client to use for making API requests. + client (AsyncGrpcClient.grpc_client): The asynchronous client to use for making API requests. bucket_name (str, optional): The name of the bucket. Defaults to None. object_name (str, optional): The name of the object. Defaults to None. generation_number (int, optional): The generation number of the object. diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index d479e3a17..e2c54b5f7 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -20,6 +20,11 @@ ) from io import BytesIO +_TEST_BUCKET_NAME = "test-bucket" +_TEST_OBJECT_NAME = "test-object" +_TEST_GENERATION = 123456789 +_TEST_READ_HANDLE = b"test-handle" + @pytest.fixture def mock_async_grpc_client(): @@ -43,24 +48,20 @@ def mock_async_read_object_stream(): def test_init(mock_async_grpc_client): """Test the constructor of MultiRangeDownloader.""" client = mock_async_grpc_client - bucket_name = "test-bucket" - object_name = "test-object" - generation = 123 - read_handle = b"test-handle" mrd = AsyncMultiRangeDownloader( client, - bucket_name=bucket_name, - object_name=object_name, - generation_number=generation, - read_handle=read_handle, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, + generation_number=_TEST_GENERATION, + read_handle=_TEST_READ_HANDLE, ) assert mrd.client is client - assert mrd.bucket_name == bucket_name - assert mrd.object_name == object_name - assert mrd.generation_number == generation - assert mrd.read_handle == read_handle + assert mrd.bucket_name == _TEST_BUCKET_NAME + assert mrd.object_name == _TEST_OBJECT_NAME + assert mrd.generation_number == _TEST_GENERATION + assert mrd.read_handle == _TEST_READ_HANDLE assert not hasattr(mrd, "read_obj_str") @@ -68,21 +69,19 @@ def test_init(mock_async_grpc_client): async def test_open(mock_async_grpc_client, mock_async_read_object_stream): """Test the open() method.""" client = mock_async_grpc_client - bucket_name = "test-bucket" - object_name = "test-object" mrd = AsyncMultiRangeDownloader( client, - bucket_name=bucket_name, - object_name=object_name, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, ) await mrd.open() mock_async_read_object_stream.assert_called_once_with( client=client, - bucket_name=bucket_name, - object_name=object_name, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, generation_number=None, read_handle=None, ) @@ -101,15 +100,12 @@ async def test_open_with_generation( ): """Test open() when generation_number is already set.""" client = mock_async_grpc_client - bucket_name = "test-bucket" - object_name = "test-object" - initial_generation = 456 mrd = AsyncMultiRangeDownloader( client, - bucket_name=bucket_name, - object_name=object_name, - generation_number=initial_generation, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, + generation_number=_TEST_GENERATION, ) # The mock stream will have a different generation number to ensure we don't overwrite it. @@ -119,9 +115,9 @@ async def test_open_with_generation( mock_async_read_object_stream.assert_called_once_with( client=client, - bucket_name=bucket_name, - object_name=object_name, - generation_number=initial_generation, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, + generation_number=_TEST_GENERATION, read_handle=None, ) @@ -129,7 +125,7 @@ async def test_open_with_generation( mock_stream_instance.open.assert_awaited_once() assert mrd.read_obj_str is mock_stream_instance - assert mrd.generation_number == initial_generation # Should not be overwritten + assert mrd.generation_number == _TEST_GENERATION # Should not be overwritten assert mrd.read_handle == mock_stream_instance.read_handle @@ -141,19 +137,19 @@ async def test_create_mrd(mock_async_grpc_client): new_callable=mock.AsyncMock, ) as mock_open: client = mock_async_grpc_client - bucket_name = "test-bucket" - object_name = "test-object" - generation = 123 mrd = await AsyncMultiRangeDownloader.create_mrd( - client, bucket_name, object_name, generation_number=generation + client, + _TEST_BUCKET_NAME, + _TEST_OBJECT_NAME, + generation_number=_TEST_GENERATION, ) assert isinstance(mrd, AsyncMultiRangeDownloader) assert mrd.client is client - assert mrd.bucket_name == bucket_name - assert mrd.object_name == object_name - assert mrd.generation_number == generation + assert mrd.bucket_name == _TEST_BUCKET_NAME + assert mrd.object_name == _TEST_OBJECT_NAME + assert mrd.generation_number == _TEST_GENERATION mock_open.assert_awaited_once() From 06b102c55f2b40461fe0522925446359b44fcf9e Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Fri, 19 Sep 2025 08:42:49 +0000 Subject: [PATCH 26/44] improve doc string --- .../asyncio/async_multi_range_downloader.py | 129 ++++++++++++------ 1 file changed, 84 insertions(+), 45 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 9d964404b..a5b0ec2a4 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -27,7 +27,39 @@ class AsyncMultiRangeDownloader: - """Provides an interface for downloading multiple ranges of a GCS object concurrently.""" + """Provides an interface for downloading multiple ranges of a GCS ``Object`` + concurrently. + + + Example usage: + + .. code-block:: python + + + ``` + client = AsyncGrpcClient().grpc_client + mrd = await AsyncMultiRangeDownloader.create_mrd( + client, bucket_name="chandrasiri-rs", object_name="test_open9" + ) + my_buff1 = BytesIO() + my_buff2 = BytesIO() + my_buff3 = BytesIO() + my_buff4 = BytesIO() + buffers = [my_buff1, my_buff2, my_buff3, my_buff4] + await mrd.download_ranges( + [ + (0, 100, my_buff1), + (100, 200, my_buff2), + (200, 300, my_buff3), + (300, 400, my_buff4), + ] + ) + for buff in buffers: + print("downloaded bytes", buff.getbuffer().nbytes) + ``` + + + """ @classmethod async def create_mrd( @@ -36,72 +68,77 @@ async def create_mrd( bucket_name: str, object_name: str, generation_number: Optional[int] = None, + read_handle: Optional[bytes] = None, ) -> AsyncMultiRangeDownloader: - """Asynchronously creates and initializes a MultiRangeDownloader. + """Initializes a MultiRangeDownloader and opens the underlying bidi-gRPC + object for reading. - This factory method creates an instance of MultiRangeDownloader and - opens the underlying bidi-gRPC connection. + :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` + :param client: The asynchronous client to use for making API requests. - Args: - client (AsyncGrpcClient.grpc_client): The asynchronous client to use for making API requests. - bucket_name (str): The name of the bucket containing the object. - object_name (str): The name of the object to be read. - generation_number (int, optional): If present, selects a specific - revision of this object. + :type bucket_name: str + :param bucket_name: The name of the bucket containing the object. - Returns: - MultiRangeDownloader: An initialized MultiRangeDownloader instance. - """ - mrd = cls(client, bucket_name, object_name, generation_number) - await mrd.open() - return mrd + :type object_name: str + :param object_name: The name of the object to be read. - @classmethod - def create_mrd_from_read_handle( - cls, client: AsyncGrpcClient.grpc_client, read_handle: bytes - ) -> AsyncMultiRangeDownloader: - """Creates a MultiRangeDownloader from an existing read handle. + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific + revision of this object. - Args: - client (AsyncGrpcClient.grpc_client): The asynchronous client to use for making API requests. - read_handle (bytes): An existing handle for reading the object. + :type read_handle: bytes + :param read_handle: (Optional) An existing handle for reading the object. + If provided, opening the bidi-gRPC connection will be faster. - Raises: - NotImplementedError: This method is not yet implemented. + :rtype: :class:`~google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader` + :returns: An initialized AsyncMultiRangeDownloader instance for reading. """ - raise NotImplementedError("TODO") + mrd = cls(client, bucket_name, object_name, generation_number, read_handle) + await mrd.open() + return mrd def __init__( self, client: AsyncGrpcClient.grpc_client, - bucket_name: Optional[str] = None, - object_name: Optional[str] = None, + bucket_name: str, + object_name: str, generation_number: Optional[int] = None, read_handle: Optional[bytes] = None, ) -> None: - """Initializes a MultiRangeDownloader. - - Args: - client (AsyncGrpcClient.grpc_client): The asynchronous client to use for making API requests. - bucket_name (str, optional): The name of the bucket. Defaults to None. - object_name (str, optional): The name of the object. Defaults to None. - generation_number (int, optional): The generation number of the object. - Defaults to None. - read_handle (bytes, optional): An existing read handle. Defaults to None. + """Constructor for AsyncMultiRangeDownloader, clients are not adviced to + use it directly. Instead it's adviced to use the classmethod `create_mrd`. + + :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` + :param client: The asynchronous client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the bucket containing the object. + + :type object_name: str + :param object_name: The name of the object to be read. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific revision of + this object. + + :type read_handle: bytes + :param read_handle: (Optional) An existing read handle. """ self.client = client self.bucket_name = bucket_name self.object_name = object_name self.generation_number = generation_number self.read_handle = read_handle - self.read_obj_str: _AsyncReadObjectStream + self.read_obj_str: _AsyncReadObjectStream = None async def open(self) -> None: """Opens the bidi-gRPC connection to read from the object. - This method initializes and opens an `_AsyncReadObjectStream` to - establish a connection for downloading. It also retrieves the - generation number and read handle if they are not already set. + This method initializes and opens an `_AsyncReadObjectStream` (bidi-gRPC stream) to + for downloading ranges of data from GCS ``Object``. + + "Opening" constitutes fetching object metadata such as generation number + and read handle and sets them as attributes if not already set. """ self.read_obj_str = _AsyncReadObjectStream( client=self.client, @@ -120,9 +157,11 @@ async def download_ranges(self, read_ranges: List[Tuple[int, int, BytesIO]]) -> """Downloads multiple byte ranges from the object into the buffers provided by user. - Args: - read_ranges (List[Tuple[int, int]]): A list of tuples, where each - tuple represents a byte range (start_byte, end_byte, buffer) to download. + :type read_ranges: List[Tuple[int, int, "BytesIO"]] + :param read_ranges: A list of tuples, where each tuple represents a + byte range (start_byte, end_byte, buffer) to download. Buffer has to + be provided by the user, and user has to make sure appropriate + memory is available in the application to avoid out-of-memory crash. Raises: From 52494b47f6d1c2c97324217a569c453102a6d040 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Fri, 19 Sep 2025 09:39:19 +0000 Subject: [PATCH 27/44] fix unit tess in MRD --- .../asyncio/async_multi_range_downloader.py | 43 +++-- .../test_async_multi_range_downloader.py | 155 ++++-------------- 2 files changed, 51 insertions(+), 147 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index a5b0ec2a4..bc60262ed 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -30,34 +30,29 @@ class AsyncMultiRangeDownloader: """Provides an interface for downloading multiple ranges of a GCS ``Object`` concurrently. - Example usage: .. code-block:: python - - ``` - client = AsyncGrpcClient().grpc_client - mrd = await AsyncMultiRangeDownloader.create_mrd( - client, bucket_name="chandrasiri-rs", object_name="test_open9" - ) - my_buff1 = BytesIO() - my_buff2 = BytesIO() - my_buff3 = BytesIO() - my_buff4 = BytesIO() - buffers = [my_buff1, my_buff2, my_buff3, my_buff4] - await mrd.download_ranges( - [ - (0, 100, my_buff1), - (100, 200, my_buff2), - (200, 300, my_buff3), - (300, 400, my_buff4), - ] - ) - for buff in buffers: - print("downloaded bytes", buff.getbuffer().nbytes) - ``` - + client = AsyncGrpcClient().grpc_client + mrd = await AsyncMultiRangeDownloader.create_mrd( + client, bucket_name="chandrasiri-rs", object_name="test_open9" + ) + my_buff1 = BytesIO() + my_buff2 = BytesIO() + my_buff3 = BytesIO() + my_buff4 = BytesIO() + buffers = [my_buff1, my_buff2, my_buff3, my_buff4] + await mrd.download_ranges( + [ + (0, 100, my_buff1), + (100, 200, my_buff2), + (200, 300, my_buff3), + (300, 400, my_buff4), + ] + ) + for buff in buffers: + print("downloaded bytes", buff.getbuffer().nbytes) """ diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index e2c54b5f7..52a138746 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -14,153 +14,62 @@ import pytest from unittest import mock +from unittest.mock import AsyncMock from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) from io import BytesIO +from google.cloud import _storage_v2 + _TEST_BUCKET_NAME = "test-bucket" _TEST_OBJECT_NAME = "test-object" -_TEST_GENERATION = 123456789 +_TEST_GENERATION_NUMBER = 123456789 _TEST_READ_HANDLE = b"test-handle" -@pytest.fixture -def mock_async_grpc_client(): - """A mock for the AsyncGrpcClient.""" - return mock.Mock(name="AsyncGrpcClient") - - -@pytest.fixture -def mock_async_read_object_stream(): - """A mock for the _AsyncReadObjectStream class.""" - with mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" - ) as mock_stream_cls: - mock_instance = mock.AsyncMock() - mock_instance.generation_number = 12345 - mock_instance.read_handle = b"test-read-handle" - mock_stream_cls.return_value = mock_instance - yield mock_stream_cls - - -def test_init(mock_async_grpc_client): - """Test the constructor of MultiRangeDownloader.""" - client = mock_async_grpc_client - - mrd = AsyncMultiRangeDownloader( - client, - bucket_name=_TEST_BUCKET_NAME, - object_name=_TEST_OBJECT_NAME, - generation_number=_TEST_GENERATION, - read_handle=_TEST_READ_HANDLE, - ) - - assert mrd.client is client - assert mrd.bucket_name == _TEST_BUCKET_NAME - assert mrd.object_name == _TEST_OBJECT_NAME - assert mrd.generation_number == _TEST_GENERATION - assert mrd.read_handle == _TEST_READ_HANDLE - assert not hasattr(mrd, "read_obj_str") - - +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" +) +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" +) @pytest.mark.asyncio -async def test_open(mock_async_grpc_client, mock_async_read_object_stream): - """Test the open() method.""" - client = mock_async_grpc_client +async def test_create_mrd(mock_async_grpc_client, async_read_object_stream): - mrd = AsyncMultiRangeDownloader( - client, - bucket_name=_TEST_BUCKET_NAME, - object_name=_TEST_OBJECT_NAME, - ) + # Arrange + mock_stream_instance = async_read_object_stream.return_value + mock_stream_instance.open = AsyncMock() + mock_stream_instance.generation_number = _TEST_GENERATION_NUMBER + mock_stream_instance.read_handle = _TEST_READ_HANDLE - await mrd.open() - - mock_async_read_object_stream.assert_called_once_with( - client=client, - bucket_name=_TEST_BUCKET_NAME, - object_name=_TEST_OBJECT_NAME, - generation_number=None, - read_handle=None, + # act + mrd = await AsyncMultiRangeDownloader.create_mrd( + mock_async_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME ) - mock_stream_instance = mock_async_read_object_stream.return_value - mock_stream_instance.open.assert_awaited_once() - - assert mrd.read_obj_str is mock_stream_instance - assert mrd.generation_number == mock_stream_instance.generation_number - assert mrd.read_handle == mock_stream_instance.read_handle - - -@pytest.mark.asyncio -async def test_open_with_generation( - mock_async_grpc_client, mock_async_read_object_stream -): - """Test open() when generation_number is already set.""" - client = mock_async_grpc_client - - mrd = AsyncMultiRangeDownloader( - client, + # Assert + async_read_object_stream.assert_called_once_with( + client=mock_async_grpc_client, bucket_name=_TEST_BUCKET_NAME, object_name=_TEST_OBJECT_NAME, - generation_number=_TEST_GENERATION, - ) - - # The mock stream will have a different generation number to ensure we don't overwrite it. - mock_async_read_object_stream.return_value.generation_number = 789 - - await mrd.open() - - mock_async_read_object_stream.assert_called_once_with( - client=client, - bucket_name=_TEST_BUCKET_NAME, - object_name=_TEST_OBJECT_NAME, - generation_number=_TEST_GENERATION, + generation_number=None, read_handle=None, ) + mock_stream_instance.open.assert_called_once() - mock_stream_instance = mock_async_read_object_stream.return_value - mock_stream_instance.open.assert_awaited_once() - + assert mrd.client == mock_async_grpc_client + assert mrd.bucket_name == _TEST_BUCKET_NAME + assert mrd.object_name == _TEST_OBJECT_NAME + assert mrd.generation_number == _TEST_GENERATION_NUMBER + assert mrd.read_handle == _TEST_READ_HANDLE assert mrd.read_obj_str is mock_stream_instance - assert mrd.generation_number == _TEST_GENERATION # Should not be overwritten - assert mrd.read_handle == mock_stream_instance.read_handle - - -@pytest.mark.asyncio -async def test_create_mrd(mock_async_grpc_client): - """Test the create_mrd() factory method.""" - with mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.open", - new_callable=mock.AsyncMock, - ) as mock_open: - client = mock_async_grpc_client - - mrd = await AsyncMultiRangeDownloader.create_mrd( - client, - _TEST_BUCKET_NAME, - _TEST_OBJECT_NAME, - generation_number=_TEST_GENERATION, - ) - - assert isinstance(mrd, AsyncMultiRangeDownloader) - assert mrd.client is client - assert mrd.bucket_name == _TEST_BUCKET_NAME - assert mrd.object_name == _TEST_OBJECT_NAME - assert mrd.generation_number == _TEST_GENERATION - mock_open.assert_awaited_once() - - -def test_create_mrd_from_read_handle(mock_async_grpc_client): - """Test that create_mrd_from_read_handle() raises NotImplementedError.""" - with pytest.raises(NotImplementedError): - AsyncMultiRangeDownloader.create_mrd_from_read_handle( - mock_async_grpc_client, b"handle" - ) +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" +) @pytest.mark.asyncio async def test_download_ranges(mock_async_grpc_client): """Test that download_ranges() raises NotImplementedError.""" From 0e60694e0b3f2cdab140b878dcf4d1c686bbe90a Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Fri, 19 Sep 2025 11:05:00 +0000 Subject: [PATCH 28/44] add checks for invalid inputs --- .../asyncio/async_read_object_stream.py | 15 +++++++++++---- .../unit/asyncio/test_async_read_object_stream.py | 7 +++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 96639e0df..bedfbf7ba 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -16,7 +16,7 @@ This is _experimental module for upcoming support for Rapid Storage. (https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) -APIs may not work as intented and are not stable yet. Feature is not +APIs may not work as intended and are not stable yet. Feature is not GA(Generally Available) yet, please contact your TAM(Technical Account Manager) if you want to use these APIs. @@ -36,7 +36,7 @@ class _AsyncReadObjectStream(_AsyncAbstractObjectStream): This class provides a unix socket-like interface to a GCS ``Object``, with methods like ``open``, ``close``, ``send``, and ``recv``. - :type client: :class:`~google.cloud.storage.asyncio.AsyncGrpcClient` + :type client: :class:`~google.cloud.storage.asyncio.AsyncGrpcClient.grpc_client` :param client: async grpc client to use for making API requests. :type bucket_name: str @@ -56,18 +56,25 @@ class _AsyncReadObjectStream(_AsyncAbstractObjectStream): def __init__( self, - client: AsyncGrpcClient, + client: AsyncGrpcClient.grpc_client, bucket_name: str, object_name: str, generation_number: Optional[int] = None, read_handle: Optional[bytes] = None, ) -> None: + if client is None: + raise ValueError("client must be provided") + if bucket_name is None: + raise ValueError("bucket_name must be provided") + if object_name is None: + raise ValueError("object_name must be provided") + super().__init__( bucket_name=bucket_name, object_name=object_name, generation_number=generation_number, ) - self.client: AsyncGrpcClient = client + self.client: AsyncGrpcClient.grpc_client = client self.read_handle: Optional[bytes] = read_handle async def open(self) -> None: diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index a033851cc..89d0571b0 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -62,6 +62,13 @@ def test_init(): assert stream_defaults.read_handle is None +def test_init_with_invalid_parameters(): + """Test the constructor of _AsyncReadObjectStream with invalid params.""" + + with pytest.raises(ValueError): + _AsyncReadObjectStream(None, bucket_name=None, object_name=None) + + @pytest.mark.asyncio async def test_async_methods_are_awaitable(): """Test that the async methods exist and are awaitable.""" From dcb6a552507a3e899f19f500e6c790c7fea92e62 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Tue, 23 Sep 2025 05:36:37 +0000 Subject: [PATCH 29/44] remove duplicated import --- .../storage/_experimental/asyncio/async_read_object_stream.py | 1 - 1 file changed, 1 deletion(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 8561d499f..0ac229c5a 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -28,7 +28,6 @@ from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( _AsyncAbstractObjectStream, ) -from google.cloud import _storage_v2 from google.cloud.storage._experimental.asyncio.bidi_async import AsyncBidiRpc From 90d85977456529ed1ac30dd971b20a88b4c5256c Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Tue, 23 Sep 2025 05:38:26 +0000 Subject: [PATCH 30/44] remove unused import --- tests/unit/asyncio/test_async_read_object_stream.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index bc56a5d3f..28566084c 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -16,9 +16,6 @@ from unittest import mock from google.cloud import _storage_v2 -from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( - _AsyncAbstractObjectStream, -) from google.cloud.storage._experimental.asyncio.async_read_object_stream import ( _AsyncReadObjectStream, ) From 521154c98c69587f49221f37e8717c365503fb5a Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 23 Sep 2025 08:50:27 +0000 Subject: [PATCH 31/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/asyncio/test_async_multi_range_downloader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 52a138746..a897520e1 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -37,7 +37,6 @@ ) @pytest.mark.asyncio async def test_create_mrd(mock_async_grpc_client, async_read_object_stream): - # Arrange mock_stream_instance = async_read_object_stream.return_value mock_stream_instance.open = AsyncMock() From 8338ab296a677d17c557fe1878da96d83253aa70 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Tue, 23 Sep 2025 10:03:06 +0000 Subject: [PATCH 32/44] fix unit test --- tests/unit/asyncio/test_async_multi_range_downloader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 52a138746..6d707fb8f 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -73,6 +73,9 @@ async def test_create_mrd(mock_async_grpc_client, async_read_object_stream): @pytest.mark.asyncio async def test_download_ranges(mock_async_grpc_client): """Test that download_ranges() raises NotImplementedError.""" - mrd = AsyncMultiRangeDownloader(mock_async_grpc_client) + mrd = AsyncMultiRangeDownloader( + mock_async_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME + ) + with pytest.raises(NotImplementedError): await mrd.download_ranges([(0, 100, BytesIO())]) From ac8e6511aefd475827a36aeaa2a707efcc81725c Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Tue, 23 Sep 2025 10:46:14 +0000 Subject: [PATCH 33/44] remove unused import --- tests/unit/asyncio/test_async_multi_range_downloader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 1dacaab50..edcd3fcc4 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -20,7 +20,6 @@ AsyncMultiRangeDownloader, ) from io import BytesIO -from google.cloud import _storage_v2 _TEST_BUCKET_NAME = "test-bucket" From 64690638aa605ecc3404083cf7fd9010cce9ba29 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 24 Sep 2025 05:26:06 +0000 Subject: [PATCH 34/44] implement basic functionality for download_ranges --- .../asyncio/async_multi_range_downloader.py | 120 +++++++++----- .../asyncio/async_read_object_stream.py | 3 +- .../test_async_multi_range_downloader.py | 155 ++++++++++++++---- 3 files changed, 202 insertions(+), 76 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index dfcfc2081..d16f5cf3e 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import Any, List, Optional, Tuple +from typing import List, Optional, Tuple from google.cloud.storage._experimental.asyncio.async_read_object_stream import ( _AsyncReadObjectStream, @@ -25,13 +25,34 @@ from io import BytesIO from google.cloud import _storage_v2 -import sys import asyncio _MAX_READ_RANGES_PER_BIDI_READ_REQUEST = 100 +class Result: + def __init__(self, bytes_requested: int): + # only while instantiation, should not be edited later. + self._bytes_requested: int = bytes_requested + self._bytes_written: int = 0 + + @property + def bytes_requested(self) -> int: + return self._bytes_requested + + @property + def bytes_written(self) -> int: + return self._bytes_written + + @bytes_written.setter + def bytes_written(self, value: int): + self._bytes_written = value + + def __repr__(self): + return f"bytes_requested: {self._bytes_requested}, bytes_written: {self._bytes_written}" + + class AsyncMultiRangeDownloader: """Provides an interface for downloading multiple ranges of a GCS ``Object`` concurrently. @@ -154,89 +175,100 @@ async def open(self) -> None: self.read_handle = self.read_obj_str.read_handle return - async def download_ranges(self, read_ranges: List[Tuple[int, int, BytesIO]]) -> Any: + async def download_ranges( + self, read_ranges: List[Tuple[int, int, BytesIO]] + ) -> List[Result]: """Downloads multiple byte ranges from the object into the buffers provided by user. :type read_ranges: List[Tuple[int, int, "BytesIO"]] :param read_ranges: A list of tuples, where each tuple represents a - byte range (start_byte, end_byte, buffer) to download. Buffer has to - be provided by the user, and user has to make sure appropriate + byte range (start_byte, end_byte) and a writable buffer. Buffer has + to be provided by the user, and user has to make sure appropriate memory is available in the application to avoid out-of-memory crash. """ if len(read_ranges) > 1000: - raise Exception("Invalid Input - ranges cannot be more than 1000") + raise Exception( + "Invalid input - length of read_ranges cannot be more than 1000" + ) read_id_to_writable_buffer_dict = {} + exception = None + results = [] for i in range(0, len(read_ranges), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST): - read_range_segment = read_ranges[ + read_ranges_segment = read_ranges[ i : i + _MAX_READ_RANGES_PER_BIDI_READ_REQUEST ] read_ranges_for_bidi_req = [] - for j, read_range in enumerate(read_range_segment): - # generate read_id + for j, read_range in enumerate(read_ranges_segment): read_id = i + j read_id_to_writable_buffer_dict[read_id] = read_range[2] + bytes_requested = read_range[1] + results.append(Result(bytes_requested)) read_ranges_for_bidi_req.append( _storage_v2.ReadRange( read_offset=read_range[0], - read_length=read_range[1] - read_range[0], # end - start + read_length=bytes_requested, read_id=read_id, ) ) - print(read_ranges_for_bidi_req) await self.read_obj_str.send( _storage_v2.BidiReadObjectRequest(read_ranges=read_ranges_for_bidi_req) ) + while len(read_id_to_writable_buffer_dict) > 0: - response = await self.read_obj_str.recv() - if response is None: - print("None response received, something went wrong.") - sys.exit(1) - for object_data_range in response.object_data_ranges: - - if object_data_range.read_range is None: - raise Exception("Invalid response, read_range is None") - - data = object_data_range.checksummed_data.content - # bytes_received_in_curr_res = object_data_range.read_range.read_length - read_id = object_data_range.read_range.read_id - buffer = read_id_to_writable_buffer_dict[read_id] - buffer.write(data) - print( - "for read_id ", - read_id, - data, - object_data_range.checksummed_data.crc32c, - ) - if object_data_range.range_end: - del read_id_to_writable_buffer_dict[ - object_data_range.read_range.read_id - ] + try: + response = await self.read_obj_str.recv() + + if response is None: + raise Exception("None response received, something went wrong.") + + for object_data_range in response.object_data_ranges: + if object_data_range.read_range is None: + raise Exception("Invalid response, read_range is None") + + data = object_data_range.checksummed_data.content + read_id = object_data_range.read_range.read_id + buffer = read_id_to_writable_buffer_dict[read_id] + buffer.write(data) + results[read_id].bytes_written += len(data) + + if object_data_range.range_end: + del read_id_to_writable_buffer_dict[ + object_data_range.read_range.read_id + ] + except Exception as exc: + exception = exc + break + return results, exception async def test_mrd(): client = AsyncGrpcClient()._grpc_client mrd = await AsyncMultiRangeDownloader.create_mrd( - client, bucket_name="chandrasiri-rs", object_name="test_open9" + client, bucket_name="chandrasiri-rs", object_name="test_open10" ) - my_buff1 = BytesIO() + my_buff1 = open("my_fav_file.txt", "wb") my_buff2 = BytesIO() my_buff3 = BytesIO() my_buff4 = BytesIO() - buffers = [my_buff1, my_buff2, my_buff3, my_buff4] - await mrd.download_ranges( + results_arr, error_obj = await mrd.download_ranges( [ (0, 100, my_buff1), - (100, 200, my_buff2), - (200, 300, my_buff3), - (300, 400, my_buff4), + (100, 20, my_buff2), + (200, 123, my_buff3), + (300, 789, my_buff4), ] ) - for buff in buffers: - print("downloaded bytes", buff.getbuffer().nbytes) + if error_obj: + print("*" * 80) + print(error_obj) + print("*" * 80) + + for result in results_arr: + print("downloaded bytes", result) if __name__ == "__main__": diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 316a6750a..c2cce1b50 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -132,4 +132,5 @@ async def recv(self) -> _storage_v2.BidiReadObjectResponse: :class:`~google.cloud._storage_v2.types.BidiReadObjectResponse`: The response message from the server. """ - return await self.socket_like_rpc.recv() + val = await self.socket_like_rpc.recv() + return val diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index edcd3fcc4..7314c3120 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -15,6 +15,7 @@ import pytest from unittest import mock from unittest.mock import AsyncMock +from google.cloud import _storage_v2 from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, @@ -28,41 +29,127 @@ _TEST_READ_HANDLE = b"test-handle" -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" -) -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" -) -@pytest.mark.asyncio -async def test_create_mrd(mock_async_grpc_client, async_read_object_stream): - # Arrange - mock_stream_instance = async_read_object_stream.return_value - mock_stream_instance.open = AsyncMock() - mock_stream_instance.generation_number = _TEST_GENERATION_NUMBER - mock_stream_instance.read_handle = _TEST_READ_HANDLE - - # act - mrd = await AsyncMultiRangeDownloader.create_mrd( - mock_async_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME - ) +class TestAsyncMultiRangeDownloader: - # Assert - async_read_object_stream.assert_called_once_with( - client=mock_async_grpc_client, + # helper method + @pytest.mark.asyncio + async def _make_mock_mrd( + self, + mock_grpc_client, + mock_cls_async_read_object_stream, bucket_name=_TEST_BUCKET_NAME, object_name=_TEST_OBJECT_NAME, - generation_number=None, - read_handle=None, + generation_number=_TEST_GENERATION_NUMBER, + read_handle=_TEST_READ_HANDLE, + ): + mock_stream = mock_cls_async_read_object_stream.return_value + mock_stream.open = AsyncMock() + mock_stream.generation_number = _TEST_GENERATION_NUMBER + mock_stream.read_handle = _TEST_READ_HANDLE + + mrd = await AsyncMultiRangeDownloader.create_mrd( + mock_grpc_client, bucket_name, object_name, generation_number, read_handle + ) + + return mrd + + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + ) + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) - mock_stream_instance.open.assert_called_once() + @pytest.mark.asyncio + async def test_create_mrd( + self, mock_grpc_client, mock_cls_async_read_object_stream + ): + # Arrange & Act + mrd = await self._make_mock_mrd( + mock_grpc_client, mock_cls_async_read_object_stream + ) - assert mrd.client == mock_async_grpc_client - assert mrd.bucket_name == _TEST_BUCKET_NAME - assert mrd.object_name == _TEST_OBJECT_NAME - assert mrd.generation_number == _TEST_GENERATION_NUMBER - assert mrd.read_handle == _TEST_READ_HANDLE - assert mrd.read_obj_str is mock_stream_instance + # Assert + mock_cls_async_read_object_stream.assert_called_once_with( + client=mock_grpc_client, + bucket_name=_TEST_BUCKET_NAME, + object_name=_TEST_OBJECT_NAME, + generation_number=_TEST_GENERATION_NUMBER, + read_handle=_TEST_READ_HANDLE, + ) + + mrd.read_obj_str.open.assert_called_once() + + assert mrd.client == mock_grpc_client + assert mrd.bucket_name == _TEST_BUCKET_NAME + assert mrd.object_name == _TEST_OBJECT_NAME + assert mrd.generation_number == _TEST_GENERATION_NUMBER + assert mrd.read_handle == _TEST_READ_HANDLE + + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + ) + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + ) + @pytest.mark.asyncio + async def test_download_ranges( + self, mock_grpc_client, mock_cls_async_read_object_stream + ): + # Arrange + mock_mrd = await self._make_mock_mrd( + mock_grpc_client, mock_cls_async_read_object_stream + ) + mock_mrd.read_obj_str.send = AsyncMock() + mock_mrd.read_obj_str.recv = AsyncMock() + mock_mrd.read_obj_str.recv.return_value = _storage_v2.BidiReadObjectResponse( + object_data_ranges=[ + _storage_v2.ObjectRangeData( + checksummed_data=_storage_v2.ChecksummedData( + content=b"these_are_18_chars", crc32c=123 + ), + range_end=True, + read_range=_storage_v2.ReadRange( + read_offset=0, read_length=18, read_id=0 + ), + ) + ], + ) + + # Act + buffer = BytesIO() + results, error_obj = await mock_mrd.download_ranges([(0, 18, buffer)]) + + # Assert + mock_mrd.read_obj_str.send.assert_called_once_with( + _storage_v2.BidiReadObjectRequest( + read_ranges=[ + _storage_v2.ReadRange(read_offset=0, read_length=18, read_id=0) + ] + ) + ) + assert len(results) == 1 + assert results[0].bytes_requested == 18 + assert results[0].bytes_written == 18 + assert buffer.getvalue() == b"these_are_18_chars" + assert error_obj is None + + +""" +test cases + + +* returns a awaitable which when awaited, fullfills into List[Result] object. + +1. if read_ranges > 1000 -> Raise exception + 1. A ? why Exception ? why not ValueError? + 1.B read_ranges == 0 => Raise exception. + +2. read_ranges 0, 1, 3, 45, 101, 500, 1000, 1001 + +3. None response received, raise error ? + return the result object. + +""" @mock.patch( @@ -74,6 +161,12 @@ async def test_download_ranges(mock_async_grpc_client): mrd = AsyncMultiRangeDownloader( mock_async_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME ) - with pytest.raises(NotImplementedError): await mrd.download_ranges([(0, 100, BytesIO())]) + + +""" +1. possible issues + you're keep on sending w/o receiving ? + +""" From 130046f886fa8cce62d7f524627b534dbba112d7 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 24 Sep 2025 05:47:29 +0000 Subject: [PATCH 35/44] doc string for further testcase --- .../test_async_multi_range_downloader.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index b814cdfbe..8e64788d2 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -132,3 +132,101 @@ async def test_download_ranges( assert results[0].bytes_written == 18 assert buffer.getvalue() == b"these_are_18_chars" assert error_obj is None + + +""" +test cases + + +* returns a awaitable which when awaited, fullfills into List[Result] object. + +1. if read_ranges > 1000 -> Raise exception + 1. A ? why Exception ? why not ValueError? + 1.B read_ranges == 0 => Raise exception. + +2. read_ranges length - 0, 1, 3, 45, 101, 500, 1000, 1001 + ranges downloaded should be + * 1KiB + 10Kib + 1 MiB + 10 Mib + 100 MiB + 1 Gib + 10Gib + +3. None response received, raise error ? + return the result object. + +4. memory/ cpu used by the application +""" + +""" +1. possible issues + you're keep on sending w/o receiving ? + + +success + +1. create mRD + download ranges , 1 -range, .. n ranges + + success, not error obj + assert on data ? + less than 10 bytes. + + larger bytes should always be on system-tests.s + + + +failure + 1. if > 1000 ranges , fail + create error obj, code details + + 2. type.googleapis.com/google.storage.v2.BidiReadObjectError => raise error + assert on error_obj , code , details + +3. how to simulate connection interrupt ? + after n response , (n+1)th response should raise error. + assert on error_obj , code , details, [results should be partial] + + * actual test ? + + * storage test-bench ? tests conformation tests ? + + + + + +""" + + +""" +test cases + + + + +* returns a awaitable which when awaited, fullfills into List[Result] object. + + +1. if read_ranges > 1000 -> Raise exception + 1. A ? why Exception ? why not ValueError? + 1.B read_ranges == 0 => Raise exception. + + +2. read_ranges 0, 1, 3, 45, 101, 500, 1000, 1001 + + +3. None response received, raise error ? + return the result object. + + +""" + + +""" +1. possible issues + you're keep on sending w/o receiving ? + + +""" From 0c61e873c81e9d823fdd14a80b6af2d85a31ef2f Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 24 Sep 2025 05:48:56 +0000 Subject: [PATCH 36/44] remove unwanted comments --- .../test_async_multi_range_downloader.py | 98 ------------------- 1 file changed, 98 deletions(-) diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 8e64788d2..b814cdfbe 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -132,101 +132,3 @@ async def test_download_ranges( assert results[0].bytes_written == 18 assert buffer.getvalue() == b"these_are_18_chars" assert error_obj is None - - -""" -test cases - - -* returns a awaitable which when awaited, fullfills into List[Result] object. - -1. if read_ranges > 1000 -> Raise exception - 1. A ? why Exception ? why not ValueError? - 1.B read_ranges == 0 => Raise exception. - -2. read_ranges length - 0, 1, 3, 45, 101, 500, 1000, 1001 - ranges downloaded should be - * 1KiB - 10Kib - 1 MiB - 10 Mib - 100 MiB - 1 Gib - 10Gib - -3. None response received, raise error ? - return the result object. - -4. memory/ cpu used by the application -""" - -""" -1. possible issues - you're keep on sending w/o receiving ? - - -success - -1. create mRD - download ranges , 1 -range, .. n ranges - - success, not error obj - assert on data ? - less than 10 bytes. - - larger bytes should always be on system-tests.s - - - -failure - 1. if > 1000 ranges , fail - create error obj, code details - - 2. type.googleapis.com/google.storage.v2.BidiReadObjectError => raise error - assert on error_obj , code , details - -3. how to simulate connection interrupt ? - after n response , (n+1)th response should raise error. - assert on error_obj , code , details, [results should be partial] - - * actual test ? - - * storage test-bench ? tests conformation tests ? - - - - - -""" - - -""" -test cases - - - - -* returns a awaitable which when awaited, fullfills into List[Result] object. - - -1. if read_ranges > 1000 -> Raise exception - 1. A ? why Exception ? why not ValueError? - 1.B read_ranges == 0 => Raise exception. - - -2. read_ranges 0, 1, 3, 45, 101, 500, 1000, 1001 - - -3. None response received, raise error ? - return the result object. - - -""" - - -""" -1. possible issues - you're keep on sending w/o receiving ? - - -""" From 027758c1363ebff86749cd281a5e8c3fc505d62b Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 24 Sep 2025 07:06:25 +0000 Subject: [PATCH 37/44] remove testing code --- .../asyncio/async_multi_range_downloader.py | 31 ------------------- .../asyncio/async_read_object_stream.py | 3 +- 2 files changed, 1 insertion(+), 33 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index d114de778..0691be5be 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -25,7 +25,6 @@ from io import BytesIO from google.cloud import _storage_v2 -import asyncio _MAX_READ_RANGES_PER_BIDI_READ_REQUEST = 100 @@ -243,33 +242,3 @@ async def download_ranges( exception = exc break return results, exception - - -async def test_mrd(): - client = AsyncGrpcClient()._grpc_client - mrd = await AsyncMultiRangeDownloader.create_mrd( - client, bucket_name="chandrasiri-rs", object_name="test_open10" - ) - my_buff1 = open("my_fav_file.txt", "wb") - my_buff2 = BytesIO() - my_buff3 = BytesIO() - my_buff4 = BytesIO() - results_arr, error_obj = await mrd.download_ranges( - [ - (0, 100, my_buff1), - (100, 20, my_buff2), - (200, 123, my_buff3), - (300, 789, my_buff4), - ] - ) - if error_obj: - print("*" * 80) - print(error_obj) - print("*" * 80) - - for result in results_arr: - print("downloaded bytes", result) - - -if __name__ == "__main__": - asyncio.run(test_mrd()) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index c2cce1b50..316a6750a 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -132,5 +132,4 @@ async def recv(self) -> _storage_v2.BidiReadObjectResponse: :class:`~google.cloud._storage_v2.types.BidiReadObjectResponse`: The response message from the server. """ - val = await self.socket_like_rpc.recv() - return val + return await self.socket_like_rpc.recv() From 570983f2086da3a7ed1710348c9b931f0661b4c2 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 24 Sep 2025 07:08:37 +0000 Subject: [PATCH 38/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/asyncio/test_async_multi_range_downloader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index b814cdfbe..843d432b1 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -30,7 +30,6 @@ class TestAsyncMultiRangeDownloader: - # helper method @pytest.mark.asyncio async def _make_mock_mrd( From 99ffe2d367a8a61a5dcc4b0880de7ed795599a35 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Wed, 24 Sep 2025 07:16:08 +0000 Subject: [PATCH 39/44] fix doc strings --- .../asyncio/async_multi_range_downloader.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 0691be5be..c003d3d66 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -31,8 +31,14 @@ class Result: + """An instance of this class will be populated and retured for each + `read_range` provided to ``download_ranges`` method. + + """ + def __init__(self, bytes_requested: int): # only while instantiation, should not be edited later. + # hence there's no setter, only getter is provided. self._bytes_requested: int = bytes_requested self._bytes_written: int = 0 @@ -64,21 +70,29 @@ class AsyncMultiRangeDownloader: mrd = await AsyncMultiRangeDownloader.create_mrd( client, bucket_name="chandrasiri-rs", object_name="test_open9" ) - my_buff1 = BytesIO() + my_buff1 = open('my_fav_file.txt', 'wb') my_buff2 = BytesIO() my_buff3 = BytesIO() - my_buff4 = BytesIO() - buffers = [my_buff1, my_buff2, my_buff3, my_buff4] - await mrd.download_ranges( + my_buff4 = any_object_which_provides_BytesIO_like_interface() + results_arr, error_obj = await mrd.download_ranges( [ (0, 100, my_buff1), - (100, 200, my_buff2), - (200, 300, my_buff3), - (300, 400, my_buff4), + (100, 20, my_buff2), + (200, 123, my_buff3), + (300, 789, my_buff4), ] ) - for buff in buffers: - print("downloaded bytes", buff.getbuffer().nbytes) + if error_obj: + print("Error occurred: ") + print(error_obj) + print( + "please issue call to `download_ranges` with updated" + "`read_ranges` based on diff of (bytes_requested - bytes_written)" + ) + + for result in results_arr: + print("downloaded bytes", result) + """ From fdaf1ae376705a5f108ec1fae12fec4c44bb3ff4 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Thu, 25 Sep 2025 11:51:46 +0000 Subject: [PATCH 40/44] update doc string to describe `read_ranges` format --- .../_experimental/asyncio/async_multi_range_downloader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index c003d3d66..5a8b2cee0 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -76,6 +76,7 @@ class AsyncMultiRangeDownloader: my_buff4 = any_object_which_provides_BytesIO_like_interface() results_arr, error_obj = await mrd.download_ranges( [ + # (start_byte, bytes_to_read, writeable_buffer) (0, 100, my_buff1), (100, 20, my_buff2), (200, 123, my_buff3), @@ -196,7 +197,7 @@ async def download_ranges( :type read_ranges: List[Tuple[int, int, "BytesIO"]] :param read_ranges: A list of tuples, where each tuple represents a - byte range (start_byte, end_byte) and a writable buffer. Buffer has + byte range (start_byte, bytes_to_read, writeable_buffer). Buffer has to be provided by the user, and user has to make sure appropriate memory is available in the application to avoid out-of-memory crash. From 112cb682f95b95fe3a8bb1104b6b58fe4ccead56 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Fri, 26 Sep 2025 11:26:41 +0000 Subject: [PATCH 41/44] add test case for ranges > 1000 --- .../asyncio/async_multi_range_downloader.py | 2 +- .../test_async_multi_range_downloader.py | 30 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 5a8b2cee0..762f3e617 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -203,7 +203,7 @@ async def download_ranges( """ if len(read_ranges) > 1000: - raise Exception( + raise ValueError( "Invalid input - length of read_ranges cannot be more than 1000" ) diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 843d432b1..ecea39aaa 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -131,3 +131,33 @@ async def test_download_ranges( assert results[0].bytes_written == 18 assert buffer.getvalue() == b"these_are_18_chars" assert error_obj is None + + def create_read_ranges(self, num_ranges): + ranges = [] + for i in range(num_ranges): + ranges.append( + _storage_v2.ReadRange(read_offset=i, read_length=1, read_id=i) + ) + return ranges + + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + ) + @pytest.mark.asyncio + async def test_downloading_ranges_with_more_than_1000_should_throw_error( + self, mock_grpc_client + ): + # Arrange + mrd = AsyncMultiRangeDownloader( + mock_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME + ) + + # Act + Assert + with pytest.raises(ValueError) as exc: + await mrd.download_ranges(self.create_read_ranges(1001)) + + # Assert + assert ( + str(exc.value) + == "Invalid input - length of read_ranges cannot be more than 1000" + ) From b56efd9b2db46c2ec43e833d9a64f76ee8535202 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 29 Sep 2025 07:27:57 +0000 Subject: [PATCH 42/44] don't return exception object , raise instead --- .../asyncio/async_multi_range_downloader.py | 45 +++++++++---------- .../test_async_multi_range_downloader.py | 3 +- 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 762f3e617..5f0a8b5b2 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -208,7 +208,6 @@ async def download_ranges( ) read_id_to_writable_buffer_dict = {} - exception = None results = [] for i in range(0, len(read_ranges), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST): read_ranges_segment = read_ranges[ @@ -233,27 +232,23 @@ async def download_ranges( ) while len(read_id_to_writable_buffer_dict) > 0: - try: - response = await self.read_obj_str.recv() - - if response is None: - raise Exception("None response received, something went wrong.") - - for object_data_range in response.object_data_ranges: - if object_data_range.read_range is None: - raise Exception("Invalid response, read_range is None") - - data = object_data_range.checksummed_data.content - read_id = object_data_range.read_range.read_id - buffer = read_id_to_writable_buffer_dict[read_id] - buffer.write(data) - results[read_id].bytes_written += len(data) - - if object_data_range.range_end: - del read_id_to_writable_buffer_dict[ - object_data_range.read_range.read_id - ] - except Exception as exc: - exception = exc - break - return results, exception + response = await self.read_obj_str.recv() + + if response is None: + raise Exception("None response received, something went wrong.") + + for object_data_range in response.object_data_ranges: + if object_data_range.read_range is None: + raise Exception("Invalid response, read_range is None") + + data = object_data_range.checksummed_data.content + read_id = object_data_range.read_range.read_id + buffer = read_id_to_writable_buffer_dict[read_id] + buffer.write(data) + results[read_id].bytes_written += len(data) + + if object_data_range.range_end: + del read_id_to_writable_buffer_dict[ + object_data_range.read_range.read_id + ] + return results diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index ecea39aaa..b57bc92ca 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -116,7 +116,7 @@ async def test_download_ranges( # Act buffer = BytesIO() - results, error_obj = await mock_mrd.download_ranges([(0, 18, buffer)]) + results = await mock_mrd.download_ranges([(0, 18, buffer)]) # Assert mock_mrd.read_obj_str.send.assert_called_once_with( @@ -130,7 +130,6 @@ async def test_download_ranges( assert results[0].bytes_requested == 18 assert results[0].bytes_written == 18 assert buffer.getvalue() == b"these_are_18_chars" - assert error_obj is None def create_read_ranges(self, num_ranges): ranges = [] From 890eac19015b7e9fdfd7809c33f787864d38a742 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 29 Sep 2025 07:28:30 +0000 Subject: [PATCH 43/44] don't return exception object , raise instead --- .../_experimental/asyncio/async_multi_range_downloader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 5f0a8b5b2..e76eb75a9 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -191,7 +191,7 @@ async def open(self) -> None: async def download_ranges( self, read_ranges: List[Tuple[int, int, BytesIO]] - ) -> Tuple[List[Result], Exception]: + ) -> List[Result]: """Downloads multiple byte ranges from the object into the buffers provided by user. @@ -201,6 +201,9 @@ async def download_ranges( to be provided by the user, and user has to make sure appropriate memory is available in the application to avoid out-of-memory crash. + :rtype: :class:`~google.cloud.storage._experimental.asyncio.async_multi_range_downloader.Result` + :returns: An initialized AsyncMultiRangeDownloader instance for reading. + """ if len(read_ranges) > 1000: raise ValueError( From 84b63f18fd16a431e233af979b8c6577597f2a95 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 29 Sep 2025 07:30:29 +0000 Subject: [PATCH 44/44] correct doc string rtype --- .../_experimental/asyncio/async_multi_range_downloader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index e76eb75a9..a458a5e43 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -201,8 +201,9 @@ async def download_ranges( to be provided by the user, and user has to make sure appropriate memory is available in the application to avoid out-of-memory crash. - :rtype: :class:`~google.cloud.storage._experimental.asyncio.async_multi_range_downloader.Result` - :returns: An initialized AsyncMultiRangeDownloader instance for reading. + :rtype: List[:class:`~google.cloud.storage._experimental.asyncio.async_multi_range_downloader.Result`] + :returns: A list of ``Result`` objects, where each object corresponds + to a requested range. """ if len(read_ranges) > 1000: