From 579f1be7cd86928aa7d5dc55b1200720cd8ac98a Mon Sep 17 00:00:00 2001 From: robpats Date: Sat, 30 Aug 2025 10:10:11 +0000 Subject: [PATCH 1/9] Copy the download progress of compressed data to StreamReader.total_compressed_bytes --- aiohttp/http_parser.py | 2 ++ aiohttp/streams.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 84b59afc486..6dc31fa5b63 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -936,6 +936,7 @@ class DeflateBuffer: def __init__(self, out: StreamReader, encoding: Optional[str]) -> None: self.out = out self.size = 0 + self.out.total_compressed_bytes = self.size self.encoding = encoding self._started_decoding = False @@ -969,6 +970,7 @@ def feed_data(self, chunk: bytes) -> None: return self.size += len(chunk) + self.out.total_compressed_bytes = self.size # RFC1950 # bits 0..3 = CM = 0b1000 = 8 = "deflate" diff --git a/aiohttp/streams.py b/aiohttp/streams.py index db22f162396..3d706eee079 100644 --- a/aiohttp/streams.py +++ b/aiohttp/streams.py @@ -132,6 +132,7 @@ class StreamReader(AsyncStreamReaderMixin): "_eof_callbacks", "_eof_counter", "total_bytes", + "total_compressed_bytes", ) def __init__( @@ -159,6 +160,7 @@ def __init__( self._eof_callbacks: List[Callable[[], None]] = [] self._eof_counter = 0 self.total_bytes = 0 + self.total_compressed_bytes = None def __repr__(self) -> str: info = [self.__class__.__name__] From 5d547ab2ea423c1781bcd5185fdbe1e20e27eb3d Mon Sep 17 00:00:00 2001 From: robpats Date: Sun, 31 Aug 2025 12:15:15 +0000 Subject: [PATCH 2/9] Add StreamReader.total_raw_bytes to report the download progress --- aiohttp/streams.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/aiohttp/streams.py b/aiohttp/streams.py index 3d706eee079..20d7d65963a 100644 --- a/aiohttp/streams.py +++ b/aiohttp/streams.py @@ -252,6 +252,12 @@ async def wait_eof(self) -> None: finally: self._eof_waiter = None + @property + def total_raw_bytes(self) -> int: + if self.total_compressed_bytes is None: + return self.total_bytes + return self.total_compressed_bytes + def unread_data(self, data: bytes) -> None: """rollback reading some data from stream, inserting it to buffer head.""" warnings.warn( From efc469d0ed7cc4596720267a9240f9d3feab1bc4 Mon Sep 17 00:00:00 2001 From: robpats Date: Sat, 6 Sep 2025 12:05:11 +0000 Subject: [PATCH 3/9] Add test for StreamReader.total_raw_bytes --- tests/test_client_functional.py | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index e671aef180a..c25ade69973 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -5586,3 +5586,39 @@ async def handler(request: web.Request) -> web.Response: finally: await asyncio.to_thread(f.close) + + +async def test_stream_reader_total_raw_bytes(aiohttp_client: AiohttpClient) -> None: + """ Test whether StreamReader.total_raw_bytes returns the number of bytes downloaded """ + source_data = b"@dKal^pH>1h|YW1:c2J$" * 4096 + + async def handler(request: web.Request) -> web.Response: + response = web.Response(body=source_data) + response.enable_compression() + return response + + app = web.Application() + app.router.add_get("/", handler) + + client = await aiohttp_client(app) + + # Check for decompressed data + async with client.get("/", headers={"Accept-Encoding": "gzip"}, auto_decompress=True) as resp: + assert resp.headers["Content-Encoding"] == "gzip" + data = await resp.content.read() + assert len(data) == len(source_data) + assert resp.content.total_raw_bytes == int(resp.headers["Content-Length"]) + + # Check for compressed data + async with client.get("/", headers={"Accept-Encoding": "gzip"}, auto_decompress=False) as resp: + assert resp.headers["Content-Encoding"] == "gzip" + data = await resp.content.read() + assert resp.content.total_raw_bytes == len(data) + assert resp.content.total_raw_bytes == int(resp.headers["Content-Length"]) + + # Check for non-compressed data + async with client.get("/", headers={"Accept-Encoding": "identity"}, auto_decompress=True) as resp: + assert "Content-Encoding" not in resp.headers + data = await resp.content.read() + assert resp.content.total_raw_bytes == len(data) + assert resp.content.total_raw_bytes == int(resp.headers["Content-Length"]) From 20135e8f37bab51653e21fe3e8dce2b0de203e28 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 7 Sep 2025 05:32:51 +0000 Subject: [PATCH 4/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_client_functional.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index c25ade69973..83e4266875d 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -5589,7 +5589,7 @@ async def handler(request: web.Request) -> web.Response: async def test_stream_reader_total_raw_bytes(aiohttp_client: AiohttpClient) -> None: - """ Test whether StreamReader.total_raw_bytes returns the number of bytes downloaded """ + """Test whether StreamReader.total_raw_bytes returns the number of bytes downloaded""" source_data = b"@dKal^pH>1h|YW1:c2J$" * 4096 async def handler(request: web.Request) -> web.Response: @@ -5603,21 +5603,27 @@ async def handler(request: web.Request) -> web.Response: client = await aiohttp_client(app) # Check for decompressed data - async with client.get("/", headers={"Accept-Encoding": "gzip"}, auto_decompress=True) as resp: + async with client.get( + "/", headers={"Accept-Encoding": "gzip"}, auto_decompress=True + ) as resp: assert resp.headers["Content-Encoding"] == "gzip" data = await resp.content.read() assert len(data) == len(source_data) assert resp.content.total_raw_bytes == int(resp.headers["Content-Length"]) # Check for compressed data - async with client.get("/", headers={"Accept-Encoding": "gzip"}, auto_decompress=False) as resp: + async with client.get( + "/", headers={"Accept-Encoding": "gzip"}, auto_decompress=False + ) as resp: assert resp.headers["Content-Encoding"] == "gzip" data = await resp.content.read() assert resp.content.total_raw_bytes == len(data) assert resp.content.total_raw_bytes == int(resp.headers["Content-Length"]) # Check for non-compressed data - async with client.get("/", headers={"Accept-Encoding": "identity"}, auto_decompress=True) as resp: + async with client.get( + "/", headers={"Accept-Encoding": "identity"}, auto_decompress=True + ) as resp: assert "Content-Encoding" not in resp.headers data = await resp.content.read() assert resp.content.total_raw_bytes == len(data) From b8fa5349d7b5b888cc571e9a1cdcb0d1ac940046 Mon Sep 17 00:00:00 2001 From: robpats Date: Sun, 7 Sep 2025 06:02:05 +0000 Subject: [PATCH 5/9] Add documentation for StreamReader.total_raw_bytes --- CHANGES/11483.feature.rst | 2 ++ CONTRIBUTORS.txt | 1 + docs/streams.rst | 9 ++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 CHANGES/11483.feature.rst diff --git a/CHANGES/11483.feature.rst b/CHANGES/11483.feature.rst new file mode 100644 index 00000000000..a8ef8b62c44 --- /dev/null +++ b/CHANGES/11483.feature.rst @@ -0,0 +1,2 @@ +Added ``StreamReader.total_raw_bytes`` to check the number of bytes downloaded +-- by :user:`robpats`. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 935782fe357..f06a94593c4 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -286,6 +286,7 @@ Pahaz Blinov Panagiotis Kolokotronis Pankaj Pandey Parag Jain +Patrick Lee Pau Freixes Paul Colomiets Paul J. Dorn diff --git a/docs/streams.rst b/docs/streams.rst index 6b65b59475b..2729c9d4d96 100644 --- a/docs/streams.rst +++ b/docs/streams.rst @@ -20,7 +20,7 @@ Streaming API :attr:`aiohttp.ClientResponse.content` properties for accessing raw BODY data. -Reading Methods +Reading Attributes and Methods --------------- .. method:: StreamReader.read(n=-1) @@ -109,6 +109,13 @@ Reading Methods to the end of a HTTP chunk. +.. attribute:: StreamReader.total_raw_bytes + + The number of bytes of raw data downloaded. + + Readonly :class:`int` property. + + Asynchronous Iteration Support ------------------------------ From 3b595e4d833923006d38c726d75b24cf76747079 Mon Sep 17 00:00:00 2001 From: robpats <127686110+robpats@users.noreply.github.com> Date: Fri, 19 Sep 2025 19:32:31 +0000 Subject: [PATCH 6/9] Update docs/streams.rst Co-authored-by: Sam Bull --- docs/streams.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/streams.rst b/docs/streams.rst index 2729c9d4d96..9ad79f54b47 100644 --- a/docs/streams.rst +++ b/docs/streams.rst @@ -111,7 +111,7 @@ Reading Attributes and Methods .. attribute:: StreamReader.total_raw_bytes - The number of bytes of raw data downloaded. + The number of bytes of raw data downloaded (before decompression). Readonly :class:`int` property. From 14172ebbcdc81ed085a44feb7edd955df7d55b36 Mon Sep 17 00:00:00 2001 From: robpats <127686110+robpats@users.noreply.github.com> Date: Fri, 19 Sep 2025 19:33:05 +0000 Subject: [PATCH 7/9] Update tests/test_client_functional.py --- tests/test_client_functional.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index 83e4266875d..3433226db49 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -5607,6 +5607,7 @@ async def handler(request: web.Request) -> web.Response: "/", headers={"Accept-Encoding": "gzip"}, auto_decompress=True ) as resp: assert resp.headers["Content-Encoding"] == "gzip" + assert int(resp.headers["Content-Length"]) < len(source_data) data = await resp.content.read() assert len(data) == len(source_data) assert resp.content.total_raw_bytes == int(resp.headers["Content-Length"]) From 614db452c3e3c3dc30b0779a6a145f370ca35def Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sun, 21 Sep 2025 16:52:26 +0100 Subject: [PATCH 8/9] Update aiohttp/streams.py --- aiohttp/streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/streams.py b/aiohttp/streams.py index 20d7d65963a..1b675a1b73d 100644 --- a/aiohttp/streams.py +++ b/aiohttp/streams.py @@ -160,7 +160,7 @@ def __init__( self._eof_callbacks: List[Callable[[], None]] = [] self._eof_counter = 0 self.total_bytes = 0 - self.total_compressed_bytes = None + self.total_compressed_bytes: Optional[int] = None def __repr__(self) -> str: info = [self.__class__.__name__] From 77107787f74c6dc60cedb63ab89d0e62002b547a Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Sun, 21 Sep 2025 16:56:00 +0100 Subject: [PATCH 9/9] Update docs/streams.rst --- docs/streams.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/streams.rst b/docs/streams.rst index 9ad79f54b47..8cb573d8edf 100644 --- a/docs/streams.rst +++ b/docs/streams.rst @@ -21,7 +21,7 @@ Streaming API BODY data. Reading Attributes and Methods ---------------- +------------------------------ .. method:: StreamReader.read(n=-1) :async: