Skip to content

Commit 34ec67b

Browse files
authored
Add trust_env (#97)
1 parent b84b70c commit 34ec67b

9 files changed

Lines changed: 145 additions & 33 deletions

File tree

CHANGES.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
Changes
22
=======
33

4+
0.9.0 (unreleased)
5+
------------------
6+
7+
* Added an opt-in ``trust_env`` parameter to :class:`~zyte_api.AsyncZyteAPI`
8+
and :class:`~zyte_api.ZyteAPI`, and an opt-in ``--trust-env`` CLI flag, to
9+
allow honoring environment-based network settings (e.g. ``HTTP_PROXY`` and
10+
``HTTPS_PROXY``).
11+
412
0.8.2 (2026-02-10)
513
------------------
614

tests/test_async.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22

33
import asyncio
44
from typing import TYPE_CHECKING, Any
5-
from unittest.mock import AsyncMock
5+
from unittest.mock import AsyncMock, patch
66

77
import pytest
88

99
from zyte_api import AggressiveRetryFactory, AsyncZyteAPI, RequestError
10+
from zyte_api._utils import create_session
1011
from zyte_api.aio.client import AsyncClient
1112
from zyte_api.apikey import NoApiKey
1213
from zyte_api.errors import ParsedError
@@ -54,6 +55,23 @@ def test_api_key(client_cls):
5455
client_cls()
5556

5657

58+
@pytest.mark.asyncio
59+
async def test_session_inherits_client_trust_env(mockserver):
60+
client = AsyncZyteAPI(api_key="a", api_url=mockserver.urljoin("/"), trust_env=True)
61+
async with client.session() as session:
62+
assert session._session._trust_env is True
63+
64+
65+
@pytest.mark.asyncio
66+
async def test_get_creates_session_with_client_trust_env(mockserver):
67+
client = AsyncZyteAPI(api_key="a", api_url=mockserver.urljoin("/"), trust_env=True)
68+
with patch(
69+
"zyte_api._async.create_session", wraps=create_session
70+
) as create_session_mock:
71+
await client.get({"url": "https://a.example"})
72+
assert create_session_mock.call_args.kwargs["trust_env"] is True
73+
74+
5775
@pytest.mark.parametrize(
5876
("client_cls", "get_method"),
5977
(

tests/test_auth.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414

1515

1616
def run_zyte_api(args, env, mockserver):
17+
base_env = {
18+
key: value
19+
for key, value in environ.items()
20+
if key not in {"ZYTE_API_KEY", "ZYTE_API_ETH_KEY"}
21+
}
1722
with NamedTemporaryFile("w") as url_list:
1823
url_list.write("https://a.example\n")
1924
url_list.flush()
@@ -29,7 +34,7 @@ def run_zyte_api(args, env, mockserver):
2934
],
3035
capture_output=True,
3136
check=False,
32-
env={**environ, **env},
37+
env={**base_env, **env},
3338
)
3439

3540

tests/test_main.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import pytest
1212

1313
from zyte_api import RequestError
14-
from zyte_api.__main__ import run
14+
from zyte_api.__main__ import _get_argument_parser, run
1515

1616
if TYPE_CHECKING:
1717
from collections.abc import Iterable
@@ -108,6 +108,7 @@ async def test_run(queries, expected_response, store_errors, exception):
108108
api_url = "https://example.com"
109109
api_key = "fake_key"
110110
retry_errors = True
111+
trust_env = True
111112

112113
# Create a mock for AsyncZyteAPI
113114
async_client_mock = Mock()
@@ -138,8 +139,15 @@ async def test_run(queries, expected_response, store_errors, exception):
138139
api_key=api_key,
139140
retry_errors=retry_errors,
140141
store_errors=store_errors,
142+
trust_env=trust_env,
141143
)
142144

145+
assert async_client_mock.call_args.kwargs["trust_env"] is True
146+
create_session_mock.assert_called_once_with(
147+
connection_pool_size=n_conn,
148+
trust_env=True,
149+
)
150+
143151
assert get_json_content(temporary_file) == expected_response
144152
tmp_path.unlink()
145153

@@ -218,6 +226,12 @@ def test_empty_input(mockserver):
218226
assert result.stderr == b"No input queries found. Is the input file empty?\n"
219227

220228

229+
def test_trust_env_flag_parsing() -> None:
230+
parser = _get_argument_parser()
231+
args = parser.parse_args(["--trust-env", "--api-key", "a", "README.rst"])
232+
assert args.trust_env is True
233+
234+
221235
def test_intype_txt_implicit(mockserver):
222236
result = _run(input_="https://a.example", mockserver=mockserver)
223237
assert not result.returncode

tests/test_sync.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from types import GeneratorType
44
from typing import TYPE_CHECKING, Any
5-
from unittest.mock import AsyncMock
5+
from unittest.mock import AsyncMock, patch
66

77
import pytest
88

@@ -19,6 +19,12 @@ def test_api_key():
1919
ZyteAPI()
2020

2121

22+
def test_trust_env_is_forwarded():
23+
with patch("zyte_api._sync.AsyncZyteAPI") as async_client:
24+
ZyteAPI(api_key="a", trust_env=True)
25+
assert async_client.call_args.kwargs["trust_env"] is True
26+
27+
2228
def test_get(mockserver):
2329
client = ZyteAPI(api_key="a", api_url=mockserver.urljoin("/"))
2430
expected_result = {

tests/test_utils.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@ async def test_create_session_custom_connector():
1212
custom_connector = TCPConnector(limit=1850)
1313
session = create_session(connector=custom_connector)
1414
assert session.connector == custom_connector
15+
await session.close()
16+
17+
18+
@pytest.mark.asyncio
19+
async def test_create_session_trust_env_disabled_by_default():
20+
session = create_session()
21+
assert session._trust_env is False
22+
await session.close()
23+
24+
25+
@pytest.mark.asyncio
26+
async def test_create_session_trust_env_can_be_enabled():
27+
session = create_session(trust_env=True)
28+
assert session._trust_env is True
29+
await session.close()
1530

1631

1732
@pytest.mark.parametrize(
@@ -121,4 +136,5 @@ async def test_deprecated_create_session():
121136
DeprecationWarning,
122137
match=r"^zyte_api\.aio\.client\.create_session is deprecated",
123138
):
124-
_create_session()
139+
session = _create_session()
140+
await session.close()

zyte_api/__main__.py

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import logging
99
import random
1010
import sys
11+
from contextlib import nullcontext
12+
from pathlib import Path
1113
from typing import IO, Any, Literal
1214
from warnings import warn
1315

@@ -42,6 +44,7 @@ async def run(
4244
retry_errors: bool = True,
4345
store_errors: bool | None = None,
4446
eth_key: str | None = None,
47+
trust_env: bool = False,
4548
) -> None:
4649
if stop_on_errors is not _UNSET:
4750
warn(
@@ -65,9 +68,15 @@ def write_output(content: Any) -> None:
6568
elif eth_key:
6669
auth_kwargs["eth_key"] = eth_key
6770
client = AsyncZyteAPI(
68-
n_conn=n_conn, api_url=api_url, retrying=retrying, **auth_kwargs
71+
n_conn=n_conn,
72+
api_url=api_url,
73+
retrying=retrying,
74+
trust_env=trust_env,
75+
**auth_kwargs,
6976
)
70-
async with create_session(connection_pool_size=n_conn) as session:
77+
async with create_session(
78+
connection_pool_size=n_conn, trust_env=trust_env
79+
) as session:
7180
result_iter = client.iter(
7281
queries=queries,
7382
session=session,
@@ -128,7 +137,6 @@ def _get_argument_parser(program_name: str = "zyte-api") -> argparse.ArgumentPar
128137
)
129138
p.add_argument(
130139
"INPUT",
131-
type=argparse.FileType("r", encoding="utf8"),
132140
help=(
133141
"Path to an input file (see 'Command-line client > Input file' in "
134142
"the docs for details)."
@@ -151,8 +159,7 @@ def _get_argument_parser(program_name: str = "zyte-api") -> argparse.ArgumentPar
151159
p.add_argument(
152160
"--output",
153161
"-o",
154-
default=sys.stdout,
155-
type=argparse.FileType("w", encoding="utf8"),
162+
default=None,
156163
help=(
157164
"Path for the output file. Results are written into the output "
158165
"file in JSON Lines format.\n"
@@ -225,6 +232,14 @@ def _get_argument_parser(program_name: str = "zyte-api") -> argparse.ArgumentPar
225232
),
226233
action="store_true",
227234
)
235+
p.add_argument(
236+
"--trust-env",
237+
help=(
238+
"Enable environment-based network settings such as HTTP_PROXY and "
239+
"HTTPS_PROXY for Zyte API requests."
240+
),
241+
action="store_true",
242+
)
228243
return p
229244

230245

@@ -234,7 +249,15 @@ def _main(program_name: str = "zyte-api") -> None:
234249
args = p.parse_args()
235250
logging.basicConfig(stream=sys.stderr, level=getattr(logging, args.loglevel))
236251

237-
queries = read_input(args.INPUT, args.intype)
252+
if args.INPUT == "-":
253+
with nullcontext(sys.stdin) as input_fp:
254+
queries = read_input(input_fp, args.intype)
255+
else:
256+
try:
257+
with Path(args.INPUT).open(encoding="utf8") as input_fp:
258+
queries = read_input(input_fp, args.intype)
259+
except OSError as e:
260+
p.error(f"Cannot open input file {args.INPUT!r}: {e}")
238261
if not queries:
239262
print("No input queries found. Is the input file empty?", file=sys.stderr)
240263
sys.exit(-1)
@@ -245,23 +268,28 @@ def _main(program_name: str = "zyte-api") -> None:
245268
queries = queries[: args.limit]
246269

247270
logger.info(
248-
f"Loaded {len(queries)} urls from {args.INPUT.name}; shuffled: {args.shuffle}"
271+
f"Loaded {len(queries)} urls from {args.INPUT}; shuffled: {args.shuffle}"
249272
)
250273
logger.info(f"Running Zyte API (connections: {args.n_conn})")
251274

252-
loop = asyncio.get_event_loop()
253-
coro = run(
254-
queries,
255-
out=args.output,
256-
n_conn=args.n_conn,
257-
api_url=args.api_url,
258-
api_key=args.api_key,
259-
eth_key=args.eth_key,
260-
retry_errors=not args.dont_retry_errors,
261-
store_errors=args.store_errors,
262-
)
263-
loop.run_until_complete(coro)
264-
loop.close()
275+
run_kwargs = {
276+
"n_conn": args.n_conn,
277+
"api_url": args.api_url,
278+
"api_key": args.api_key,
279+
"eth_key": args.eth_key,
280+
"retry_errors": not args.dont_retry_errors,
281+
"store_errors": args.store_errors,
282+
"trust_env": args.trust_env,
283+
}
284+
if args.output is None or args.output == "-":
285+
with nullcontext(sys.stdout) as out:
286+
asyncio.run(run(queries, out=out, **run_kwargs))
287+
else:
288+
try:
289+
with Path(args.output).open("w", encoding="utf8") as out:
290+
asyncio.run(run(queries, out=out, **run_kwargs))
291+
except OSError as e:
292+
p.error(f"Cannot open output file {args.output!r}: {e}")
265293

266294

267295
if __name__ == "__main__":

zyte_api/_async.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def _post_func(
4545
class _AsyncSession:
4646
def __init__(self, client: AsyncZyteAPI, **session_kwargs: Any):
4747
self._client: AsyncZyteAPI = client
48+
session_kwargs.setdefault("trust_env", client.trust_env)
4849
self._session: aiohttp.ClientSession = create_session(
4950
client.n_conn, **session_kwargs
5051
)
@@ -123,6 +124,7 @@ def __init__(
123124
retrying: AsyncRetrying | None = None,
124125
user_agent: str | None = None,
125126
eth_key: str | None = None,
127+
trust_env: bool = False,
126128
):
127129
if retrying is not None and not isinstance(retrying, AsyncRetrying):
128130
raise ValueError(
@@ -134,6 +136,7 @@ def __init__(
134136
self.agg_stats = AggStats()
135137
self.retrying = retrying or zyte_api_retrying
136138
self.user_agent = user_agent or USER_AGENT
139+
self.trust_env = trust_env
137140
self._semaphore = asyncio.Semaphore(n_conn)
138141
self._auth: str | _x402Handler
139142
self.auth: AuthInfo
@@ -190,6 +193,10 @@ async def get(
190193
) -> dict[str, Any]:
191194
"""Asynchronous equivalent to :meth:`ZyteAPI.get`."""
192195
retrying = retrying or self.retrying
196+
owned_session: aiohttp.ClientSession | None = None
197+
if session is None:
198+
owned_session = create_session(self.n_conn, trust_env=self.trust_env)
199+
session = owned_session
193200
post = _post_func(session)
194201

195202
url = self.api_url + endpoint
@@ -257,14 +264,18 @@ async def request() -> dict[str, Any]:
257264
request = retrying.wraps(request)
258265

259266
try:
260-
# Try to make a request
261-
result = await request()
262-
self.agg_stats.n_success += 1
263-
except Exception:
264-
self.agg_stats.n_fatal_errors += 1
265-
raise
266-
267-
return result
267+
try:
268+
# Try to make a request
269+
result = await request()
270+
self.agg_stats.n_success += 1
271+
except Exception:
272+
self.agg_stats.n_fatal_errors += 1
273+
raise
274+
275+
return result
276+
finally:
277+
if owned_session is not None:
278+
await owned_session.close()
268279

269280
def iter(
270281
self,

zyte_api/_sync.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ class ZyteAPI:
104104
*user_agent* is the user agent string reported to Zyte API. Defaults to
105105
``python-zyte-api/<VERSION>``.
106106
107+
*trust_env* controls whether :mod:`aiohttp` honors environment-based
108+
network settings (e.g. ``HTTP_PROXY`` and ``HTTPS_PROXY``). Defaults to
109+
``False``.
110+
107111
.. tip:: To change the ``User-Agent`` header sent to a target website, use
108112
:http:`request:customHttpRequestHeaders` instead.
109113
"""
@@ -117,6 +121,7 @@ def __init__(
117121
retrying: AsyncRetrying | None = None,
118122
user_agent: str | None = None,
119123
eth_key: str | None = None,
124+
trust_env: bool = False,
120125
):
121126
self._async_client = AsyncZyteAPI(
122127
api_key=api_key,
@@ -125,6 +130,7 @@ def __init__(
125130
retrying=retrying,
126131
user_agent=user_agent,
127132
eth_key=eth_key,
133+
trust_env=trust_env,
128134
)
129135

130136
def get(

0 commit comments

Comments
 (0)