Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.6
rev: v0.13.1
hooks:
- id: ruff
- id: ruff-check
args: [ --fix ]
- id: ruff-format
- repo: https://github.com/adamchainz/blacken-docs
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# -- Project information -----------------------------------------------------

project = "python-zyte-api"
copyright = "2021, Zyte Group Ltd"
project_copyright = "2021, Zyte Group Ltd"
author = "Zyte Group Ltd"

# The short X.Y version
Expand Down
44 changes: 36 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,35 @@ exclude_also = [
"if TYPE_CHECKING:",
]

[tool.mypy]
allow_untyped_defs = false
implicit_reexport = false

[[tool.mypy.overrides]]
module = "runstats"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "tests.*"
allow_untyped_defs = true

[tool.pytest.ini_options]
filterwarnings = [
"ignore:The zyte_api\\.aio module is deprecated:DeprecationWarning"
]

[tool.ruff.lint]
extend-select = [
# flake8-builtins
"A",
# flake8-async
"ASYNC",
# flake8-bugbear
"B",
# flake8-comprehensions
"C4",
# flake8-commas
"COM",
# pydocstyle
"D",
# flake8-future-annotations
Expand Down Expand Up @@ -84,6 +102,8 @@ extend-select = [
"T10",
# flake8-type-checking
"TC",
# flake8-tidy-imports
"TID",
# pyupgrade
"UP",
# pycodestyle warnings
Expand All @@ -92,6 +112,8 @@ extend-select = [
"YTT",
]
ignore = [
# Trailing comma missing
"COM812",
# Missing docstring in public module
"D100",
# Missing docstring in public class
Expand Down Expand Up @@ -144,21 +166,27 @@ ignore = [
"S101",
]

[tool.ruff.lint.flake8-pytest-style]
parametrize-values-type = "tuple"

[tool.ruff.lint.flake8-tidy-imports]
banned-module-level-imports = ["twisted.internet.reactor"]

[tool.ruff.lint.flake8-type-checking]
runtime-evaluated-decorators = ["attr.s"]

[tool.ruff.lint.isort]
split-on-trailing-comma = false

[tool.ruff.lint.per-file-ignores]
"zyte_api/__init__.py" = ["F401"]
"zyte_api/aio/errors.py" = ["F401"]
"zyte_api/aio/retry.py" = ["F401"]
"tests/*" = ["S"]
"docs/**" = ["B006"]
# Skip PEP 604 suggestions for files with attr classes
"zyte_api/errors.py" = ["UP007"]
"zyte_api/stats.py" = ["UP007"]

[tool.ruff.lint.flake8-pytest-style]
parametrize-values-type = "tuple"

[tool.ruff.lint.flake8-type-checking]
runtime-evaluated-decorators = ["attr.s"]
"zyte_api/errors.py" = ["UP007", "UP045"]
"zyte_api/stats.py" = ["UP007", "UP045"]

[tool.ruff.lint.pydocstyle]
convention = "pep257"
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
author_email="opensource@zyte.com",
url="https://github.com/zytedata/python-zyte-api",
packages=find_packages(exclude=["tests", "examples"]),
package_data={
"zyte_api": ["py.typed"],
},
include_package_data=True,
entry_points={
"console_scripts": ["zyte-api=zyte_api.__main__:_main"],
},
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

@pytest.fixture(scope="session")
def mockserver():
from .mockserver import MockServer
from .mockserver import MockServer # noqa: PLC0415

with MockServer() as server:
yield server
16 changes: 10 additions & 6 deletions tests/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import Any
from urllib.parse import urlparse

from twisted.internet import reactor
from twisted.internet.task import deferLater
from twisted.web.resource import Resource
from twisted.web.server import NOT_DONE_YET, Site
Expand All @@ -22,11 +21,11 @@


# https://github.com/scrapy/scrapy/blob/02b97f98e74a994ad3e4d74e7ed55207e508a576/tests/mockserver.py#L27C1-L33C19
def getarg(request, name, default=None, type=None):
def getarg(request, name, default=None, type_=None):
if name in request.args:
value = request.args[name][0]
if type is not None:
value = type(value)
if type_ is not None:
value = type_(value)
return value
return default

Expand All @@ -41,6 +40,8 @@ class DropResource(Resource):
isLeaf = True

def deferRequest(self, request, delay, f, *a, **kw):
from twisted.internet import reactor

def _cancelrequest(_):
# silence CancelledError
d.addErrback(lambda _: None)
Expand All @@ -56,7 +57,7 @@ def render_POST(self, request):
return NOT_DONE_YET

def _delayedRender(self, request):
abort = getarg(request, b"abort", 0, type=int)
abort = getarg(request, b"abort", 0, type_=int)
request.write(b"this connection will be dropped\n")
tr = request.channel.transport
try:
Expand Down Expand Up @@ -107,6 +108,7 @@ def render_POST(self, request):
)

request_data = json.loads(request.content.read())
response_data: dict[str, Any]

url = request_data["url"]
domain = urlparse(url).netloc
Expand Down Expand Up @@ -214,7 +216,7 @@ def render_POST(self, request):
}
return json.dumps(response_data).encode()

response_data: dict[str, Any] = {
response_data = {
"url": url,
}

Expand Down Expand Up @@ -269,6 +271,8 @@ def urljoin(self, path):


def main():
from twisted.internet import reactor

parser = argparse.ArgumentParser()
parser.add_argument("resource")
parser.add_argument("--port", type=int)
Expand Down
20 changes: 15 additions & 5 deletions tests/test_async.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from __future__ import annotations

import asyncio
from typing import TYPE_CHECKING, Any
from unittest.mock import AsyncMock

import pytest
Expand All @@ -9,6 +12,9 @@
from zyte_api.errors import ParsedError
from zyte_api.utils import USER_AGENT

if TYPE_CHECKING:
from tests.mockserver import MockServer


@pytest.mark.parametrize(
"client_cls",
Expand Down Expand Up @@ -218,7 +224,7 @@ async def test_semaphore(client_cls, get_method, iter_method, mockserver):


@pytest.mark.asyncio
async def test_session_context_manager(mockserver):
async def test_session_context_manager(mockserver: MockServer) -> None:
client = AsyncZyteAPI(api_key="a", api_url=mockserver.urljoin("/"))
queries = [
{"url": "https://a.example", "httpResponseBody": True},
Expand All @@ -236,11 +242,13 @@ async def test_session_context_manager(mockserver):
"httpResponseBody": "PGh0bWw+PGJvZHk+SGVsbG88aDE+V29ybGQhPC9oMT48L2JvZHk+PC9odG1sPg==",
},
]
actual_results = []
actual_results: list[dict[str, Any] | Exception] = []
async with client.session() as session:
assert session._session.connector is not None
assert session._session.connector.limit == client.n_conn
actual_results.append(await session.get(queries[0]))
for future in session.iter(queries[1:]):
result: dict[str, Any] | Exception
try:
result = await future
except Exception as e:
Expand All @@ -266,7 +274,7 @@ async def test_session_context_manager(mockserver):


@pytest.mark.asyncio
async def test_session_no_context_manager(mockserver):
async def test_session_no_context_manager(mockserver: MockServer) -> None:
client = AsyncZyteAPI(api_key="a", api_url=mockserver.urljoin("/"))
queries = [
{"url": "https://a.example", "httpResponseBody": True},
Expand All @@ -284,8 +292,10 @@ async def test_session_no_context_manager(mockserver):
"httpResponseBody": "PGh0bWw+PGJvZHk+SGVsbG88aDE+V29ybGQhPC9oMT48L2JvZHk+PC9odG1sPg==",
},
]
actual_results = []
actual_results: list[dict[str, Any] | Exception] = []
result: dict[str, Any] | Exception
session = client.session()
assert session._session.connector is not None
assert session._session.connector.limit == client.n_conn
actual_results.append(await session.get(queries[0]))
for future in session.iter(queries[1:]):
Expand Down Expand Up @@ -318,4 +328,4 @@ def test_retrying_class():
"""A descriptive exception is raised when creating a client with an
AsyncRetrying subclass or similar instead of an instance of it."""
with pytest.raises(ValueError, match="must be an instance of AsyncRetrying"):
AsyncZyteAPI(api_key="foo", retrying=AggressiveRetryFactory)
AsyncZyteAPI(api_key="foo", retrying=AggressiveRetryFactory) # type: ignore[arg-type]
37 changes: 25 additions & 12 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
from __future__ import annotations

import json
import subprocess
from json import JSONDecodeError
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Any
from unittest.mock import AsyncMock, Mock, patch

import pytest

from zyte_api import RequestError
from zyte_api.__main__ import run
from zyte_api.aio.errors import RequestError

if TYPE_CHECKING:
from collections.abc import Iterable

from tests.mockserver import MockServer


class MockRequestError(RequestError):
Expand Down Expand Up @@ -41,7 +49,7 @@ def get_json_content(file_object):
pass


def forbidden_domain_response():
def forbidden_domain_response() -> dict[str, Any]:
return {
"type": "/download/temporary-error",
"title": "Temporary Downloading Error",
Expand Down Expand Up @@ -95,7 +103,7 @@ async def fake_exception(value=True):
@pytest.mark.asyncio
async def test_run(queries, expected_response, store_errors, exception):
tmp_path = Path("temporary_file.jsonl")
temporary_file = tmp_path.open("w")
temporary_file = tmp_path.open("w") # noqa: ASYNC230
n_conn = 5
api_url = "https://example.com"
api_key = "fake_key"
Expand Down Expand Up @@ -177,10 +185,12 @@ async def test_run_stop_on_errors_true(mockserver):
assert exc_info.value.query == query


def _run(*, input, mockserver, cli_params=None):
def _run(
*, input_: str, mockserver: MockServer, cli_params: Iterable[str] | None = None
) -> subprocess.CompletedProcess[bytes]:
cli_params = cli_params or ()
with NamedTemporaryFile("w") as url_list:
url_list.write(input)
url_list.write(input_)
url_list.flush()
# Note: Using “python -m zyte_api” instead of “zyte-api” enables
# coverage tracking to work.
Expand All @@ -202,14 +212,14 @@ def _run(*, input, mockserver, cli_params=None):


def test_empty_input(mockserver):
result = _run(input="", mockserver=mockserver)
result = _run(input_="", mockserver=mockserver)
assert result.returncode
assert result.stdout == b""
assert result.stderr == b"No input queries found. Is the input file empty?\n"


def test_intype_txt_implicit(mockserver):
result = _run(input="https://a.example", mockserver=mockserver)
result = _run(input_="https://a.example", mockserver=mockserver)
assert not result.returncode
assert (
result.stdout
Expand All @@ -219,7 +229,9 @@ def test_intype_txt_implicit(mockserver):

def test_intype_txt_explicit(mockserver):
result = _run(
input="https://a.example", mockserver=mockserver, cli_params=["--intype", "txt"]
input_="https://a.example",
mockserver=mockserver,
cli_params=["--intype", "txt"],
)
assert not result.returncode
assert (
Expand All @@ -230,7 +242,8 @@ def test_intype_txt_explicit(mockserver):

def test_intype_jsonl_implicit(mockserver):
result = _run(
input='{"url": "https://a.example", "browserHtml": true}', mockserver=mockserver
input_='{"url": "https://a.example", "browserHtml": true}',
mockserver=mockserver,
)
assert not result.returncode
assert (
Expand All @@ -241,7 +254,7 @@ def test_intype_jsonl_implicit(mockserver):

def test_intype_jsonl_explicit(mockserver):
result = _run(
input='{"url": "https://a.example", "browserHtml": true}',
input_='{"url": "https://a.example", "browserHtml": true}',
mockserver=mockserver,
cli_params=["--intype", "jl"],
)
Expand All @@ -255,7 +268,7 @@ def test_intype_jsonl_explicit(mockserver):
@pytest.mark.flaky(reruns=16)
def test_limit_and_shuffle(mockserver):
result = _run(
input="https://a.example\nhttps://b.example",
input_="https://a.example\nhttps://b.example",
mockserver=mockserver,
cli_params=["--limit", "1", "--shuffle"],
)
Expand All @@ -268,7 +281,7 @@ def test_limit_and_shuffle(mockserver):

def test_run_non_json_response(mockserver):
result = _run(
input="https://nonjson.example",
input_="https://nonjson.example",
mockserver=mockserver,
)
assert not result.returncode
Expand Down
Loading