Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 105 additions & 16 deletions scraper/carriers/fedex.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import asyncio
import logging
import re
from datetime import datetime
Expand Down Expand Up @@ -38,17 +39,6 @@
"clearance delay": TrackingStatus.EXCEPTION,
}

DESCRIPTION_MAPPING: dict[str, TrackingStatus] = {
"Picked Up": TrackingStatus.PRE_TRANSIT,
"Shipment information sent to FedEx": TrackingStatus.PRE_TRANSIT,
"In transit": TrackingStatus.IN_TRANSIT,
"At local FedEx facility": TrackingStatus.IN_TRANSIT,
"On FedEx vehicle for delivery": TrackingStatus.OUT_FOR_DELIVERY,
"Out for Delivery": TrackingStatus.OUT_FOR_DELIVERY,
"Delivered": TrackingStatus.DELIVERED,
"Delivery exception": TrackingStatus.EXCEPTION,
}


class FedExProvider(CarrierProvider):
"""FedEx tracking provider via web scraping."""
Expand Down Expand Up @@ -84,11 +74,62 @@ async def async_track(
tracking_number=tracking_number,
)

url = self.tracking_url(tracking_number)
html = await self._get_page_content(browser, url, WAIT_SELECTOR)
self._parse_tracking_page(html, result)
result.last_updated = datetime.now()
return result
context = await browser.new_context()
page = await context.new_page()

try:
_api_data: dict | None = None
_api_event = asyncio.Event()

async def _on_response(response) -> None:
nonlocal _api_data
url = response.url
if _api_data is None and (
"trackingCal" in url
or "api.fedex.com/track/" in url
):
try:
_api_data = await response.json()
_LOGGER.info("FedEx: captured API response from %s", url)
_api_event.set()
except Exception as exc:
_LOGGER.warning("FedEx: failed to parse %s: %s", url, exc)

page.on("response", _on_response)

# Warm-up: visit homepage to establish session/cookies before
# hitting the tracking page (mitigates bot detection)
await page.goto(
"https://www.fedex.com/en-us/home.html",
wait_until="domcontentloaded",
timeout=30000,
)

tracking_url = self.tracking_url(tracking_number)
# domcontentloaded so we don't block on networkidle before the
# tracking API fires; the asyncio.Event below handles the wait
await page.goto(tracking_url, wait_until="domcontentloaded", timeout=30000)

# Wait for the tracking API response — may fire during or after goto
if not _api_event.is_set():
try:
await asyncio.wait_for(_api_event.wait(), timeout=45.0)
except asyncio.TimeoutError:
_LOGGER.warning("FedEx: API response not captured for %s", tracking_number)

if _api_data is not None:
# JSON path: skip wait_for_selector entirely
self._parse_tracking_json(_api_data, result)
else:
# HTML fallback
await page.wait_for_selector(WAIT_SELECTOR, timeout=45000)
html = await page.content()
self._parse_tracking_page(html, result)

result.last_updated = datetime.now()
return result
finally:
await context.close()

def _parse_tracking_page(self, html: str, result: TrackingResult) -> None:
"""Parse the FedEx tracking page HTML."""
Expand Down Expand Up @@ -123,6 +164,54 @@ def _parse_tracking_page(self, html: str, result: TrackingResult) -> None:
if event:
result.events.append(event)

def _parse_tracking_json(self, data: dict, result: TrackingResult) -> None:
"""Parse FedEx tracking JSON from the api.fedex.com/track/v2/shipments API."""
try:
pkg = data["output"]["packages"][0]
except (KeyError, IndexError):
return

# mainStatus is the human-readable status ("Picked up", "Delivered", etc.)
main_status = pkg.get("mainStatus", "")
if main_status:
result.raw_status = main_status
result.status = self._map_status(main_status)

# estDeliveryDt is ISO-8601 — reliable for parsing; strip timezone for naive datetime
est_dt = pkg.get("estDeliveryDt", "")
if est_dt:
try:
result.estimated_delivery = datetime.fromisoformat(est_dt).replace(tzinfo=None)
except ValueError:
pass

for scan in pkg.get("scanEventList", []):
description = scan.get("status", "")
location = scan.get("scanLocation", "")
date_str = scan.get("date", "") # "YYYY-MM-DD"
time_str = scan.get("time", "") # "HH:MM:SS"

timestamp = datetime.now()
if date_str:
combined = f"{date_str}T{time_str}" if time_str else date_str
try:
timestamp = datetime.fromisoformat(combined)
except ValueError:
try:
timestamp = datetime.strptime(date_str, "%Y-%m-%d")
except ValueError:
pass

status = self._map_status(description)
result.events.append(
TrackingEvent(
timestamp=timestamp,
location=location,
description=description,
status=status,
)
)

def _map_status(self, raw_status: str) -> TrackingStatus:
"""Map a raw status string to TrackingStatus."""
lower = raw_status.lower()
Expand Down
148 changes: 141 additions & 7 deletions scraper/tests/test_carriers_fedex.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

from __future__ import annotations

from unittest.mock import AsyncMock, MagicMock

import pytest

from scraper.carriers.base import TrackingResult
from scraper.carriers.fedex import DESCRIPTION_MAPPING, STATUS_MAPPING, FedExProvider
from scraper.carriers.fedex import STATUS_MAPPING, FedExProvider
from scraper.const import Carrier, TrackingStatus


Expand All @@ -16,6 +18,50 @@ def provider():

VALID_TRACKING_12 = "123456789012"

FEDEX_API_JSON_DELIVERED = {
"output": {
"packages": [
{
"mainStatus": "Delivered",
"estDeliveryDt": "2025-01-15T00:00:00+00:00",
"scanEventList": [
{
"date": "2025-01-15",
"time": "10:00:00",
"status": "Delivered",
"scanLocation": "Springfield, IL 62701 US",
},
{
"date": "2025-01-15",
"time": "06:00:00",
"status": "On FedEx vehicle for delivery",
"scanLocation": "Springfield, IL 62701 US",
},
],
}
]
}
}

FEDEX_API_JSON_IN_TRANSIT = {
"output": {
"packages": [
{
"mainStatus": "In transit",
"estDeliveryDt": "2025-01-16T00:00:00+00:00",
"scanEventList": [
{
"date": "2025-01-14",
"time": "14:30:00",
"status": "Departed FedEx location",
"scanLocation": "Memphis, TN 38118 US",
}
],
}
]
}
}


class TestValidateTrackingNumber:
def test_valid_12_digit(self, provider):
Expand Down Expand Up @@ -81,16 +127,109 @@ def test_not_found_stays_unknown(self, provider, fedex_not_found_html):
assert result.events == []


class TestParseTrackingJson:
def test_delivered_status(self, provider):
result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST")
provider._parse_tracking_json(FEDEX_API_JSON_DELIVERED, result)
assert result.status == TrackingStatus.DELIVERED
assert result.raw_status == "Delivered"

def test_delivered_estimated_delivery(self, provider):
result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST")
provider._parse_tracking_json(FEDEX_API_JSON_DELIVERED, result)
assert result.estimated_delivery is not None
assert result.estimated_delivery.month == 1
assert result.estimated_delivery.day == 15
assert result.estimated_delivery.year == 2025

def test_delivered_events(self, provider):
result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST")
provider._parse_tracking_json(FEDEX_API_JSON_DELIVERED, result)
assert len(result.events) == 2
assert result.events[0].description == "Delivered"
assert result.events[0].location == "Springfield, IL 62701 US"
assert result.events[0].status == TrackingStatus.DELIVERED

def test_in_transit_status(self, provider):
result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST")
provider._parse_tracking_json(FEDEX_API_JSON_IN_TRANSIT, result)
assert result.status == TrackingStatus.IN_TRANSIT

def test_in_transit_events(self, provider):
result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST")
provider._parse_tracking_json(FEDEX_API_JSON_IN_TRANSIT, result)
assert len(result.events) == 1
assert result.events[0].description == "Departed FedEx location"

def test_empty_package_list(self, provider):
result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST")
data = {"output": {"packages": []}}
provider._parse_tracking_json(data, result)
assert result.status == TrackingStatus.UNKNOWN
assert result.events == []

def test_missing_key(self, provider):
result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST")
provider._parse_tracking_json({}, result)
assert result.status == TrackingStatus.UNKNOWN


class TestAsyncTrack:
@pytest.mark.asyncio
async def test_successful_tracking(
async def test_json_path_two_goto_calls(self, provider, mock_browser):
"""When API response is intercepted, result is populated via JSON path."""
browser, mock_page = mock_browser

captured_callbacks: list = []
mock_page.on = MagicMock(
side_effect=lambda event, cb: captured_callbacks.append(cb)
if event == "response"
else None
)

mock_response = AsyncMock()
mock_response.url = "https://www.fedex.com/trackingCal/track"
mock_response.json = AsyncMock(return_value=FEDEX_API_JSON_DELIVERED)

call_count = 0

async def goto_side_effect(url, **kwargs):
nonlocal call_count
call_count += 1
if call_count == 2 and captured_callbacks:
await captured_callbacks[0](mock_response)

mock_page.goto.side_effect = goto_side_effect

result = await provider.async_track(VALID_TRACKING_12, browser)

assert mock_page.goto.call_count == 2
# First call = homepage warm-up
first_url = mock_page.goto.call_args_list[0].args[0]
assert "home" in first_url
# Second call = tracking URL
second_url = mock_page.goto.call_args_list[1].args[0]
assert VALID_TRACKING_12 in second_url

assert result.carrier == Carrier.FEDEX
assert result.status == TrackingStatus.DELIVERED
assert len(result.events) == 2
assert result.last_updated is not None
# wait_for_selector should NOT be called in the JSON path
mock_page.wait_for_selector.assert_not_called()

@pytest.mark.asyncio
async def test_html_fallback_when_no_api_response(
self, provider, mock_browser, fedex_delivered_html
):
"""When no API response is intercepted, falls back to HTML parsing."""
browser, mock_page = mock_browser
mock_page.content.return_value = fedex_delivered_html

result = await provider.async_track(VALID_TRACKING_12, browser)

assert mock_page.goto.call_count == 2
mock_page.wait_for_selector.assert_called_once()
assert result.carrier == Carrier.FEDEX
assert result.status == TrackingStatus.DELIVERED
assert len(result.events) == 3
Expand Down Expand Up @@ -121,8 +260,3 @@ def test_pre_transit(self):
def test_exception(self):
assert STATUS_MAPPING["delivery exception"] == TrackingStatus.EXCEPTION

def test_description_delivered(self):
assert DESCRIPTION_MAPPING["Delivered"] == TrackingStatus.DELIVERED

def test_description_in_transit(self):
assert DESCRIPTION_MAPPING["In transit"] == TrackingStatus.IN_TRANSIT
Loading