diff --git a/scraper/carriers/fedex.py b/scraper/carriers/fedex.py index f7b7181..4f0596b 100644 --- a/scraper/carriers/fedex.py +++ b/scraper/carriers/fedex.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import logging import re from datetime import datetime @@ -38,17 +39,6 @@ "clearance delay": TrackingStatus.EXCEPTION, } -DESCRIPTION_MAPPING: dict[str, TrackingStatus] = { - "Picked Up": TrackingStatus.PRE_TRANSIT, - "Shipment information sent to FedEx": TrackingStatus.PRE_TRANSIT, - "In transit": TrackingStatus.IN_TRANSIT, - "At local FedEx facility": TrackingStatus.IN_TRANSIT, - "On FedEx vehicle for delivery": TrackingStatus.OUT_FOR_DELIVERY, - "Out for Delivery": TrackingStatus.OUT_FOR_DELIVERY, - "Delivered": TrackingStatus.DELIVERED, - "Delivery exception": TrackingStatus.EXCEPTION, -} - class FedExProvider(CarrierProvider): """FedEx tracking provider via web scraping.""" @@ -84,11 +74,62 @@ async def async_track( tracking_number=tracking_number, ) - url = self.tracking_url(tracking_number) - html = await self._get_page_content(browser, url, WAIT_SELECTOR) - self._parse_tracking_page(html, result) - result.last_updated = datetime.now() - return result + context = await browser.new_context() + page = await context.new_page() + + try: + _api_data: dict | None = None + _api_event = asyncio.Event() + + async def _on_response(response) -> None: + nonlocal _api_data + url = response.url + if _api_data is None and ( + "trackingCal" in url + or "api.fedex.com/track/" in url + ): + try: + _api_data = await response.json() + _LOGGER.info("FedEx: captured API response from %s", url) + _api_event.set() + except Exception as exc: + _LOGGER.warning("FedEx: failed to parse %s: %s", url, exc) + + page.on("response", _on_response) + + # Warm-up: visit homepage to establish session/cookies before + # hitting the tracking page (mitigates bot detection) + await page.goto( + "https://www.fedex.com/en-us/home.html", + wait_until="domcontentloaded", + timeout=30000, + ) + + tracking_url = self.tracking_url(tracking_number) + # domcontentloaded so we don't block on networkidle before the + # tracking API fires; the asyncio.Event below handles the wait + await page.goto(tracking_url, wait_until="domcontentloaded", timeout=30000) + + # Wait for the tracking API response — may fire during or after goto + if not _api_event.is_set(): + try: + await asyncio.wait_for(_api_event.wait(), timeout=45.0) + except asyncio.TimeoutError: + _LOGGER.warning("FedEx: API response not captured for %s", tracking_number) + + if _api_data is not None: + # JSON path: skip wait_for_selector entirely + self._parse_tracking_json(_api_data, result) + else: + # HTML fallback + await page.wait_for_selector(WAIT_SELECTOR, timeout=45000) + html = await page.content() + self._parse_tracking_page(html, result) + + result.last_updated = datetime.now() + return result + finally: + await context.close() def _parse_tracking_page(self, html: str, result: TrackingResult) -> None: """Parse the FedEx tracking page HTML.""" @@ -123,6 +164,54 @@ def _parse_tracking_page(self, html: str, result: TrackingResult) -> None: if event: result.events.append(event) + def _parse_tracking_json(self, data: dict, result: TrackingResult) -> None: + """Parse FedEx tracking JSON from the api.fedex.com/track/v2/shipments API.""" + try: + pkg = data["output"]["packages"][0] + except (KeyError, IndexError): + return + + # mainStatus is the human-readable status ("Picked up", "Delivered", etc.) + main_status = pkg.get("mainStatus", "") + if main_status: + result.raw_status = main_status + result.status = self._map_status(main_status) + + # estDeliveryDt is ISO-8601 — reliable for parsing; strip timezone for naive datetime + est_dt = pkg.get("estDeliveryDt", "") + if est_dt: + try: + result.estimated_delivery = datetime.fromisoformat(est_dt).replace(tzinfo=None) + except ValueError: + pass + + for scan in pkg.get("scanEventList", []): + description = scan.get("status", "") + location = scan.get("scanLocation", "") + date_str = scan.get("date", "") # "YYYY-MM-DD" + time_str = scan.get("time", "") # "HH:MM:SS" + + timestamp = datetime.now() + if date_str: + combined = f"{date_str}T{time_str}" if time_str else date_str + try: + timestamp = datetime.fromisoformat(combined) + except ValueError: + try: + timestamp = datetime.strptime(date_str, "%Y-%m-%d") + except ValueError: + pass + + status = self._map_status(description) + result.events.append( + TrackingEvent( + timestamp=timestamp, + location=location, + description=description, + status=status, + ) + ) + def _map_status(self, raw_status: str) -> TrackingStatus: """Map a raw status string to TrackingStatus.""" lower = raw_status.lower() diff --git a/scraper/tests/test_carriers_fedex.py b/scraper/tests/test_carriers_fedex.py index e50cf0f..8261c31 100644 --- a/scraper/tests/test_carriers_fedex.py +++ b/scraper/tests/test_carriers_fedex.py @@ -2,10 +2,12 @@ from __future__ import annotations +from unittest.mock import AsyncMock, MagicMock + import pytest from scraper.carriers.base import TrackingResult -from scraper.carriers.fedex import DESCRIPTION_MAPPING, STATUS_MAPPING, FedExProvider +from scraper.carriers.fedex import STATUS_MAPPING, FedExProvider from scraper.const import Carrier, TrackingStatus @@ -16,6 +18,50 @@ def provider(): VALID_TRACKING_12 = "123456789012" +FEDEX_API_JSON_DELIVERED = { + "output": { + "packages": [ + { + "mainStatus": "Delivered", + "estDeliveryDt": "2025-01-15T00:00:00+00:00", + "scanEventList": [ + { + "date": "2025-01-15", + "time": "10:00:00", + "status": "Delivered", + "scanLocation": "Springfield, IL 62701 US", + }, + { + "date": "2025-01-15", + "time": "06:00:00", + "status": "On FedEx vehicle for delivery", + "scanLocation": "Springfield, IL 62701 US", + }, + ], + } + ] + } +} + +FEDEX_API_JSON_IN_TRANSIT = { + "output": { + "packages": [ + { + "mainStatus": "In transit", + "estDeliveryDt": "2025-01-16T00:00:00+00:00", + "scanEventList": [ + { + "date": "2025-01-14", + "time": "14:30:00", + "status": "Departed FedEx location", + "scanLocation": "Memphis, TN 38118 US", + } + ], + } + ] + } +} + class TestValidateTrackingNumber: def test_valid_12_digit(self, provider): @@ -81,16 +127,109 @@ def test_not_found_stays_unknown(self, provider, fedex_not_found_html): assert result.events == [] +class TestParseTrackingJson: + def test_delivered_status(self, provider): + result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST") + provider._parse_tracking_json(FEDEX_API_JSON_DELIVERED, result) + assert result.status == TrackingStatus.DELIVERED + assert result.raw_status == "Delivered" + + def test_delivered_estimated_delivery(self, provider): + result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST") + provider._parse_tracking_json(FEDEX_API_JSON_DELIVERED, result) + assert result.estimated_delivery is not None + assert result.estimated_delivery.month == 1 + assert result.estimated_delivery.day == 15 + assert result.estimated_delivery.year == 2025 + + def test_delivered_events(self, provider): + result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST") + provider._parse_tracking_json(FEDEX_API_JSON_DELIVERED, result) + assert len(result.events) == 2 + assert result.events[0].description == "Delivered" + assert result.events[0].location == "Springfield, IL 62701 US" + assert result.events[0].status == TrackingStatus.DELIVERED + + def test_in_transit_status(self, provider): + result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST") + provider._parse_tracking_json(FEDEX_API_JSON_IN_TRANSIT, result) + assert result.status == TrackingStatus.IN_TRANSIT + + def test_in_transit_events(self, provider): + result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST") + provider._parse_tracking_json(FEDEX_API_JSON_IN_TRANSIT, result) + assert len(result.events) == 1 + assert result.events[0].description == "Departed FedEx location" + + def test_empty_package_list(self, provider): + result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST") + data = {"output": {"packages": []}} + provider._parse_tracking_json(data, result) + assert result.status == TrackingStatus.UNKNOWN + assert result.events == [] + + def test_missing_key(self, provider): + result = TrackingResult(carrier=Carrier.FEDEX, tracking_number="TEST") + provider._parse_tracking_json({}, result) + assert result.status == TrackingStatus.UNKNOWN + + class TestAsyncTrack: @pytest.mark.asyncio - async def test_successful_tracking( + async def test_json_path_two_goto_calls(self, provider, mock_browser): + """When API response is intercepted, result is populated via JSON path.""" + browser, mock_page = mock_browser + + captured_callbacks: list = [] + mock_page.on = MagicMock( + side_effect=lambda event, cb: captured_callbacks.append(cb) + if event == "response" + else None + ) + + mock_response = AsyncMock() + mock_response.url = "https://www.fedex.com/trackingCal/track" + mock_response.json = AsyncMock(return_value=FEDEX_API_JSON_DELIVERED) + + call_count = 0 + + async def goto_side_effect(url, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 2 and captured_callbacks: + await captured_callbacks[0](mock_response) + + mock_page.goto.side_effect = goto_side_effect + + result = await provider.async_track(VALID_TRACKING_12, browser) + + assert mock_page.goto.call_count == 2 + # First call = homepage warm-up + first_url = mock_page.goto.call_args_list[0].args[0] + assert "home" in first_url + # Second call = tracking URL + second_url = mock_page.goto.call_args_list[1].args[0] + assert VALID_TRACKING_12 in second_url + + assert result.carrier == Carrier.FEDEX + assert result.status == TrackingStatus.DELIVERED + assert len(result.events) == 2 + assert result.last_updated is not None + # wait_for_selector should NOT be called in the JSON path + mock_page.wait_for_selector.assert_not_called() + + @pytest.mark.asyncio + async def test_html_fallback_when_no_api_response( self, provider, mock_browser, fedex_delivered_html ): + """When no API response is intercepted, falls back to HTML parsing.""" browser, mock_page = mock_browser mock_page.content.return_value = fedex_delivered_html result = await provider.async_track(VALID_TRACKING_12, browser) + assert mock_page.goto.call_count == 2 + mock_page.wait_for_selector.assert_called_once() assert result.carrier == Carrier.FEDEX assert result.status == TrackingStatus.DELIVERED assert len(result.events) == 3 @@ -121,8 +260,3 @@ def test_pre_transit(self): def test_exception(self): assert STATUS_MAPPING["delivery exception"] == TrackingStatus.EXCEPTION - def test_description_delivered(self): - assert DESCRIPTION_MAPPING["Delivered"] == TrackingStatus.DELIVERED - - def test_description_in_transit(self): - assert DESCRIPTION_MAPPING["In transit"] == TrackingStatus.IN_TRANSIT