diff --git a/src/crawlee/proxy_configuration.py b/src/crawlee/proxy_configuration.py index 7a46d1de68..55adc671d8 100644 --- a/src/crawlee/proxy_configuration.py +++ b/src/crawlee/proxy_configuration.py @@ -241,6 +241,9 @@ def add_error(self, domain: str, tier: int) -> None: def predict_tier(self, domain: str) -> int: histogram = self._histogram_by_domain[domain] + max_tier = len(histogram) - 1 + + self._current_tier_by_domain[domain] = max(0, min(max_tier, self._current_tier_by_domain[domain])) current_tier = self._current_tier_by_domain[domain] for index, value in enumerate(histogram): @@ -250,13 +253,15 @@ def predict_tier(self, domain: str) -> int: histogram[index] -= 1 left = histogram[current_tier - 1] if current_tier > 0 else float('inf') - right = histogram[current_tier + 1] if current_tier < len(histogram) - 1 else float('inf') + right = histogram[current_tier + 1] if current_tier < max_tier else float('inf') if histogram[current_tier] > min(left, right): self._current_tier_by_domain[domain] = current_tier - 1 if left <= right else current_tier + 1 elif histogram[current_tier] == left: self._current_tier_by_domain[domain] -= 1 + self._current_tier_by_domain[domain] = max(0, min(max_tier, self._current_tier_by_domain[domain])) + return self._current_tier_by_domain[domain] diff --git a/tests/unit/proxy_configuration/test_tiers.py b/tests/unit/proxy_configuration/test_tiers.py index 59db9a43d7..64f6c40e9b 100644 --- a/tests/unit/proxy_configuration/test_tiers.py +++ b/tests/unit/proxy_configuration/test_tiers.py @@ -1,7 +1,9 @@ from __future__ import annotations +from yarl import URL + from crawlee import Request -from crawlee.proxy_configuration import ProxyConfiguration +from crawlee.proxy_configuration import ProxyConfiguration, _ProxyTierTracker async def test_rotates_proxies_uniformly_with_no_request() -> None: @@ -176,3 +178,15 @@ async def test_none_proxy_rotates_proxies_uniformly_with_no_request() -> None: # Proxy rotation starts from the beginning of the proxy list after last proxy in tier was used. No proxy used again. info = await config.new_proxy_info(None, None, None) assert info is None, 'First entry in tired_proxy_urls is None. config.new_proxy_info is expected to generate None.' + + +def test_predict_tier_bounds_with_single_tier() -> None: + """With a single tier, predict_tier should always return 0.""" + tracker = _ProxyTierTracker([[URL('http://proxy:1111')]]) + tracker.add_error('example.com', 0) + + # Each call mutates internal state (decaying histogram, potentially shifting tiers). The error score starts + # at 10 and decays by 1 per call, so 20 iterations covers the full decay to zero and beyond. + for _ in range(20): + tier = tracker.predict_tier('example.com') + assert tier == 0