Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/crawlee/proxy_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ def add_error(self, domain: str, tier: int) -> None:

def predict_tier(self, domain: str) -> int:
histogram = self._histogram_by_domain[domain]
max_tier = len(histogram) - 1

self._current_tier_by_domain[domain] = max(0, min(max_tier, self._current_tier_by_domain[domain]))
current_tier = self._current_tier_by_domain[domain]

for index, value in enumerate(histogram):
Expand All @@ -250,13 +253,15 @@ def predict_tier(self, domain: str) -> int:
histogram[index] -= 1

left = histogram[current_tier - 1] if current_tier > 0 else float('inf')
right = histogram[current_tier + 1] if current_tier < len(histogram) - 1 else float('inf')
right = histogram[current_tier + 1] if current_tier < max_tier else float('inf')

if histogram[current_tier] > min(left, right):
self._current_tier_by_domain[domain] = current_tier - 1 if left <= right else current_tier + 1
elif histogram[current_tier] == left:
self._current_tier_by_domain[domain] -= 1

self._current_tier_by_domain[domain] = max(0, min(max_tier, self._current_tier_by_domain[domain]))

return self._current_tier_by_domain[domain]


Expand Down
16 changes: 15 additions & 1 deletion tests/unit/proxy_configuration/test_tiers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

from yarl import URL

from crawlee import Request
from crawlee.proxy_configuration import ProxyConfiguration
from crawlee.proxy_configuration import ProxyConfiguration, _ProxyTierTracker


async def test_rotates_proxies_uniformly_with_no_request() -> None:
Expand Down Expand Up @@ -176,3 +178,15 @@ async def test_none_proxy_rotates_proxies_uniformly_with_no_request() -> None:
# Proxy rotation starts from the beginning of the proxy list after last proxy in tier was used. No proxy used again.
info = await config.new_proxy_info(None, None, None)
assert info is None, 'First entry in tired_proxy_urls is None. config.new_proxy_info is expected to generate None.'


def test_predict_tier_bounds_with_single_tier() -> None:
"""With a single tier, predict_tier should always return 0."""
tracker = _ProxyTierTracker([[URL('http://proxy:1111')]])
tracker.add_error('example.com', 0)

# Each call mutates internal state (decaying histogram, potentially shifting tiers). The error score starts
# at 10 and decays by 1 per call, so 20 iterations covers the full decay to zero and beyond.
for _ in range(20):
tier = tracker.predict_tier('example.com')
assert tier == 0
Loading