Move transformation function to from client to sdk

Pijukatel · Pijukatel · commit caf9e23ab571 · 2025-08-20T16:35:29.000+02:00
Update leftover id-based variable names
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,7 +34,7 @@ keywords = [
     "scraping",
 ]
 dependencies = [
-    "apify-client@git+https://github.com/apify/apify-client-python.git@remove-request-id",
+    "apify-client>=2.0.0,<3.0.0",
     "apify-shared>=2.0.0,<3.0.0",
     "crawlee@git+https://github.com/apify/crawlee-python.git@remove-request-id",
     "cachetools>=5.5.0",
diff --git a/src/apify/storage_clients/_apify/_request_queue_client.py b/src/apify/storage_clients/_apify/_request_queue_client.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
 
 import asyncio
+import re
+from base64 import b64encode
 from collections import deque
 from datetime import datetime, timedelta, timezone
+from hashlib import sha256
 from logging import getLogger
 from typing import TYPE_CHECKING, Final
 
@@ -320,16 +323,16 @@ async def add_batch_of_requests(
         return api_response
 
     @override
-    async def get_request(self, request_unique_key: str) -> Request | None:
-        """Get a request by ID.
+    async def get_request(self, unique_key: str) -> Request | None:
+        """Get a request by unique key.
 
         Args:
-            request_unique_key: Unique key of the request to get.
+            unique_key: Unique key of the request to get.
 
         Returns:
             The request or None if not found.
         """
-        response = await self._api_client.get_request_by_unique_key(request_unique_key)
+        response = await self._api_client.get_request(unique_key_to_request_id(unique_key))
 
         if response is None:
             return None
@@ -357,23 +360,23 @@ async def fetch_next_request(self) -> Request | None:
                 return None
 
             # Get the next request ID from the queue head
-            next_request_id = self._queue_head.popleft()
+            next_unique_key = self._queue_head.popleft()
 
-        request = await self._get_or_hydrate_request(next_request_id)
+        request = await self._get_or_hydrate_request(next_unique_key)
 
         # Handle potential inconsistency where request might not be in the main table yet
         if request is None:
             logger.debug(
                 'Cannot find a request from the beginning of queue, will be retried later',
-                extra={'nextRequestId': next_request_id},
+                extra={'nextRequestUniqueKey': next_unique_key},
             )
             return None
 
         # If the request was already handled, skip it
         if request.handled_at is not None:
             logger.debug(
                 'Request fetched from the beginning of queue was already handled',
-                extra={'nextRequestId': next_request_id},
+                extra={'nextRequestUniqueKey': next_unique_key},
             )
             return None
 
@@ -382,7 +385,7 @@ async def fetch_next_request(self) -> Request | None:
         if request is None:
             logger.debug(
                 'Request fetched from the beginning of queue was not found in the RQ',
-                extra={'nextRequestId': next_request_id},
+                extra={'nextRequestUniqueKey': next_unique_key},
             )
             return None
 
@@ -509,29 +512,29 @@ async def _ensure_head_is_non_empty(self) -> None:
         # Fetch requests from the API and populate the queue head
         await self._list_head(lock_time=self._DEFAULT_LOCK_TIME)
 
-    async def _get_or_hydrate_request(self, request_id: str) -> Request | None:
-        """Get a request by ID, either from cache or by fetching from API.
+    async def _get_or_hydrate_request(self, unique_key: str) -> Request | None:
+        """Get a request by unique key, either from cache or by fetching from API.
 
         Args:
-            request_id: The ID of the request to get.
+            unique_key: Unique keu of the request to get.
 
         Returns:
             The request if found and valid, otherwise None.
         """
         # First check if the request is in our cache
-        cached_entry = self._requests_cache.get(request_id)
+        cached_entry = self._requests_cache.get(unique_key)
 
         if cached_entry and cached_entry.hydrated:
             # If we have the request hydrated in cache, check if lock is expired
             if cached_entry.lock_expires_at and cached_entry.lock_expires_at < datetime.now(tz=timezone.utc):
                 # Try to prolong the lock if it's expired
                 try:
                     lock_secs = int(self._DEFAULT_LOCK_TIME.total_seconds())
-                    response = await self._prolong_request_lock(request_id, lock_secs=lock_secs)
+                    response = await self._prolong_request_lock(unique_key, lock_secs=lock_secs)
                     cached_entry.lock_expires_at = response.lock_expires_at
                 except Exception:
                     # If prolonging the lock fails, we lost the request
-                    logger.debug(f'Failed to prolong lock for request {request_id}, returning None')
+                    logger.debug(f'Failed to prolong lock for request {unique_key}, returning None')
                     return None
 
             return cached_entry.hydrated
@@ -540,30 +543,29 @@ async def _get_or_hydrate_request(self, request_id: str) -> Request | None:
         try:
             # Try to acquire or prolong the lock
             lock_secs = int(self._DEFAULT_LOCK_TIME.total_seconds())
-            await self._prolong_request_lock(request_id, lock_secs=lock_secs)
+            await self._prolong_request_lock(unique_key, lock_secs=lock_secs)
 
             # Fetch the request data
-            request = await self.get_request(request_id)
+            request = await self.get_request(unique_key)
 
             # If request is not found, release lock and return None
             if not request:
-                await self._delete_request_lock(request_id)
+                await self._delete_request_lock(unique_key)
                 return None
 
             # Update cache with hydrated request
             cache_key = request.unique_key
             self._cache_request(
                 cache_key,
                 ProcessedRequest(
-                    id=request_id,
                     unique_key=request.unique_key,
                     was_already_present=True,
                     was_already_handled=request.handled_at is not None,
                 ),
                 hydrated_request=request,
             )
         except Exception as exc:
-            logger.debug(f'Error fetching or locking request {request_id}: {exc!s}')
+            logger.debug(f'Error fetching or locking request {unique_key}: {exc!s}')
             return None
         else:
             return request
@@ -613,8 +615,8 @@ async def _list_head(
             logger.debug(f'Using cached queue head with {len(self._queue_head)} requests')
             # Create a list of requests from the cached queue head
             items = []
-            for request_id in list(self._queue_head)[:limit]:
-                cached_request = self._requests_cache.get(request_id)
+            for unique_key in list(self._queue_head)[:limit]:
+                cached_request = self._requests_cache.get(unique_key)
                 if cached_request and cached_request.hydrated:
                     items.append(cached_request.hydrated)
 
@@ -671,28 +673,28 @@ async def _list_head(
             )
             self._queue_head.append(request.unique_key)
 
-        for leftover_request_id in leftover_buffer:
+        for leftover_unique_key in leftover_buffer:
             # After adding new requests to the forefront, any existing leftover locked request is kept in the end.
-            self._queue_head.append(leftover_request_id)
+            self._queue_head.append(leftover_unique_key)
         return RequestQueueHead.model_validate(response)
 
     async def _prolong_request_lock(
         self,
-        request_unique_key: str,
+        unique_key: str,
         *,
         lock_secs: int,
     ) -> ProlongRequestLockResponse:
         """Prolong the lock on a specific request in the queue.
 
         Args:
-            request_unique_key: Unique key of the request whose lock is to be prolonged.
+            unique_key: Unique key of the request whose lock is to be prolonged.
             lock_secs: The additional amount of time, in seconds, that the request will remain locked.
 
         Returns:
             A response containing the time at which the lock will expire.
         """
-        response = await self._api_client.prolong_request_lock_by_unique_key(
-            request_unique_key=request_unique_key,
+        response = await self._api_client.prolong_request_lock(
+            request_id=unique_key_to_request_id(unique_key),
             # All requests reaching this code were the tip of the queue at the moment when they were fetched,
             # so if their lock expires, they should be put back to the forefront as their handling is long overdue.
             forefront=True,
@@ -705,37 +707,37 @@ async def _prolong_request_lock(
 
         # Update the cache with the new lock expiration
         for cached_request in self._requests_cache.values():
-            if cached_request.unique_key == request_unique_key:
+            if cached_request.unique_key == unique_key:
                 cached_request.lock_expires_at = result.lock_expires_at
                 break
 
         return result
 
     async def _delete_request_lock(
         self,
-        request_unique_key: str,
+        unique_key: str,
         *,
         forefront: bool = False,
     ) -> None:
         """Delete the lock on a specific request in the queue.
 
         Args:
-            request_unique_key: Unique key of the request to delete the lock.
+            unique_key: Unique key of the request to delete the lock.
             forefront: Whether to put the request in the beginning or the end of the queue after the lock is deleted.
         """
         try:
-            await self._api_client.delete_request_lock_by_unique_key(
-                request_unique_key=request_unique_key,
+            await self._api_client.delete_request_lock(
+                request_id=unique_key_to_request_id(unique_key),
                 forefront=forefront,
             )
 
             # Update the cache to remove the lock
             for cached_request in self._requests_cache.values():
-                if cached_request.unique_key == request_unique_key:
+                if cached_request.unique_key == unique_key:
                     cached_request.lock_expires_at = None
                     break
         except Exception as err:
-            logger.debug(f'Failed to delete request lock for request {request_unique_key}', exc_info=err)
+            logger.debug(f'Failed to delete request lock for request {unique_key}', exc_info=err)
 
     def _cache_request(
         self,
@@ -758,3 +760,26 @@ def _cache_request(
             hydrated=hydrated_request,
             lock_expires_at=None,
         )
+
+
+def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) -> str:
+    """Generate a deterministic request ID based on a unique key.
+
+    Args:
+        unique_key: The unique key to convert into a request ID.
+        request_id_length: The length of the request ID.
+
+    Returns:
+        A URL-safe, truncated request ID based on the unique key.
+    """
+    # Encode the unique key and compute its SHA-256 hash
+    hashed_key = sha256(unique_key.encode('utf-8')).digest()
+
+    # Encode the hash in base64 and decode it to get a string
+    base64_encoded = b64encode(hashed_key).decode('utf-8')
+
+    # Remove characters that are not URL-safe ('+', '/', or '=')
+    url_safe_key = re.sub(r'(\+|\/|=)', '', base64_encoded)
+
+    # Truncate the key to the desired length
+    return url_safe_key[:request_id_length]
diff --git a/tests/unit/storage_clients/test_apify_request_queue_client.py b/tests/unit/storage_clients/test_apify_request_queue_client.py
@@ -0,0 +1,38 @@
+import pytest
+
+from apify.storage_clients._apify._request_queue_client import unique_key_to_request_id
+
+
+def test_unique_key_to_request_id_length() -> None:
+    unique_key = 'exampleKey123'
+    request_id = unique_key_to_request_id(unique_key, request_id_length=15)
+    assert len(request_id) == 15, 'Request ID should have the correct length.'
+
+
+def test_unique_key_to_request_id_consistency() -> None:
+    unique_key = 'consistentKey'
+    request_id_1 = unique_key_to_request_id(unique_key)
+    request_id_2 = unique_key_to_request_id(unique_key)
+    assert request_id_1 == request_id_2, 'The same unique key should generate consistent request IDs.'
+
+
+@pytest.mark.parametrize(
+    ('unique_key', 'expected_request_id'),
+    [
+        ('abc', 'ungWv48BzpBQUDe'),
+        ('uniqueKey', 'xiWPs083cree7mH'),
+        ('', '47DEQpj8HBSaTIm'),
+        ('测试中文', 'lKPdJkdvw8MXEUp'),
+        ('test+/=', 'XZRQjhoG0yjfnYD'),
+    ],
+    ids=[
+        'basic_abc',
+        'keyword_uniqueKey',
+        'empty_string',
+        'non_ascii_characters',
+        'url_unsafe_characters',
+    ],
+)
+def test_unique_key_to_request_id_matches_known_values(unique_key: str, expected_request_id: str) -> None:
+    request_id = unique_key_to_request_id(unique_key)
+    assert request_id == expected_request_id, f'Unique key "{unique_key}" should produce the expected request ID.'
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ keywords = [`
`34`	`34`	`"scraping",`
`35`	`35`	`]`
`36`	`36`	`dependencies = [`
`37`		`- "apify-client@git+https://github.com/apify/apify-client-python.git@remove-request-id",`
	`37`	`+ "apify-client>=2.0.0,<3.0.0",`
`38`	`38`	`"apify-shared>=2.0.0,<3.0.0",`
`39`	`39`	`"crawlee@git+https://github.com/apify/crawlee-python.git@remove-request-id",`
`40`	`40`	`"cachetools>=5.5.0",`