Skip to content

Commit 8861c5e

Browse files
committed
Merge remote-tracking branch 'origin/master' into no-locking-queue
2 parents da2f5df + dbea7d9 commit 8861c5e

File tree

7 files changed

+84
-17
lines changed

7 files changed

+84
-17
lines changed

docs/04_upgrading/upgrading_to_v3.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ Some changes in the related model classes:
6969
## Removed Actor.config property
7070
- `Actor.config` property has been removed. Use `Actor.configuration` instead.
7171

72+
## Default storage ids in configuration changed to None
73+
- `Configuration.default_key_value_store_id` changed from `'default'` to `None`.
74+
- `Configuration.default_dataset_id` changed from `'default'` to `None`.
75+
- `Configuration.default_request_queue_id` changed from `'default'` to `None`.
76+
77+
Previously using the default storage without specifying its `id` in `Configuration` would lead to using specific storage with id `'default'`. Now it will use newly created unnamed storage with `'id'` assigned by the Apify platform, consecutive calls to get the default storage will return the same storage.
78+
7279
## Actor initialization and ServiceLocator changes
7380

7481
`Actor` initialization and global `service_locator` services setup is more strict and predictable.
@@ -102,9 +109,6 @@ async def main():
102109
)
103110
```
104111

105-
## Removed Actor.config property
106-
- `Actor.config` property has been removed. Use `Actor.configuration` instead.
107-
108112
### Changes in storage clients
109113

110114
## Explicit control over storage clients used in Actor

src/apify/_configuration.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,37 +142,37 @@ class Configuration(CrawleeConfiguration):
142142
] = None
143143

144144
default_dataset_id: Annotated[
145-
str,
145+
str | None,
146146
Field(
147147
validation_alias=AliasChoices(
148148
'actor_default_dataset_id',
149149
'apify_default_dataset_id',
150150
),
151151
description='Default dataset ID used by the Apify storage client when no ID or name is provided.',
152152
),
153-
] = 'default'
153+
] = None
154154

155155
default_key_value_store_id: Annotated[
156-
str,
156+
str | None,
157157
Field(
158158
validation_alias=AliasChoices(
159159
'actor_default_key_value_store_id',
160160
'apify_default_key_value_store_id',
161161
),
162162
description='Default key-value store ID for the Apify storage client when no ID or name is provided.',
163163
),
164-
] = 'default'
164+
] = None
165165

166166
default_request_queue_id: Annotated[
167-
str,
167+
str | None,
168168
Field(
169169
validation_alias=AliasChoices(
170170
'actor_default_request_queue_id',
171171
'apify_default_request_queue_id',
172172
),
173173
description='Default request queue ID for the Apify storage client when no ID or name is provided.',
174174
),
175-
] = 'default'
175+
] = None
176176

177177
disable_outdated_warning: Annotated[
178178
bool,

src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,10 @@ async def open(
124124
)
125125
apify_datasets_client = apify_client_async.datasets()
126126

127-
# Normalize 'default' alias to None
128-
alias = None if alias == 'default' else alias
127+
# Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
128+
# storage aliased as `__default__`
129+
if not any([alias, name, id, configuration.default_dataset_id]):
130+
alias = '__default__'
129131

130132
if alias:
131133
# Check if there is pre-existing alias mapping in the default KVS.
@@ -150,6 +152,11 @@ async def open(
150152
# If none are provided, try to get the default storage ID from environment variables.
151153
elif id is None:
152154
id = configuration.default_dataset_id
155+
if not id:
156+
raise ValueError(
157+
'Dataset "id", "name", or "alias" must be specified, '
158+
'or a default dataset ID must be set in the configuration.'
159+
)
153160

154161
# Now create the client for the determined ID
155162
apify_dataset_client = apify_client_async.dataset(dataset_id=id)

src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,10 @@ async def open(
115115
)
116116
apify_kvss_client = apify_client_async.key_value_stores()
117117

118-
# Normalize 'default' alias to None
119-
alias = None if alias == 'default' else alias
118+
# Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
119+
# unnamed storage aliased as `__default__`
120+
if not any([alias, name, id, configuration.default_key_value_store_id]):
121+
alias = '__default__'
120122

121123
if alias:
122124
# Check if there is pre-existing alias mapping in the default KVS.
@@ -142,6 +144,11 @@ async def open(
142144
# If none are provided, try to get the default storage ID from environment variables.
143145
elif id is None:
144146
id = configuration.default_key_value_store_id
147+
if not id:
148+
raise ValueError(
149+
'KeyValueStore "id", "name", or "alias" must be specified, '
150+
'or a default KeyValueStore ID must be set in the configuration.'
151+
)
145152

146153
# Now create the client for the determined ID
147154
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,10 @@ async def open(
157157
)
158158
apify_rqs_client = apify_client_async.request_queues()
159159

160-
# Normalize 'default' alias to None
161-
alias = None if alias == 'default' else alias
160+
# Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to
161+
# unnamed storage aliased as `__default__`
162+
if not any([alias, name, id, configuration.default_request_queue_id]):
163+
alias = '__default__'
162164

163165
if alias:
164166
# Check if there is pre-existing alias mapping in the default KVS.
@@ -183,6 +185,11 @@ async def open(
183185
# If none are provided, try to get the default storage ID from environment variables.
184186
elif id is None:
185187
id = configuration.default_request_queue_id
188+
if not id:
189+
raise ValueError(
190+
'RequestQueue "id", "name", or "alias" must be specified, '
191+
'or a default default_request_queue_id ID must be set in the configuration.'
192+
)
186193

187194
# Use suitable client_key to make `hadMultipleClients` response of Apify API useful.
188195
# It should persist across migrated or resurrected Actor runs on the Apify platform.

src/apify/storage_clients/_apify/_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ async def _get_alias_map(cls) -> dict[str, str]:
7676
Returns:
7777
Map of aliases and storage ids.
7878
"""
79-
if not cls._alias_map:
79+
if not cls._alias_map and Configuration.get_global_configuration().is_at_home:
8080
default_kvs_client = await _get_default_kvs_client()
8181

8282
record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY)
@@ -156,7 +156,8 @@ async def _get_default_kvs_client() -> KeyValueStoreClientAsync:
156156
min_delay_between_retries_millis=500,
157157
timeout_secs=360,
158158
)
159-
159+
if not configuration.default_key_value_store_id:
160+
raise ValueError("'Configuration.default_key_value_store_id' must be set.")
160161
return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)
161162

162163

tests/integration/test_apify_storages.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,47 @@ async def test_alias_concurrent_creation_local(
3535
await storage.drop()
3636

3737

38+
@pytest.mark.parametrize(
39+
'storage_type',
40+
[Dataset, KeyValueStore, RequestQueue],
41+
)
42+
async def test_unnamed_default_without_config(
43+
storage_type: Dataset | KeyValueStore | RequestQueue, apify_token: str
44+
) -> None:
45+
"""Test that default Apify storage used locally is unnamed storage."""
46+
service_locator.set_configuration(Configuration(token=apify_token))
47+
service_locator.set_storage_client(ApifyStorageClient())
48+
49+
# Open storage and make sure it has no name and it has id
50+
storage = await storage_type.open()
51+
assert storage.name is None
52+
assert storage.id
53+
54+
# Make sure the same instance is returned when opened again without name or alias
55+
storage_again = await storage_type.open()
56+
assert storage is storage_again
57+
58+
await storage.drop()
59+
60+
61+
@pytest.mark.parametrize(
62+
'storage_type',
63+
[Dataset, KeyValueStore, RequestQueue],
64+
)
65+
async def test_aliases_not_stored_on_platform_when_local(
66+
storage_type: Dataset | KeyValueStore | RequestQueue, apify_token: str
67+
) -> None:
68+
"""Test that default Apify storage used locally is not persisting aliases to Apify based default KVS."""
69+
service_locator.set_configuration(Configuration(token=apify_token))
70+
service_locator.set_storage_client(ApifyStorageClient())
71+
async with Actor(configure_logging=False):
72+
await storage_type.open(alias='test')
73+
default_kvs = await Actor.open_key_value_store(force_cloud=True)
74+
75+
# The default KVS should be empty
76+
assert len(await default_kvs.list_keys()) == 0
77+
78+
3879
async def test_actor_full_explicit_storage_init(apify_token: str) -> None:
3980
service_locator.set_configuration(Configuration(token=apify_token))
4081
service_locator.set_storage_client(

0 commit comments

Comments
 (0)