Skip to content

Commit 4c6dac1

Browse files
authored
Add server-side validation for fleet configuration subtypes (#3848)
Forbids mixing cloud and SSH fleet configuration properties. Default values (mostly None) are ignored for compatibility. Closes: #2223
1 parent e86c432 commit 4c6dac1

4 files changed

Lines changed: 249 additions & 120 deletions

File tree

mkdocs/docs/reference/dstack.yml/fleet.md

Lines changed: 71 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2,76 +2,93 @@
22

33
The `fleet` configuration type allows creating and updating fleets.
44

5-
## Root reference
65

7-
#SCHEMA# dstack._internal.core.models.fleets.FleetConfiguration
8-
overrides:
9-
show_root_heading: false
10-
type:
11-
required: true
6+
=== "Backend fleet"
127

13-
### `ssh_config` { data-toc-label="ssh_config" }
8+
## Root reference
149

15-
#SCHEMA# dstack._internal.core.models.fleets.SSHParams
16-
overrides:
17-
show_root_heading: false
18-
item_id_prefix: ssh_config-
10+
#SCHEMA# dstack._internal.core.models.fleets.BackendFleetConfiguration
11+
overrides:
12+
show_root_heading: false
13+
type:
14+
required: true
15+
nodes:
16+
required: true
1917

20-
#### `ssh_config.proxy_jump` { #ssh_config-proxy_jump data-toc-label="proxy_jump" }
18+
### `resources`
2119

22-
#SCHEMA# dstack._internal.core.models.fleets.SSHProxyParams
23-
overrides:
24-
show_root_heading: false
25-
item_id_prefix: proxy_jump-
20+
#SCHEMA# dstack._internal.core.models.resources.ResourcesSpec
21+
overrides:
22+
show_root_heading: false
23+
type:
24+
required: true
25+
item_id_prefix: resources-
2626

27-
#### `ssh_config.hosts[n]` { #ssh_config-hosts data-toc-label="hosts" }
27+
#### `resources.cpu` { #resources-cpu data-toc-label="cpu" }
2828

29-
#SCHEMA# dstack._internal.core.models.fleets.SSHHostParams
30-
overrides:
31-
show_root_heading: false
29+
#SCHEMA# dstack._internal.core.models.resources.CPUSpec
30+
overrides:
31+
show_root_heading: false
32+
type:
33+
required: true
3234

33-
##### `ssh_config.hosts[n].proxy_jump` { #proxy_jump data-toc-label="hosts[n].proxy_jump" }
35+
#### `resources.gpu` { #resources-gpu data-toc-label="gpu" }
3436

35-
#SCHEMA# dstack._internal.core.models.fleets.SSHProxyParams
36-
overrides:
37-
show_root_heading: false
38-
item_id_prefix: hosts-proxy_jump-
37+
#SCHEMA# dstack._internal.core.models.resources.GPUSpec
38+
overrides:
39+
show_root_heading: false
40+
type:
41+
required: true
3942

40-
### `resources`
43+
#### `resources.disk` { #resources-disk data-toc-label="disk" }
4144

42-
#SCHEMA# dstack._internal.core.models.resources.ResourcesSpec
43-
overrides:
44-
show_root_heading: false
45-
type:
46-
required: true
47-
item_id_prefix: resources-
45+
#SCHEMA# dstack._internal.core.models.resources.DiskSpec
46+
overrides:
47+
show_root_heading: false
48+
type:
49+
required: true
4850

49-
#### `resources.cpu` { #resources-cpu data-toc-label="cpu" }
51+
### `retry`
5052

51-
#SCHEMA# dstack._internal.core.models.resources.CPUSpec
52-
overrides:
53-
show_root_heading: false
54-
type:
55-
required: true
53+
#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry
54+
overrides:
55+
show_root_heading: false
5656

57-
#### `resources.gpu` { #resources-gpu data-toc-label="gpu" }
57+
=== "SSH fleet"
5858

59-
#SCHEMA# dstack._internal.core.models.resources.GPUSpec
60-
overrides:
61-
show_root_heading: false
62-
type:
63-
required: true
59+
## Root reference
6460

65-
#### `resources.disk` { #resources-disk data-toc-label="disk" }
61+
#SCHEMA# dstack._internal.core.models.fleets.SSHFleetConfiguration
62+
overrides:
63+
show_root_heading: false
64+
type:
65+
required: true
66+
ssh_config:
67+
required: true
6668

67-
#SCHEMA# dstack._internal.core.models.resources.DiskSpec
68-
overrides:
69-
show_root_heading: false
70-
type:
71-
required: true
69+
### `ssh_config` { data-toc-label="ssh_config" }
7270

73-
### `retry`
71+
#SCHEMA# dstack._internal.core.models.fleets.SSHParams
72+
overrides:
73+
show_root_heading: false
74+
item_id_prefix: ssh_config-
7475

75-
#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry
76-
overrides:
77-
show_root_heading: false
76+
#### `ssh_config.proxy_jump` { #ssh_config-proxy_jump data-toc-label="proxy_jump" }
77+
78+
#SCHEMA# dstack._internal.core.models.fleets.SSHProxyParams
79+
overrides:
80+
show_root_heading: false
81+
item_id_prefix: proxy_jump-
82+
83+
#### `ssh_config.hosts[n]` { #ssh_config-hosts data-toc-label="hosts" }
84+
85+
#SCHEMA# dstack._internal.core.models.fleets.SSHHostParams
86+
overrides:
87+
show_root_heading: false
88+
89+
##### `ssh_config.hosts[n].proxy_jump` { #proxy_jump data-toc-label="hosts[n].proxy_jump" }
90+
91+
#SCHEMA# dstack._internal.core.models.fleets.SSHProxyParams
92+
overrides:
93+
show_root_heading: false
94+
item_id_prefix: hosts-proxy_jump-

src/dstack/_internal/core/models/fleets.py

Lines changed: 76 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ class SSHHostParams(CoreModel):
8484
"The amount of blocks to split the instance into, a number or `auto`."
8585
" `auto` means as many as possible."
8686
" The number of GPUs and CPUs must be divisible by the number of blocks."
87-
" Defaults to the top-level `blocks` value."
87+
" Defaults to the top-level `blocks` value"
8888
),
8989
ge=1,
9090
),
@@ -130,7 +130,7 @@ class SSHParams(CoreModel):
130130
" If not specified, `dstack` will use IPs from the first found internal network."
131131
)
132132
),
133-
]
133+
] = None
134134

135135
@validator("network")
136136
def validate_network(cls, value):
@@ -206,50 +206,13 @@ def _post_validate_ranges(cls, values):
206206
return values
207207

208208

209-
class InstanceGroupParamsConfig(CoreConfig):
210-
@staticmethod
211-
def schema_extra(schema: Dict[str, Any]):
212-
add_extra_schema_types(
213-
schema["properties"]["nodes"],
214-
extra_types=[{"type": "integer"}, {"type": "string"}],
215-
)
216-
add_extra_schema_types(
217-
schema["properties"]["idle_duration"],
218-
extra_types=[{"type": "string"}],
219-
)
220-
221-
222-
class InstanceGroupParams(CoreModel):
223-
env: Annotated[
224-
Env,
225-
Field(description="The mapping or the list of environment variables"),
226-
] = Env()
227-
ssh_config: Annotated[
228-
Optional[SSHParams],
229-
Field(description="The parameters for adding instances via SSH"),
230-
] = None
231-
232-
nodes: Annotated[
233-
Optional[FleetNodesSpec], Field(description="The number of instances in cloud fleet")
234-
] = None
209+
class CommonFleetConfigurationProps(CoreModel):
210+
type: Literal["fleet"] = "fleet"
211+
name: Annotated[Optional[str], Field(description="The fleet name")] = None
235212
placement: Annotated[
236213
Optional[InstanceGroupPlacement],
237214
Field(description="The placement of instances: `any` or `cluster`"),
238215
] = None
239-
reservation: Annotated[
240-
Optional[str],
241-
Field(
242-
description=(
243-
"The existing reservation to use for instance provisioning."
244-
" Supports AWS Capacity Reservations, AWS Capacity Blocks, and GCP reservations"
245-
)
246-
),
247-
] = None
248-
resources: Annotated[
249-
Optional[ResourcesSpec],
250-
Field(description="The resources requirements"),
251-
] = None
252-
253216
blocks: Annotated[
254217
Union[Literal["auto"], int],
255218
Field(
@@ -263,6 +226,22 @@ class InstanceGroupParams(CoreModel):
263226
),
264227
] = 1
265228

229+
230+
class BackendFleetConfiguraionProps(CoreModel):
231+
nodes: Annotated[Optional[FleetNodesSpec], Field(description="The number of instances")] = None
232+
reservation: Annotated[
233+
Optional[str],
234+
Field(
235+
description=(
236+
"The existing reservation to use for instance provisioning."
237+
" Supports AWS Capacity Reservations, AWS Capacity Blocks, and GCP reservations"
238+
)
239+
),
240+
] = None
241+
resources: Annotated[
242+
Optional[ResourcesSpec],
243+
Field(description="The resources requirements"),
244+
] = None
266245
backends: Annotated[
267246
Optional[List[BackendType]],
268247
Field(description="The backends to consider for provisioning (e.g., `[aws, gcp]`)"),
@@ -314,6 +293,16 @@ class InstanceGroupParams(CoreModel):
314293
)
315294
),
316295
] = None
296+
tags: Annotated[
297+
Optional[Dict[str, str]],
298+
Field(
299+
description=(
300+
"The custom tags to associate with the resource."
301+
" The tags are also propagated to the underlying backend resources."
302+
" If there is a conflict with backend-level tags, does not override them"
303+
)
304+
),
305+
] = None
317306

318307
@validator("nodes", pre=True)
319308
def parse_nodes(cls, v: Optional[Union[dict, str]]) -> Optional[dict]:
@@ -329,35 +318,61 @@ def parse_nodes(cls, v: Optional[Union[dict, str]]) -> Optional[dict]:
329318
parse_idle_duration
330319
)
331320

332-
333-
class FleetProps(CoreModel):
334-
type: Literal["fleet"] = "fleet"
335-
name: Annotated[Optional[str], Field(description="The fleet name")] = None
321+
_validate_tags = validator("tags", pre=True, allow_reuse=True)(tags_validator)
336322

337323

338-
class FleetConfigurationConfig(InstanceGroupParamsConfig):
324+
class BackendFleetConfigurationPropsConfig(CoreConfig):
339325
@staticmethod
340326
def schema_extra(schema: Dict[str, Any]):
341-
InstanceGroupParamsConfig.schema_extra(schema)
327+
add_extra_schema_types(
328+
schema["properties"]["nodes"],
329+
extra_types=[{"type": "integer"}, {"type": "string"}],
330+
)
331+
add_extra_schema_types(
332+
schema["properties"]["idle_duration"],
333+
extra_types=[{"type": "string"}],
334+
)
335+
336+
337+
class SSHFleetConfigurationProps(CoreModel):
338+
ssh_config: Annotated[
339+
Optional[SSHParams],
340+
Field(description="The parameters for adding instances via SSH"),
341+
] = None
342+
env: Annotated[
343+
Env,
344+
Field(description="The mapping or the list of environment variables"),
345+
] = Env()
346+
347+
348+
class FleetConfigurationConfig(BackendFleetConfigurationPropsConfig):
349+
@staticmethod
350+
def schema_extra(schema: dict[str, Any]):
351+
BackendFleetConfigurationPropsConfig.schema_extra(schema)
342352

343353

344354
class FleetConfiguration(
345-
InstanceGroupParams,
346-
FleetProps,
355+
SSHFleetConfigurationProps,
356+
BackendFleetConfiguraionProps,
357+
CommonFleetConfigurationProps,
347358
generate_dual_core_model(FleetConfigurationConfig),
348359
):
349-
tags: Annotated[
350-
Optional[Dict[str, str]],
351-
Field(
352-
description=(
353-
"The custom tags to associate with the resource."
354-
" The tags are also propagated to the underlying backend resources."
355-
" If there is a conflict with backend-level tags, does not override them"
356-
)
357-
),
358-
] = None
360+
pass
359361

360-
_validate_tags = validator("tags", pre=True, allow_reuse=True)(tags_validator)
362+
363+
class BackendFleetConfiguration(
364+
BackendFleetConfiguraionProps,
365+
CommonFleetConfigurationProps,
366+
generate_dual_core_model(BackendFleetConfigurationPropsConfig),
367+
):
368+
"""For the documentation only"""
369+
370+
371+
class SSHFleetConfiguration(
372+
SSHFleetConfigurationProps,
373+
CommonFleetConfigurationProps,
374+
):
375+
"""For the documentation only"""
361376

362377

363378
class FleetSpecConfig(CoreConfig):

src/dstack/_internal/server/services/fleets.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@
2020
from dstack._internal.core.models.envs import Env
2121
from dstack._internal.core.models.fleets import (
2222
ApplyFleetPlanInput,
23+
BackendFleetConfiguraionProps,
2324
Fleet,
2425
FleetConfiguration,
2526
FleetPlan,
2627
FleetSpec,
2728
FleetStatus,
2829
InstanceGroupPlacement,
30+
SSHFleetConfigurationProps,
2931
SSHHostParams,
3032
SSHParams,
3133
)
@@ -1370,10 +1372,7 @@ def _remove_fleet_spec_sensitive_info(spec: FleetSpec):
13701372
def _validate_fleet_spec_and_set_defaults(spec: FleetSpec):
13711373
if spec.configuration.name is not None:
13721374
validate_dstack_resource_name(spec.configuration.name)
1373-
if spec.configuration.ssh_config is None and spec.configuration.nodes is None:
1374-
raise ServerClientError("No ssh_config or nodes specified")
1375-
if spec.configuration.ssh_config is not None and spec.configuration.nodes is not None:
1376-
raise ServerClientError("ssh_config and nodes are mutually exclusive")
1375+
_validate_fleet_configuration_subtype_specific_fields(spec.configuration)
13771376
if spec.configuration.ssh_config is not None:
13781377
_validate_all_ssh_params_specified(spec.configuration.ssh_config)
13791378
if spec.configuration.ssh_config.ssh_key is not None:
@@ -1385,6 +1384,31 @@ def _validate_fleet_spec_and_set_defaults(spec: FleetSpec):
13851384
_set_fleet_spec_defaults(spec)
13861385

13871386

1387+
def _validate_fleet_configuration_subtype_specific_fields(conf: FleetConfiguration):
1388+
if conf.ssh_config is None and conf.nodes is None:
1389+
raise ServerClientError("No ssh_config or nodes specified")
1390+
if conf.ssh_config is not None and conf.nodes is not None:
1391+
raise ServerClientError("ssh_config and nodes are mutually exclusive")
1392+
subtype: str
1393+
props_model: type[CoreModel]
1394+
if conf.ssh_config is not None:
1395+
subtype = "SSH"
1396+
props_model = BackendFleetConfiguraionProps
1397+
else:
1398+
subtype = "Backend"
1399+
props_model = SSHFleetConfigurationProps
1400+
non_default_fields: list[str] = []
1401+
for field in props_model.__fields__.values():
1402+
if getattr(conf, field.name) != field.default:
1403+
non_default_fields.append(field.name)
1404+
if non_default_fields:
1405+
raise ServerClientError(
1406+
f"{subtype} fleet configuration does not support the following fields:"
1407+
f" {non_default_fields}"
1408+
)
1409+
return conf
1410+
1411+
13881412
def _set_fleet_spec_defaults(spec: FleetSpec):
13891413
if spec.configuration.resources is not None:
13901414
set_resources_defaults(spec.configuration.resources)

0 commit comments

Comments
 (0)