Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions mkdocs/docs/concepts/backends.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,22 @@ There are two ways to configure AWS: using an access key or using the default cr

The `iam:*` permissions are only needed if you specify `iam_instance_profile` to assign to EC2 instances.

The following additional permissions are required when running [multi-EFA instance types](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-eni.html#network-cards) with `public_ips: true`:

```
{
"Effect": "Allow",
"Action": [
"ec2:AllocateAddress",
"ec2:AssociateAddress",
"ec2:DescribeAddresses",
"ec2:DisassociateAddress",
"ec2:ReleaseAddress"
],
"Resource": "*"
}
```

You can also limit permissions to specific resources in your account:

```
Expand Down
4 changes: 0 additions & 4 deletions mkdocs/docs/concepts/fleets.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,6 @@ This property ensures that instances are interconnected. This is required for ru

Fast interconnect is supported on the `aws`, `gcp`, `nebius`, `crusoe`, and `kubernetes` backends. Some backends may require additional configuration.

=== "AWS"
On AWS, `dstack` requires `public_ips` to be set to `false` in the backend configuration.
Refer to the [AWS](../examples/clusters/aws.md) example for more details.

=== "GCP"
On GCP, you may need to configure `extra_vpcs` and `roce_vpcs` in the `gcp` backend configuration.
Refer to the [GCP](../examples/clusters/gcp.md) examples for more details.
Expand Down
6 changes: 0 additions & 6 deletions mkdocs/docs/examples/clusters/aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,10 @@ projects:
creds:
type: default
regions: ["us-west-2"]

public_ips: false
vpc_name: my-custom-vpc
```

</div>

!!! info "Multiple network interfaces"
To use P4, P5, or P6 instances, set `public_ips` to `false` — this allows AWS to attach multiple network interfaces for EFA. In this case, the `dstack` server can reach your VPC’s private subnets.

!!! info "VPC"
If you use a custom VPC, verify that it permits all internal traffic between nodes for EFA to function properly

Expand Down
169 changes: 167 additions & 2 deletions src/dstack/_internal/core/backends/aws/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ class AWSVolumeBackendData(CoreModel):
iops: int


class AWSInstanceBackendData(CoreModel):
eip_allocation_id: Optional[str] = None
"""Elastic IP allocated for multi-ENI instances launched with `public_ips: true`.
"""


def _ec2client_cache_methodkey(self, ec2_client, *args, **kwargs):
return hashkey(*args, **kwargs)

Expand Down Expand Up @@ -227,6 +233,12 @@ def terminate_instance(
logger.debug("Skipping instance %s termination. Instance not found.", instance_id)
else:
raise e
instance_backend_data = _parse_instance_backend_data(backend_data)
if instance_backend_data.eip_allocation_id is not None:
_release_eip(
ec2_client=ec2_client,
allocation_id=instance_backend_data.eip_allocation_id,
)

def create_instance(
self,
Expand Down Expand Up @@ -395,6 +407,7 @@ def update_provisioning_data(
project_ssh_private_key: str,
):
ec2_resource = self.session.resource("ec2", region_name=provisioning_data.region)
ec2_client = self.session.client("ec2", region_name=provisioning_data.region)
instance = ec2_resource.Instance(provisioning_data.instance_id) # pyright: ignore[reportAttributeAccessIssue]
try:
instance.load()
Expand Down Expand Up @@ -422,8 +435,24 @@ def update_provisioning_data(
f"Failed to get instance IP address. Unknown instance state {state}."
)

hostname = _get_instance_ip(instance, self.config.allocate_public_ips)
provisioning_data.hostname = hostname
if self.config.allocate_public_ips and instance.public_ip_address is None:
# AWS can't auto-assign a public IPv4 to multi-ENI instances (multi-EFA instances).
# When `public_ips: true` and no public IP is present after launch, attach an Elastic IP to the primary ENI.
# The check relies on running instances always having IP assigned if ever.
public_ip, allocation_id = _allocate_and_associate_eip(
ec2_client=ec2_client,
instance=instance,
project_name=_get_project_name_from_instance_tags(instance),
backend_tags=self.config.tags,
)
provisioning_data.backend_data = AWSInstanceBackendData(
eip_allocation_id=allocation_id
).json()
provisioning_data.hostname = public_ip
else:
provisioning_data.hostname = _get_instance_ip(
instance, self.config.allocate_public_ips
)
provisioning_data.internal_ip = instance.private_ip_address
provisioning_data.ssh_port = 22

Expand Down Expand Up @@ -1263,3 +1292,139 @@ def _get_instance_ip(instance: Any, public_ip: bool) -> str:
def _get_volume_price(size: int, iops: int) -> float:
# https://aws.amazon.com/ebs/pricing/
return size * 0.08 + (iops - 3000) * 0.005


def _parse_instance_backend_data(backend_data: Optional[str]) -> "AWSInstanceBackendData":
if backend_data is None:
return AWSInstanceBackendData()
try:
return AWSInstanceBackendData.parse_raw(backend_data)
except ValidationError:
logger.exception("Failed to parse AWS instance backend_data; treating as empty")
return AWSInstanceBackendData()


def _get_project_name_from_instance_tags(instance: Any) -> Optional[str]:
for tag in instance.tags or []:
if tag.get("Key") == "dstack_project":
return tag.get("Value")
return None


def _allocate_and_associate_eip(
ec2_client: botocore.client.BaseClient,
instance: Any,
project_name: Optional[str],
backend_tags: Optional[Dict[str, str]],
) -> Tuple[str, str]:
"""
Allocates an Elastic IP and associates it with the primary ENI of `instance`.
Returns `(public_ip, allocation_id)`.
"""
primary_nic_id = _get_primary_network_interface_id(instance)
tags = {
"owner": "dstack",
"dstack_instance": instance.instance_id,
}
if project_name is not None:
tags["dstack_project"] = project_name
if backend_tags:
for k, v in backend_tags.items():
tags.setdefault(k, v)
tags = aws_resources.filter_invalid_tags(tags)

try:
allocate_response = ec2_client.allocate_address(
Domain="vpc",
TagSpecifications=[
{
"ResourceType": "elastic-ip",
"Tags": aws_resources.make_tags(tags),
}
],
)
except botocore.exceptions.ClientError as e:
code = e.response.get("Error", {}).get("Code", "")
region = ec2_client.meta.region_name
if code == "AddressLimitExceeded":
raise ProvisioningError(
f"Elastic IP quota exceeded in {region}. "
"Raise the EC2 'EC2-VPC Elastic IPs' quota in Service Quotas, "
"or reduce concurrent multi-EFA instances."
)
raise ProvisioningError(f"Failed to allocate Elastic IP in {region}: {e}")

allocation_id = allocate_response["AllocationId"]
public_ip = allocate_response["PublicIp"]
try:
ec2_client.associate_address(
AllocationId=allocation_id,
NetworkInterfaceId=primary_nic_id,
AllowReassociation=False,
)
except botocore.exceptions.ClientError as e:
# Best-effort release; on failure the EIP leaks until manually released.
logger.warning(
"Failed to associate EIP %s to instance %s; releasing.",
allocation_id,
instance.instance_id,
)
try:
ec2_client.release_address(AllocationId=allocation_id)
except botocore.exceptions.ClientError:
logger.exception(
"Failed to release just-allocated EIP %s; release it manually.",
allocation_id,
)
raise ProvisioningError(
f"Failed to associate Elastic IP {allocation_id} to instance "
f"{instance.instance_id}: {e}"
)
return public_ip, allocation_id


def _get_primary_network_interface_id(instance: Any) -> str:
for nic in instance.network_interfaces_attribute or []:
attachment = nic.get("Attachment") or {}
if attachment.get("DeviceIndex") == 0:
return nic["NetworkInterfaceId"]
raise ProvisioningError(
f"Instance {instance.instance_id} has no primary network interface (DeviceIndex=0)"
)


def _release_eip(ec2_client: botocore.client.BaseClient, allocation_id: str) -> None:
"""
Releases an Elastic IP by allocation ID. Disassociates first if the EIP is still
bound to an instance — `TerminateInstances` only initiates shutdown, and AWS
auto-disassociates only once the instance reaches `terminated`. Releasing
explicitly avoids the `InvalidIPAddress.InUse` race and the retry loop.
"""
try:
response = ec2_client.describe_addresses(AllocationIds=[allocation_id])
except botocore.exceptions.ClientError as e:
code = e.response.get("Error", {}).get("Code", "")
if code in ("InvalidAllocationID.NotFound", "InvalidAddress.NotFound"):
logger.debug("Skipping EIP %s release. Already released.", allocation_id)
return
raise
addresses = response.get("Addresses", [])
if not addresses:
return
association_id = addresses[0].get("AssociationId")
if association_id is not None:
try:
ec2_client.disassociate_address(AssociationId=association_id)
except botocore.exceptions.ClientError as e:
code = e.response.get("Error", {}).get("Code", "")
# AWS may have auto-disassociated between our Describe and Disassociate
# if the instance just reached `terminated`. Tolerated.
if code != "InvalidAssociationID.NotFound":
raise
try:
ec2_client.release_address(AllocationId=allocation_id)
except botocore.exceptions.ClientError as e:
code = e.response.get("Error", {}).get("Code", "")
if code in ("InvalidAllocationID.NotFound", "InvalidAddress.NotFound"):
return
raise
22 changes: 9 additions & 13 deletions src/dstack/_internal/core/backends/aws/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,27 +196,23 @@ def create_instances_struct(
# AWS allows specifying either NetworkInterfaces for specific subnet_id
# or instance-level SecurityGroupIds in case of no specific subnet_id, not both.
if subnet_id is not None:
# If the instance type supports multiple cards, we request multiple interfaces only if not allocate_public_ip
# due to the limitation: "AssociatePublicIpAddress [...] You cannot specify more than one
# network interface in the request".
# Error message: "(InvalidParameterCombination) when calling the RunInstances operation:
# The associatePublicIPAddress parameter cannot be specified when launching with
# multiple network interfaces".
# See: https://stackoverflow.com/questions/49882121
# If we need more than one card, we should either use Elastic IP (AWS-recommended way) or
# create the instance with one interface and add the rest later (the latter is not tested
# and may or may not work).
# AWS does not auto-assign a public IPv4 to instances launched with multiple network
# interfaces ("AssociatePublicIpAddress [...] You cannot specify more than one network
# interface in the request"). For multi-EFA instance types (e.g. p4d, p5, trn1), we
# therefore launch all EFA NICs without `AssociatePublicIpAddress` and, when
# `public_ips: true`, attach an Elastic IP after launch in `update_provisioning_data`.
multi_eni = max_efa_interfaces > 1
struct["NetworkInterfaces"] = [
{
"AssociatePublicIpAddress": allocate_public_ip,
"AssociatePublicIpAddress": allocate_public_ip and not multi_eni,
"DeviceIndex": 0,
"SubnetId": subnet_id,
"Groups": [security_group_id],
"InterfaceType": "efa" if max_efa_interfaces > 0 else "interface",
},
]

if max_efa_interfaces > 1 and allocate_public_ip is False:
if multi_eni:
for i in range(1, max_efa_interfaces):
# Set to efa-only to use interfaces exclusively for GPU-to-GPU communication
interface_type = "efa-only"
Expand All @@ -226,7 +222,7 @@ def create_instances_struct(
interface_type = "efa" if i % 4 == 0 else "efa-only"
struct["NetworkInterfaces"].append(
{
"AssociatePublicIpAddress": allocate_public_ip,
"AssociatePublicIpAddress": False,
"NetworkCardIndex": i,
"DeviceIndex": 1,
"SubnetId": subnet_id,
Expand Down
Loading