From 46bb6b04d83bd3bde9ca894bedb71364f7506ecf Mon Sep 17 00:00:00 2001 From: Giacomo Marciani Date: Wed, 20 May 2026 18:02:02 +0200 Subject: [PATCH 1/5] [Test] Add caching to flexible instance type retrieval to minimize the number of EC2 requests. --- tests/integration-tests/conftest.py | 4 ++-- tests/integration-tests/utils.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/integration-tests/conftest.py b/tests/integration-tests/conftest.py index fd9b4ad20a..29236902fc 100644 --- a/tests/integration-tests/conftest.py +++ b/tests/integration-tests/conftest.py @@ -82,10 +82,10 @@ generate_stack_name, get_architecture_supported_by_instance_type, get_arn_partition, + get_flexible_instance_types, get_instance_info, get_metadata, get_network_interfaces_count, - get_similar_instance_types, get_vpc_snakecase_value, random_alphanumeric, to_pascal_case, @@ -698,7 +698,7 @@ def inject_placement_group_settings(vpc_stack, instance, region, kwargs): def inject_flexible_instance_types_settings(instance, region, kwargs): - kwargs["flexible_instance_types"] = list({instance, *get_similar_instance_types(instance, region, 5)}) + kwargs["flexible_instance_types"] = get_flexible_instance_types(instance, region) def inject_additional_image_configs_settings(image_config, request): diff --git a/tests/integration-tests/utils.py b/tests/integration-tests/utils.py index 18d14eec8d..ed944a29c7 100644 --- a/tests/integration-tests/utils.py +++ b/tests/integration-tests/utils.py @@ -1074,6 +1074,12 @@ def get_similar_instance_types(instance_type: str, region: str = None, max_items return similar_instances +@cache +def get_flexible_instance_types(instance, region): + """Return ``instance`` plus up to 5 similar instance types available in ``region``""" + return list({instance, *get_similar_instance_types(instance, region, 5)}) + + @cache def get_flexible_gpu_instance_types(instance, region): """Return a list of NVIDIA GPU instance types compatible with ``instance``'s architecture.""" From 3a3f8c4f846e4a9756f1fbb7539a05acaecb1c26 Mon Sep 17 00:00:00 2001 From: Giacomo Marciani Date: Thu, 21 May 2026 09:13:53 +0200 Subject: [PATCH 2/5] [Test] Add log line about retrieval of similar instance types to facilitate troubleshooting. --- tests/integration-tests/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration-tests/utils.py b/tests/integration-tests/utils.py index ed944a29c7..5ef40a02ea 100644 --- a/tests/integration-tests/utils.py +++ b/tests/integration-tests/utils.py @@ -1071,6 +1071,8 @@ def get_similar_instance_types(instance_type: str, region: str = None, max_items if max_items and len(similar_instances) >= max_items: return similar_instances + logging.info(f"Retrieved instance types equivalent to {instance_type} in {region}: {similar_instances}") + return similar_instances From 8cfab1a07c755c7c5747abf254bea9d0e90e87e7 Mon Sep 17 00:00:00 2001 From: Giacomo Marciani Date: Thu, 21 May 2026 12:02:38 +0200 Subject: [PATCH 3/5] [Test] Cache the results of get_similar_instance_types to reduce the number of calls to retrieve similar instance types. --- tests/integration-tests/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration-tests/utils.py b/tests/integration-tests/utils.py index 5ef40a02ea..5fb2fc1f2f 100644 --- a/tests/integration-tests/utils.py +++ b/tests/integration-tests/utils.py @@ -1026,6 +1026,7 @@ def _get_gpu_spec(instance_type_data): return frozenset((gpu.get("Manufacturer", ""), gpu.get("Count", 0)) for gpu in gpu_info.get("Gpus", [])) +@cache def get_similar_instance_types(instance_type: str, region: str = None, max_items: int = None): ec2 = boto3.client("ec2", region_name=region) @@ -1076,13 +1077,11 @@ def get_similar_instance_types(instance_type: str, region: str = None, max_items return similar_instances -@cache def get_flexible_instance_types(instance, region): """Return ``instance`` plus up to 5 similar instance types available in ``region``""" return list({instance, *get_similar_instance_types(instance, region, 5)}) -@cache def get_flexible_gpu_instance_types(instance, region): """Return a list of NVIDIA GPU instance types compatible with ``instance``'s architecture.""" architecture = get_architecture_supported_by_instance_type(instance, region) From 0990d5afc536e39a4bd00afec8c18504397f4ead Mon Sep 17 00:00:00 2001 From: Giacomo Marciani Date: Thu, 21 May 2026 12:05:35 +0200 Subject: [PATCH 4/5] [Test] Make the capacity helper specify the region when retrieving the similar instance types, so that it can reuse cached value and reduce calls to EC2. --- tests/integration-tests/tests/common/capacity_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration-tests/tests/common/capacity_helpers.py b/tests/integration-tests/tests/common/capacity_helpers.py index 1fb351986a..e2e475e5f8 100644 --- a/tests/integration-tests/tests/common/capacity_helpers.py +++ b/tests/integration-tests/tests/common/capacity_helpers.py @@ -35,7 +35,7 @@ def resolve_instance_with_capacity(region, az_id, instance_type, os, minutes=50, if instance_type not in DEFAULT_INSTANCE_TYPES: return instance_type - candidates = [instance_type] + get_similar_instance_types(instance_type) + candidates = [instance_type] + get_similar_instance_types(instance_type, region) ec2_client = boto3.client("ec2", region_name=region) instance_platform = "Red Hat Enterprise Linux" if "rhel" in os else "Linux/UNIX" From 8eea674f81a5f68c75a887f7ce725a4503234f4b Mon Sep 17 00:00:00 2001 From: Giacomo Marciani Date: Thu, 21 May 2026 12:41:09 +0200 Subject: [PATCH 5/5] [Test] Make the utility function to retrieve flexible instance types to reuse the regional cached value to reduce the EC2 calls --- tests/integration-tests/utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration-tests/utils.py b/tests/integration-tests/utils.py index 5fb2fc1f2f..93ce6dba0a 100644 --- a/tests/integration-tests/utils.py +++ b/tests/integration-tests/utils.py @@ -1047,6 +1047,7 @@ def get_similar_instance_types(instance_type: str, region: str = None, max_items # Now query for similar instances using filters paginator = ec2.get_paginator("describe_instance_types") similar_instances = [] + reached_max_items = False for page in paginator.paginate( Filters=[ @@ -1070,7 +1071,10 @@ def get_similar_instance_types(instance_type: str, region: str = None, max_items ): similar_instances.append(instance["InstanceType"]) if max_items and len(similar_instances) >= max_items: - return similar_instances + reached_max_items = True + break + if reached_max_items: + break logging.info(f"Retrieved instance types equivalent to {instance_type} in {region}: {similar_instances}") @@ -1079,14 +1083,14 @@ def get_similar_instance_types(instance_type: str, region: str = None, max_items def get_flexible_instance_types(instance, region): """Return ``instance`` plus up to 5 similar instance types available in ``region``""" - return list({instance, *get_similar_instance_types(instance, region, 5)}) + return list({instance, *get_similar_instance_types(instance, region)[:5]}) def get_flexible_gpu_instance_types(instance, region): """Return a list of NVIDIA GPU instance types compatible with ``instance``'s architecture.""" architecture = get_architecture_supported_by_instance_type(instance, region) gpu_instance_type = "g4dn.2xlarge" if architecture == "x86_64" else "g5g.2xlarge" - return list({gpu_instance_type, *get_similar_instance_types(gpu_instance_type, region, 5)}) + return list({gpu_instance_type, *get_similar_instance_types(gpu_instance_type, region)[:5]}) def verify_cluster_node_config_version_in_ddb(region, cluster_name, instance_id, expected_version):