From ddeab1ad3579db35abc4154f0e23fdea9147b5f9 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 24 Mar 2026 14:19:52 +0000 Subject: [PATCH 01/16] Update cnl hlsm workflow --- .../azure/cli/command_modules/acs/_consts.py | 1 + .../command_modules/acs/addonconfiguration.py | 38 +++-- .../azure/cli/command_modules/acs/custom.py | 46 ++++-- .../acs/managed_cluster_decorator.py | 138 +++++++++++++++--- .../latest/test_managed_cluster_decorator.py | 2 + 5 files changed, 180 insertions(+), 45 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_consts.py b/src/azure-cli/azure/cli/command_modules/acs/_consts.py index 0e18af74918..d9394879dc9 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_consts.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_consts.py @@ -154,6 +154,7 @@ # monitoring CONST_MONITORING_ADDON_NAME = "omsagent" +CONST_MONITORING_ADDON_NAME_CAMELCASE = "omsAgent" CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID = "logAnalyticsWorkspaceResourceID" CONST_MONITORING_USING_AAD_MSI_AUTH = "useAADAuth" diff --git a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py index 61da2361dce..93071c5ca79 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py +++ b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py @@ -5,6 +5,7 @@ import json import os import re +import time from azure.cli.command_modules.acs._client_factory import get_resource_groups_client, get_resources_client from azure.cli.core.util import get_file_json @@ -22,7 +23,7 @@ from azure.cli.core.azclierror import AzCLIError, CLIError, InvalidArgumentValueError, ArgumentUsageError from azure.cli.core.profiles import ResourceType from azure.cli.core.util import send_raw_request -from azure.core.exceptions import HttpResponseError +from azure.core.exceptions import HttpResponseError, ResourceExistsError from azure.mgmt.core.tools import parse_resource_id, resource_id from knack.log import get_logger @@ -325,18 +326,33 @@ def ensure_default_log_analytics_workspace_for_monitoring( location=workspace_region, properties={"sku": {"name": "standalone"}} ) - async_poller = resources.begin_create_or_update_by_id( - default_workspace_resource_id, "2015-11-01-preview", generic_resource - ) + # Retry with backoff for workspace provisioning conflicts (409 Conflict) + _MAX_RETRY_TIMES = 3 + _RETRY_SLEEP_SECONDS = 30 + for retry_count in range(_MAX_RETRY_TIMES): + try: + async_poller = resources.begin_create_or_update_by_id( + default_workspace_resource_id, "2015-11-01-preview", generic_resource + ) - ws_resource_id = "" - while True: - result = async_poller.result(15) - if async_poller.done(): - ws_resource_id = result.id - break + ws_resource_id = "" + while True: + result = async_poller.result(15) + if async_poller.done(): + ws_resource_id = result.id + break + + return ws_resource_id + except (HttpResponseError, ResourceExistsError) as ex: + if retry_count >= (_MAX_RETRY_TIMES - 1): + raise ex + logger.warning( + "Workspace creation conflict (attempt %d/%d), retrying in %ds...", + retry_count + 1, _MAX_RETRY_TIMES, _RETRY_SLEEP_SECONDS + ) + time.sleep(_RETRY_SLEEP_SECONDS) - return ws_resource_id + return default_workspace_resource_id def sanitize_loganalytics_ws_resource_id(workspace_resource_id): diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index afb442031aa..2018bbd3e7a 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -59,6 +59,7 @@ CONST_INGRESS_APPGW_WATCH_NAMESPACE, CONST_KUBE_DASHBOARD_ADDON_NAME, CONST_MONITORING_ADDON_NAME, + CONST_MONITORING_ADDON_NAME_CAMELCASE, CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, CONST_MONITORING_USING_AAD_MSI_AUTH, CONST_NODEPOOL_MODE_USER, @@ -1512,19 +1513,31 @@ def _remove_nulls(managed_clusters): return managed_clusters +def _get_monitoring_addon_key_custom(addon_profiles): + """Return the key present in addon_profiles for the monitoring addon (omsagent or omsAgent).""" + if addon_profiles is None: + return CONST_MONITORING_ADDON_NAME + if CONST_MONITORING_ADDON_NAME in addon_profiles: + return CONST_MONITORING_ADDON_NAME + if CONST_MONITORING_ADDON_NAME_CAMELCASE in addon_profiles: + return CONST_MONITORING_ADDON_NAME_CAMELCASE + return CONST_MONITORING_ADDON_NAME + + # pylint: disable=line-too-long def aks_disable_addons(cmd, client, resource_group_name, name, addons, no_wait=False): instance = client.get(resource_group_name, name) subscription_id = get_subscription_id(cmd.cli_ctx) + monitoring_addon_key = _get_monitoring_addon_key_custom(instance.addon_profiles) try: - if addons == "monitoring" and CONST_MONITORING_ADDON_NAME in instance.addon_profiles and \ - instance.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled and \ - CONST_MONITORING_USING_AAD_MSI_AUTH in instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config and \ - str(instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == 'true': + if addons == "monitoring" and monitoring_addon_key in instance.addon_profiles and \ + instance.addon_profiles[monitoring_addon_key].enabled and \ + CONST_MONITORING_USING_AAD_MSI_AUTH in instance.addon_profiles[monitoring_addon_key].config and \ + str(instance.addon_profiles[monitoring_addon_key].config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == 'true': # remove the DCR association because otherwise the DCR can't be deleted ensure_container_insights_for_monitoring( cmd, - instance.addon_profiles[CONST_MONITORING_ADDON_NAME], + instance.addon_profiles[monitoring_addon_key], subscription_id, resource_group_name, name, @@ -1614,12 +1627,13 @@ def aks_enable_addons(cmd, client, resource_group_name, name, addons, if need_pull_for_result: if enable_monitoring: - if CONST_MONITORING_USING_AAD_MSI_AUTH in instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config and \ - str(instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == 'true': + monitoring_addon_key = _get_monitoring_addon_key_custom(instance.addon_profiles) + if CONST_MONITORING_USING_AAD_MSI_AUTH in instance.addon_profiles[monitoring_addon_key].config and \ + str(instance.addon_profiles[monitoring_addon_key].config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == 'true': if msi_auth: # create a Data Collection Rule (DCR) and associate it with the cluster ensure_container_insights_for_monitoring( - cmd, instance.addon_profiles[CONST_MONITORING_ADDON_NAME], + cmd, instance.addon_profiles[monitoring_addon_key], subscription_id, resource_group_name, name, @@ -1650,7 +1664,7 @@ def aks_enable_addons(cmd, client, resource_group_name, name, addons, raise ArgumentUsageError( "--ampls-resource-id supported only in MSI auth mode.") ensure_container_insights_for_monitoring( - cmd, instance.addon_profiles[CONST_MONITORING_ADDON_NAME], subscription_id, resource_group_name, name, instance.location, aad_route=False) + cmd, instance.addon_profiles[monitoring_addon_key], subscription_id, resource_group_name, name, instance.location, aad_route=False) # adding a wait here since we rely on the result for role assignment result = LongRunningOperation(cmd.cli_ctx)( @@ -1743,6 +1757,15 @@ def _update_addons(cmd, instance, subscription_id, resource_group_name, name, ad addon_profile.config = { CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID: workspace_resource_id} addon_profile.config[CONST_MONITORING_USING_AAD_MSI_AUTH] = "true" if enable_msi_auth_for_monitoring else "false" + + # Preserve enableRetinaNetworkFlags (CNL) if it was set on the existing addon + # or if container network logs are being enabled simultaneously + existing_cnl = None + existing_addon = addon_profiles.get(addon) or addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) + if existing_addon and existing_addon.config: + existing_cnl = existing_addon.config.get("enableRetinaNetworkFlags") + if existing_cnl is not None: + addon_profile.config["enableRetinaNetworkFlags"] = existing_cnl elif addon == (CONST_VIRTUAL_NODE_ADDON_NAME + os_type): if addon_profile.enabled: raise CLIError('The virtual-node addon is already enabled for this managed cluster.\n' @@ -4078,8 +4101,9 @@ def is_monitoring_addon_enabled(addons, instance): break addon_profiles = instance.addon_profiles or {} - monitoring_addon_enabled = is_monitoring_addon and CONST_MONITORING_ADDON_NAME in addon_profiles and addon_profiles[ - CONST_MONITORING_ADDON_NAME].enabled + monitoring_addon_key = _get_monitoring_addon_key_custom(addon_profiles) + monitoring_addon_enabled = is_monitoring_addon and monitoring_addon_key in addon_profiles and addon_profiles[ + monitoring_addon_key].enabled except Exception as ex: # pylint: disable=broad-except logger.debug("failed to check monitoring addon enabled: %s", ex) return monitoring_addon_enabled diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index fa1794e4edc..d5a39f640e5 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -160,6 +160,24 @@ ManagedClusterIngressProfileNginx = TypeVar("ManagedClusterIngressProfileNginx") ServiceMeshProfile = TypeVar("ServiceMeshProfile") +def _get_monitoring_addon_key(addon_profiles, addon_consts): + """Return the key present in addon_profiles for the monitoring addon. + + The API response may return the monitoring addon key as either "omsagent" + (lowercase) or "omsAgent" (camelCase). This helper checks both variants + and returns the one that exists, falling back to the lowercase constant. + """ + CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + CONST_MONITORING_ADDON_NAME_CAMELCASE = addon_consts.get("CONST_MONITORING_ADDON_NAME_CAMELCASE") + if addon_profiles is None: + return CONST_MONITORING_ADDON_NAME + if CONST_MONITORING_ADDON_NAME in addon_profiles: + return CONST_MONITORING_ADDON_NAME + if CONST_MONITORING_ADDON_NAME_CAMELCASE and CONST_MONITORING_ADDON_NAME_CAMELCASE in addon_profiles: + return CONST_MONITORING_ADDON_NAME_CAMELCASE + return CONST_MONITORING_ADDON_NAME + + # TODO # 1. remove enable_rbac related implementation # 2. add validation for all/some of the parameters involved in the getter of outbound_type/enable_addons @@ -2642,10 +2660,12 @@ def get_container_network_logs(self, mc: ManagedCluster) -> Union[bool, None]: monitoring_via_enable_addons = enable_addons and "monitoring" in enable_addons # Check if monitoring is already enabled on the cluster + addon_consts = self.get_addon_consts() + monitoring_addon_key = _get_monitoring_addon_key(mc.addon_profiles, addon_consts) monitoring_on_cluster = ( mc.addon_profiles and - mc.addon_profiles.get("omsagent") and - mc.addon_profiles["omsagent"].enabled + mc.addon_profiles.get(monitoring_addon_key) and + mc.addon_profiles[monitoring_addon_key].enabled ) # Check if ACNS is being enabled or already enabled @@ -2868,6 +2888,7 @@ def get_addon_consts(self) -> Dict[str, str]: CONST_INGRESS_APPGW_SUBNET_CIDR, CONST_INGRESS_APPGW_SUBNET_ID, CONST_INGRESS_APPGW_WATCH_NAMESPACE, CONST_KUBE_DASHBOARD_ADDON_NAME, CONST_MONITORING_ADDON_NAME, + CONST_MONITORING_ADDON_NAME_CAMELCASE, CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, CONST_MONITORING_USING_AAD_MSI_AUTH, CONST_OPEN_SERVICE_MESH_ADDON_NAME, CONST_ROTATION_POLL_INTERVAL, @@ -2911,6 +2932,9 @@ def get_addon_consts(self) -> Dict[str, str]: addon_consts[ "CONST_MONITORING_ADDON_NAME" ] = CONST_MONITORING_ADDON_NAME + addon_consts[ + "CONST_MONITORING_ADDON_NAME_CAMELCASE" + ] = CONST_MONITORING_ADDON_NAME_CAMELCASE addon_consts[ "CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID" ] = CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID @@ -3115,33 +3139,48 @@ def get_enable_msi_auth_for_monitoring(self) -> Union[bool, None]: """ # determine the value of constants addon_consts = self.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") # read the original value passed by the command enable_msi_auth_for_monitoring = self.raw_param.get("enable_msi_auth_for_monitoring") + + # Use helper to find the correct monitoring addon key (handles omsagent/omsAgent variants) + monitoring_addon_key = _get_monitoring_addon_key( + self.mc.addon_profiles if self.mc else None, addon_consts + ) + + # For non-MSI clusters (service principal), MSI auth is not available. + # But if the cluster is MSI-based (client_id == "msi"), check the existing + # addon config — the addon may already have useAADAuth=true. if ( self.mc and self.mc.service_principal_profile and - self.mc.service_principal_profile.client_id is not None + self.mc.service_principal_profile.client_id is not None and + self.mc.service_principal_profile.client_id != "msi" ): return False + # try to read the property value corresponding to the parameter from the `mc` object if ( self.mc and self.mc.addon_profiles and - CONST_MONITORING_ADDON_NAME in self.mc.addon_profiles and + monitoring_addon_key in self.mc.addon_profiles and self.mc.addon_profiles.get( - CONST_MONITORING_ADDON_NAME + monitoring_addon_key ).config.get(CONST_MONITORING_USING_AAD_MSI_AUTH) is not None ): - enable_msi_auth_for_monitoring = ( + existing_msi_auth = ( safe_lower( - self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME).config.get( + self.mc.addon_profiles.get(monitoring_addon_key).config.get( CONST_MONITORING_USING_AAD_MSI_AUTH ) ) == "true" ) + # If the existing addon already has useAADAuth=true, honor that + # even if enable_msi_auth_for_monitoring was not explicitly set + if existing_msi_auth: + return True + enable_msi_auth_for_monitoring = existing_msi_auth sku_name = self.get_sku_name() if sku_name == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: @@ -7563,10 +7602,10 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: elif self.context.raw_param.get("enable_addons") is not None: # Create the DCR Association here addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + monitoring_addon_key = _get_monitoring_addon_key(cluster.addon_profiles, addon_consts) self.context.external_functions.ensure_container_insights_for_monitoring( self.cmd, - cluster.addon_profiles[CONST_MONITORING_ADDON_NAME], + cluster.addon_profiles[monitoring_addon_key], self.context.get_subscription_id(), self.context.get_resource_group_name(), self.context.get_name(), @@ -7854,7 +7893,8 @@ def check_raw_parameters(self): self.context.get_load_balancer_idle_timeout() is None and self.context.get_load_balancer_outbound_ports() is None and self.context.get_nat_gateway_managed_outbound_ip_count() is None and - self.context.get_nat_gateway_idle_timeout() is None + self.context.get_nat_gateway_idle_timeout() is None and + self.context.raw_param.get("enable_high_log_scale_mode") is None ) if not is_changed and is_default: @@ -8411,22 +8451,63 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus """ self._ensure_mc(mc) + addon_consts = self.context.get_addon_consts() + CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") + monitoring_addon_key = _get_monitoring_addon_key(mc.addon_profiles, addon_consts) + + enable_high_log_scale_mode = self.context.get_enable_high_log_scale_mode() + enable_cnl = self.context.raw_param.get("enable_container_network_logs") + # Trigger validation for high log scale mode when container network logs are enabled. # This ensures proper error messages are raised before cluster update if the user # explicitly disables high log scale mode while enabling container network logs. - if self.context.raw_param.get("enable_container_network_logs"): + if enable_cnl: self.context.get_enable_high_log_scale_mode() + # Validate HLSM on the update path + if enable_high_log_scale_mode is True and not enable_cnl: + # HLSM requires monitoring addon with MSI auth to be enabled + monitoring_addon_profile = mc.addon_profiles.get(monitoring_addon_key) if mc.addon_profiles else None + if ( + not monitoring_addon_profile or + not monitoring_addon_profile.enabled or + safe_lower( + (monitoring_addon_profile.config or {}).get(CONST_MONITORING_USING_AAD_MSI_AUTH) + ) != "true" + ): + raise RequiredArgumentMissingError( + "--enable-high-log-scale-mode requires the monitoring addon to be enabled with MSI auth " + "(useAADAuth=true). Please enable the monitoring addon with --enable-addons monitoring first." + ) + + if enable_high_log_scale_mode is False: + # Check if CNL is already enabled on the cluster — cannot disable HLSM while CNL is active + monitoring_addon_profile = mc.addon_profiles.get(monitoring_addon_key) if mc.addon_profiles else None + if monitoring_addon_profile and monitoring_addon_profile.config: + existing_cnl = safe_lower( + monitoring_addon_profile.config.get("enableRetinaNetworkFlags") + ) + if existing_cnl == "true": + raise MutuallyExclusiveArgumentError( + "Cannot disable --enable-high-log-scale-mode while container network logs are enabled. " + "Please disable container network logs first with --disable-container-network-logs." + ) + container_network_logs_enabled = self.context.get_container_network_logs(mc) if container_network_logs_enabled is not None: if mc.addon_profiles: - addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - monitoring_addon_profile = mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) + monitoring_addon_profile = mc.addon_profiles.get(monitoring_addon_key) if monitoring_addon_profile: config = monitoring_addon_profile.config or {} config["enableRetinaNetworkFlags"] = str(container_network_logs_enabled) - mc.addon_profiles[CONST_MONITORING_ADDON_NAME].config = config + mc.addon_profiles[monitoring_addon_key].config = config + + # When CNL or HLSM flags are provided, mark that monitoring postprocessing is needed + # so the DCR gets updated with the correct streams + if container_network_logs_enabled is not None or enable_high_log_scale_mode is not None: + self.context.set_intermediate( + "monitoring_addon_postprocessing_required", True, overwrite_exists=True + ) return mc def update_http_proxy_config(self, mc: ManagedCluster) -> ManagedCluster: @@ -8607,9 +8688,10 @@ def update_addon_profiles(self, mc: ManagedCluster) -> ManagedCluster: azure_keyvault_secrets_provider_addon_profile = None if mc.addon_profiles is not None: + monitoring_addon_key = _get_monitoring_addon_key(mc.addon_profiles, addon_consts) monitoring_addon_enabled = ( - CONST_MONITORING_ADDON_NAME in mc.addon_profiles and - mc.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled + monitoring_addon_key in mc.addon_profiles and + mc.addon_profiles[monitoring_addon_key].enabled ) ingress_appgw_addon_enabled = ( CONST_INGRESS_APPGW_ADDON_NAME in mc.addon_profiles and @@ -9810,6 +9892,9 @@ def check_is_postprocessing_required(self, mc: ManagedCluster) -> bool: from azure.cli.command_modules.acs._consts import CONST_AZURE_KEYVAULT_SECRETS_PROVIDER_ADDON_NAME # some addons require post cluster creation role assigment monitoring_addon_enabled = self.context.get_intermediate("monitoring_addon_enabled", default_value=False) + monitoring_addon_postprocessing_required = self.context.get_intermediate( + "monitoring_addon_postprocessing_required", default_value=False + ) ingress_appgw_addon_enabled = self.context.get_intermediate("ingress_appgw_addon_enabled", default_value=False) virtual_node_addon_enabled = self.context.get_intermediate("virtual_node_addon_enabled", default_value=False) enable_managed_identity = check_is_msi_cluster(mc) @@ -9827,6 +9912,7 @@ def check_is_postprocessing_required(self, mc: ManagedCluster) -> bool: # pylint: disable=too-many-boolean-expressions if ( monitoring_addon_enabled or + monitoring_addon_postprocessing_required or ingress_appgw_addon_enabled or virtual_node_addon_enabled or (enable_managed_identity and attach_acr) or @@ -9853,6 +9939,9 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: """ # monitoring addon monitoring_addon_enabled = self.context.get_intermediate("monitoring_addon_enabled", default_value=False) + monitoring_addon_postprocessing_required = self.context.get_intermediate( + "monitoring_addon_postprocessing_required", default_value=False + ) if monitoring_addon_enabled: enable_msi_auth_for_monitoring = self.context.get_enable_msi_auth_for_monitoring() if not enable_msi_auth_for_monitoring: @@ -9873,21 +9962,24 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: cluster, cluster_resource_id, self.cmd ) elif ( - self.context.raw_param.get("enable_addons") is not None + self.context.raw_param.get("enable_addons") is not None or + monitoring_addon_postprocessing_required ): - # Create the DCR Association here + # Create/update the DCR and DCRA here addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + monitoring_addon_key = _get_monitoring_addon_key(cluster.addon_profiles, addon_consts) + # When CNL/HLSM flags changed, also update the DCR + needs_dcr_update = monitoring_addon_postprocessing_required self.context.external_functions.ensure_container_insights_for_monitoring( self.cmd, - cluster.addon_profiles[CONST_MONITORING_ADDON_NAME], + cluster.addon_profiles[monitoring_addon_key], self.context.get_subscription_id(), self.context.get_resource_group_name(), self.context.get_name(), self.context.get_location(), remove_monitoring=False, aad_route=self.context.get_enable_msi_auth_for_monitoring(), - create_dcr=False, + create_dcr=needs_dcr_update, create_dcra=True, enable_syslog=self.context.get_enable_syslog(), data_collection_settings=self.context.get_data_collection_settings(), diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 0c75d9a6054..ee07dfca452 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -23,6 +23,7 @@ CONST_INGRESS_APPGW_WATCH_NAMESPACE, CONST_KUBE_DASHBOARD_ADDON_NAME, CONST_MONITORING_ADDON_NAME, + CONST_MONITORING_ADDON_NAME_CAMELCASE, CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, CONST_OPEN_SERVICE_MESH_ADDON_NAME, CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING, @@ -2531,6 +2532,7 @@ def test_get_addon_consts(self): "CONST_INGRESS_APPGW_WATCH_NAMESPACE": CONST_INGRESS_APPGW_WATCH_NAMESPACE, "CONST_KUBE_DASHBOARD_ADDON_NAME": CONST_KUBE_DASHBOARD_ADDON_NAME, "CONST_MONITORING_ADDON_NAME": CONST_MONITORING_ADDON_NAME, + "CONST_MONITORING_ADDON_NAME_CAMELCASE": CONST_MONITORING_ADDON_NAME_CAMELCASE, "CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID": CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, "CONST_OPEN_SERVICE_MESH_ADDON_NAME": CONST_OPEN_SERVICE_MESH_ADDON_NAME, "CONST_VIRTUAL_NODE_ADDON_NAME": CONST_VIRTUAL_NODE_ADDON_NAME, From 0621156f16754f39623ef934a78e3898f5cd09d2 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 24 Mar 2026 15:20:12 +0000 Subject: [PATCH 02/16] Fix update workflow --- .../azure/cli/command_modules/acs/_help.py | 3 + .../azure/cli/command_modules/acs/_params.py | 2 + .../azure/cli/command_modules/acs/custom.py | 2 + .../acs/tests/latest/test_aks_commands.py | 36 ++++ .../latest/test_managed_cluster_decorator.py | 189 ++++++++++++++++++ 5 files changed, 232 insertions(+) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index c8d4eeb32a4..833b9b5ad4b 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -1112,6 +1112,9 @@ type: string short-summary: Set transit encryption type for ACNS security. long-summary: Configures pod-to-pod encryption for Cilium-based clusters. Once enabled, all traffic between Cilium managed pods will be encrypted when it leaves the node boundary. Valid values are "WireGuard" and "None". When creating a cluster, this option must be used together with "--enable-acns"; when updating a cluster, it can be used on its own to modify the transit encryption type for an existing ACNS-enabled cluster. + - name: --enable-high-log-scale-mode + type: bool + short-summary: Enable High Log Scale Mode for Container Logs. Auto-enabled when --enable-container-network-logs is specified. - name: --nrg-lockdown-restriction-level type: string short-summary: Restriction level on the managed node resource group. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index c999e4c581f..1901346e5a3 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -690,6 +690,8 @@ def load_arguments(self, _): help="Set the datapath acceleration mode for Azure Container Networking Solution (ACNS). Valid values are 'BpfVeth' and 'None'." ) c.argument('acns_transit_encryption_type', arg_type=get_enum_type(transit_encryption_types)) + # monitoring addons + c.argument('enable_high_log_scale_mode', arg_type=get_three_state_flag()) # private cluster parameters c.argument('enable_apiserver_vnet_integration', action='store_true') c.argument('apiserver_subnet_id', validator=validate_apiserver_subnet_id) diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index 2018bbd3e7a..b27623c0b8c 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1169,6 +1169,8 @@ def aks_update( disable_container_network_logs=None, acns_datapath_acceleration_mode=None, acns_transit_encryption_type=None, + # monitoring addons + enable_high_log_scale_mode=None, # network isoalted cluster bootstrap_artifact_source=None, bootstrap_container_registry_resource_id=None, diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 63094d2808b..15acbbcb180 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -13326,6 +13326,42 @@ def test_aks_create_acns_with_flow_logs( ], ) + # update: disable container network logs + disable_cnl_cmd = ( + "aks update --resource-group={resource_group} --name={name} " + "--disable-container-network-logs " + ) + self.cmd( + disable_cnl_cmd, + checks=[ + self.check("provisioningState", "Succeeded"), + ], + ) + + # update: enable high log scale mode independently via aks update + enable_hlsm_cmd = ( + "aks update --resource-group={resource_group} --name={name} " + "--enable-high-log-scale-mode " + ) + self.cmd( + enable_hlsm_cmd, + checks=[ + self.check("provisioningState", "Succeeded"), + ], + ) + + # update: re-enable container network logs (should auto-enable HLSM) + enable_cnl_cmd = ( + "aks update --resource-group={resource_group} --name={name} " + "--enable-container-network-logs " + ) + self.cmd( + enable_cnl_cmd, + checks=[ + self.check("provisioningState", "Succeeded"), + ], + ) + # delete self.cmd( "aks delete -g {resource_group} -n {name} --yes --no-wait", diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index ee07dfca452..62f9cce5725 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -14829,6 +14829,195 @@ def test_enable_container_network_logs(self): ): dec_8.set_up_addon_profiles(mc_8) + # Case 9: UPDATE - enable HLSM only (no CNL), monitoring with MSI auth enabled + dec_9 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_9 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_9.context.attach_mc(mc_9) + dec_mc_9 = dec_9.update_monitoring_profile_flow_logs(mc_9) + # HLSM should be enabled but CNL remains unset — no enableRetinaNetworkFlags change + # The monitoring_addon_postprocessing_required intermediate should be set + self.assertTrue( + dec_9.context.get_intermediate("monitoring_addon_postprocessing_required") + ) + + # Case 10: UPDATE - disable HLSM while CNL is active -> should ERROR + dec_10 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": False, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_10 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + "enableRetinaNetworkFlags": "True", + }, + ) + }, + ) + dec_10.context.attach_mc(mc_10) + with self.assertRaises(MutuallyExclusiveArgumentError): + dec_10.update_monitoring_profile_flow_logs(mc_10) + + # Case 11: UPDATE - enable CNL + HLSM=true together + dec_11 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_container_network_logs": True, + "enable_high_log_scale_mode": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_11 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_11.context.attach_mc(mc_11) + dec_mc_11 = dec_11.update_monitoring_profile_flow_logs(mc_11) + self.assertEqual( + dec_mc_11.addon_profiles["omsagent"].config["enableRetinaNetworkFlags"], + "True", + ) + self.assertTrue( + dec_11.context.get_intermediate("monitoring_addon_postprocessing_required") + ) + + # Case 12: UPDATE - enable HLSM without monitoring addon -> should ERROR + dec_12 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_12 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + ) + dec_12.context.attach_mc(mc_12) + with self.assertRaises(RequiredArgumentMissingError): + dec_12.update_monitoring_profile_flow_logs(mc_12) + + # Case 13: UPDATE - enable HLSM without MSI auth -> should ERROR + dec_13 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_13 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "false"}, + ) + }, + ) + dec_13.context.attach_mc(mc_13) + with self.assertRaises(RequiredArgumentMissingError): + dec_13.update_monitoring_profile_flow_logs(mc_13) + + # Case 14: UPDATE - enable CNL + HLSM=false -> should ERROR + dec_14 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_container_network_logs": True, + "enable_high_log_scale_mode": False, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_14 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_14.context.attach_mc(mc_14) + with self.assertRaises(MutuallyExclusiveArgumentError): + dec_14.update_monitoring_profile_flow_logs(mc_14) + if __name__ == "__main__": unittest.main() From 362af9c8755b3595d03e70ae63c91542bc4abf09 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 24 Mar 2026 16:01:30 +0000 Subject: [PATCH 03/16] Updates + tests --- .../azure/cli/command_modules/acs/_helpers.py | 16 ++ .../command_modules/acs/addonconfiguration.py | 27 ++- .../azure/cli/command_modules/acs/custom.py | 25 ++- .../acs/managed_cluster_decorator.py | 52 ++---- .../acs/tests/latest/test_aks_commands.py | 7 + .../acs/tests/latest/test_custom.py | 161 ++++++++++++++++++ .../acs/tests/latest/test_helpers.py | 39 +++++ .../latest/test_managed_cluster_decorator.py | 112 ++++++++++++ 8 files changed, 381 insertions(+), 58 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_helpers.py b/src/azure-cli/azure/cli/command_modules/acs/_helpers.py index 13c9c66b4f0..433a5a7f8fa 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_helpers.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_helpers.py @@ -28,6 +28,22 @@ ManagedCluster = TypeVar("ManagedCluster") +def get_monitoring_addon_key(addon_profiles, monitoring_addon_name, monitoring_addon_name_camelcase=None): + """Return the key present in addon_profiles for the monitoring addon. + + The API response may return the monitoring addon key as either "omsagent" + (lowercase) or "omsAgent" (camelCase). This helper checks both variants + and returns the one that exists, falling back to the lowercase constant. + """ + if addon_profiles is None: + return monitoring_addon_name + if monitoring_addon_name in addon_profiles: + return monitoring_addon_name + if monitoring_addon_name_camelcase and monitoring_addon_name_camelcase in addon_profiles: + return monitoring_addon_name_camelcase + return monitoring_addon_name + + def format_parameter_name_to_option_name(parameter_name: str) -> str: """Convert a name in parameter format to option format. diff --git a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py index 93071c5ca79..6af52cbf175 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py +++ b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py @@ -4,6 +4,7 @@ # -------------------------------------------------------------------------------------------- import json import os +import random import re import time @@ -326,9 +327,9 @@ def ensure_default_log_analytics_workspace_for_monitoring( location=workspace_region, properties={"sku": {"name": "standalone"}} ) - # Retry with backoff for workspace provisioning conflicts (409 Conflict) + # Retry with exponential backoff + jitter for 409 Conflict during workspace provisioning _MAX_RETRY_TIMES = 3 - _RETRY_SLEEP_SECONDS = 30 + _BASE_SLEEP_SECONDS = 5 for retry_count in range(_MAX_RETRY_TIMES): try: async_poller = resources.begin_create_or_update_by_id( @@ -343,14 +344,26 @@ def ensure_default_log_analytics_workspace_for_monitoring( break return ws_resource_id - except (HttpResponseError, ResourceExistsError) as ex: + except ResourceExistsError: + # ResourceExistsError is a subclass of HttpResponseError, so must be caught first if retry_count >= (_MAX_RETRY_TIMES - 1): - raise ex + raise + sleep_seconds = _BASE_SLEEP_SECONDS * (2 ** retry_count) + random.uniform(0, 2) + logger.warning( + "Workspace already exists (attempt %d/%d), retrying in %.1fs...", + retry_count + 1, _MAX_RETRY_TIMES, sleep_seconds + ) + time.sleep(sleep_seconds) + except HttpResponseError as ex: + is_conflict = hasattr(ex, 'status_code') and ex.status_code == 409 + if not is_conflict or retry_count >= (_MAX_RETRY_TIMES - 1): + raise + sleep_seconds = _BASE_SLEEP_SECONDS * (2 ** retry_count) + random.uniform(0, 2) logger.warning( - "Workspace creation conflict (attempt %d/%d), retrying in %ds...", - retry_count + 1, _MAX_RETRY_TIMES, _RETRY_SLEEP_SECONDS + "Workspace creation conflict (attempt %d/%d), retrying in %.1fs...", + retry_count + 1, _MAX_RETRY_TIMES, sleep_seconds ) - time.sleep(_RETRY_SLEEP_SECONDS) + time.sleep(sleep_seconds) return default_workspace_resource_id diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index b27623c0b8c..12a1d64508a 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -81,7 +81,7 @@ CONST_VIRTUAL_MACHINES, ) from azure.cli.command_modules.acs._polling import RunCommandLocationPolling -from azure.cli.command_modules.acs._helpers import get_snapshot_by_snapshot_id, check_is_private_link_cluster, build_etag_kwargs +from azure.cli.command_modules.acs._helpers import get_snapshot_by_snapshot_id, get_monitoring_addon_key, check_is_private_link_cluster, build_etag_kwargs from azure.cli.command_modules.acs._resourcegroup import get_rg_location from azure.cli.command_modules.acs.managednamespace import aks_managed_namespace_add, aks_managed_namespace_update from azure.cli.command_modules.acs._validators import extract_comma_separated_string @@ -1515,22 +1515,13 @@ def _remove_nulls(managed_clusters): return managed_clusters -def _get_monitoring_addon_key_custom(addon_profiles): - """Return the key present in addon_profiles for the monitoring addon (omsagent or omsAgent).""" - if addon_profiles is None: - return CONST_MONITORING_ADDON_NAME - if CONST_MONITORING_ADDON_NAME in addon_profiles: - return CONST_MONITORING_ADDON_NAME - if CONST_MONITORING_ADDON_NAME_CAMELCASE in addon_profiles: - return CONST_MONITORING_ADDON_NAME_CAMELCASE - return CONST_MONITORING_ADDON_NAME - - # pylint: disable=line-too-long def aks_disable_addons(cmd, client, resource_group_name, name, addons, no_wait=False): instance = client.get(resource_group_name, name) subscription_id = get_subscription_id(cmd.cli_ctx) - monitoring_addon_key = _get_monitoring_addon_key_custom(instance.addon_profiles) + monitoring_addon_key = get_monitoring_addon_key( + instance.addon_profiles, CONST_MONITORING_ADDON_NAME, CONST_MONITORING_ADDON_NAME_CAMELCASE + ) try: if addons == "monitoring" and monitoring_addon_key in instance.addon_profiles and \ instance.addon_profiles[monitoring_addon_key].enabled and \ @@ -1629,7 +1620,9 @@ def aks_enable_addons(cmd, client, resource_group_name, name, addons, if need_pull_for_result: if enable_monitoring: - monitoring_addon_key = _get_monitoring_addon_key_custom(instance.addon_profiles) + monitoring_addon_key = get_monitoring_addon_key( + instance.addon_profiles, CONST_MONITORING_ADDON_NAME, CONST_MONITORING_ADDON_NAME_CAMELCASE + ) if CONST_MONITORING_USING_AAD_MSI_AUTH in instance.addon_profiles[monitoring_addon_key].config and \ str(instance.addon_profiles[monitoring_addon_key].config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == 'true': if msi_auth: @@ -4103,7 +4096,9 @@ def is_monitoring_addon_enabled(addons, instance): break addon_profiles = instance.addon_profiles or {} - monitoring_addon_key = _get_monitoring_addon_key_custom(addon_profiles) + monitoring_addon_key = get_monitoring_addon_key( + addon_profiles, CONST_MONITORING_ADDON_NAME, CONST_MONITORING_ADDON_NAME_CAMELCASE + ) monitoring_addon_enabled = is_monitoring_addon and monitoring_addon_key in addon_profiles and addon_profiles[ monitoring_addon_key].enabled except Exception as ex: # pylint: disable=broad-except diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index d5a39f640e5..8eb833b3988 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -63,6 +63,7 @@ check_is_apiserver_vnet_integration_cluster, check_is_private_cluster, format_parameter_name_to_option_name, + get_monitoring_addon_key, get_user_assigned_identity_by_resource_id, get_shared_control_plane_identity, map_azure_error_to_cli_error, @@ -160,22 +161,14 @@ ManagedClusterIngressProfileNginx = TypeVar("ManagedClusterIngressProfileNginx") ServiceMeshProfile = TypeVar("ServiceMeshProfile") -def _get_monitoring_addon_key(addon_profiles, addon_consts): - """Return the key present in addon_profiles for the monitoring addon. - The API response may return the monitoring addon key as either "omsagent" - (lowercase) or "omsAgent" (camelCase). This helper checks both variants - and returns the one that exists, falling back to the lowercase constant. - """ - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - CONST_MONITORING_ADDON_NAME_CAMELCASE = addon_consts.get("CONST_MONITORING_ADDON_NAME_CAMELCASE") - if addon_profiles is None: - return CONST_MONITORING_ADDON_NAME - if CONST_MONITORING_ADDON_NAME in addon_profiles: - return CONST_MONITORING_ADDON_NAME - if CONST_MONITORING_ADDON_NAME_CAMELCASE and CONST_MONITORING_ADDON_NAME_CAMELCASE in addon_profiles: - return CONST_MONITORING_ADDON_NAME_CAMELCASE - return CONST_MONITORING_ADDON_NAME +def _get_monitoring_addon_key_from_consts(addon_profiles, addon_consts): + """Thin wrapper around get_monitoring_addon_key that unpacks addon_consts dict.""" + return get_monitoring_addon_key( + addon_profiles, + addon_consts.get("CONST_MONITORING_ADDON_NAME"), + addon_consts.get("CONST_MONITORING_ADDON_NAME_CAMELCASE"), + ) # TODO @@ -2661,7 +2654,7 @@ def get_container_network_logs(self, mc: ManagedCluster) -> Union[bool, None]: # Check if monitoring is already enabled on the cluster addon_consts = self.get_addon_consts() - monitoring_addon_key = _get_monitoring_addon_key(mc.addon_profiles, addon_consts) + monitoring_addon_key = _get_monitoring_addon_key_from_consts(mc.addon_profiles, addon_consts) monitoring_on_cluster = ( mc.addon_profiles and mc.addon_profiles.get(monitoring_addon_key) and @@ -3145,21 +3138,16 @@ def get_enable_msi_auth_for_monitoring(self) -> Union[bool, None]: enable_msi_auth_for_monitoring = self.raw_param.get("enable_msi_auth_for_monitoring") # Use helper to find the correct monitoring addon key (handles omsagent/omsAgent variants) - monitoring_addon_key = _get_monitoring_addon_key( + monitoring_addon_key = _get_monitoring_addon_key_from_consts( self.mc.addon_profiles if self.mc else None, addon_consts ) - # For non-MSI clusters (service principal), MSI auth is not available. - # But if the cluster is MSI-based (client_id == "msi"), check the existing - # addon config — the addon may already have useAADAuth=true. if ( self.mc and self.mc.service_principal_profile and - self.mc.service_principal_profile.client_id is not None and - self.mc.service_principal_profile.client_id != "msi" + self.mc.service_principal_profile.client_id is not None ): return False - # try to read the property value corresponding to the parameter from the `mc` object if ( self.mc and @@ -3169,18 +3157,13 @@ def get_enable_msi_auth_for_monitoring(self) -> Union[bool, None]: monitoring_addon_key ).config.get(CONST_MONITORING_USING_AAD_MSI_AUTH) is not None ): - existing_msi_auth = ( + enable_msi_auth_for_monitoring = ( safe_lower( self.mc.addon_profiles.get(monitoring_addon_key).config.get( CONST_MONITORING_USING_AAD_MSI_AUTH ) ) == "true" ) - # If the existing addon already has useAADAuth=true, honor that - # even if enable_msi_auth_for_monitoring was not explicitly set - if existing_msi_auth: - return True - enable_msi_auth_for_monitoring = existing_msi_auth sku_name = self.get_sku_name() if sku_name == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: @@ -7602,7 +7585,7 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: elif self.context.raw_param.get("enable_addons") is not None: # Create the DCR Association here addon_consts = self.context.get_addon_consts() - monitoring_addon_key = _get_monitoring_addon_key(cluster.addon_profiles, addon_consts) + monitoring_addon_key = _get_monitoring_addon_key_from_consts(cluster.addon_profiles, addon_consts) self.context.external_functions.ensure_container_insights_for_monitoring( self.cmd, cluster.addon_profiles[monitoring_addon_key], @@ -8453,7 +8436,7 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus addon_consts = self.context.get_addon_consts() CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") - monitoring_addon_key = _get_monitoring_addon_key(mc.addon_profiles, addon_consts) + monitoring_addon_key = _get_monitoring_addon_key_from_consts(mc.addon_profiles, addon_consts) enable_high_log_scale_mode = self.context.get_enable_high_log_scale_mode() enable_cnl = self.context.raw_param.get("enable_container_network_logs") @@ -8673,9 +8656,6 @@ def update_addon_profiles(self, mc: ManagedCluster) -> ManagedCluster: # determine the value of constants addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get( - "CONST_MONITORING_ADDON_NAME" - ) CONST_INGRESS_APPGW_ADDON_NAME = addon_consts.get( "CONST_INGRESS_APPGW_ADDON_NAME" ) @@ -8688,7 +8668,7 @@ def update_addon_profiles(self, mc: ManagedCluster) -> ManagedCluster: azure_keyvault_secrets_provider_addon_profile = None if mc.addon_profiles is not None: - monitoring_addon_key = _get_monitoring_addon_key(mc.addon_profiles, addon_consts) + monitoring_addon_key = _get_monitoring_addon_key_from_consts(mc.addon_profiles, addon_consts) monitoring_addon_enabled = ( monitoring_addon_key in mc.addon_profiles and mc.addon_profiles[monitoring_addon_key].enabled @@ -9967,7 +9947,7 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: ): # Create/update the DCR and DCRA here addon_consts = self.context.get_addon_consts() - monitoring_addon_key = _get_monitoring_addon_key(cluster.addon_profiles, addon_consts) + monitoring_addon_key = _get_monitoring_addon_key_from_consts(cluster.addon_profiles, addon_consts) # When CNL/HLSM flags changed, also update the DCR needs_dcr_update = monitoring_addon_postprocessing_required self.context.external_functions.ensure_container_insights_for_monitoring( diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 15acbbcb180..b6414c9a84a 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -13323,6 +13323,8 @@ def test_aks_create_acns_with_flow_logs( checks=[ self.check("provisioningState", "Succeeded"), self.check("networkProfile.advancedNetworking.observability.enabled", True), + self.check("addonProfiles.omsagent.enabled", True), + self.check("addonProfiles.omsagent.config.enableRetinaNetworkFlags", "True"), ], ) @@ -13335,6 +13337,8 @@ def test_aks_create_acns_with_flow_logs( disable_cnl_cmd, checks=[ self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), + self.check("addonProfiles.omsagent.config.enableRetinaNetworkFlags", "False"), ], ) @@ -13347,6 +13351,7 @@ def test_aks_create_acns_with_flow_logs( enable_hlsm_cmd, checks=[ self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), ], ) @@ -13359,6 +13364,8 @@ def test_aks_create_acns_with_flow_logs( enable_cnl_cmd, checks=[ self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), + self.check("addonProfiles.omsagent.config.enableRetinaNetworkFlags", "True"), ], ) diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py index df6c68a8b96..59071f51ed3 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py @@ -45,6 +45,7 @@ ) from azure.cli.core.util import CLIError from azure.cli.core.profiles import ResourceType +from azure.core.exceptions import HttpResponseError, ResourceExistsError from azure.mgmt.containerservice.models import ( ManagedClusterAddonProfile, ) @@ -1172,5 +1173,165 @@ def test_unknown_delos_region_defaults_to_deloscloudgermanycentral(self, mock_ge self.assertIn(f'DefaultWorkspace-{subscription_id}-DELOSC', result) +class TestWorkspaceCreationRetry(unittest.TestCase): + """Tests for the retry logic in ensure_default_log_analytics_workspace_for_monitoring.""" + + def setUp(self): + self.cli = MockCLI() + self.cmd = MockCmd(self.cli) + self.cmd.cli_ctx.cloud.name = 'AzureCloud' + + def _make_mocks(self): + """Create standard mocks for resource group and resources clients.""" + mock_rg_client = mock.Mock() + mock_rg_client.check_existence.return_value = False + mock_rg_client.create_or_update = mock.Mock() + + mock_poller = mock.Mock() + mock_result = mock.Mock() + mock_result.id = '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws' + mock_poller.result.return_value = mock_result + mock_poller.done.return_value = True + + mock_resources_client = mock.Mock() + mock_resources_client.begin_create_or_update_by_id.return_value = mock_poller + + self.cmd.get_models = mock.Mock(return_value=mock.Mock) + return mock_rg_client, mock_resources_client + + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') + def test_retry_on_409_conflict(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): + """409 Conflict should be retried with exponential backoff.""" + mock_get_rg_location.return_value = 'eastus' + mock_rg_client, mock_resources_client = self._make_mocks() + mock_get_rg_client.return_value = mock_rg_client + mock_get_resources_client.return_value = mock_resources_client + + # First call raises 409, second succeeds + conflict_error = HttpResponseError(response=mock.Mock(status_code=409)) + conflict_error.status_code = 409 + mock_poller_success = mock.Mock() + mock_result = mock.Mock() + mock_result.id = '/subscriptions/sub/rg/ws' + mock_poller_success.result.return_value = mock_result + mock_poller_success.done.return_value = True + mock_resources_client.begin_create_or_update_by_id.side_effect = [ + conflict_error, mock_poller_success + ] + + result = ensure_default_log_analytics_workspace_for_monitoring( + self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' + ) + + self.assertEqual(result, '/subscriptions/sub/rg/ws') + self.assertEqual(mock_sleep.call_count, 1) + # Verify backoff: first retry sleep should be base (5) * 2^0 + jitter + sleep_arg = mock_sleep.call_args[0][0] + self.assertGreaterEqual(sleep_arg, 5.0) + self.assertLessEqual(sleep_arg, 7.0) # 5 + up to 2s jitter + + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') + def test_non_409_http_error_not_retried(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): + """Non-409 HttpResponseError should be raised immediately, not retried.""" + mock_get_rg_location.return_value = 'eastus' + mock_rg_client, mock_resources_client = self._make_mocks() + mock_get_rg_client.return_value = mock_rg_client + mock_get_resources_client.return_value = mock_resources_client + + bad_request_error = HttpResponseError(response=mock.Mock(status_code=400)) + bad_request_error.status_code = 400 + mock_resources_client.begin_create_or_update_by_id.side_effect = bad_request_error + + with self.assertRaises(HttpResponseError): + ensure_default_log_analytics_workspace_for_monitoring( + self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' + ) + + mock_sleep.assert_not_called() + + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') + def test_resource_exists_error_retried(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): + """ResourceExistsError should be retried.""" + mock_get_rg_location.return_value = 'eastus' + mock_rg_client, mock_resources_client = self._make_mocks() + mock_get_rg_client.return_value = mock_rg_client + mock_get_resources_client.return_value = mock_resources_client + + exists_error = ResourceExistsError("already exists") + mock_poller_success = mock.Mock() + mock_result = mock.Mock() + mock_result.id = '/subscriptions/sub/rg/ws' + mock_poller_success.result.return_value = mock_result + mock_poller_success.done.return_value = True + mock_resources_client.begin_create_or_update_by_id.side_effect = [ + exists_error, mock_poller_success + ] + + result = ensure_default_log_analytics_workspace_for_monitoring( + self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' + ) + + self.assertEqual(result, '/subscriptions/sub/rg/ws') + self.assertEqual(mock_sleep.call_count, 1) + + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') + def test_409_conflict_exhausts_retries(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): + """409 Conflict that persists through all retries should raise.""" + mock_get_rg_location.return_value = 'eastus' + mock_rg_client, mock_resources_client = self._make_mocks() + mock_get_rg_client.return_value = mock_rg_client + mock_get_resources_client.return_value = mock_resources_client + + conflict_error = HttpResponseError(response=mock.Mock(status_code=409)) + conflict_error.status_code = 409 + mock_resources_client.begin_create_or_update_by_id.side_effect = conflict_error + + with self.assertRaises(HttpResponseError): + ensure_default_log_analytics_workspace_for_monitoring( + self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' + ) + + # Should have slept twice (retries 0 and 1), then raised on retry 2 + self.assertEqual(mock_sleep.call_count, 2) + + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') + @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') + def test_exponential_backoff_increasing_sleep(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): + """Sleep durations should increase exponentially across retries.""" + mock_get_rg_location.return_value = 'eastus' + mock_rg_client, mock_resources_client = self._make_mocks() + mock_get_rg_client.return_value = mock_rg_client + mock_get_resources_client.return_value = mock_resources_client + + conflict_error = HttpResponseError(response=mock.Mock(status_code=409)) + conflict_error.status_code = 409 + mock_resources_client.begin_create_or_update_by_id.side_effect = conflict_error + + with self.assertRaises(HttpResponseError): + ensure_default_log_analytics_workspace_for_monitoring( + self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' + ) + + self.assertEqual(mock_sleep.call_count, 2) + first_sleep = mock_sleep.call_args_list[0][0][0] + second_sleep = mock_sleep.call_args_list[1][0][0] + # Second sleep should be larger (base doubles: 5->10, plus jitter) + self.assertGreater(second_sleep, first_sleep) + + if __name__ == "__main__": unittest.main() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py index dccfe1edec8..f74b0e566f5 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py @@ -13,6 +13,7 @@ check_is_private_cluster, check_is_private_link_cluster, format_parameter_name_to_option_name, + get_monitoring_addon_key, get_property_from_dict_or_object, get_snapshot, get_snapshot_by_snapshot_id, @@ -347,5 +348,43 @@ def test_get_user_assigned_identity(self): get_user_assigned_identity("mock_cli_ctx", "mock_sub_id", "mock_rg", "mock_identity_name") +class TestGetMonitoringAddonKey(unittest.TestCase): + """Tests for the shared get_monitoring_addon_key helper.""" + + def test_returns_default_when_addon_profiles_is_none(self): + result = get_monitoring_addon_key(None, "omsagent", "omsAgent") + self.assertEqual(result, "omsagent") + + def test_returns_lowercase_key_when_present(self): + addon_profiles = {"omsagent": object()} + result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") + self.assertEqual(result, "omsagent") + + def test_returns_camelcase_key_when_lowercase_absent(self): + addon_profiles = {"omsAgent": object()} + result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") + self.assertEqual(result, "omsAgent") + + def test_prefers_lowercase_when_both_present(self): + addon_profiles = {"omsagent": object(), "omsAgent": object()} + result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") + self.assertEqual(result, "omsagent") + + def test_returns_default_when_neither_key_present(self): + addon_profiles = {"someOtherAddon": object()} + result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") + self.assertEqual(result, "omsagent") + + def test_returns_default_when_camelcase_is_none(self): + addon_profiles = {"omsAgent": object()} + result = get_monitoring_addon_key(addon_profiles, "omsagent", None) + self.assertEqual(result, "omsagent") + + def test_returns_default_when_camelcase_not_provided(self): + addon_profiles = {"omsAgent": object()} + result = get_monitoring_addon_key(addon_profiles, "omsagent") + self.assertEqual(result, "omsagent") + + if __name__ == "__main__": unittest.main() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 62f9cce5725..2b2f66abe80 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -8635,6 +8635,58 @@ def test_postprocessing_after_mc_created(self): assignee_principal_type=None, ) + # Case 5: Create with enable_high_log_scale_mode=True + CNL + # Verifies HLSM is passed through to ensure_container_insights_for_monitoring + dec_5 = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "resource_group_name": "test_rg_name", + "name": "test_name", + "enable_msi_auth_for_monitoring": True, + "enable_addons": "monitoring", + "enable_container_network_logs": True, + "enable_high_log_scale_mode": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + monitoring_addon_profile_5 = self.models.ManagedClusterAddonProfile( + enabled=True, + config={}, + ) + mc_5 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: monitoring_addon_profile_5, + }, + ) + dec_5.context.attach_mc(mc_5) + dec_5.context.set_intermediate("monitoring_addon_enabled", True) + mock_profile_5 = Mock(get_subscription_id=Mock(return_value="1234-5678-9012")) + with patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.Profile", return_value=mock_profile_5 + ), patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.ensure_container_insights_for_monitoring" + ) as mock_ensure_5: + dec_5.postprocessing_after_mc_created(mc_5) + mock_ensure_5.assert_called_once_with( + self.cmd, + monitoring_addon_profile_5, + "1234-5678-9012", + "test_rg_name", + "test_name", + "test_location", + remove_monitoring=False, + aad_route=True, + create_dcr=False, + create_dcra=True, + enable_syslog=None, + data_collection_settings=None, + is_private_cluster=None, + ampls_resource_id=None, + enable_high_log_scale_mode=True, + ) + def test_put_mc(self): dec_1 = AKSManagedClusterCreateDecorator( self.cmd, @@ -12583,6 +12635,57 @@ def test_postprocessing_after_mc_created(self): assignee_principal_type=None, ) + # Case 5: Update with HLSM=True via monitoring_addon_postprocessing_required + # Verifies enable_high_log_scale_mode=True is passed and create_dcr=True for DCR update + dec_5 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "resource_group_name": "test_rg_name", + "name": "test_name", + "enable_msi_auth_for_monitoring": True, + "enable_high_log_scale_mode": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + monitoring_addon_profile_5 = self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + mc_5 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: monitoring_addon_profile_5, + }, + ) + dec_5.context.attach_mc(mc_5) + dec_5.context.set_intermediate("monitoring_addon_enabled", True) + dec_5.context.set_intermediate("monitoring_addon_postprocessing_required", True) + mock_profile_5 = Mock(get_subscription_id=Mock(return_value="1234-5678-9012")) + with patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.Profile", return_value=mock_profile_5 + ), patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.ensure_container_insights_for_monitoring" + ) as mock_ensure_5: + dec_5.postprocessing_after_mc_created(mc_5) + mock_ensure_5.assert_called_once_with( + self.cmd, + monitoring_addon_profile_5, + "1234-5678-9012", + "test_rg_name", + "test_name", + "test_location", + remove_monitoring=False, + aad_route=True, + create_dcr=True, + create_dcra=True, + enable_syslog=None, + data_collection_settings=None, + is_private_cluster=None, + ampls_resource_id=None, + enable_high_log_scale_mode=True, + ) + def test_put_mc(self): dec_1 = AKSManagedClusterUpdateDecorator( self.cmd, @@ -14862,6 +14965,13 @@ def test_enable_container_network_logs(self): self.assertTrue( dec_9.context.get_intermediate("monitoring_addon_postprocessing_required") ) + # Verify HLSM is resolved to True + self.assertEqual(dec_9.context.get_enable_high_log_scale_mode(), True) + # Verify CNL flag was NOT added to addon config (HLSM alone doesn't set it) + self.assertNotIn( + "enableRetinaNetworkFlags", + dec_mc_9.addon_profiles["omsagent"].config or {}, + ) # Case 10: UPDATE - disable HLSM while CNL is active -> should ERROR dec_10 = AKSManagedClusterUpdateDecorator( @@ -14932,6 +15042,8 @@ def test_enable_container_network_logs(self): self.assertTrue( dec_11.context.get_intermediate("monitoring_addon_postprocessing_required") ) + # Verify HLSM is resolved to True + self.assertEqual(dec_11.context.get_enable_high_log_scale_mode(), True) # Case 12: UPDATE - enable HLSM without monitoring addon -> should ERROR dec_12 = AKSManagedClusterUpdateDecorator( From e7ac4ffee2ba9ba5bde36a1fc3e6090f15da2402 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 24 Mar 2026 16:24:53 +0000 Subject: [PATCH 04/16] Tests --- .../azure/cli/command_modules/acs/custom.py | 11 +- .../acs/tests/latest/test_custom.py | 78 ++++++++ .../latest/test_managed_cluster_decorator.py | 177 ++++++++++++++++++ 3 files changed, 261 insertions(+), 5 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index 12a1d64508a..b5e6ddf42c5 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1749,16 +1749,17 @@ def _update_addons(cmd, instance, subscription_id, resource_group_name, name, ad "--enable_msi_auth_for_monitoring is not supported in %s cloud and continuing monitoring enablement without this flag.", cloud_name) enable_msi_auth_for_monitoring = False + # Capture existing CNL flag before overwriting config + existing_cnl = None + existing_addon = addon_profiles.get(addon) or addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) + if existing_addon and existing_addon.config: + existing_cnl = existing_addon.config.get("enableRetinaNetworkFlags") + addon_profile.config = { CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID: workspace_resource_id} addon_profile.config[CONST_MONITORING_USING_AAD_MSI_AUTH] = "true" if enable_msi_auth_for_monitoring else "false" # Preserve enableRetinaNetworkFlags (CNL) if it was set on the existing addon - # or if container network logs are being enabled simultaneously - existing_cnl = None - existing_addon = addon_profiles.get(addon) or addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) - if existing_addon and existing_addon.config: - existing_cnl = existing_addon.config.get("enableRetinaNetworkFlags") if existing_cnl is not None: addon_profile.config["enableRetinaNetworkFlags"] = existing_cnl elif addon == (CONST_VIRTUAL_NODE_ADDON_NAME + os_type): diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py index 59071f51ed3..684b03e4a7f 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py @@ -17,6 +17,8 @@ CONST_HTTP_APPLICATION_ROUTING_ADDON_NAME, CONST_KUBE_DASHBOARD_ADDON_NAME, CONST_MONITORING_ADDON_NAME, + CONST_MONITORING_ADDON_NAME_CAMELCASE, + CONST_MONITORING_USING_AAD_MSI_AUTH, ) from azure.cli.command_modules.acs.addonconfiguration import ( ensure_default_log_analytics_workspace_for_monitoring, @@ -25,6 +27,7 @@ _get_command_context, _update_addons, aks_stop, + is_monitoring_addon_enabled, k8s_install_kubectl, k8s_install_kubelogin, merge_kubernetes_configurations, @@ -613,6 +616,44 @@ def test_update_addons(self, rg_def, get_resource_groups_client, get_resources_c addon_profile = instance.addon_profiles['ingressApplicationGateway'] self.assertFalse(addon_profile.enabled) + # monitoring enable preserves enableRetinaNetworkFlags (CNL) from camelCase addon key + # Scenario: cluster has monitoring under "omsAgent" (camelCase) key with CNL enabled; + # CLI enables monitoring (uses "omsagent" key), CNL should be preserved from old key + instance = mock.MagicMock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME_CAMELCASE: ManagedClusterAddonProfile( + enabled=False, + config={ + 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws', + CONST_MONITORING_USING_AAD_MSI_AUTH: 'true', + 'enableRetinaNetworkFlags': 'True', + }, + ), + } + instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', + 'clitest000001', 'clitest000001', 'monitoring', enable=True) + monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] + self.assertTrue(monitoring_profile.enabled) + self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') + + # monitoring enable preserves enableRetinaNetworkFlags from lowercase addon key + instance = mock.MagicMock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile( + enabled=False, + config={ + 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws', + CONST_MONITORING_USING_AAD_MSI_AUTH: 'true', + 'enableRetinaNetworkFlags': 'True', + }, + ), + } + instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', + 'clitest000001', 'clitest000001', 'monitoring', enable=True) + monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] + self.assertTrue(monitoring_profile.enabled) + self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') + @mock.patch('azure.cli.command_modules.acs.custom._urlretrieve') @mock.patch('azure.cli.command_modules.acs.custom.logger') def test_k8s_install_kubectl_emit_warnings(self, logger_mock, mock_url_retrieve): @@ -1333,5 +1374,42 @@ def test_exponential_backoff_increasing_sleep(self, mock_get_rg_location, mock_g self.assertGreater(second_sleep, first_sleep) +class TestIsMonitoringAddonEnabled(unittest.TestCase): + """Tests for the is_monitoring_addon_enabled helper in custom.py.""" + + def test_monitoring_enabled_with_lowercase_key(self): + instance = mock.Mock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile(enabled=True, config={}), + } + self.assertTrue(is_monitoring_addon_enabled("monitoring", instance)) + + def test_monitoring_enabled_with_camelcase_key(self): + instance = mock.Mock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME_CAMELCASE: ManagedClusterAddonProfile(enabled=True, config={}), + } + self.assertTrue(is_monitoring_addon_enabled("monitoring", instance)) + + def test_monitoring_disabled_with_camelcase_key(self): + instance = mock.Mock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME_CAMELCASE: ManagedClusterAddonProfile(enabled=False, config={}), + } + self.assertFalse(is_monitoring_addon_enabled("monitoring", instance)) + + def test_no_monitoring_addon_at_all(self): + instance = mock.Mock() + instance.addon_profiles = {} + self.assertFalse(is_monitoring_addon_enabled("monitoring", instance)) + + def test_non_monitoring_addon(self): + instance = mock.Mock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile(enabled=True, config={}), + } + self.assertFalse(is_monitoring_addon_enabled("http_application_routing", instance)) + + if __name__ == "__main__": unittest.main() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 2b2f66abe80..f8d682d727d 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -2829,6 +2829,27 @@ def test_get_enable_msi_auth_for_monitoring(self): ctx_1.attach_mc(mc) self.assertEqual(ctx_1.get_enable_msi_auth_for_monitoring(), True) + # camelCase key (omsAgent) should also be resolved + ctx_2 = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict( + { + "enable_msi_auth_for_monitoring": False, + } + ), + self.models, + DecoratorMode.CREATE, + ) + addon_profiles_2 = { + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + } + mc_2 = self.models.ManagedCluster(location="test_location", addon_profiles=addon_profiles_2) + ctx_2.attach_mc(mc_2) + self.assertEqual(ctx_2.get_enable_msi_auth_for_monitoring(), True) + def test_get_virtual_node_addon_os_type(self): # default ctx_1 = AKSManagedClusterContext(self.cmd, AKSManagedClusterParamDict({}), self.models, DecoratorMode.CREATE) @@ -12497,6 +12518,10 @@ def test_check_is_postprocessing_required(self): self.assertEqual(dec_1.check_is_postprocessing_required(mc_1), True) dec_1.context.remove_intermediate("monitoring_addon_enabled") + dec_1.context.set_intermediate("monitoring_addon_postprocessing_required", True) + self.assertEqual(dec_1.check_is_postprocessing_required(mc_1), True) + + dec_1.context.remove_intermediate("monitoring_addon_postprocessing_required") dec_1.context.set_intermediate("ingress_appgw_addon_enabled", True) self.assertEqual(dec_1.check_is_postprocessing_required(mc_1), True) @@ -12686,6 +12711,56 @@ def test_postprocessing_after_mc_created(self): enable_high_log_scale_mode=True, ) + # Case 6: Update postprocessing with camelCase addon key (omsAgent) + dec_6 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "resource_group_name": "test_rg_name", + "name": "test_name", + "enable_msi_auth_for_monitoring": True, + "enable_high_log_scale_mode": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + monitoring_addon_profile_6 = self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + mc_6 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME_CAMELCASE: monitoring_addon_profile_6, + }, + ) + dec_6.context.attach_mc(mc_6) + dec_6.context.set_intermediate("monitoring_addon_enabled", True) + dec_6.context.set_intermediate("monitoring_addon_postprocessing_required", True) + mock_profile_6 = Mock(get_subscription_id=Mock(return_value="1234-5678-9012")) + with patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.Profile", return_value=mock_profile_6 + ), patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.ensure_container_insights_for_monitoring" + ) as mock_ensure_6: + dec_6.postprocessing_after_mc_created(mc_6) + mock_ensure_6.assert_called_once_with( + self.cmd, + monitoring_addon_profile_6, + "1234-5678-9012", + "test_rg_name", + "test_name", + "test_location", + remove_monitoring=False, + aad_route=True, + create_dcr=True, + create_dcra=True, + enable_syslog=None, + data_collection_settings=None, + is_private_cluster=None, + ampls_resource_id=None, + enable_high_log_scale_mode=True, + ) + def test_put_mc(self): dec_1 = AKSManagedClusterUpdateDecorator( self.cmd, @@ -15130,6 +15205,108 @@ def test_enable_container_network_logs(self): with self.assertRaises(MutuallyExclusiveArgumentError): dec_14.update_monitoring_profile_flow_logs(mc_14) + # Case 15: UPDATE - enable HLSM with camelCase key (omsAgent) + dec_15 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_15 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_15.context.attach_mc(mc_15) + dec_mc_15 = dec_15.update_monitoring_profile_flow_logs(mc_15) + self.assertTrue( + dec_15.context.get_intermediate("monitoring_addon_postprocessing_required") + ) + self.assertEqual(dec_15.context.get_enable_high_log_scale_mode(), True) + + # Case 16: UPDATE - enable CNL with camelCase key (omsAgent) + dec_16 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_container_network_logs": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_16 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_16.context.attach_mc(mc_16) + dec_mc_16 = dec_16.update_monitoring_profile_flow_logs(mc_16) + self.assertEqual( + dec_mc_16.addon_profiles[CONST_MONITORING_ADDON_NAME_CAMELCASE].config["enableRetinaNetworkFlags"], + "True", + ) + self.assertTrue( + dec_16.context.get_intermediate("monitoring_addon_postprocessing_required") + ) + + # Case 17: UPDATE - disable HLSM with camelCase key, CNL active -> should ERROR + dec_17 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": False, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_17 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + "enableRetinaNetworkFlags": "True", + }, + ) + }, + ) + dec_17.context.attach_mc(mc_17) + with self.assertRaises(MutuallyExclusiveArgumentError): + dec_17.update_monitoring_profile_flow_logs(mc_17) + if __name__ == "__main__": unittest.main() From 4306187c6a5c2a6287840abbb038c5aefc487d49 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 24 Mar 2026 20:15:04 +0000 Subject: [PATCH 05/16] Fix update workflow --- .../azure/cli/command_modules/acs/custom.py | 14 ++- .../acs/managed_cluster_decorator.py | 25 ++++ .../acs/tests/latest/test_custom.py | 84 +++++++++++++ .../latest/test_managed_cluster_decorator.py | 118 ++++++++++++++++++ 4 files changed, 240 insertions(+), 1 deletion(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index b5e6ddf42c5..46c5e463b27 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1836,7 +1836,19 @@ def _update_addons(cmd, instance, subscription_id, resource_group_name, name, ad else: raise CLIError( "The addon {} is not installed.".format(addon)) - addon_profiles[addon].config = None + # When disabling the monitoring addon, preserve enableRetinaNetworkFlags (CNL) + # so that re-enabling the addon later restores the CNL setting. + monitoring_addon_key = get_monitoring_addon_key( + addon_profiles, CONST_MONITORING_ADDON_NAME, CONST_MONITORING_ADDON_NAME_CAMELCASE + ) + if addon == monitoring_addon_key and addon_profiles[addon].config: + existing_cnl = addon_profiles[addon].config.get("enableRetinaNetworkFlags") + if existing_cnl is not None: + addon_profiles[addon].config = {"enableRetinaNetworkFlags": existing_cnl} + else: + addon_profiles[addon].config = None + else: + addon_profiles[addon].config = None addon_profiles[addon].enabled = enable instance.addon_profiles = addon_profiles diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 8eb833b3988..75f8fffb4d4 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -9941,6 +9941,31 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: self.context.external_functions.add_monitoring_role_assignment( cluster, cluster_resource_id, self.cmd ) + # If CNL/HLSM flags changed, also update the DCR even though MSI auth + # was not detected via get_enable_msi_auth_for_monitoring (which returns + # False for existing MSI clusters where client_id=="msi"). + if monitoring_addon_postprocessing_required: + addon_consts = self.context.get_addon_consts() + monitoring_addon_key = _get_monitoring_addon_key_from_consts( + cluster.addon_profiles, addon_consts + ) + self.context.external_functions.ensure_container_insights_for_monitoring( + self.cmd, + cluster.addon_profiles[monitoring_addon_key], + self.context.get_subscription_id(), + self.context.get_resource_group_name(), + self.context.get_name(), + self.context.get_location(), + remove_monitoring=False, + aad_route=True, + create_dcr=True, + create_dcra=False, + enable_syslog=self.context.get_enable_syslog(), + data_collection_settings=self.context.get_data_collection_settings(), + is_private_cluster=self.context.get_enable_private_cluster(), + ampls_resource_id=self.context.get_ampls_resource_id(), + enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), + ) elif ( self.context.raw_param.get("enable_addons") is not None or monitoring_addon_postprocessing_required diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py index 684b03e4a7f..5d7366e7977 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py @@ -654,6 +654,90 @@ def test_update_addons(self, rg_def, get_resource_groups_client, get_resources_c self.assertTrue(monitoring_profile.enabled) self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') + # monitoring disable preserves enableRetinaNetworkFlags (CNL) in config + instance = mock.MagicMock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile( + enabled=True, + config={ + 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws', + CONST_MONITORING_USING_AAD_MSI_AUTH: 'true', + 'enableRetinaNetworkFlags': 'True', + }, + ), + } + instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', + 'clitest000001', 'clitest000001', 'monitoring', enable=False) + monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] + self.assertFalse(monitoring_profile.enabled) + self.assertIsNotNone(monitoring_profile.config) + self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') + # Workspace and auth keys should NOT be preserved (only CNL flag) + self.assertIsNone(monitoring_profile.config.get('logAnalyticsWorkspaceResourceID')) + + # monitoring disable → enable cycle preserves CNL (Test 6b scenario) + instance = mock.MagicMock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile( + enabled=True, + config={ + 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/old-ws', + CONST_MONITORING_USING_AAD_MSI_AUTH: 'true', + 'enableRetinaNetworkFlags': 'True', + }, + ), + } + # Step 1: disable monitoring + instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', + 'clitest000001', 'clitest000001', 'monitoring', enable=False) + self.assertFalse(instance.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled) + self.assertEqual(instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config.get('enableRetinaNetworkFlags'), 'True') + # Step 2: re-enable monitoring with new workspace + instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', + 'clitest000001', 'clitest000001', 'monitoring', enable=True, + workspace_resource_id='/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/new-ws') + monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] + self.assertTrue(monitoring_profile.enabled) + self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') + self.assertIn('new-ws', monitoring_profile.config.get('logAnalyticsWorkspaceResourceID')) + + # monitoring disable without CNL sets config to None + instance = mock.MagicMock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile( + enabled=True, + config={ + 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws', + CONST_MONITORING_USING_AAD_MSI_AUTH: 'true', + }, + ), + } + instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', + 'clitest000001', 'clitest000001', 'monitoring', enable=False) + monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] + self.assertFalse(monitoring_profile.enabled) + self.assertIsNone(monitoring_profile.config) + + # monitoring disable preserves CNL with camelCase key (omsAgent) + instance = mock.MagicMock() + instance.addon_profiles = { + CONST_MONITORING_ADDON_NAME_CAMELCASE: ManagedClusterAddonProfile( + enabled=True, + config={ + 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws', + CONST_MONITORING_USING_AAD_MSI_AUTH: 'true', + 'enableRetinaNetworkFlags': 'True', + }, + ), + } + # The addon key normalization in _update_addons remaps camelCase to lowercase + instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', + 'clitest000001', 'clitest000001', 'monitoring', enable=False) + monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] + self.assertFalse(monitoring_profile.enabled) + self.assertIsNotNone(monitoring_profile.config) + self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') + @mock.patch('azure.cli.command_modules.acs.custom._urlretrieve') @mock.patch('azure.cli.command_modules.acs.custom.logger') def test_k8s_install_kubectl_emit_warnings(self, logger_mock, mock_url_retrieve): diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index f8d682d727d..29c5e135177 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -12761,6 +12761,124 @@ def test_postprocessing_after_mc_created(self): enable_high_log_scale_mode=True, ) + # Case 7: Update with CNL enabled on an MSI cluster where monitoring was already enabled. + # In the update flow, update_addon_profiles sets monitoring_addon_enabled=True + # (because the addon is already present and enabled on the cluster). + # get_enable_msi_auth_for_monitoring() returns False for MSI clusters where + # service_principal_profile.client_id="msi", so the code enters the + # "if not enable_msi_auth_for_monitoring:" branch. The fix ensures that when + # monitoring_addon_postprocessing_required=True, the DCR is still updated + # with aad_route=True inside that branch. + dec_7 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "resource_group_name": "test_rg_name", + "name": "test_name", + "enable_msi_auth_for_monitoring": False, + "enable_container_network_logs": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + monitoring_addon_profile_7 = self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + "enableRetinaNetworkFlags": "True", + }, + ) + mc_7 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: monitoring_addon_profile_7, + }, + service_principal_profile=self.models.ManagedClusterServicePrincipalProfile( + client_id="msi" + ), + ) + dec_7.context.attach_mc(mc_7) + # monitoring_addon_enabled is True — set by update_addon_profiles because addon already exists + dec_7.context.set_intermediate("monitoring_addon_enabled", True) + dec_7.context.set_intermediate("monitoring_addon_postprocessing_required", True) + mock_profile_7 = Mock(get_subscription_id=Mock(return_value="1234-5678-9012")) + with patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.Profile", return_value=mock_profile_7 + ), patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.ensure_container_insights_for_monitoring" + ) as mock_ensure_7: + dec_7.postprocessing_after_mc_created(mc_7) + mock_ensure_7.assert_called_once_with( + self.cmd, + monitoring_addon_profile_7, + "1234-5678-9012", + "test_rg_name", + "test_name", + "test_location", + remove_monitoring=False, + aad_route=True, + create_dcr=True, + create_dcra=False, + enable_syslog=None, + data_collection_settings=None, + is_private_cluster=None, + ampls_resource_id=None, + enable_high_log_scale_mode=True, + ) + + # Case 8: Update with HLSM-only on an MSI cluster where monitoring was already enabled + dec_8 = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "resource_group_name": "test_rg_name", + "name": "test_name", + "enable_msi_auth_for_monitoring": False, + "enable_high_log_scale_mode": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + monitoring_addon_profile_8 = self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + mc_8 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: monitoring_addon_profile_8, + }, + service_principal_profile=self.models.ManagedClusterServicePrincipalProfile( + client_id="msi" + ), + ) + dec_8.context.attach_mc(mc_8) + # monitoring_addon_enabled is True — set by update_addon_profiles because addon already exists + dec_8.context.set_intermediate("monitoring_addon_enabled", True) + dec_8.context.set_intermediate("monitoring_addon_postprocessing_required", True) + mock_profile_8 = Mock(get_subscription_id=Mock(return_value="1234-5678-9012")) + with patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.Profile", return_value=mock_profile_8 + ), patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.ensure_container_insights_for_monitoring" + ) as mock_ensure_8: + dec_8.postprocessing_after_mc_created(mc_8) + mock_ensure_8.assert_called_once_with( + self.cmd, + monitoring_addon_profile_8, + "1234-5678-9012", + "test_rg_name", + "test_name", + "test_location", + remove_monitoring=False, + aad_route=True, + create_dcr=True, + create_dcra=False, + enable_syslog=None, + data_collection_settings=None, + is_private_cluster=None, + ampls_resource_id=None, + enable_high_log_scale_mode=True, + ) + def test_put_mc(self): dec_1 = AKSManagedClusterUpdateDecorator( self.cmd, From a8806a2bda47bcec83c21a78c189d39d16a52280 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 24 Mar 2026 20:21:27 +0000 Subject: [PATCH 06/16] Add missing_parameter_test_coverage exclusion for aks update --enable-high-log-scale-mode --- .../azure/cli/command_modules/acs/linter_exclusions.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml index e0e238260ed..7821855f33d 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml +++ b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml @@ -191,6 +191,7 @@ aks update: enable_high_log_scale_mode: rule_exclusions: - option_length_too_long + - missing_parameter_test_coverage disable_acns_observability: rule_exclusions: - option_length_too_long From 84889c0b7c591a1efc33fa251d8e4042686f103d Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 24 Mar 2026 20:32:20 +0000 Subject: [PATCH 07/16] Remove workspace creation retry logic and related tests --- .../command_modules/acs/addonconfiguration.py | 51 ++---- .../acs/tests/latest/test_custom.py | 162 +----------------- 2 files changed, 12 insertions(+), 201 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py index 6af52cbf175..61da2361dce 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py +++ b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py @@ -4,9 +4,7 @@ # -------------------------------------------------------------------------------------------- import json import os -import random import re -import time from azure.cli.command_modules.acs._client_factory import get_resource_groups_client, get_resources_client from azure.cli.core.util import get_file_json @@ -24,7 +22,7 @@ from azure.cli.core.azclierror import AzCLIError, CLIError, InvalidArgumentValueError, ArgumentUsageError from azure.cli.core.profiles import ResourceType from azure.cli.core.util import send_raw_request -from azure.core.exceptions import HttpResponseError, ResourceExistsError +from azure.core.exceptions import HttpResponseError from azure.mgmt.core.tools import parse_resource_id, resource_id from knack.log import get_logger @@ -327,45 +325,18 @@ def ensure_default_log_analytics_workspace_for_monitoring( location=workspace_region, properties={"sku": {"name": "standalone"}} ) - # Retry with exponential backoff + jitter for 409 Conflict during workspace provisioning - _MAX_RETRY_TIMES = 3 - _BASE_SLEEP_SECONDS = 5 - for retry_count in range(_MAX_RETRY_TIMES): - try: - async_poller = resources.begin_create_or_update_by_id( - default_workspace_resource_id, "2015-11-01-preview", generic_resource - ) - - ws_resource_id = "" - while True: - result = async_poller.result(15) - if async_poller.done(): - ws_resource_id = result.id - break + async_poller = resources.begin_create_or_update_by_id( + default_workspace_resource_id, "2015-11-01-preview", generic_resource + ) - return ws_resource_id - except ResourceExistsError: - # ResourceExistsError is a subclass of HttpResponseError, so must be caught first - if retry_count >= (_MAX_RETRY_TIMES - 1): - raise - sleep_seconds = _BASE_SLEEP_SECONDS * (2 ** retry_count) + random.uniform(0, 2) - logger.warning( - "Workspace already exists (attempt %d/%d), retrying in %.1fs...", - retry_count + 1, _MAX_RETRY_TIMES, sleep_seconds - ) - time.sleep(sleep_seconds) - except HttpResponseError as ex: - is_conflict = hasattr(ex, 'status_code') and ex.status_code == 409 - if not is_conflict or retry_count >= (_MAX_RETRY_TIMES - 1): - raise - sleep_seconds = _BASE_SLEEP_SECONDS * (2 ** retry_count) + random.uniform(0, 2) - logger.warning( - "Workspace creation conflict (attempt %d/%d), retrying in %.1fs...", - retry_count + 1, _MAX_RETRY_TIMES, sleep_seconds - ) - time.sleep(sleep_seconds) + ws_resource_id = "" + while True: + result = async_poller.result(15) + if async_poller.done(): + ws_resource_id = result.id + break - return default_workspace_resource_id + return ws_resource_id def sanitize_loganalytics_ws_resource_id(workspace_resource_id): diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py index 5d7366e7977..0dd4f4ecdde 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py @@ -48,7 +48,7 @@ ) from azure.cli.core.util import CLIError from azure.cli.core.profiles import ResourceType -from azure.core.exceptions import HttpResponseError, ResourceExistsError +from azure.core.exceptions import HttpResponseError from azure.mgmt.containerservice.models import ( ManagedClusterAddonProfile, ) @@ -1298,166 +1298,6 @@ def test_unknown_delos_region_defaults_to_deloscloudgermanycentral(self, mock_ge self.assertIn(f'DefaultWorkspace-{subscription_id}-DELOSC', result) -class TestWorkspaceCreationRetry(unittest.TestCase): - """Tests for the retry logic in ensure_default_log_analytics_workspace_for_monitoring.""" - - def setUp(self): - self.cli = MockCLI() - self.cmd = MockCmd(self.cli) - self.cmd.cli_ctx.cloud.name = 'AzureCloud' - - def _make_mocks(self): - """Create standard mocks for resource group and resources clients.""" - mock_rg_client = mock.Mock() - mock_rg_client.check_existence.return_value = False - mock_rg_client.create_or_update = mock.Mock() - - mock_poller = mock.Mock() - mock_result = mock.Mock() - mock_result.id = '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws' - mock_poller.result.return_value = mock_result - mock_poller.done.return_value = True - - mock_resources_client = mock.Mock() - mock_resources_client.begin_create_or_update_by_id.return_value = mock_poller - - self.cmd.get_models = mock.Mock(return_value=mock.Mock) - return mock_rg_client, mock_resources_client - - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') - def test_retry_on_409_conflict(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): - """409 Conflict should be retried with exponential backoff.""" - mock_get_rg_location.return_value = 'eastus' - mock_rg_client, mock_resources_client = self._make_mocks() - mock_get_rg_client.return_value = mock_rg_client - mock_get_resources_client.return_value = mock_resources_client - - # First call raises 409, second succeeds - conflict_error = HttpResponseError(response=mock.Mock(status_code=409)) - conflict_error.status_code = 409 - mock_poller_success = mock.Mock() - mock_result = mock.Mock() - mock_result.id = '/subscriptions/sub/rg/ws' - mock_poller_success.result.return_value = mock_result - mock_poller_success.done.return_value = True - mock_resources_client.begin_create_or_update_by_id.side_effect = [ - conflict_error, mock_poller_success - ] - - result = ensure_default_log_analytics_workspace_for_monitoring( - self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' - ) - - self.assertEqual(result, '/subscriptions/sub/rg/ws') - self.assertEqual(mock_sleep.call_count, 1) - # Verify backoff: first retry sleep should be base (5) * 2^0 + jitter - sleep_arg = mock_sleep.call_args[0][0] - self.assertGreaterEqual(sleep_arg, 5.0) - self.assertLessEqual(sleep_arg, 7.0) # 5 + up to 2s jitter - - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') - def test_non_409_http_error_not_retried(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): - """Non-409 HttpResponseError should be raised immediately, not retried.""" - mock_get_rg_location.return_value = 'eastus' - mock_rg_client, mock_resources_client = self._make_mocks() - mock_get_rg_client.return_value = mock_rg_client - mock_get_resources_client.return_value = mock_resources_client - - bad_request_error = HttpResponseError(response=mock.Mock(status_code=400)) - bad_request_error.status_code = 400 - mock_resources_client.begin_create_or_update_by_id.side_effect = bad_request_error - - with self.assertRaises(HttpResponseError): - ensure_default_log_analytics_workspace_for_monitoring( - self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' - ) - - mock_sleep.assert_not_called() - - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') - def test_resource_exists_error_retried(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): - """ResourceExistsError should be retried.""" - mock_get_rg_location.return_value = 'eastus' - mock_rg_client, mock_resources_client = self._make_mocks() - mock_get_rg_client.return_value = mock_rg_client - mock_get_resources_client.return_value = mock_resources_client - - exists_error = ResourceExistsError("already exists") - mock_poller_success = mock.Mock() - mock_result = mock.Mock() - mock_result.id = '/subscriptions/sub/rg/ws' - mock_poller_success.result.return_value = mock_result - mock_poller_success.done.return_value = True - mock_resources_client.begin_create_or_update_by_id.side_effect = [ - exists_error, mock_poller_success - ] - - result = ensure_default_log_analytics_workspace_for_monitoring( - self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' - ) - - self.assertEqual(result, '/subscriptions/sub/rg/ws') - self.assertEqual(mock_sleep.call_count, 1) - - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') - def test_409_conflict_exhausts_retries(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): - """409 Conflict that persists through all retries should raise.""" - mock_get_rg_location.return_value = 'eastus' - mock_rg_client, mock_resources_client = self._make_mocks() - mock_get_rg_client.return_value = mock_rg_client - mock_get_resources_client.return_value = mock_resources_client - - conflict_error = HttpResponseError(response=mock.Mock(status_code=409)) - conflict_error.status_code = 409 - mock_resources_client.begin_create_or_update_by_id.side_effect = conflict_error - - with self.assertRaises(HttpResponseError): - ensure_default_log_analytics_workspace_for_monitoring( - self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' - ) - - # Should have slept twice (retries 0 and 1), then raised on retry 2 - self.assertEqual(mock_sleep.call_count, 2) - - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.time.sleep') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resources_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_resource_groups_client') - @mock.patch('azure.cli.command_modules.acs.addonconfiguration.get_rg_location') - def test_exponential_backoff_increasing_sleep(self, mock_get_rg_location, mock_get_rg_client, mock_get_resources_client, mock_sleep): - """Sleep durations should increase exponentially across retries.""" - mock_get_rg_location.return_value = 'eastus' - mock_rg_client, mock_resources_client = self._make_mocks() - mock_get_rg_client.return_value = mock_rg_client - mock_get_resources_client.return_value = mock_resources_client - - conflict_error = HttpResponseError(response=mock.Mock(status_code=409)) - conflict_error.status_code = 409 - mock_resources_client.begin_create_or_update_by_id.side_effect = conflict_error - - with self.assertRaises(HttpResponseError): - ensure_default_log_analytics_workspace_for_monitoring( - self.cmd, '00000000-0000-0000-0000-000000000000', 'test-rg' - ) - - self.assertEqual(mock_sleep.call_count, 2) - first_sleep = mock_sleep.call_args_list[0][0][0] - second_sleep = mock_sleep.call_args_list[1][0][0] - # Second sleep should be larger (base doubles: 5->10, plus jitter) - self.assertGreater(second_sleep, first_sleep) - - class TestIsMonitoringAddonEnabled(unittest.TestCase): """Tests for the is_monitoring_addon_enabled helper in custom.py.""" From c80c61c0e079804bbef2e5fc2a739399859723e1 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 25 Mar 2026 10:55:49 +0000 Subject: [PATCH 08/16] Fix live tests --- .../command_modules/acs/addonconfiguration.py | 37 ++++++++++++------- .../acs/tests/latest/test_aks_commands.py | 3 ++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py index 61da2361dce..5eb18ed59d7 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py +++ b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py @@ -4,7 +4,9 @@ # -------------------------------------------------------------------------------------------- import json import os +import random import re +import time from azure.cli.command_modules.acs._client_factory import get_resource_groups_client, get_resources_client from azure.cli.core.util import get_file_json @@ -657,24 +659,31 @@ def ensure_container_insights_for_monitoring( ) resources = get_resources_client(cmd.cli_ctx, cluster_subscription) - for _ in range(3): + dcr_creation_body = json.loads( + dcr_creation_body_with_syslog if enable_syslog else dcr_creation_body_without_syslog + ) + max_retries = 3 + max_total_delay = 30 + total_delay = 0 + for attempt in range(max_retries): try: - if enable_syslog: - resources.begin_create_or_update_by_id( - dcr_resource_id, - "2022-06-01", - json.loads(dcr_creation_body_with_syslog) - ) - else: - resources.begin_create_or_update_by_id( - dcr_resource_id, - "2022-06-01", - json.loads(dcr_creation_body_without_syslog) - ) + resources.begin_create_or_update_by_id( + dcr_resource_id, + "2022-06-01", + dcr_creation_body + ) error = None break - except CLIError as e: + except (CLIError, HttpResponseError) as e: error = e + if attempt < max_retries - 1 and total_delay < max_total_delay: + delay = min(2 ** attempt + random.uniform(0, 1), max_total_delay - total_delay) + logger.warning( + "DCR creation attempt %d/%d failed, retrying in %.1f seconds: %s", + attempt + 1, max_retries, delay, e + ) + time.sleep(delay) + total_delay += delay else: raise error diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index b6414c9a84a..e18e87a8ef6 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -9186,6 +9186,9 @@ def enable_monitoring_existing_cluster_aad_auth(self, resource_group, resource_g self.check('properties.dataCollectionRuleId', f'{dcr_resource_id}') ]) + # wait for any in-progress cluster operation to finish before disabling + self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated') + # make sure monitoring can be smoothly disabled self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') From 0029fc113495dcfa6e507100f5269dd68db5cabd Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 25 Mar 2026 13:07:03 +0000 Subject: [PATCH 09/16] PR comments + fix live tests --- .../azure/cli/command_modules/acs/_helpers.py | 19 +++++++-- .../command_modules/acs/linter_exclusions.yml | 1 - .../acs/managed_cluster_decorator.py | 41 +++---------------- .../acs/tests/latest/test_aks_commands.py | 5 +++ .../acs/tests/latest/test_helpers.py | 11 +++++ 5 files changed, 37 insertions(+), 40 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_helpers.py b/src/azure-cli/azure/cli/command_modules/acs/_helpers.py index 433a5a7f8fa..4e1019ca592 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_helpers.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_helpers.py @@ -29,18 +29,29 @@ def get_monitoring_addon_key(addon_profiles, monitoring_addon_name, monitoring_addon_name_camelcase=None): - """Return the key present in addon_profiles for the monitoring addon. + """Return the canonical key for the monitoring addon, normalizing non-standard casing. - The API response may return the monitoring addon key as either "omsagent" - (lowercase) or "omsAgent" (camelCase). This helper checks both variants - and returns the one that exists, falling back to the lowercase constant. + The API response may return the monitoring addon key in any casing (e.g. + "omsagent", "omsAgent", "oMSaGent"). This helper performs a + case-insensitive lookup and, when a non-standard key is found, re-keys + addon_profiles in-place so that subsequent code always uses the canonical + ``monitoring_addon_name`` (lowercase) form. """ if addon_profiles is None: return monitoring_addon_name + # Exact match on the canonical lowercase name – preferred form. if monitoring_addon_name in addon_profiles: return monitoring_addon_name + # Exact match on the known camelCase variant. if monitoring_addon_name_camelcase and monitoring_addon_name_camelcase in addon_profiles: return monitoring_addon_name_camelcase + # Case-insensitive fallback: catch any other casing the server may return. + target_lower = monitoring_addon_name.lower() + for key in list(addon_profiles): + if key.lower() == target_lower: + # Normalize: move the profile to the canonical key. + addon_profiles[monitoring_addon_name] = addon_profiles.pop(key) + return monitoring_addon_name return monitoring_addon_name diff --git a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml index 7821855f33d..e0e238260ed 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml +++ b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml @@ -191,7 +191,6 @@ aks update: enable_high_log_scale_mode: rule_exclusions: - option_length_too_long - - missing_parameter_test_coverage disable_acns_observability: rule_exclusions: - option_length_too_long diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 75f8fffb4d4..10ecb05fd03 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -9941,40 +9941,11 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: self.context.external_functions.add_monitoring_role_assignment( cluster, cluster_resource_id, self.cmd ) - # If CNL/HLSM flags changed, also update the DCR even though MSI auth - # was not detected via get_enable_msi_auth_for_monitoring (which returns - # False for existing MSI clusters where client_id=="msi"). - if monitoring_addon_postprocessing_required: - addon_consts = self.context.get_addon_consts() - monitoring_addon_key = _get_monitoring_addon_key_from_consts( - cluster.addon_profiles, addon_consts - ) - self.context.external_functions.ensure_container_insights_for_monitoring( - self.cmd, - cluster.addon_profiles[monitoring_addon_key], - self.context.get_subscription_id(), - self.context.get_resource_group_name(), - self.context.get_name(), - self.context.get_location(), - remove_monitoring=False, - aad_route=True, - create_dcr=True, - create_dcra=False, - enable_syslog=self.context.get_enable_syslog(), - data_collection_settings=self.context.get_data_collection_settings(), - is_private_cluster=self.context.get_enable_private_cluster(), - ampls_resource_id=self.context.get_ampls_resource_id(), - enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), - ) - elif ( - self.context.raw_param.get("enable_addons") is not None or - monitoring_addon_postprocessing_required - ): - # Create/update the DCR and DCRA here + if ( + enable_msi_auth_for_monitoring and self.context.raw_param.get("enable_addons") is not None + ) or monitoring_addon_postprocessing_required: addon_consts = self.context.get_addon_consts() monitoring_addon_key = _get_monitoring_addon_key_from_consts(cluster.addon_profiles, addon_consts) - # When CNL/HLSM flags changed, also update the DCR - needs_dcr_update = monitoring_addon_postprocessing_required self.context.external_functions.ensure_container_insights_for_monitoring( self.cmd, cluster.addon_profiles[monitoring_addon_key], @@ -9983,9 +9954,9 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: self.context.get_name(), self.context.get_location(), remove_monitoring=False, - aad_route=self.context.get_enable_msi_auth_for_monitoring(), - create_dcr=needs_dcr_update, - create_dcra=True, + aad_route=True, + create_dcr=monitoring_addon_postprocessing_required, + create_dcra=enable_msi_auth_for_monitoring, enable_syslog=self.context.get_enable_syslog(), data_collection_settings=self.context.get_data_collection_settings(), is_private_cluster=self.context.get_enable_private_cluster(), diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index e18e87a8ef6..51ae4c1ddec 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -9094,6 +9094,9 @@ def create_new_cluster_with_monitoring_aad_auth(self, resource_group, resource_g self.check('properties.provisioningState', f'Succeeded') ]) + # wait for any in-progress cluster operation to finish before disabling + self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated') + # make sure monitoring can be smoothly disabled self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') @@ -9241,6 +9244,8 @@ def test_aks_create_with_monitoring_legacy_auth(self, resource_group, resource_g except Exception as err: pass # this is expected + # wait for any in-progress cluster operation to finish before disabling + self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated') # make sure monitoring can be smoothly disabled self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py index f74b0e566f5..df4feb621db 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py @@ -375,6 +375,17 @@ def test_returns_default_when_neither_key_present(self): result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") self.assertEqual(result, "omsagent") + def test_normalizes_nonstandard_casing(self): + """A key like 'oMSaGent' should be re-keyed to the canonical form.""" + profile = object() + addon_profiles = {"oMSaGent": profile} + result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") + self.assertEqual(result, "omsagent") + # The dict should now contain the canonical key, not the old one. + self.assertIn("omsagent", addon_profiles) + self.assertNotIn("oMSaGent", addon_profiles) + self.assertIs(addon_profiles["omsagent"], profile) + def test_returns_default_when_camelcase_is_none(self): addon_profiles = {"omsAgent": object()} result = get_monitoring_addon_key(addon_profiles, "omsagent", None) From 6497baca54c214b3f4402222e626cbd0a105e469 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 25 Mar 2026 13:30:23 +0000 Subject: [PATCH 10/16] Fix lint + cleanup --- .../command_modules/acs/addonconfiguration.py | 17 ++--------------- .../acs/tests/latest/test_aks_commands.py | 7 ++----- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py index 5eb18ed59d7..745ad2e6a49 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py +++ b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py @@ -4,9 +4,7 @@ # -------------------------------------------------------------------------------------------- import json import os -import random import re -import time from azure.cli.command_modules.acs._client_factory import get_resource_groups_client, get_resources_client from azure.cli.core.util import get_file_json @@ -662,10 +660,7 @@ def ensure_container_insights_for_monitoring( dcr_creation_body = json.loads( dcr_creation_body_with_syslog if enable_syslog else dcr_creation_body_without_syslog ) - max_retries = 3 - max_total_delay = 30 - total_delay = 0 - for attempt in range(max_retries): + for _ in range(3): try: resources.begin_create_or_update_by_id( dcr_resource_id, @@ -674,16 +669,8 @@ def ensure_container_insights_for_monitoring( ) error = None break - except (CLIError, HttpResponseError) as e: + except CLIError as e: error = e - if attempt < max_retries - 1 and total_delay < max_total_delay: - delay = min(2 ** attempt + random.uniform(0, 1), max_total_delay - total_delay) - logger.warning( - "DCR creation attempt %d/%d failed, retrying in %.1f seconds: %s", - attempt + 1, max_retries, delay, e - ) - time.sleep(delay) - total_delay += delay else: raise error diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 51ae4c1ddec..925054b1599 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -13351,12 +13351,9 @@ def test_aks_create_acns_with_flow_logs( ) # update: enable high log scale mode independently via aks update - enable_hlsm_cmd = ( - "aks update --resource-group={resource_group} --name={name} " - "--enable-high-log-scale-mode " - ) self.cmd( - enable_hlsm_cmd, + "aks update --resource-group={resource_group} --name={name} " + "--enable-high-log-scale-mode ", checks=[ self.check("provisioningState", "Succeeded"), self.check("addonProfiles.omsagent.enabled", True), From 190f319d3bafe169fdb7a83debef0881ca9742df Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 25 Mar 2026 15:59:01 +0000 Subject: [PATCH 11/16] Fix tests + workspaces --- .../azure/cli/command_modules/acs/custom.py | 5 + .../acs/tests/latest/test_aks_commands.py | 3 + .../acs/tests/latest/test_custom.py | 136 ++++++++++++++++++ 3 files changed, 144 insertions(+) diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index 46c5e463b27..af5d56af60f 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1623,6 +1623,11 @@ def aks_enable_addons(cmd, client, resource_group_name, name, addons, monitoring_addon_key = get_monitoring_addon_key( instance.addon_profiles, CONST_MONITORING_ADDON_NAME, CONST_MONITORING_ADDON_NAME_CAMELCASE ) + # Auto-enable HLSM if CNL is active and HLSM was not explicitly set + if enable_high_log_scale_mode is None: + cnl_flag = instance.addon_profiles[monitoring_addon_key].config.get("enableRetinaNetworkFlags") + if cnl_flag is not None and str(cnl_flag).lower() == "true": + enable_high_log_scale_mode = True if CONST_MONITORING_USING_AAD_MSI_AUTH in instance.addon_profiles[monitoring_addon_key].config and \ str(instance.addon_profiles[monitoring_addon_key].config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == 'true': if msi_auth: diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 925054b1599..4ff9ba10acf 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -9096,6 +9096,7 @@ def create_new_cluster_with_monitoring_aad_auth(self, resource_group, resource_g # wait for any in-progress cluster operation to finish before disabling self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated') + time.sleep(30) # make sure monitoring can be smoothly disabled self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') @@ -9191,6 +9192,7 @@ def enable_monitoring_existing_cluster_aad_auth(self, resource_group, resource_g # wait for any in-progress cluster operation to finish before disabling self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated') + time.sleep(30) # make sure monitoring can be smoothly disabled self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') @@ -9246,6 +9248,7 @@ def test_aks_create_with_monitoring_legacy_auth(self, resource_group, resource_g # wait for any in-progress cluster operation to finish before disabling self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated') + time.sleep(30) # make sure monitoring can be smoothly disabled self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py index 0dd4f4ecdde..da8c4874c34 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py @@ -26,6 +26,7 @@ from azure.cli.command_modules.acs.custom import ( _get_command_context, _update_addons, + aks_enable_addons, aks_stop, is_monitoring_addon_enabled, k8s_install_kubectl, @@ -1335,5 +1336,140 @@ def test_non_monitoring_addon(self): self.assertFalse(is_monitoring_addon_enabled("http_application_routing", instance)) +class TestAksEnableAddonsAutoHLSM(unittest.TestCase): + """Tests for auto-detection of HLSM when CNL is active in aks_enable_addons.""" + + def setUp(self): + self.cli = MockCLI() + self.cmd = MockCmd(self.cli) + + def _build_instance(self, cnl_flag=None, addon_key=CONST_MONITORING_ADDON_NAME): + """Build a mock cluster instance with monitoring addon.""" + config = { + 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws', + CONST_MONITORING_USING_AAD_MSI_AUTH: 'true', + } + if cnl_flag is not None: + config['enableRetinaNetworkFlags'] = cnl_flag + instance = mock.MagicMock() + instance.addon_profiles = { + addon_key: ManagedClusterAddonProfile(enabled=True, config=config), + } + instance.service_principal_profile.client_id = "msi" + instance.api_server_access_profile = None + instance.location = "eastus" + return instance + + @mock.patch("azure.cli.command_modules.acs.custom.ensure_container_insights_for_monitoring") + @mock.patch("azure.cli.command_modules.acs.custom.LongRunningOperation") + @mock.patch("azure.cli.command_modules.acs.custom._update_addons") + @mock.patch("azure.cli.command_modules.acs.custom.get_subscription_id", return_value="00000000-0000-0000-0000-000000000000") + def test_hlsm_auto_enabled_when_cnl_active(self, _mock_sub, mock_update, mock_lro, mock_ensure): + """When CNL is active and HLSM not set, HLSM should auto-enable.""" + instance = self._build_instance(cnl_flag="True") + mock_update.return_value = instance + mock_lro.return_value = lambda x: instance + client = mock.Mock() + client.get.return_value = instance + + aks_enable_addons(self.cmd, client, "rg", "cluster", "monitoring") + + mock_ensure.assert_called_once() + _, kwargs = mock_ensure.call_args + self.assertTrue(kwargs.get("enable_high_log_scale_mode")) + + @mock.patch("azure.cli.command_modules.acs.custom.ensure_container_insights_for_monitoring") + @mock.patch("azure.cli.command_modules.acs.custom.LongRunningOperation") + @mock.patch("azure.cli.command_modules.acs.custom._update_addons") + @mock.patch("azure.cli.command_modules.acs.custom.get_subscription_id", return_value="00000000-0000-0000-0000-000000000000") + def test_hlsm_not_auto_enabled_when_cnl_inactive(self, _mock_sub, mock_update, mock_lro, mock_ensure): + """When CNL is not active and HLSM not set, HLSM should remain None.""" + instance = self._build_instance(cnl_flag=None) + mock_update.return_value = instance + mock_lro.return_value = lambda x: instance + client = mock.Mock() + client.get.return_value = instance + + aks_enable_addons(self.cmd, client, "rg", "cluster", "monitoring") + + mock_ensure.assert_called_once() + _, kwargs = mock_ensure.call_args + self.assertIsNone(kwargs.get("enable_high_log_scale_mode")) + + @mock.patch("azure.cli.command_modules.acs.custom.ensure_container_insights_for_monitoring") + @mock.patch("azure.cli.command_modules.acs.custom.LongRunningOperation") + @mock.patch("azure.cli.command_modules.acs.custom._update_addons") + @mock.patch("azure.cli.command_modules.acs.custom.get_subscription_id", return_value="00000000-0000-0000-0000-000000000000") + def test_hlsm_explicit_true_not_overridden(self, _mock_sub, mock_update, mock_lro, mock_ensure): + """When HLSM is explicitly True, auto-detection should not change it.""" + instance = self._build_instance(cnl_flag="True") + mock_update.return_value = instance + mock_lro.return_value = lambda x: instance + client = mock.Mock() + client.get.return_value = instance + + aks_enable_addons(self.cmd, client, "rg", "cluster", "monitoring", + enable_high_log_scale_mode=True) + + mock_ensure.assert_called_once() + _, kwargs = mock_ensure.call_args + self.assertTrue(kwargs.get("enable_high_log_scale_mode")) + + @mock.patch("azure.cli.command_modules.acs.custom.ensure_container_insights_for_monitoring") + @mock.patch("azure.cli.command_modules.acs.custom.LongRunningOperation") + @mock.patch("azure.cli.command_modules.acs.custom._update_addons") + @mock.patch("azure.cli.command_modules.acs.custom.get_subscription_id", return_value="00000000-0000-0000-0000-000000000000") + def test_hlsm_explicit_false_not_overridden_by_cnl(self, _mock_sub, mock_update, mock_lro, mock_ensure): + """When HLSM is explicitly False, auto-detection should not override even with CNL active.""" + instance = self._build_instance(cnl_flag="True") + mock_update.return_value = instance + mock_lro.return_value = lambda x: instance + client = mock.Mock() + client.get.return_value = instance + + aks_enable_addons(self.cmd, client, "rg", "cluster", "monitoring", + enable_high_log_scale_mode=False) + + mock_ensure.assert_called_once() + _, kwargs = mock_ensure.call_args + self.assertFalse(kwargs.get("enable_high_log_scale_mode")) + + @mock.patch("azure.cli.command_modules.acs.custom.ensure_container_insights_for_monitoring") + @mock.patch("azure.cli.command_modules.acs.custom.LongRunningOperation") + @mock.patch("azure.cli.command_modules.acs.custom._update_addons") + @mock.patch("azure.cli.command_modules.acs.custom.get_subscription_id", return_value="00000000-0000-0000-0000-000000000000") + def test_hlsm_auto_enabled_with_cnl_lowercase_true(self, _mock_sub, mock_update, mock_lro, mock_ensure): + """CNL flag value 'true' (lowercase) should also trigger auto-HLSM.""" + instance = self._build_instance(cnl_flag="true") + mock_update.return_value = instance + mock_lro.return_value = lambda x: instance + client = mock.Mock() + client.get.return_value = instance + + aks_enable_addons(self.cmd, client, "rg", "cluster", "monitoring") + + mock_ensure.assert_called_once() + _, kwargs = mock_ensure.call_args + self.assertTrue(kwargs.get("enable_high_log_scale_mode")) + + @mock.patch("azure.cli.command_modules.acs.custom.ensure_container_insights_for_monitoring") + @mock.patch("azure.cli.command_modules.acs.custom.LongRunningOperation") + @mock.patch("azure.cli.command_modules.acs.custom._update_addons") + @mock.patch("azure.cli.command_modules.acs.custom.get_subscription_id", return_value="00000000-0000-0000-0000-000000000000") + def test_hlsm_not_auto_enabled_when_cnl_false(self, _mock_sub, mock_update, mock_lro, mock_ensure): + """When CNL flag is 'false', HLSM should not auto-enable.""" + instance = self._build_instance(cnl_flag="false") + mock_update.return_value = instance + mock_lro.return_value = lambda x: instance + client = mock.Mock() + client.get.return_value = instance + + aks_enable_addons(self.cmd, client, "rg", "cluster", "monitoring") + + mock_ensure.assert_called_once() + _, kwargs = mock_ensure.call_args + self.assertIsNone(kwargs.get("enable_high_log_scale_mode")) + + if __name__ == "__main__": unittest.main() From 3b02c51e224fecdf27a06ec4c24d80593dd9ea4b Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 25 Mar 2026 17:45:08 +0000 Subject: [PATCH 12/16] Fix live tests --- .../acs/tests/latest/test_aks_commands.py | 48 ++++++++++++++----- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 4ff9ba10acf..1ae50121a77 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -9095,11 +9095,19 @@ def create_new_cluster_with_monitoring_aad_auth(self, resource_group, resource_g ]) # wait for any in-progress cluster operation to finish before disabling - self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated') - time.sleep(30) + self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated --interval 30 --timeout 600') + time.sleep(60) - # make sure monitoring can be smoothly disabled - self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') + # make sure monitoring can be smoothly disabled, retry on 409 (in-progress addon operation) + for attempt in range(6): + try: + self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') + break + except Exception: + if attempt < 5: + time.sleep(60) + else: + raise # delete self.cmd(f'aks delete -g {resource_group} -n {aks_name} --yes --no-wait', checks=[self.is_empty()]) @@ -9191,11 +9199,19 @@ def enable_monitoring_existing_cluster_aad_auth(self, resource_group, resource_g ]) # wait for any in-progress cluster operation to finish before disabling - self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated') - time.sleep(30) + self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated --interval 30 --timeout 600') + time.sleep(60) - # make sure monitoring can be smoothly disabled - self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') + # make sure monitoring can be smoothly disabled, retry on 409 (in-progress addon operation) + for attempt in range(6): + try: + self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') + break + except Exception: + if attempt < 5: + time.sleep(60) + else: + raise # delete self.cmd(f'aks delete -g {resource_group} -n {aks_name} --yes --no-wait', checks=[self.is_empty()]) @@ -9247,11 +9263,19 @@ def test_aks_create_with_monitoring_legacy_auth(self, resource_group, resource_g pass # this is expected # wait for any in-progress cluster operation to finish before disabling - self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated') - time.sleep(30) + self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated --interval 30 --timeout 600') + time.sleep(60) - # make sure monitoring can be smoothly disabled - self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') + # make sure monitoring can be smoothly disabled, retry on 409 (in-progress addon operation) + for attempt in range(6): + try: + self.cmd(f'aks disable-addons -a monitoring -g={resource_group} -n={aks_name}') + break + except Exception: + if attempt < 5: + time.sleep(60) + else: + raise # delete self.cmd(f'aks delete -g {resource_group} -n {aks_name} --yes --no-wait', checks=[self.is_empty()]) From 21bfae51ef92e4555381092ab3e0d41a66df74ac Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 25 Mar 2026 21:38:43 +0000 Subject: [PATCH 13/16] PR comments --- .../azure/cli/command_modules/acs/_consts.py | 1 - .../azure/cli/command_modules/acs/_helpers.py | 7 +- .../command_modules/acs/addonconfiguration.py | 20 +++-- .../azure/cli/command_modules/acs/custom.py | 42 +++-------- .../acs/managed_cluster_decorator.py | 6 -- .../acs/tests/latest/test_custom.py | 73 +++---------------- .../acs/tests/latest/test_helpers.py | 33 ++++----- .../latest/test_managed_cluster_decorator.py | 14 ++-- 8 files changed, 54 insertions(+), 142 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_consts.py b/src/azure-cli/azure/cli/command_modules/acs/_consts.py index d9394879dc9..0e18af74918 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_consts.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_consts.py @@ -154,7 +154,6 @@ # monitoring CONST_MONITORING_ADDON_NAME = "omsagent" -CONST_MONITORING_ADDON_NAME_CAMELCASE = "omsAgent" CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID = "logAnalyticsWorkspaceResourceID" CONST_MONITORING_USING_AAD_MSI_AUTH = "useAADAuth" diff --git a/src/azure-cli/azure/cli/command_modules/acs/_helpers.py b/src/azure-cli/azure/cli/command_modules/acs/_helpers.py index 4e1019ca592..4a41bcb356e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_helpers.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_helpers.py @@ -28,7 +28,7 @@ ManagedCluster = TypeVar("ManagedCluster") -def get_monitoring_addon_key(addon_profiles, monitoring_addon_name, monitoring_addon_name_camelcase=None): +def get_monitoring_addon_key(addon_profiles, monitoring_addon_name): """Return the canonical key for the monitoring addon, normalizing non-standard casing. The API response may return the monitoring addon key in any casing (e.g. @@ -42,10 +42,7 @@ def get_monitoring_addon_key(addon_profiles, monitoring_addon_name, monitoring_a # Exact match on the canonical lowercase name – preferred form. if monitoring_addon_name in addon_profiles: return monitoring_addon_name - # Exact match on the known camelCase variant. - if monitoring_addon_name_camelcase and monitoring_addon_name_camelcase in addon_profiles: - return monitoring_addon_name_camelcase - # Case-insensitive fallback: catch any other casing the server may return. + # Case-insensitive fallback: catch any casing the server may return. target_lower = monitoring_addon_name.lower() for key in list(addon_profiles): if key.lower() == target_lower: diff --git a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py index 745ad2e6a49..61da2361dce 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py +++ b/src/azure-cli/azure/cli/command_modules/acs/addonconfiguration.py @@ -657,16 +657,20 @@ def ensure_container_insights_for_monitoring( ) resources = get_resources_client(cmd.cli_ctx, cluster_subscription) - dcr_creation_body = json.loads( - dcr_creation_body_with_syslog if enable_syslog else dcr_creation_body_without_syslog - ) for _ in range(3): try: - resources.begin_create_or_update_by_id( - dcr_resource_id, - "2022-06-01", - dcr_creation_body - ) + if enable_syslog: + resources.begin_create_or_update_by_id( + dcr_resource_id, + "2022-06-01", + json.loads(dcr_creation_body_with_syslog) + ) + else: + resources.begin_create_or_update_by_id( + dcr_resource_id, + "2022-06-01", + json.loads(dcr_creation_body_without_syslog) + ) error = None break except CLIError as e: diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index af5d56af60f..950904c4372 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -59,7 +59,6 @@ CONST_INGRESS_APPGW_WATCH_NAMESPACE, CONST_KUBE_DASHBOARD_ADDON_NAME, CONST_MONITORING_ADDON_NAME, - CONST_MONITORING_ADDON_NAME_CAMELCASE, CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, CONST_MONITORING_USING_AAD_MSI_AUTH, CONST_NODEPOOL_MODE_USER, @@ -1520,7 +1519,7 @@ def aks_disable_addons(cmd, client, resource_group_name, name, addons, no_wait=F instance = client.get(resource_group_name, name) subscription_id = get_subscription_id(cmd.cli_ctx) monitoring_addon_key = get_monitoring_addon_key( - instance.addon_profiles, CONST_MONITORING_ADDON_NAME, CONST_MONITORING_ADDON_NAME_CAMELCASE + instance.addon_profiles, CONST_MONITORING_ADDON_NAME ) try: if addons == "monitoring" and monitoring_addon_key in instance.addon_profiles and \ @@ -1621,16 +1620,17 @@ def aks_enable_addons(cmd, client, resource_group_name, name, addons, if need_pull_for_result: if enable_monitoring: monitoring_addon_key = get_monitoring_addon_key( - instance.addon_profiles, CONST_MONITORING_ADDON_NAME, CONST_MONITORING_ADDON_NAME_CAMELCASE + instance.addon_profiles, CONST_MONITORING_ADDON_NAME ) - # Auto-enable HLSM if CNL is active and HLSM was not explicitly set - if enable_high_log_scale_mode is None: - cnl_flag = instance.addon_profiles[monitoring_addon_key].config.get("enableRetinaNetworkFlags") - if cnl_flag is not None and str(cnl_flag).lower() == "true": - enable_high_log_scale_mode = True if CONST_MONITORING_USING_AAD_MSI_AUTH in instance.addon_profiles[monitoring_addon_key].config and \ str(instance.addon_profiles[monitoring_addon_key].config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == 'true': if msi_auth: + # Auto-enable HLSM when CNL is active and HLSM not explicitly set + if enable_high_log_scale_mode is None: + addon_config = instance.addon_profiles[monitoring_addon_key].config or {} + cnl_flag = addon_config.get("enableRetinaNetworkFlags", "").lower() + if cnl_flag == "true": + enable_high_log_scale_mode = True # create a Data Collection Rule (DCR) and associate it with the cluster ensure_container_insights_for_monitoring( cmd, instance.addon_profiles[monitoring_addon_key], @@ -1754,19 +1754,9 @@ def _update_addons(cmd, instance, subscription_id, resource_group_name, name, ad "--enable_msi_auth_for_monitoring is not supported in %s cloud and continuing monitoring enablement without this flag.", cloud_name) enable_msi_auth_for_monitoring = False - # Capture existing CNL flag before overwriting config - existing_cnl = None - existing_addon = addon_profiles.get(addon) or addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) - if existing_addon and existing_addon.config: - existing_cnl = existing_addon.config.get("enableRetinaNetworkFlags") - addon_profile.config = { CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID: workspace_resource_id} addon_profile.config[CONST_MONITORING_USING_AAD_MSI_AUTH] = "true" if enable_msi_auth_for_monitoring else "false" - - # Preserve enableRetinaNetworkFlags (CNL) if it was set on the existing addon - if existing_cnl is not None: - addon_profile.config["enableRetinaNetworkFlags"] = existing_cnl elif addon == (CONST_VIRTUAL_NODE_ADDON_NAME + os_type): if addon_profile.enabled: raise CLIError('The virtual-node addon is already enabled for this managed cluster.\n' @@ -1841,19 +1831,7 @@ def _update_addons(cmd, instance, subscription_id, resource_group_name, name, ad else: raise CLIError( "The addon {} is not installed.".format(addon)) - # When disabling the monitoring addon, preserve enableRetinaNetworkFlags (CNL) - # so that re-enabling the addon later restores the CNL setting. - monitoring_addon_key = get_monitoring_addon_key( - addon_profiles, CONST_MONITORING_ADDON_NAME, CONST_MONITORING_ADDON_NAME_CAMELCASE - ) - if addon == monitoring_addon_key and addon_profiles[addon].config: - existing_cnl = addon_profiles[addon].config.get("enableRetinaNetworkFlags") - if existing_cnl is not None: - addon_profiles[addon].config = {"enableRetinaNetworkFlags": existing_cnl} - else: - addon_profiles[addon].config = None - else: - addon_profiles[addon].config = None + addon_profiles[addon].config = None addon_profiles[addon].enabled = enable instance.addon_profiles = addon_profiles @@ -4115,7 +4093,7 @@ def is_monitoring_addon_enabled(addons, instance): addon_profiles = instance.addon_profiles or {} monitoring_addon_key = get_monitoring_addon_key( - addon_profiles, CONST_MONITORING_ADDON_NAME, CONST_MONITORING_ADDON_NAME_CAMELCASE + addon_profiles, CONST_MONITORING_ADDON_NAME ) monitoring_addon_enabled = is_monitoring_addon and monitoring_addon_key in addon_profiles and addon_profiles[ monitoring_addon_key].enabled diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 10ecb05fd03..21de7adc88c 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -167,10 +167,8 @@ def _get_monitoring_addon_key_from_consts(addon_profiles, addon_consts): return get_monitoring_addon_key( addon_profiles, addon_consts.get("CONST_MONITORING_ADDON_NAME"), - addon_consts.get("CONST_MONITORING_ADDON_NAME_CAMELCASE"), ) - # TODO # 1. remove enable_rbac related implementation # 2. add validation for all/some of the parameters involved in the getter of outbound_type/enable_addons @@ -2881,7 +2879,6 @@ def get_addon_consts(self) -> Dict[str, str]: CONST_INGRESS_APPGW_SUBNET_CIDR, CONST_INGRESS_APPGW_SUBNET_ID, CONST_INGRESS_APPGW_WATCH_NAMESPACE, CONST_KUBE_DASHBOARD_ADDON_NAME, CONST_MONITORING_ADDON_NAME, - CONST_MONITORING_ADDON_NAME_CAMELCASE, CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, CONST_MONITORING_USING_AAD_MSI_AUTH, CONST_OPEN_SERVICE_MESH_ADDON_NAME, CONST_ROTATION_POLL_INTERVAL, @@ -2925,9 +2922,6 @@ def get_addon_consts(self) -> Dict[str, str]: addon_consts[ "CONST_MONITORING_ADDON_NAME" ] = CONST_MONITORING_ADDON_NAME - addon_consts[ - "CONST_MONITORING_ADDON_NAME_CAMELCASE" - ] = CONST_MONITORING_ADDON_NAME_CAMELCASE addon_consts[ "CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID" ] = CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py index da8c4874c34..4844bb7ca3c 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py @@ -17,7 +17,6 @@ CONST_HTTP_APPLICATION_ROUTING_ADDON_NAME, CONST_KUBE_DASHBOARD_ADDON_NAME, CONST_MONITORING_ADDON_NAME, - CONST_MONITORING_ADDON_NAME_CAMELCASE, CONST_MONITORING_USING_AAD_MSI_AUTH, ) from azure.cli.command_modules.acs.addonconfiguration import ( @@ -617,12 +616,10 @@ def test_update_addons(self, rg_def, get_resource_groups_client, get_resources_c addon_profile = instance.addon_profiles['ingressApplicationGateway'] self.assertFalse(addon_profile.enabled) - # monitoring enable preserves enableRetinaNetworkFlags (CNL) from camelCase addon key - # Scenario: cluster has monitoring under "omsAgent" (camelCase) key with CNL enabled; - # CLI enables monitoring (uses "omsagent" key), CNL should be preserved from old key + # monitoring enable with camelCase addon key does NOT preserve enableRetinaNetworkFlags instance = mock.MagicMock() instance.addon_profiles = { - CONST_MONITORING_ADDON_NAME_CAMELCASE: ManagedClusterAddonProfile( + "omsAgent": ManagedClusterAddonProfile( enabled=False, config={ 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws', @@ -635,27 +632,9 @@ def test_update_addons(self, rg_def, get_resource_groups_client, get_resources_c 'clitest000001', 'clitest000001', 'monitoring', enable=True) monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] self.assertTrue(monitoring_profile.enabled) - self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') + self.assertIsNone(monitoring_profile.config.get('enableRetinaNetworkFlags')) - # monitoring enable preserves enableRetinaNetworkFlags from lowercase addon key - instance = mock.MagicMock() - instance.addon_profiles = { - CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile( - enabled=False, - config={ - 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws', - CONST_MONITORING_USING_AAD_MSI_AUTH: 'true', - 'enableRetinaNetworkFlags': 'True', - }, - ), - } - instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', - 'clitest000001', 'clitest000001', 'monitoring', enable=True) - monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] - self.assertTrue(monitoring_profile.enabled) - self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') - - # monitoring disable preserves enableRetinaNetworkFlags (CNL) in config + # monitoring disable sets config to None instance = mock.MagicMock() instance.addon_profiles = { CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile( @@ -671,38 +650,9 @@ def test_update_addons(self, rg_def, get_resource_groups_client, get_resources_c 'clitest000001', 'clitest000001', 'monitoring', enable=False) monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] self.assertFalse(monitoring_profile.enabled) - self.assertIsNotNone(monitoring_profile.config) - self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') - # Workspace and auth keys should NOT be preserved (only CNL flag) - self.assertIsNone(monitoring_profile.config.get('logAnalyticsWorkspaceResourceID')) - - # monitoring disable → enable cycle preserves CNL (Test 6b scenario) - instance = mock.MagicMock() - instance.addon_profiles = { - CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile( - enabled=True, - config={ - 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/old-ws', - CONST_MONITORING_USING_AAD_MSI_AUTH: 'true', - 'enableRetinaNetworkFlags': 'True', - }, - ), - } - # Step 1: disable monitoring - instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', - 'clitest000001', 'clitest000001', 'monitoring', enable=False) - self.assertFalse(instance.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled) - self.assertEqual(instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config.get('enableRetinaNetworkFlags'), 'True') - # Step 2: re-enable monitoring with new workspace - instance = _update_addons(MockCmd(self.cli), instance, '00000000-0000-0000-0000-000000000000', - 'clitest000001', 'clitest000001', 'monitoring', enable=True, - workspace_resource_id='/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/new-ws') - monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] - self.assertTrue(monitoring_profile.enabled) - self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') - self.assertIn('new-ws', monitoring_profile.config.get('logAnalyticsWorkspaceResourceID')) + self.assertIsNone(monitoring_profile.config) - # monitoring disable without CNL sets config to None + # monitoring disable without CNL also sets config to None instance = mock.MagicMock() instance.addon_profiles = { CONST_MONITORING_ADDON_NAME: ManagedClusterAddonProfile( @@ -719,10 +669,10 @@ def test_update_addons(self, rg_def, get_resource_groups_client, get_resources_c self.assertFalse(monitoring_profile.enabled) self.assertIsNone(monitoring_profile.config) - # monitoring disable preserves CNL with camelCase key (omsAgent) + # monitoring disable with camelCase key (omsAgent) sets config to None instance = mock.MagicMock() instance.addon_profiles = { - CONST_MONITORING_ADDON_NAME_CAMELCASE: ManagedClusterAddonProfile( + "omsAgent": ManagedClusterAddonProfile( enabled=True, config={ 'logAnalyticsWorkspaceResourceID': '/subscriptions/sub/resourceGroups/rg/providers/Microsoft.OperationalInsights/workspaces/ws', @@ -736,8 +686,7 @@ def test_update_addons(self, rg_def, get_resource_groups_client, get_resources_c 'clitest000001', 'clitest000001', 'monitoring', enable=False) monitoring_profile = instance.addon_profiles[CONST_MONITORING_ADDON_NAME] self.assertFalse(monitoring_profile.enabled) - self.assertIsNotNone(monitoring_profile.config) - self.assertEqual(monitoring_profile.config.get('enableRetinaNetworkFlags'), 'True') + self.assertIsNone(monitoring_profile.config) @mock.patch('azure.cli.command_modules.acs.custom._urlretrieve') @mock.patch('azure.cli.command_modules.acs.custom.logger') @@ -1312,14 +1261,14 @@ def test_monitoring_enabled_with_lowercase_key(self): def test_monitoring_enabled_with_camelcase_key(self): instance = mock.Mock() instance.addon_profiles = { - CONST_MONITORING_ADDON_NAME_CAMELCASE: ManagedClusterAddonProfile(enabled=True, config={}), + "omsAgent": ManagedClusterAddonProfile(enabled=True, config={}), } self.assertTrue(is_monitoring_addon_enabled("monitoring", instance)) def test_monitoring_disabled_with_camelcase_key(self): instance = mock.Mock() instance.addon_profiles = { - CONST_MONITORING_ADDON_NAME_CAMELCASE: ManagedClusterAddonProfile(enabled=False, config={}), + "omsAgent": ManagedClusterAddonProfile(enabled=False, config={}), } self.assertFalse(is_monitoring_addon_enabled("monitoring", instance)) diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py index df4feb621db..f5211a1b94d 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_helpers.py @@ -352,50 +352,43 @@ class TestGetMonitoringAddonKey(unittest.TestCase): """Tests for the shared get_monitoring_addon_key helper.""" def test_returns_default_when_addon_profiles_is_none(self): - result = get_monitoring_addon_key(None, "omsagent", "omsAgent") + result = get_monitoring_addon_key(None, "omsagent") self.assertEqual(result, "omsagent") def test_returns_lowercase_key_when_present(self): addon_profiles = {"omsagent": object()} - result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") + result = get_monitoring_addon_key(addon_profiles, "omsagent") self.assertEqual(result, "omsagent") - def test_returns_camelcase_key_when_lowercase_absent(self): - addon_profiles = {"omsAgent": object()} - result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") - self.assertEqual(result, "omsAgent") + def test_normalizes_camelcase_key(self): + profile = object() + addon_profiles = {"omsAgent": profile} + result = get_monitoring_addon_key(addon_profiles, "omsagent") + self.assertEqual(result, "omsagent") + self.assertIn("omsagent", addon_profiles) + self.assertNotIn("omsAgent", addon_profiles) def test_prefers_lowercase_when_both_present(self): addon_profiles = {"omsagent": object(), "omsAgent": object()} - result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") + result = get_monitoring_addon_key(addon_profiles, "omsagent") self.assertEqual(result, "omsagent") - def test_returns_default_when_neither_key_present(self): + def test_returns_default_when_key_not_present(self): addon_profiles = {"someOtherAddon": object()} - result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") + result = get_monitoring_addon_key(addon_profiles, "omsagent") self.assertEqual(result, "omsagent") def test_normalizes_nonstandard_casing(self): """A key like 'oMSaGent' should be re-keyed to the canonical form.""" profile = object() addon_profiles = {"oMSaGent": profile} - result = get_monitoring_addon_key(addon_profiles, "omsagent", "omsAgent") + result = get_monitoring_addon_key(addon_profiles, "omsagent") self.assertEqual(result, "omsagent") # The dict should now contain the canonical key, not the old one. self.assertIn("omsagent", addon_profiles) self.assertNotIn("oMSaGent", addon_profiles) self.assertIs(addon_profiles["omsagent"], profile) - def test_returns_default_when_camelcase_is_none(self): - addon_profiles = {"omsAgent": object()} - result = get_monitoring_addon_key(addon_profiles, "omsagent", None) - self.assertEqual(result, "omsagent") - - def test_returns_default_when_camelcase_not_provided(self): - addon_profiles = {"omsAgent": object()} - result = get_monitoring_addon_key(addon_profiles, "omsagent") - self.assertEqual(result, "omsagent") - if __name__ == "__main__": unittest.main() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 29c5e135177..c94cc6718ec 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -23,7 +23,6 @@ CONST_INGRESS_APPGW_WATCH_NAMESPACE, CONST_KUBE_DASHBOARD_ADDON_NAME, CONST_MONITORING_ADDON_NAME, - CONST_MONITORING_ADDON_NAME_CAMELCASE, CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, CONST_OPEN_SERVICE_MESH_ADDON_NAME, CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING, @@ -2532,7 +2531,6 @@ def test_get_addon_consts(self): "CONST_INGRESS_APPGW_WATCH_NAMESPACE": CONST_INGRESS_APPGW_WATCH_NAMESPACE, "CONST_KUBE_DASHBOARD_ADDON_NAME": CONST_KUBE_DASHBOARD_ADDON_NAME, "CONST_MONITORING_ADDON_NAME": CONST_MONITORING_ADDON_NAME, - "CONST_MONITORING_ADDON_NAME_CAMELCASE": CONST_MONITORING_ADDON_NAME_CAMELCASE, "CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID": CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, "CONST_OPEN_SERVICE_MESH_ADDON_NAME": CONST_OPEN_SERVICE_MESH_ADDON_NAME, "CONST_VIRTUAL_NODE_ADDON_NAME": CONST_VIRTUAL_NODE_ADDON_NAME, @@ -2841,7 +2839,7 @@ def test_get_enable_msi_auth_for_monitoring(self): DecoratorMode.CREATE, ) addon_profiles_2 = { - CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile( + "omsAgent": self.models.ManagedClusterAddonProfile( enabled=True, config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, ) @@ -12730,7 +12728,7 @@ def test_postprocessing_after_mc_created(self): mc_6 = self.models.ManagedCluster( location="test_location", addon_profiles={ - CONST_MONITORING_ADDON_NAME_CAMELCASE: monitoring_addon_profile_6, + "omsAgent": monitoring_addon_profile_6, }, ) dec_6.context.attach_mc(mc_6) @@ -15343,7 +15341,7 @@ def test_enable_container_network_logs(self): ), ), addon_profiles={ - CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile( + "omsAgent": self.models.ManagedClusterAddonProfile( enabled=True, config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, ) @@ -15376,7 +15374,7 @@ def test_enable_container_network_logs(self): ), ), addon_profiles={ - CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile( + "omsAgent": self.models.ManagedClusterAddonProfile( enabled=True, config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, ) @@ -15385,7 +15383,7 @@ def test_enable_container_network_logs(self): dec_16.context.attach_mc(mc_16) dec_mc_16 = dec_16.update_monitoring_profile_flow_logs(mc_16) self.assertEqual( - dec_mc_16.addon_profiles[CONST_MONITORING_ADDON_NAME_CAMELCASE].config["enableRetinaNetworkFlags"], + dec_mc_16.addon_profiles[CONST_MONITORING_ADDON_NAME].config["enableRetinaNetworkFlags"], "True", ) self.assertTrue( @@ -15412,7 +15410,7 @@ def test_enable_container_network_logs(self): ), ), addon_profiles={ - CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile( + "omsAgent": self.models.ManagedClusterAddonProfile( enabled=True, config={ CONST_MONITORING_USING_AAD_MSI_AUTH: "true", From bb15da4fb6be9f07862ff801c553977e426e9822 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 25 Mar 2026 21:44:17 +0000 Subject: [PATCH 14/16] Lint --- src/azure-cli/azure/cli/command_modules/acs/custom.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index 950904c4372..3d4aed5d91a 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1626,11 +1626,10 @@ def aks_enable_addons(cmd, client, resource_group_name, name, addons, str(instance.addon_profiles[monitoring_addon_key].config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == 'true': if msi_auth: # Auto-enable HLSM when CNL is active and HLSM not explicitly set - if enable_high_log_scale_mode is None: - addon_config = instance.addon_profiles[monitoring_addon_key].config or {} - cnl_flag = addon_config.get("enableRetinaNetworkFlags", "").lower() - if cnl_flag == "true": - enable_high_log_scale_mode = True + if enable_high_log_scale_mode is None and \ + (instance.addon_profiles[monitoring_addon_key].config or {}).get( + "enableRetinaNetworkFlags", "").lower() == "true": + enable_high_log_scale_mode = True # create a Data Collection Rule (DCR) and associate it with the cluster ensure_container_insights_for_monitoring( cmd, instance.addon_profiles[monitoring_addon_key], From efad51eb6ed075e743c3f983a835cae04d3af3f2 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Thu, 26 Mar 2026 11:17:29 +0000 Subject: [PATCH 15/16] Update waiting during test --- .../acs/tests/latest/test_aks_commands.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 1ae50121a77..804662df3d1 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -9096,7 +9096,8 @@ def create_new_cluster_with_monitoring_aad_auth(self, resource_group, resource_g # wait for any in-progress cluster operation to finish before disabling self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated --interval 30 --timeout 600') - time.sleep(60) + if self.is_live or self.in_recording: + time.sleep(60) # make sure monitoring can be smoothly disabled, retry on 409 (in-progress addon operation) for attempt in range(6): @@ -9105,7 +9106,8 @@ def create_new_cluster_with_monitoring_aad_auth(self, resource_group, resource_g break except Exception: if attempt < 5: - time.sleep(60) + if self.is_live or self.in_recording: + time.sleep(60) else: raise @@ -9200,7 +9202,8 @@ def enable_monitoring_existing_cluster_aad_auth(self, resource_group, resource_g # wait for any in-progress cluster operation to finish before disabling self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated --interval 30 --timeout 600') - time.sleep(60) + if self.is_live or self.in_recording: + time.sleep(60) # make sure monitoring can be smoothly disabled, retry on 409 (in-progress addon operation) for attempt in range(6): @@ -9209,7 +9212,8 @@ def enable_monitoring_existing_cluster_aad_auth(self, resource_group, resource_g break except Exception: if attempt < 5: - time.sleep(60) + if self.is_live or self.in_recording: + time.sleep(60) else: raise @@ -9264,7 +9268,8 @@ def test_aks_create_with_monitoring_legacy_auth(self, resource_group, resource_g # wait for any in-progress cluster operation to finish before disabling self.cmd(f'aks wait -g {resource_group} -n {aks_name} --updated --interval 30 --timeout 600') - time.sleep(60) + if self.is_live or self.in_recording: + time.sleep(60) # make sure monitoring can be smoothly disabled, retry on 409 (in-progress addon operation) for attempt in range(6): @@ -9273,7 +9278,8 @@ def test_aks_create_with_monitoring_legacy_auth(self, resource_group, resource_g break except Exception: if attempt < 5: - time.sleep(60) + if self.is_live or self.in_recording: + time.sleep(60) else: raise From b299c57a9b2b5750163254e63210206e1be092cf Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Thu, 26 Mar 2026 13:47:10 +0000 Subject: [PATCH 16/16] Fix flaky live test --- .../command_modules/acs/tests/latest/test_aks_commands.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 804662df3d1..3214f0d5bf1 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -7884,6 +7884,11 @@ def test_aks_update_with_azuremonitormetrics(self, resource_group, resource_grou self.check('azureMonitorProfile.metrics.enabled', True), ]) + # wait for cluster to fully settle before issuing next update + self.cmd('aks wait --resource-group={resource_group} --name={name} --updated --interval 30 --timeout 600') + if self.is_live or self.in_recording: + time.sleep(60) + # update: disable-azure-monitor-metrics update_cmd = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ '--disable-azure-monitor-metrics'