Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/vm-repair/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
Release History
===============

2.2.1
++++++
Migrating telemetry from Application Insights SDK to azure.cli.core telemetry pipeline
Adding PII scrubbing for error messages and stack traces
Fixing if/elif bug in command_helper_class.py telemetry dispatch
Removing unused opencensus dependency

2.2.0
++++++
Adding `--tags` parameter to `vm repair create` and `vm repair repair-and-restore` commands to allow users to tag the repair VM for organizational requirements
Expand Down
30 changes: 21 additions & 9 deletions src/vm-repair/azext_vm_repair/command_helper_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
import inspect
from knack.log import get_logger

from azure.cli.core.commands.client_factory import get_subscription_id

from .telemetry import _track_command_telemetry, _track_run_command_telemetry, _track_command_telemetry_repair_and_restore
from .telemetry import _track_command_telemetry, _track_run_command_telemetry, _track_command_telemetry_repair_and_restore, _generate_user_hash

from .repair_utils import _get_function_param_dict

Expand Down Expand Up @@ -65,9 +63,18 @@ def __init__(self, logger, cmd, command_name):
# Error stack trace
self.error_stack_trace = ''

# Exception type for telemetry (e.g. 'SkuNotAvailableError')
self.exception_type = ''

# Return dict
self.return_dict = {}

# Extra telemetry context (VM properties, feature flags, etc.)
self.telemetry_context = {}

# Pseudonymous caller hash
self.telemetry_context['UserHash'] = _generate_user_hash(cmd)

# Verbose flag for command
self.is_verbose = any(handler.level == logging.INFO for handler in get_logger().handlers)

Expand All @@ -87,12 +94,17 @@ def __del__(self):
self.cmd.cli_ctx.get_progress_controller().end()
# Track telemetry data
elapsed_time = timeit.default_timer() - self.start_time
if self.command_name == VM_REPAIR_RUN_COMMAND:
_track_run_command_telemetry(self.logger, self.command_name, self.command_params, self.status, self.message, self.error_message, self.error_stack_trace, elapsed_time, get_subscription_id(self.cmd.cli_ctx), self.return_dict, self.script.run_id, self.script.status, self.script.output, self.script.run_time)
if self.command_name == VM_REPAIR_AND_RESTORE_COMMAND:
_track_command_telemetry_repair_and_restore(self.logger, self.command_name, self.status, self.message, self.error_message, self.error_stack_trace, elapsed_time, get_subscription_id(self.cmd.cli_ctx))
else:
_track_command_telemetry(self.logger, self.command_name, self.command_params, self.status, self.message, self.error_message, self.error_stack_trace, elapsed_time, get_subscription_id(self.cmd.cli_ctx), self.return_dict)
if self.exception_type:
self.telemetry_context['ExceptionType'] = self.exception_type
try:
if self.command_name == VM_REPAIR_RUN_COMMAND:
_track_run_command_telemetry(self.logger, self.command_name, self.command_params, self.status, self.message, self.error_message, self.error_stack_trace, elapsed_time, self.return_dict, self.script.run_id, self.script.status, self.script.output, self.script.run_time, context=self.telemetry_context)
elif self.command_name == VM_REPAIR_AND_RESTORE_COMMAND:
_track_command_telemetry_repair_and_restore(self.logger, self.command_name, self.status, self.message, self.error_message, self.error_stack_trace, elapsed_time, context=self.telemetry_context)
else:
_track_command_telemetry(self.logger, self.command_name, self.command_params, self.status, self.message, self.error_message, self.error_stack_trace, elapsed_time, self.return_dict, context=self.telemetry_context)
except Exception: # pylint: disable=broad-except
self.logger.debug('Failed to send telemetry for %s', self.command_name)

def set_status_success(self):
""" Set command status to success """
Expand Down
34 changes: 34 additions & 0 deletions src/vm-repair/azext_vm_repair/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,17 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern
# Fetching the architecture of the source VM.
architecture_type = _fetch_architecture(source_vm)

# Enrich telemetry with VM context
command.telemetry_context.update({
'OsType': 'Linux' if is_linux else 'Windows',
'HyperVGeneration': str(vm_hypervgen),
'Architecture': str(architecture_type),
'IsManagedDisk': str(is_managed),
'IsEncrypted': str(bool(encrypt_recovery_key)),
'EnableNested': str(bool(enable_nested)),
'AssociatePublicIp': str(bool(associate_public_ip)),
})

# Checking if the source VM's OS is Linux and if it uses a managed disk.
if is_linux and _uses_managed_disk(source_vm):
# Setting the OS type to 'Linux'.
Expand Down Expand Up @@ -422,34 +433,42 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern
command.error_stack_trace = traceback.format_exc()
command.error_message = "Command interrupted by user input."
command.message = "Command interrupted by user input. Cleaning up resources."
command.exception_type = 'KeyboardInterrupt'
except AzCommandError as azCommandError:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(azCommandError)
command.message = "Repair create failed. Cleaning up created resources."
command.exception_type = 'AzCommandError'
except SkuDoesNotSupportHyperV as skuDoesNotSupportHyperV:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(skuDoesNotSupportHyperV)
command.message = "provided sku does not support nested VM in hyperv. Please run command without --enabled-nested or provide a valid --size parameter. Cleaning up created resources."
command.exception_type = 'SkuDoesNotSupportHyperV'
except ScriptReturnsError as scriptReturnsError:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(scriptReturnsError)
command.message = "Error returned from script when enabling hyperv."
command.exception_type = 'ScriptReturnsError'
except SkuNotAvailableError as skuNotAvailableError:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(skuNotAvailableError)
command.message = "Please check if the current subscription can create more VM resources. Cleaning up created resources."
command.exception_type = 'SkuNotAvailableError'
except UnmanagedDiskCopyError as unmanagedDiskCopyError:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(unmanagedDiskCopyError)
command.message = "Repair create failed. Please try again at another time. Cleaning up created resources."
command.exception_type = 'UnmanagedDiskCopyError'
except WindowsOsNotAvailableError:
command.error_stack_trace = traceback.format_exc()
command.error_message = 'Compatible Windows OS image not available.'
command.message = 'A compatible Windows OS image is not available at this time, please check subscription.'
command.exception_type = 'WindowsOsNotAvailableError'
except Exception as exception:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(exception)
command.message = 'An unexpected error occurred. Try running again with the --debug flag to debug.'
command.exception_type = type(exception).__name__

finally:
if command.error_stack_trace:
Expand Down Expand Up @@ -546,16 +565,19 @@ def restore(cmd, vm_name, resource_group_name, disk_name=None, repair_vm_id=None
command.error_stack_trace = traceback.format_exc()
command.error_message = "Command interrupted by user input."
command.message = "Command interrupted by user input. If the restore command fails at retry, please rerun the repair process from \'az vm repair create\'."
command.exception_type = 'KeyboardInterrupt'
except AzCommandError as azCommandError:
# Capture the stack trace and set the error message if an Azure command error occurs
command.error_stack_trace = traceback.format_exc()
command.error_message = str(azCommandError)
command.message = "Repair restore failed. If the restore command fails at retry, please rerun the repair process from \'az vm repair create\'."
command.exception_type = 'AzCommandError'
except Exception as exception:
# Capture the stack trace and set the error message if an unexpected error occurs
command.error_stack_trace = traceback.format_exc()
command.error_message = str(exception)
command.message = 'An unexpected error occurred. Try running again with the --debug flag to debug.'
command.exception_type = type(exception).__name__
finally:
# Log the stack trace if an error has occurred
if command.error_stack_trace:
Expand Down Expand Up @@ -707,22 +729,27 @@ def run(cmd, vm_name, resource_group_name, run_id=None, repair_vm_id=None, custo
command.error_stack_trace = traceback.format_exc()
command.error_message = "Command interrupted by user input."
command.message = "Repair run failed. Command interrupted by user input."
command.exception_type = 'KeyboardInterrupt'
except AzCommandError as azCommandError:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(azCommandError)
command.message = "Repair run failed."
command.exception_type = 'AzCommandError'
except requests.exceptions.RequestException as exception:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(exception)
command.message = "Failed to fetch run script data from GitHub. Please check this repository is reachable: https://github.com/Azure/repair-script-library"
command.exception_type = 'RequestException'
except RunScriptNotFoundForIdError as exception:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(exception)
command.message = "Repair run failed. Run ID not found."
command.exception_type = 'RunScriptNotFoundForIdError'
except Exception as exception:
command.error_stack_trace = traceback.format_exc()
command.error_message = str(exception)
command.message = 'An unexpected error occurred. Try running again with the --debug flag to debug.'
command.exception_type = type(exception).__name__
finally:
if command.error_stack_trace:
logger.debug(command.error_stack_trace)
Expand Down Expand Up @@ -918,26 +945,31 @@ def reset_nic(cmd, vm_name, resource_group_name, yes=False):
command.error_stack_trace = traceback.format_exc()
command.error_message = "Command interrupted by user input."
command.message = "Command interrupted by user input."
command.exception_type = 'KeyboardInterrupt'
except AzCommandError as azCommandError:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = str(azCommandError)
command.message = "Reset NIC failed."
command.exception_type = 'AzCommandError'
except SupportingResourceNotFoundError as resourceError:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = str(resourceError)
command.message = "Reset NIC could not be initiated."
command.exception_type = 'SupportingResourceNotFoundError'
except CommandCanceledByUserError as canceledError:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = str(canceledError)
command.message = VM_OFF_MESSAGE
command.exception_type = 'CommandCanceledByUserError'
except Exception as exception:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = str(exception)
command.message = 'An unexpected error occurred. Try running again with the --debug flag to debug.'
command.exception_type = type(exception).__name__
else:
command.set_status_success()
command.message = 'VM guest NIC reset complete. The VM is in running state.'
Expand Down Expand Up @@ -1017,6 +1049,7 @@ def repair_and_restore(cmd, vm_name, resource_group_name, repair_password=None,
command.error_stack_trace = traceback.format_exc()
command.error_message = "Command failed when running fstab script."
command.message = "Command failed when running fstab script."
command.exception_type = 'FstabScriptError'

# If the resource group existed before, confirm before cleaning up resources
# Otherwise, clean up resources without confirmation
Expand Down Expand Up @@ -1117,6 +1150,7 @@ def repair_button(cmd, vm_name, resource_group_name, button_command, repair_pass
command.error_stack_trace = traceback.format_exc()
command.error_message = "Command failed when running script."
command.message = "Command failed when running script."
command.exception_type = 'ButtonScriptError'
if existing_rg:
_clean_up_resources(repair_group_name, confirm=True)
else:
Expand Down
Loading
Loading