diff --git a/src/Runner.Worker/Dap/DapDebugger.cs b/src/Runner.Worker/Dap/DapDebugger.cs index 89879e2ff2c..3b3ec7cbfaf 100644 --- a/src/Runner.Worker/Dap/DapDebugger.cs +++ b/src/Runner.Worker/Dap/DapDebugger.cs @@ -243,6 +243,26 @@ public async Task OnJobCompletedAsync() { if (_state != DapSessionState.NotStarted) { + // Pause so the user can inspect final job state before we tear down, + // but only if the user was stepping through (not if they hit continue). + if (IsActive && _pauseOnNextStep) + { + try + { + if (_jobContext != null) + { + Trace.Info("Job completed — pausing for inspection"); + SendStoppedEvent("completed", "Job completed — inspect variables before the session ends."); + + await WaitForCommandAsync(_jobContext.CancellationToken); + } + } + catch (Exception ex) + { + Trace.Warning($"DAP job-completed pause error: {ex.Message}"); + } + } + try { OnJobCompleted(); @@ -252,8 +272,6 @@ public async Task OnJobCompletedAsync() Trace.Warning($"DAP OnJobCompleted error: {ex.Message}"); } } - - await StopAsync(); } public async Task StopAsync() @@ -1302,6 +1320,13 @@ private async Task WaitForCommandAsync(CancellationToken cancellationToken) _commandTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); } + // If cancellation already fired before we created the new TCS, + // the registration callback targeted the old one. Unblock now. + if (cancellationToken.IsCancellationRequested) + { + _commandTcs.TrySetResult(DapCommand.Disconnect); + } + Trace.Info("Waiting for debugger command..."); var command = await _commandTcs.Task; diff --git a/src/Runner.Worker/Dap/IDapDebugger.cs b/src/Runner.Worker/Dap/IDapDebugger.cs index 533626a4287..07ebcab6926 100644 --- a/src/Runner.Worker/Dap/IDapDebugger.cs +++ b/src/Runner.Worker/Dap/IDapDebugger.cs @@ -22,5 +22,6 @@ public interface IDapDebugger : IRunnerService Task OnStepStartingAsync(IStep step); void OnStepCompleted(IStep step); Task OnJobCompletedAsync(); + Task StopAsync(); } } diff --git a/src/Runner.Worker/JobExtension.cs b/src/Runner.Worker/JobExtension.cs index efcba0f531c..838009fc9c2 100644 --- a/src/Runner.Worker/JobExtension.cs +++ b/src/Runner.Worker/JobExtension.cs @@ -16,6 +16,7 @@ using GitHub.Runner.Common; using GitHub.Runner.Common.Util; using GitHub.Runner.Sdk; +using GitHub.Runner.Worker.Dap; using GitHub.Services.Common; using Newtonsoft.Json; using Pipelines = GitHub.DistributedTask.Pipelines; @@ -50,6 +51,7 @@ public sealed class JobExtension : RunnerService, IJobExtension private Task _diskSpaceCheckTask = null; private CancellationTokenSource _serviceConnectivityCheckToken = new(); private Task _serviceConnectivityCheckTask = null; + private IDapDebugger _dapDebugger; // Download all required actions. // Make sure all condition inputs are valid. @@ -67,6 +69,7 @@ public async Task> InitializeJob(IExecutionContext jobContext, Pipel List preJobSteps = new(); List jobSteps = new(); + var initSucceeded = false; using (var register = jobContext.CancellationToken.Register(() => { context.CancelToken(); })) { try @@ -481,6 +484,41 @@ public async Task> InitializeJob(IExecutionContext jobContext, Pipel Trace.Info($"Start checking service connectivity in background."); _serviceConnectivityCheckTask = CheckServiceConnectivityAsync(context, _serviceConnectivityCheckToken.Token); + // Start the DAP debugger and wait for a client connection inside + // "Set up job" so the step stays in-progress while we wait. + if (jobContext.Global.Debugger?.Enabled == true) + { + Trace.Info("Debugger enabled — starting inside Set up job"); + context.Output("Starting debugger…"); + + try + { + _dapDebugger = HostContext.GetService(); + await _dapDebugger.StartAsync(jobContext); + + context.Output("Waiting for debugger client to connect…"); + + await _dapDebugger.WaitUntilReadyAsync(); + context.Output("Debugger connected."); + AddDebuggerConnectionTelemetry(jobContext, "Connected"); + } + catch (OperationCanceledException) when (jobContext.CancellationToken.IsCancellationRequested) + { + Trace.Info("Job was cancelled before debugger client connected."); + AddDebuggerConnectionTelemetry(jobContext, "Canceled"); + context.Error("Job was cancelled before debugger client connected."); + throw; + } + catch (Exception ex) + { + Trace.Error($"DAP debugger failed: {ex.Message}"); + AddDebuggerConnectionTelemetry(jobContext, $"Failed: {ex.GetType().Name}"); + context.Error("The debugger failed to start or no debugger client connected in time."); + throw; + } + } + + initSucceeded = true; return steps; } catch (OperationCanceledException ex) when (jobContext.CancellationToken.IsCancellationRequested) @@ -501,12 +539,36 @@ public async Task> InitializeJob(IExecutionContext jobContext, Pipel } finally { + // If InitializeJob failed after the debugger was started, + // tear down the transport here since FinalizeJob won't run. + if (!initSucceeded && _dapDebugger != null) + { + try + { + await _dapDebugger.StopAsync(); + } + catch (Exception ex) + { + Trace.Warning($"DAP debugger cleanup during failed init: {ex.Message}"); + } + _dapDebugger = null; + } + context.Debug("Finishing: Set up job"); context.Complete(); } } } + private static void AddDebuggerConnectionTelemetry(IExecutionContext jobContext, string result) + { + jobContext.Global.JobTelemetry.Add(new JobTelemetry + { + Type = JobTelemetryType.General, + Message = $"DebuggerConnectionResult: {result}" + }); + } + private string GetWorkflowReference(IDictionary variables) { var reference = ""; @@ -782,6 +844,34 @@ public async Task FinalizeJob(IExecutionContext jobContext, Pipelines.AgentJobRe } finally { + // Pause for debugger inspection, then tear down the DAP session. + // OnJobCompletedAsync pauses first, then sends terminated/exited + // events and stops the transport. + if (_dapDebugger != null) + { + context.Output("Job completed — pausing for debugger inspection. Press continue to finish."); + try + { + await _dapDebugger.OnJobCompletedAsync(); + } + catch (Exception ex) + { + Trace.Warning($"DAP debugger completion error: {ex.Message}"); + } + finally + { + try + { + await _dapDebugger.StopAsync(); + } + catch (Exception ex) + { + Trace.Warning($"DAP debugger stop error: {ex.Message}"); + } + } + _dapDebugger = null; + } + context.Debug("Finishing: Complete job"); context.Complete(); } diff --git a/src/Runner.Worker/JobRunner.cs b/src/Runner.Worker/JobRunner.cs index 2ccad0c0ce2..8308b434220 100644 --- a/src/Runner.Worker/JobRunner.cs +++ b/src/Runner.Worker/JobRunner.cs @@ -13,7 +13,6 @@ using GitHub.Runner.Common; using GitHub.Runner.Common.Util; using GitHub.Runner.Sdk; -using GitHub.Runner.Worker.Dap; using GitHub.Services.Common; using GitHub.Services.WebApi; using Sdk.RSWebApi.Contracts; @@ -29,7 +28,6 @@ public interface IJobRunner : IRunnerService public sealed class JobRunner : RunnerService, IJobRunner { - private const string DebuggerConnectionTelemetryPrefix = "DebuggerConnectionResult"; private IJobServerQueue _jobServerQueue; private RunnerSettings _runnerSettings; private ITempDirectoryManager _tempDirectoryManager; @@ -114,7 +112,6 @@ public async Task RunAsync(AgentJobRequestMessage message, Cancellat IExecutionContext jobContext = null; CancellationTokenRegistration? runnerShutdownRegistration = null; - IDapDebugger dapDebugger = null; try { // Create the job execution context. @@ -181,25 +178,6 @@ public async Task RunAsync(AgentJobRequestMessage message, Cancellat _tempDirectoryManager = HostContext.GetService(); _tempDirectoryManager.InitializeTempDirectory(jobContext); - // Setup the debugger - if (jobContext.Global.Debugger?.Enabled == true) - { - Trace.Info("Debugger enabled for this job run"); - - try - { - dapDebugger = HostContext.GetService(); - await dapDebugger.StartAsync(jobContext); - } - catch (Exception ex) - { - Trace.Error($"Failed to start DAP debugger: {ex.Message}"); - AddDebuggerConnectionTelemetry(jobContext, $"Failed: {ex.Message}"); - jobContext.Error("Failed to start debugger."); - return await CompleteJobAsync(server, jobContext, message, TaskResult.Failed); - } - } - // Get the job extension. Trace.Info("Getting job extension."); @@ -242,33 +220,6 @@ public async Task RunAsync(AgentJobRequestMessage message, Cancellat await Task.WhenAny(_jobServerQueue.JobRecordUpdated.Task, Task.Delay(1000)); } - // Wait for DAP debugger client connection and handshake after "Set up job" - // so the job page shows the setup step before we block on the debugger - if (dapDebugger != null) - { - try - { - await dapDebugger.WaitUntilReadyAsync(); - AddDebuggerConnectionTelemetry(jobContext, "Connected"); - } - catch (OperationCanceledException) when (jobRequestCancellationToken.IsCancellationRequested) - { - Trace.Info("Job was cancelled before debugger client connected."); - AddDebuggerConnectionTelemetry(jobContext, "Canceled"); - jobContext.Error("Job was cancelled before debugger client connected."); - return await CompleteJobAsync(server, jobContext, message, TaskResult.Canceled); - } - catch (Exception ex) - { - Trace.Error($"DAP debugger failed to become ready: {ex.Message}"); - AddDebuggerConnectionTelemetry(jobContext, $"Failed: {ex.Message}"); - - // If debugging was requested but the debugger is not available, fail the job - jobContext.Error("The debugger failed to start or no debugger client connected in time."); - return await CompleteJobAsync(server, jobContext, message, TaskResult.Failed); - } - } - // Run all job steps Trace.Info("Run all job steps."); var stepsRunner = HostContext.GetService(); @@ -309,11 +260,6 @@ public async Task RunAsync(AgentJobRequestMessage message, Cancellat runnerShutdownRegistration = null; } - if (dapDebugger != null) - { - await dapDebugger.OnJobCompletedAsync(); - } - await ShutdownQueue(throwOnFailure: false); } } @@ -495,15 +441,6 @@ private async Task CompleteJobAsync(IJobServer jobServer, IExecution throw new AggregateException(exceptions); } - private static void AddDebuggerConnectionTelemetry(IExecutionContext jobContext, string result) - { - jobContext.Global.JobTelemetry.Add(new JobTelemetry - { - Type = JobTelemetryType.General, - Message = $"{DebuggerConnectionTelemetryPrefix}: {result}" - }); - } - private void MaskTelemetrySecrets(List jobTelemetry) { foreach (var telemetryItem in jobTelemetry) diff --git a/src/Test/L0/Worker/DapDebuggerL0.cs b/src/Test/L0/Worker/DapDebuggerL0.cs index 9454b1a3a67..f1a29306ff1 100644 --- a/src/Test/L0/Worker/DapDebuggerL0.cs +++ b/src/Test/L0/Worker/DapDebuggerL0.cs @@ -744,14 +744,32 @@ public async Task OnJobCompletedSendsTerminatedAndExitedEvents() await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5)); await waitTask; - // Complete the job — events are sent via OnJobCompletedAsync - await _debugger.OnJobCompletedAsync(); + // Complete the job — OnJobCompletedAsync pauses when stepping, + // so run it in the background and send continue to unblock. + var completedTask = _debugger.OnJobCompletedAsync(); + + // Read the stopped event from the pause + var stoppedMsg = await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5)); + Assert.Contains("\"event\":\"stopped\"", stoppedMsg); + + // Send continue to unblock the pause + await SendRequestAsync(stream, new Request + { + Seq = 2, + Type = "request", + Command = "continue" + }); + + await completedTask; - var msg1 = await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5)); - var msg2 = await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5)); + // Read remaining messages — continue response + continued event + terminated + exited + var allMessages = new System.Text.StringBuilder(); + for (int i = 0; i < 4; i++) + { + allMessages.Append(await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5))); + } - // Both events should arrive (order may vary) - var combined = msg1 + msg2; + var combined = allMessages.ToString(); Assert.Contains("\"event\":\"terminated\"", combined); Assert.Contains("\"event\":\"exited\"", combined); } @@ -809,5 +827,45 @@ public void ResolveTunnelConnectTimeoutIgnoresZeroValue() }); } } + [Fact] + [Trait("Level", "L0")] + [Trait("Category", "Worker")] + public async Task WaitForCommandAsyncUnblocksOnCancellationDuringWait() + { + using (CreateTestContext()) + { + var port = GetFreePort(); + using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(10)); + var jobContext = CreateJobContextWithTunnel(cts.Token, port); + await _debugger.StartAsync(jobContext.Object); + + var waitTask = _debugger.WaitUntilReadyAsync(); + using var client = await ConnectClientAsync(port); + var stream = client.GetStream(); + await SendRequestAsync(stream, new Request + { + Seq = 1, + Type = "request", + Command = "configurationDone" + }); + + await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5)); + await waitTask; + + // Start OnJobCompletedAsync — it will pause because _pauseOnNextStep is true + var completedTask = _debugger.OnJobCompletedAsync(); + + // Read the stopped event + var stoppedMsg = await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5)); + Assert.Contains("\"event\":\"stopped\"", stoppedMsg); + + // Cancel the job while waiting — should unblock the pause + cts.Cancel(); + + // OnJobCompletedAsync should complete without hanging + var finished = await Task.WhenAny(completedTask, Task.Delay(TimeSpan.FromSeconds(5))); + Assert.Equal(completedTask, finished); + } + } } } diff --git a/src/Test/L0/Worker/JobExtensionL0.cs b/src/Test/L0/Worker/JobExtensionL0.cs index 40c495a8183..a286e62db6a 100644 --- a/src/Test/L0/Worker/JobExtensionL0.cs +++ b/src/Test/L0/Worker/JobExtensionL0.cs @@ -760,5 +760,171 @@ public async Task SnapshotPreflightChecks_BothChecks_Enabled_AllConditionsMet_Su Environment.SetEnvironmentVariable("RUNNER_ENVIRONMENT", null); Environment.SetEnvironmentVariable("GITHUB_ACTIONS_IMAGE_GEN_ENABLED", null); } + + [Fact] + [Trait("Level", "L0")] + [Trait("Category", "Worker")] + public async Task DebuggerStartedInSetupJobWhenEnabled() + { + using (TestHostContext hc = CreateTestContext()) + { + var jobExtension = new JobExtension(); + jobExtension.Initialize(hc); + + // Enable debugger on the message + _message.EnableDebugger = true; + _message.DebuggerTunnel = new Pipelines.DebuggerTunnelInfo + { + TunnelId = "test-tunnel", + ClusterId = "test-cluster", + HostToken = "test-token", + Port = 9229 + }; + + // Re-initialize the execution context so it picks up debugger config + _jobEc = new Runner.Worker.ExecutionContext(); + _jobEc.Initialize(hc); + _jobEc.InitializeJob(_message, _tokenSource.Token); + + // Set up mock debugger + var mockDebugger = new Mock(); + mockDebugger.Setup(x => x.StartAsync(It.IsAny())).Returns(Task.CompletedTask); + mockDebugger.Setup(x => x.WaitUntilReadyAsync()).Returns(Task.CompletedTask); + hc.SetSingleton(mockDebugger.Object); + + _actionManager.Setup(x => x.PrepareActionsAsync(It.IsAny(), It.IsAny>(), It.IsAny())) + .Returns(Task.FromResult(new PrepareResult(new List(), new Dictionary()))); + + List result = await jobExtension.InitializeJob(_jobEc, _message); + + // Verify DAP debugger was started and waited on + mockDebugger.Verify(x => x.StartAsync(It.IsAny()), Times.Once); + mockDebugger.Verify(x => x.WaitUntilReadyAsync(), Times.Once); + + // Verify steps are still returned correctly + Assert.Equal(5, result.Count); + } + } + + [Fact] + [Trait("Level", "L0")] + [Trait("Category", "Worker")] + public async Task DebuggerNotStartedInSetupJobWhenDisabled() + { + using (TestHostContext hc = CreateTestContext()) + { + var jobExtension = new JobExtension(); + jobExtension.Initialize(hc); + + // Debugger NOT enabled on the message — should not be started + + // Set up mock debugger (should NOT be called) + var mockDebugger = new Mock(); + hc.SetSingleton(mockDebugger.Object); + + _actionManager.Setup(x => x.PrepareActionsAsync(It.IsAny(), It.IsAny>(), It.IsAny())) + .Returns(Task.FromResult(new PrepareResult(new List(), new Dictionary()))); + + List result = await jobExtension.InitializeJob(_jobEc, _message); + + // Verify DAP debugger was NOT started during setup job + mockDebugger.Verify(x => x.StartAsync(It.IsAny()), Times.Never); + mockDebugger.Verify(x => x.WaitUntilReadyAsync(), Times.Never); + + Assert.Equal(5, result.Count); + } + } + + [Fact] + [Trait("Level", "L0")] + [Trait("Category", "Worker")] + public async Task DebuggerCleanedUpInFinalizeJob() + { + using (TestHostContext hc = CreateTestContext()) + { + var jobExtension = new JobExtension(); + jobExtension.Initialize(hc); + + // Enable debugger on the message + _message.EnableDebugger = true; + _message.DebuggerTunnel = new Pipelines.DebuggerTunnelInfo + { + TunnelId = "test-tunnel", + ClusterId = "test-cluster", + HostToken = "test-token", + Port = 9229 + }; + + // Re-initialize the execution context so it picks up debugger config + _jobEc = new Runner.Worker.ExecutionContext(); + _jobEc.Initialize(hc); + _jobEc.InitializeJob(_message, _tokenSource.Token); + + // Set up mock debugger + var mockDebugger = new Mock(); + mockDebugger.Setup(x => x.StartAsync(It.IsAny())).Returns(Task.CompletedTask); + mockDebugger.Setup(x => x.WaitUntilReadyAsync()).Returns(Task.CompletedTask); + mockDebugger.Setup(x => x.OnJobCompletedAsync()).Returns(Task.CompletedTask); + hc.SetSingleton(mockDebugger.Object); + + _actionManager.Setup(x => x.PrepareActionsAsync(It.IsAny(), It.IsAny>(), It.IsAny())) + .Returns(Task.FromResult(new PrepareResult(new List(), new Dictionary()))); + + // Run InitializeJob to start the debugger + await jobExtension.InitializeJob(_jobEc, _message); + + // Run FinalizeJob — should pause (inside OnJobCompletedAsync) then clean up + await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow); + + // Verify OnJobCompletedAsync was called (it handles pause + cleanup) + mockDebugger.Verify(x => x.OnJobCompletedAsync(), Times.Once); + } + } + + [Fact] + [Trait("Level", "L0")] + [Trait("Category", "Worker")] + public async Task FinalizeJobHandlesDebuggerCleanupException() + { + using (TestHostContext hc = CreateTestContext()) + { + var jobExtension = new JobExtension(); + jobExtension.Initialize(hc); + + // Enable debugger on the message + _message.EnableDebugger = true; + _message.DebuggerTunnel = new Pipelines.DebuggerTunnelInfo + { + TunnelId = "test-tunnel", + ClusterId = "test-cluster", + HostToken = "test-token", + Port = 9229 + }; + + // Re-initialize the execution context so it picks up debugger config + _jobEc = new Runner.Worker.ExecutionContext(); + _jobEc.Initialize(hc); + _jobEc.InitializeJob(_message, _tokenSource.Token); + + // Set up mock debugger — OnJobCompletedAsync throws + var mockDebugger = new Mock(); + mockDebugger.Setup(x => x.StartAsync(It.IsAny())).Returns(Task.CompletedTask); + mockDebugger.Setup(x => x.WaitUntilReadyAsync()).Returns(Task.CompletedTask); + mockDebugger.Setup(x => x.OnJobCompletedAsync()).ThrowsAsync(new InvalidOperationException("tunnel disposed")); + mockDebugger.Setup(x => x.StopAsync()).Returns(Task.CompletedTask); + hc.SetSingleton(mockDebugger.Object); + + _actionManager.Setup(x => x.PrepareActionsAsync(It.IsAny(), It.IsAny>(), It.IsAny())) + .Returns(Task.FromResult(new PrepareResult(new List(), new Dictionary()))); + + await jobExtension.InitializeJob(_jobEc, _message); + + // FinalizeJob should not throw even when OnJobCompletedAsync throws + await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow); + + mockDebugger.Verify(x => x.OnJobCompletedAsync(), Times.Once); + mockDebugger.Verify(x => x.StopAsync(), Times.Once); + } + } } }