Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions src/Runner.Worker/Dap/DapDebugger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,26 @@ public async Task OnJobCompletedAsync()
{
if (_state != DapSessionState.NotStarted)
{
// Pause so the user can inspect final job state before we tear down,
// but only if the user was stepping through (not if they hit continue).
if (IsActive && _pauseOnNextStep)
{
try
{
if (_jobContext != null)
{
Trace.Info("Job completed — pausing for inspection");
SendStoppedEvent("completed", "Job completed — inspect variables before the session ends.");

await WaitForCommandAsync(_jobContext.CancellationToken);
}
}
catch (Exception ex)
{
Trace.Warning($"DAP job-completed pause error: {ex.Message}");
}
}

try
{
OnJobCompleted();
Expand All @@ -252,8 +272,6 @@ public async Task OnJobCompletedAsync()
Trace.Warning($"DAP OnJobCompleted error: {ex.Message}");
}
}

await StopAsync();
}

public async Task StopAsync()
Expand Down Expand Up @@ -1302,6 +1320,13 @@ private async Task WaitForCommandAsync(CancellationToken cancellationToken)
_commandTcs = new TaskCompletionSource<DapCommand>(TaskCreationOptions.RunContinuationsAsynchronously);
}

// If cancellation already fired before we created the new TCS,
// the registration callback targeted the old one. Unblock now.
if (cancellationToken.IsCancellationRequested)
{
_commandTcs.TrySetResult(DapCommand.Disconnect);
}

Trace.Info("Waiting for debugger command...");

var command = await _commandTcs.Task;
Expand Down
1 change: 1 addition & 0 deletions src/Runner.Worker/Dap/IDapDebugger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ public interface IDapDebugger : IRunnerService
Task OnStepStartingAsync(IStep step);
void OnStepCompleted(IStep step);
Task OnJobCompletedAsync();
Task StopAsync();
}
}
90 changes: 90 additions & 0 deletions src/Runner.Worker/JobExtension.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
using GitHub.Runner.Common;
using GitHub.Runner.Common.Util;
using GitHub.Runner.Sdk;
using GitHub.Runner.Worker.Dap;
using GitHub.Services.Common;
using Newtonsoft.Json;
using Pipelines = GitHub.DistributedTask.Pipelines;
Expand Down Expand Up @@ -50,6 +51,7 @@ public sealed class JobExtension : RunnerService, IJobExtension
private Task _diskSpaceCheckTask = null;
private CancellationTokenSource _serviceConnectivityCheckToken = new();
private Task _serviceConnectivityCheckTask = null;
private IDapDebugger _dapDebugger;

// Download all required actions.
// Make sure all condition inputs are valid.
Expand All @@ -67,6 +69,7 @@ public async Task<List<IStep>> InitializeJob(IExecutionContext jobContext, Pipel

List<IStep> preJobSteps = new();
List<IStep> jobSteps = new();
var initSucceeded = false;
using (var register = jobContext.CancellationToken.Register(() => { context.CancelToken(); }))
{
try
Expand Down Expand Up @@ -481,6 +484,41 @@ public async Task<List<IStep>> InitializeJob(IExecutionContext jobContext, Pipel
Trace.Info($"Start checking service connectivity in background.");
_serviceConnectivityCheckTask = CheckServiceConnectivityAsync(context, _serviceConnectivityCheckToken.Token);

// Start the DAP debugger and wait for a client connection inside
// "Set up job" so the step stays in-progress while we wait.
if (jobContext.Global.Debugger?.Enabled == true)
{
Trace.Info("Debugger enabled — starting inside Set up job");
context.Output("Starting debugger…");

try
{
_dapDebugger = HostContext.GetService<IDapDebugger>();
await _dapDebugger.StartAsync(jobContext);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we pass in the step level context instead of the jobContext?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The debugger needs jobs cancellation, Global.Debugger and variable provider from the job context, plus I think it needs a context that outlives the setup step? This isn't new btw, it just moved into the step.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the Global object is shared across all context (step + job).
you were using jobcontext since the code in jobrunner.cs doesn't have a step context.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But also we hook up to jobcontext cancelation in the debugger, if we used the step context wouldn't that be completed when Setup job completes?


context.Output("Waiting for debugger client to connect…");

await _dapDebugger.WaitUntilReadyAsync();
context.Output("Debugger connected.");
AddDebuggerConnectionTelemetry(jobContext, "Connected");
}
catch (OperationCanceledException) when (jobContext.CancellationToken.IsCancellationRequested)
{
Trace.Info("Job was cancelled before debugger client connected.");
AddDebuggerConnectionTelemetry(jobContext, "Canceled");
context.Error("Job was cancelled before debugger client connected.");
throw;
}
catch (Exception ex)
{
Trace.Error($"DAP debugger failed: {ex.Message}");
AddDebuggerConnectionTelemetry(jobContext, $"Failed: {ex.GetType().Name}");
context.Error("The debugger failed to start or no debugger client connected in time.");
throw;
}
}

initSucceeded = true;
return steps;
}
catch (OperationCanceledException ex) when (jobContext.CancellationToken.IsCancellationRequested)
Expand All @@ -501,12 +539,36 @@ public async Task<List<IStep>> InitializeJob(IExecutionContext jobContext, Pipel
}
finally
{
// If InitializeJob failed after the debugger was started,
// tear down the transport here since FinalizeJob won't run.
if (!initSucceeded && _dapDebugger != null)
{
try
{
await _dapDebugger.StopAsync();
}
catch (Exception ex)
{
Trace.Warning($"DAP debugger cleanup during failed init: {ex.Message}");
}
_dapDebugger = null;
}

context.Debug("Finishing: Set up job");
context.Complete();
}
}
}

private static void AddDebuggerConnectionTelemetry(IExecutionContext jobContext, string result)
{
jobContext.Global.JobTelemetry.Add(new JobTelemetry
{
Type = JobTelemetryType.General,
Message = $"DebuggerConnectionResult: {result}"
});
}

private string GetWorkflowReference(IDictionary<string, VariableValue> variables)
{
var reference = "";
Expand Down Expand Up @@ -782,6 +844,34 @@ public async Task FinalizeJob(IExecutionContext jobContext, Pipelines.AgentJobRe
}
finally
{
// Pause for debugger inspection, then tear down the DAP session.
// OnJobCompletedAsync pauses first, then sends terminated/exited
// events and stops the transport.
if (_dapDebugger != null)
Comment thread
rentziass marked this conversation as resolved.
{
context.Output("Job completed — pausing for debugger inspection. Press continue to finish.");
try
{
await _dapDebugger.OnJobCompletedAsync();
}
catch (Exception ex)
{
Trace.Warning($"DAP debugger completion error: {ex.Message}");
}
finally
{
try
{
await _dapDebugger.StopAsync();
}
catch (Exception ex)
{
Trace.Warning($"DAP debugger stop error: {ex.Message}");
}
}
_dapDebugger = null;
}

context.Debug("Finishing: Complete job");
context.Complete();
}
Expand Down
63 changes: 0 additions & 63 deletions src/Runner.Worker/JobRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
using GitHub.Runner.Common;
using GitHub.Runner.Common.Util;
using GitHub.Runner.Sdk;
using GitHub.Runner.Worker.Dap;
using GitHub.Services.Common;
using GitHub.Services.WebApi;
using Sdk.RSWebApi.Contracts;
Expand All @@ -29,7 +28,6 @@ public interface IJobRunner : IRunnerService

public sealed class JobRunner : RunnerService, IJobRunner
{
private const string DebuggerConnectionTelemetryPrefix = "DebuggerConnectionResult";
private IJobServerQueue _jobServerQueue;
private RunnerSettings _runnerSettings;
private ITempDirectoryManager _tempDirectoryManager;
Expand Down Expand Up @@ -114,7 +112,6 @@ public async Task<TaskResult> RunAsync(AgentJobRequestMessage message, Cancellat

IExecutionContext jobContext = null;
CancellationTokenRegistration? runnerShutdownRegistration = null;
IDapDebugger dapDebugger = null;
try
{
// Create the job execution context.
Expand Down Expand Up @@ -181,25 +178,6 @@ public async Task<TaskResult> RunAsync(AgentJobRequestMessage message, Cancellat
_tempDirectoryManager = HostContext.GetService<ITempDirectoryManager>();
_tempDirectoryManager.InitializeTempDirectory(jobContext);

// Setup the debugger
if (jobContext.Global.Debugger?.Enabled == true)
{
Trace.Info("Debugger enabled for this job run");

try
{
dapDebugger = HostContext.GetService<IDapDebugger>();
await dapDebugger.StartAsync(jobContext);
}
catch (Exception ex)
{
Trace.Error($"Failed to start DAP debugger: {ex.Message}");
AddDebuggerConnectionTelemetry(jobContext, $"Failed: {ex.Message}");
jobContext.Error("Failed to start debugger.");
return await CompleteJobAsync(server, jobContext, message, TaskResult.Failed);
}
}


// Get the job extension.
Trace.Info("Getting job extension.");
Expand Down Expand Up @@ -242,33 +220,6 @@ public async Task<TaskResult> RunAsync(AgentJobRequestMessage message, Cancellat
await Task.WhenAny(_jobServerQueue.JobRecordUpdated.Task, Task.Delay(1000));
}

// Wait for DAP debugger client connection and handshake after "Set up job"
// so the job page shows the setup step before we block on the debugger
if (dapDebugger != null)
{
try
{
await dapDebugger.WaitUntilReadyAsync();
AddDebuggerConnectionTelemetry(jobContext, "Connected");
}
catch (OperationCanceledException) when (jobRequestCancellationToken.IsCancellationRequested)
{
Trace.Info("Job was cancelled before debugger client connected.");
AddDebuggerConnectionTelemetry(jobContext, "Canceled");
jobContext.Error("Job was cancelled before debugger client connected.");
return await CompleteJobAsync(server, jobContext, message, TaskResult.Canceled);
}
catch (Exception ex)
{
Trace.Error($"DAP debugger failed to become ready: {ex.Message}");
AddDebuggerConnectionTelemetry(jobContext, $"Failed: {ex.Message}");

// If debugging was requested but the debugger is not available, fail the job
jobContext.Error("The debugger failed to start or no debugger client connected in time.");
return await CompleteJobAsync(server, jobContext, message, TaskResult.Failed);
}
}

// Run all job steps
Trace.Info("Run all job steps.");
var stepsRunner = HostContext.GetService<IStepsRunner>();
Expand Down Expand Up @@ -309,11 +260,6 @@ public async Task<TaskResult> RunAsync(AgentJobRequestMessage message, Cancellat
runnerShutdownRegistration = null;
}

if (dapDebugger != null)
{
await dapDebugger.OnJobCompletedAsync();
}

await ShutdownQueue(throwOnFailure: false);
}
}
Expand Down Expand Up @@ -495,15 +441,6 @@ private async Task<TaskResult> CompleteJobAsync(IJobServer jobServer, IExecution
throw new AggregateException(exceptions);
}

private static void AddDebuggerConnectionTelemetry(IExecutionContext jobContext, string result)
{
jobContext.Global.JobTelemetry.Add(new JobTelemetry
{
Type = JobTelemetryType.General,
Message = $"{DebuggerConnectionTelemetryPrefix}: {result}"
});
}

private void MaskTelemetrySecrets(List<JobTelemetry> jobTelemetry)
{
foreach (var telemetryItem in jobTelemetry)
Expand Down
70 changes: 64 additions & 6 deletions src/Test/L0/Worker/DapDebuggerL0.cs
Original file line number Diff line number Diff line change
Expand Up @@ -744,14 +744,32 @@ public async Task OnJobCompletedSendsTerminatedAndExitedEvents()
await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5));
await waitTask;

// Complete the job — events are sent via OnJobCompletedAsync
await _debugger.OnJobCompletedAsync();
// Complete the job — OnJobCompletedAsync pauses when stepping,
// so run it in the background and send continue to unblock.
var completedTask = _debugger.OnJobCompletedAsync();

// Read the stopped event from the pause
var stoppedMsg = await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5));
Assert.Contains("\"event\":\"stopped\"", stoppedMsg);

// Send continue to unblock the pause
await SendRequestAsync(stream, new Request
{
Seq = 2,
Type = "request",
Command = "continue"
});

await completedTask;

var msg1 = await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5));
var msg2 = await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5));
// Read remaining messages — continue response + continued event + terminated + exited
var allMessages = new System.Text.StringBuilder();
for (int i = 0; i < 4; i++)
{
allMessages.Append(await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5)));
}

// Both events should arrive (order may vary)
var combined = msg1 + msg2;
var combined = allMessages.ToString();
Assert.Contains("\"event\":\"terminated\"", combined);
Assert.Contains("\"event\":\"exited\"", combined);
}
Expand Down Expand Up @@ -809,5 +827,45 @@ public void ResolveTunnelConnectTimeoutIgnoresZeroValue()
});
}
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async Task WaitForCommandAsyncUnblocksOnCancellationDuringWait()
{
using (CreateTestContext())
{
var port = GetFreePort();
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(10));
var jobContext = CreateJobContextWithTunnel(cts.Token, port);
await _debugger.StartAsync(jobContext.Object);

var waitTask = _debugger.WaitUntilReadyAsync();
using var client = await ConnectClientAsync(port);
var stream = client.GetStream();
await SendRequestAsync(stream, new Request
{
Seq = 1,
Type = "request",
Command = "configurationDone"
});

await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5));
await waitTask;

// Start OnJobCompletedAsync — it will pause because _pauseOnNextStep is true
var completedTask = _debugger.OnJobCompletedAsync();

// Read the stopped event
var stoppedMsg = await ReadDapMessageAsync(stream, TimeSpan.FromSeconds(5));
Assert.Contains("\"event\":\"stopped\"", stoppedMsg);

// Cancel the job while waiting — should unblock the pause
cts.Cancel();

// OnJobCompletedAsync should complete without hanging
var finished = await Task.WhenAny(completedTask, Task.Delay(TimeSpan.FromSeconds(5)));
Assert.Equal(completedTask, finished);
}
}
}
}
Loading
Loading