Skip to content

Commit 4e7eba7

Browse files
NickK21DAC098
authored andcommitted
Harden Python pytest error handling
1 parent f4380c4 commit 4e7eba7

File tree

1 file changed

+142
-32
lines changed

1 file changed

+142
-32
lines changed

executor.js

Lines changed: 142 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -238,61 +238,141 @@ ${testCode}
238238
}
239239

240240
/**
241-
* Parses pytest output to extract test results.
242-
* @param {string} output - The output from pytest.
241+
* Parses pytest output to extract structured test results.
242+
* @param {string} stdout - The stdout from pytest.
243+
* @param {string} [stderr=''] - The stderr from pytest.
244+
* @param {number|null} [exitCode=null] - The pytest process exit code.
243245
* @returns {object} - The test summary.
244246
*/
245-
function parsePytestOutput(output, stdout = '', stderr = '') {
246-
let total_tests = 0;
247-
let passed_tests = 0;
248-
let failed_tests = 0;
249-
let failures = [];
247+
function buildRawOutput(stdout = '', stderr = '') {
248+
if (stdout && stderr) {
249+
return `${stdout}\n${stderr}`;
250+
}
251+
return stdout || stderr || '';
252+
}
250253

251-
const match = output.match(/(\d+) passed, (\d+) failed/);
252-
if (match) {
253-
passed_tests = parseInt(match[1]);
254-
failed_tests = parseInt(match[2]);
255-
total_tests = passed_tests + failed_tests;
256-
} else {
257-
const singlePassMatch = output.match(/(\d+) passed/);
258-
if (singlePassMatch) {
259-
passed_tests = parseInt(singlePassMatch[1]);
260-
total_tests = passed_tests;
261-
}
262-
const singleFailMatch = output.match(/(\d+) failed/);
263-
if (singleFailMatch) {
264-
failed_tests = parseInt(singleFailMatch[1]);
265-
total_tests += failed_tests;
254+
function extractPytestSummary(output) {
255+
const lines = output
256+
.split(/\r?\n/)
257+
.map((line) => line.trim())
258+
.filter(Boolean);
259+
260+
const summaryLine = [...lines].reverse().find((line) => (
261+
/^=+/.test(line) &&
262+
/=+$/.test(line) &&
263+
(/\bin [\d.]+s\b/.test(line) || /\bno tests ran\b/.test(line))
264+
));
265+
266+
if (!summaryLine) {
267+
return '';
268+
}
269+
270+
return summaryLine.replace(/^=+\s*/, '').replace(/\s*=+$/, '');
271+
}
272+
273+
function extractPytestCount(summary, labelPattern) {
274+
const match = summary.match(new RegExp(`(\\d+) ${labelPattern}\\b`));
275+
return match ? parseInt(match[1], 10) : 0;
276+
}
277+
278+
function extractPytestShortSummaryTarget(output, prefix) {
279+
const line = output
280+
.split(/\r?\n/)
281+
.map((entry) => entry.trim())
282+
.find((entry) => entry.startsWith(`${prefix} `));
283+
284+
return line ? line.slice(prefix.length + 1).trim() : '';
285+
}
286+
287+
function extractPytestErrorMessage(output, stderr = '') {
288+
const combined = buildRawOutput(output, stderr);
289+
const patterns = [
290+
/^E\s+([A-Za-z_.]+(?:Error|Exception): .+)$/m,
291+
/^([A-Za-z_.]+(?:Error|Exception): .+)$/m,
292+
/^(ImportError while importing test module .+)$/m,
293+
/^E\s+(.+)$/m,
294+
];
295+
296+
for (const pattern of patterns) {
297+
const match = combined.match(pattern);
298+
if (match) {
299+
return match[1].trim();
266300
}
267301
}
268302

269-
270-
const failureBlocks = output.split(/={10,} FAILURES ={10,}/)[1]?.split(/={10,}/)[0] || '';
303+
return 'Pytest error during collection or execution';
304+
}
305+
306+
function parsePytestOutput(stdout = '', stderr = '', exitCode = null) {
307+
const summary = extractPytestSummary(stdout);
308+
const rawout = buildRawOutput(stdout, stderr);
309+
const passed_tests = extractPytestCount(summary, 'passed');
310+
const failed_tests = extractPytestCount(summary, 'failed');
311+
const errors = extractPytestCount(summary, 'error(?:s)?');
312+
const no_tests_collected = exitCode === 5 || /\bno tests ran\b/.test(summary);
313+
const failures = [];
271314

315+
const failureBlocks = stdout.split(/={10,} FAILURES ={10,}/)[1]?.split(/={10,}/)[0] || '';
272316
const matches = [...failureBlocks.matchAll(
273317
/_{5,}\s*(.*?)\s*_{5,}[\s\S]*?>\s*assert\s+(.*?)\s*?\nE\s+assert\s+(.*?)\s*?(?:\nE\s+\+\s+where\s+(.*?)\s+=)?/g
274318
)];
275-
319+
276320
matches.forEach((match, index) => {
277321
const test_case = match[1]?.trim() || `Test ${index + 1}`;
278322
const assertionLine = match[2]?.trim();
279323
const failedExpr = match[3]?.trim();
280324
const evaluated = match[4]?.trim() || '';
281-
325+
282326
failures.push({
283327
test_case,
284328
expected: failedExpr.split('==')[1]?.trim() || '',
285329
received: evaluated || failedExpr.split('==')[0]?.trim(),
286330
error_message: `Assertion failed: ${assertionLine}`,
287-
rawout: `${stdout}\n${stderr}`
331+
rawout,
288332
});
289333
});
290334

335+
if (failed_tests > 0 && failures.length === 0) {
336+
failures.push({
337+
test_case: extractPytestShortSummaryTarget(stdout, 'FAILED') || 'pytest assertion failure',
338+
expected: '',
339+
received: '',
340+
error_message: 'Pytest reported one or more failed assertions',
341+
rawout,
342+
});
343+
}
344+
345+
let runtime_error = '';
346+
347+
if (errors > 0) {
348+
runtime_error = extractPytestErrorMessage(stdout, stderr);
349+
failures.push({
350+
test_case: extractPytestShortSummaryTarget(stdout, 'ERROR') || 'pytest collection/execution',
351+
expected: '',
352+
received: '',
353+
error_message: runtime_error,
354+
rawout,
355+
});
356+
} else if (no_tests_collected) {
357+
runtime_error = 'Pytest did not collect any tests';
358+
failures.push({
359+
test_case: 'pytest collection',
360+
expected: 'at least 1 collected test',
361+
received: '0 collected tests',
362+
error_message: runtime_error,
363+
rawout,
364+
});
365+
}
366+
291367
return {
292-
tests_run: total_tests,
368+
tests_run: passed_tests + failed_tests,
293369
passed: passed_tests,
294370
failed: failed_tests,
371+
errors,
372+
no_tests_collected,
373+
exit_code: exitCode,
295374
failure_details: failures,
375+
runtime_error,
296376
};
297377
}
298378

@@ -355,6 +435,9 @@ async function executeCode(language, code, stdin, expectedOutput, runTests = fal
355435
tests_run: 0,
356436
passed: 0,
357437
failed: 0,
438+
errors: 0,
439+
no_tests_collected: false,
440+
exit_code: null,
358441
failure_details: [],
359442
compilation_error: '',
360443
runtime_error: '',
@@ -432,9 +515,11 @@ async function executeCode(language, code, stdin, expectedOutput, runTests = fal
432515
} catch (executionError) {
433516
console.error('Test execution failed:', executionError);
434517
response.state = 'failed';
518+
response.exit_code = executionError.exitCode ?? null;
435519
output = {
436520
stdout: executionError.stdout || '',
437521
stderr: executionError.stderr || '',
522+
exitCode: executionError.exitCode ?? null,
438523
};
439524
response.runtime_error = executionError.message;
440525
}
@@ -454,16 +539,38 @@ async function executeCode(language, code, stdin, expectedOutput, runTests = fal
454539
} else {
455540
response.state = 'runtime_error';
456541
}
542+
response.exit_code = executionError.exitCode ?? null;
457543
response.runtime_error = executionError.message;
458544
return response;
459545
}
460546
}
461547

548+
response.exit_code = output.exitCode ?? response.exit_code;
549+
462550
if (runTests && testCode) {
463551
if (language.toLowerCase() === 'python') {
464-
const testResults = parsePytestOutput(output.stdout, output.stdout, output.stderr);
552+
const testResults = parsePytestOutput(output.stdout, output.stderr, output.exitCode ?? null);
553+
const keepGenericRuntimeError = output.exitCode == null || (
554+
output.exitCode !== 0 &&
555+
output.exitCode !== 1 &&
556+
output.exitCode !== 5 &&
557+
testResults.errors === 0
558+
);
559+
const runtime_error = testResults.runtime_error || (keepGenericRuntimeError ? response.runtime_error : '');
560+
const hasUnexpectedPytestExecutionError = Boolean(runtime_error) && (
561+
testResults.failed === 0 &&
562+
testResults.errors === 0 &&
563+
!testResults.no_tests_collected
564+
);
565+
465566
response = { ...response, ...testResults };
466-
response.state = testResults.failed === 0 ? 'passed' : 'failed';
567+
response.runtime_error = runtime_error;
568+
response.state = (
569+
testResults.failed === 0 &&
570+
testResults.errors === 0 &&
571+
!testResults.no_tests_collected &&
572+
!hasUnexpectedPytestExecutionError
573+
) ? 'passed' : 'failed';
467574
return response;
468575
}
469576

@@ -497,6 +604,7 @@ async function executeCode(language, code, stdin, expectedOutput, runTests = fal
497604
response.tests_run = 1;
498605
response.passed = output.stdout.trim() === expectedOutput.trim() ? 1 : 0;
499606
response.failed = response.passed === 0 ? 1 : 0;
607+
response.exit_code = output.exitCode ?? response.exit_code;
500608
response.state = response.passed === 1 ? 'passed' : 'failed';
501609

502610
if (response.failed) {
@@ -558,7 +666,7 @@ function compileCode(command, args, cwd) {
558666
* @param {string[]} args - The command arguments.
559667
* @param {string} [stdin=''] - Input for the process.
560668
* @param {number} [timeout=3000] - Timeout in milliseconds.
561-
* @returns {Promise<string>} - The program's stdout.
669+
* @returns {Promise<{stdout: string, stderr: string, exitCode: number}>} - The program output.
562670
*/
563671
function runProgram(command, args, stdin = '', timeout = 3000, workingDir = null) {
564672
return new Promise((resolve, reject) => {
@@ -617,17 +725,19 @@ function runProgram(command, args, stdin = '', timeout = 3000, workingDir = null
617725
const err = new Error(`Execution terminated: ${reason}`);
618726
err.stdout = stdout;
619727
err.stderr = stderr;
728+
err.exitCode = null;
620729
return reject(err);
621730
}
622731

623732
if (code !== 0) {
624733
const err = new Error(`Execution failed with code ${code}`);
625734
err.stdout = stdout;
626735
err.stderr = stderr;
736+
err.exitCode = code;
627737
return reject(err);
628738
}
629739

630-
resolve({ stdout, stderr });
740+
resolve({ stdout, stderr, exitCode: code });
631741
});
632742

633743
proc.on('error', err => {

0 commit comments

Comments
 (0)