From 00aa9770e382fa82b97a49feaa78346a8e399fdf Mon Sep 17 00:00:00 2001 From: Zippo00 Date: Mon, 13 Apr 2026 12:59:24 +0800 Subject: [PATCH 1/5] changed default reports dir to avise-reports --- avise/cli.py | 2 +- avise/engine.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/avise/cli.py b/avise/cli.py index 4c65dc7..2820e3e 100644 --- a/avise/cli.py +++ b/avise/cli.py @@ -37,7 +37,7 @@ ) logger = logging.getLogger(__name__) -DEFAULT_REPORTS_DIR = "reports" +DEFAULT_REPORTS_DIR = "avise-reports" DEFAULT_SET_CONFIGS = { "red_queen": "configs/SET/languagemodel/multi_turn/red_queen.json", diff --git a/avise/engine.py b/avise/engine.py index a5083c0..6e1fcdd 100644 --- a/avise/engine.py +++ b/avise/engine.py @@ -28,7 +28,7 @@ ) logger = logging.getLogger(__name__) -DEFAULT_REPORTS_DIR = "reports" +DEFAULT_REPORTS_DIR = "avise-reports" # On Windows, ensure triton-windows package is installed if os.name == "nt": From 51330689ba30e839455f0aa63d95fd77ac104987 Mon Sep 17 00:00:00 2001 From: Zippo00 Date: Mon, 13 Apr 2026 13:00:23 +0800 Subject: [PATCH 2/5] added avise-reports/ to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index bf96aa3..ec8ed6c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .vscode/ reports/ +avise-reports/ test.py #models From f4e8cbaa9d82c7625b0fa89037da156dc6000316 Mon Sep 17 00:00:00 2001 From: Zippo00 Date: Mon, 13 Apr 2026 13:34:05 +0800 Subject: [PATCH 3/5] fixed crash when calling EvaluationLanguageModel.del_model() method more than once consecutively --- avise/models/evaluation_lm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/avise/models/evaluation_lm.py b/avise/models/evaluation_lm.py index 53f8458..fe95838 100644 --- a/avise/models/evaluation_lm.py +++ b/avise/models/evaluation_lm.py @@ -35,6 +35,8 @@ def __init__( ): logger.info("Loading Evaluation Language Model...") torch.cuda.empty_cache() + self.model = None + self.tokenizer = None if use_device is None: use_device = "auto" # Check for CUDA @@ -196,6 +198,8 @@ def del_model(self): del self.model del self.tokenizer torch.cuda.empty_cache() + self.model = None + self.tokenizer = None def _model_download( self, From 73f7023da0ef29824c1579cb4698b95f5a9ce589 Mon Sep 17 00:00:00 2001 From: Zippo00 Date: Mon, 13 Apr 2026 13:35:06 +0800 Subject: [PATCH 4/5] fixed crash when calling EvaluationLanguageModel.del_model() method more than once consecutively --- avise/reportgen/summarizers/ai_summarizer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/avise/reportgen/summarizers/ai_summarizer.py b/avise/reportgen/summarizers/ai_summarizer.py index 45c326c..4f3cd19 100644 --- a/avise/reportgen/summarizers/ai_summarizer.py +++ b/avise/reportgen/summarizers/ai_summarizer.py @@ -43,6 +43,11 @@ def __init__( max_new_tokens=max_new_tokens, ) + def del_model(self): + """Delete the model from GPU memory.""" + if self.model: + self.model.del_model() + def generate_summary( self, results: List[Dict[str, Any]], From 6bd00f531f79d33b87deff1368707ce3916d41b8 Mon Sep 17 00:00:00 2001 From: Zippo00 Date: Mon, 13 Apr 2026 13:35:53 +0800 Subject: [PATCH 5/5] added CI card to generated html report; added target model/system information to generated html report --- avise/reportgen/reporters/html_reporter.py | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/avise/reportgen/reporters/html_reporter.py b/avise/reportgen/reporters/html_reporter.py index 6f307f8..097ac82 100644 --- a/avise/reportgen/reporters/html_reporter.py +++ b/avise/reportgen/reporters/html_reporter.py @@ -159,6 +159,13 @@ def _markdown_to_html(self, text: str) -> str: def _get_html_header(self, report_data: ReportData) -> str: """Generate HTML head and opening body.""" config = report_data.configuration + target_info = "" + if "target_model" in config: + target_info = f"Target: {config['target_model']} |" + elif "target_system" in config: + target_info = f"Target: {config['target_system']} |" + elif "target" in config: + target_info = f"Target: {config['target']} |" return f""" @@ -196,12 +203,53 @@ def _get_html_header(self, report_data: ReportData) -> str: border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); text-align: center; + position: relative; }} .card .number {{ font-size: 36px; font-weight: bold; }} .card .label {{ color: #666; font-size: 14px; }} .card.passed .number {{ color: {self.STATUS_COLORS["passed"]}; }} .card.failed .number {{ color: {self.STATUS_COLORS["failed"]}; }} .card.error .number {{ color: {self.STATUS_COLORS["error"]}; }} + .card .tooltip-trigger {{ + position: absolute; + top: 8px; + right: 10px; + width: 16px; + height: 16px; + background: #ccc; + color: white; + border-radius: 50%; + font-size: 11px; + font-weight: bold; + line-height: 16px; + text-align: center; + cursor: default; + }} + + .card .tooltip-trigger .tooltip-text {{ + visibility: hidden; + opacity: 0; + background: #333; + color: white; + font-size: 12px; + font-weight: normal; + text-align: left; + padding: 6px 10px; + border-radius: 6px; + width: 160px; + position: absolute; + top: 22px; /* appears just below the ? */ + right: 0; + z-index: 10; + transition: opacity 0.2s ease; + pointer-events: none; + white-space: normal; + }} + + .card .tooltip-trigger:hover .tooltip-text {{ + visibility: visible; + opacity: 1; + }} .category {{ background: white; border-radius: 10px; @@ -324,6 +372,7 @@ def _get_html_header(self, report_data: ReportData) -> str:

AVISE Security Report

Security Evaluation Test: {report_data.set_name} | + {target_info} Generated: {report_data.timestamp} | Duration: {report_data.execution_time_seconds}s {" | ELM Evaluation: Yes" if config.get("elm_evaluation_used") else ""} @@ -352,6 +401,13 @@ def _get_summary_section(self, report_data: ReportData) -> str:
{summary["error"]}
Inconclusive
+
+ ? + 95% confidence interval for the true pass rate, calculated using the Wilson score method. Accounts for sample size — wider when fewer tests are run. + +
[{summary["ci_lower_bound"]:.2f}, {summary["ci_upper_bound"]:.2f}]
+
Pass Rate 95% Confidence Interval
+
"""