Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.vscode/
reports/
avise-reports/
test.py

#models
Expand Down
2 changes: 1 addition & 1 deletion avise/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
)
logger = logging.getLogger(__name__)

DEFAULT_REPORTS_DIR = "reports"
DEFAULT_REPORTS_DIR = "avise-reports"

DEFAULT_SET_CONFIGS = {
"red_queen": "configs/SET/languagemodel/multi_turn/red_queen.json",
Expand Down
2 changes: 1 addition & 1 deletion avise/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
)
logger = logging.getLogger(__name__)

DEFAULT_REPORTS_DIR = "reports"
DEFAULT_REPORTS_DIR = "avise-reports"

# On Windows, ensure triton-windows package is installed
if os.name == "nt":
Expand Down
4 changes: 4 additions & 0 deletions avise/models/evaluation_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def __init__(
):
logger.info("Loading Evaluation Language Model...")
torch.cuda.empty_cache()
self.model = None
self.tokenizer = None
if use_device is None:
use_device = "auto"
# Check for CUDA
Expand Down Expand Up @@ -196,6 +198,8 @@ def del_model(self):
del self.model
del self.tokenizer
torch.cuda.empty_cache()
self.model = None
self.tokenizer = None

def _model_download(
self,
Expand Down
56 changes: 56 additions & 0 deletions avise/reportgen/reporters/html_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@ def _markdown_to_html(self, text: str) -> str:
def _get_html_header(self, report_data: ReportData) -> str:
"""Generate HTML head and opening body."""
config = report_data.configuration
target_info = ""
if "target_model" in config:
target_info = f"Target: {config['target_model']} |"
elif "target_system" in config:
target_info = f"Target: {config['target_system']} |"
elif "target" in config:
target_info = f"Target: {config['target']} |"

return f"""<!DOCTYPE html>
<html lang="en">
Expand Down Expand Up @@ -196,12 +203,53 @@ def _get_html_header(self, report_data: ReportData) -> str:
border-radius: 10px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
text-align: center;
position: relative;
}}
.card .number {{ font-size: 36px; font-weight: bold; }}
.card .label {{ color: #666; font-size: 14px; }}
.card.passed .number {{ color: {self.STATUS_COLORS["passed"]}; }}
.card.failed .number {{ color: {self.STATUS_COLORS["failed"]}; }}
.card.error .number {{ color: {self.STATUS_COLORS["error"]}; }}
.card .tooltip-trigger {{
position: absolute;
top: 8px;
right: 10px;
width: 16px;
height: 16px;
background: #ccc;
color: white;
border-radius: 50%;
font-size: 11px;
font-weight: bold;
line-height: 16px;
text-align: center;
cursor: default;
}}

.card .tooltip-trigger .tooltip-text {{
visibility: hidden;
opacity: 0;
background: #333;
color: white;
font-size: 12px;
font-weight: normal;
text-align: left;
padding: 6px 10px;
border-radius: 6px;
width: 160px;
position: absolute;
top: 22px; /* appears just below the ? */
right: 0;
z-index: 10;
transition: opacity 0.2s ease;
pointer-events: none;
white-space: normal;
}}

.card .tooltip-trigger:hover .tooltip-text {{
visibility: visible;
opacity: 1;
}}
.category {{
background: white;
border-radius: 10px;
Expand Down Expand Up @@ -324,6 +372,7 @@ def _get_html_header(self, report_data: ReportData) -> str:
<h1>AVISE Security Report</h1>
<div class="meta">
Security Evaluation Test: {report_data.set_name} |
{target_info}
Generated: {report_data.timestamp} |
Duration: {report_data.execution_time_seconds}s
{" | ELM Evaluation: Yes" if config.get("elm_evaluation_used") else ""}
Expand Down Expand Up @@ -352,6 +401,13 @@ def _get_summary_section(self, report_data: ReportData) -> str:
<div class="number">{summary["error"]}</div>
<div class="label">Inconclusive</div>
</div>
<div class="card">
<span class="tooltip-trigger">?
<span class="tooltip-text">95% confidence interval for the true pass rate, calculated using the Wilson score method. Accounts for sample size — wider when fewer tests are run.</span>
</span>
<div class="number" style="font-size: 28px">[{summary["ci_lower_bound"]:.2f}, {summary["ci_upper_bound"]:.2f}]</div>
<div class="label">Pass Rate 95% Confidence Interval</div>
</div>
</div>
"""

Expand Down
5 changes: 5 additions & 0 deletions avise/reportgen/summarizers/ai_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ def __init__(
max_new_tokens=max_new_tokens,
)

def del_model(self):
"""Delete the model from GPU memory."""
if self.model:
self.model.del_model()

def generate_summary(
self,
results: List[Dict[str, Any]],
Expand Down
Loading