diff --git a/avise/models/evaluation_lm.py b/avise/models/evaluation_lm.py index 7524447..53f8458 100644 --- a/avise/models/evaluation_lm.py +++ b/avise/models/evaluation_lm.py @@ -191,10 +191,11 @@ def _mistral_text_generation(self, messages: list) -> str: def del_model(self): """Delete the model from GPU memory.""" - self.model.cpu() - del self.model - del self.tokenizer - torch.cuda.empty_cache() + if self.model: + self.model.cpu() + del self.model + del self.tokenizer + torch.cuda.empty_cache() def _model_download( self, diff --git a/avise/pipelines/languagemodel/pipeline.py b/avise/pipelines/languagemodel/pipeline.py index 969519d..5ebfba6 100644 --- a/avise/pipelines/languagemodel/pipeline.py +++ b/avise/pipelines/languagemodel/pipeline.py @@ -161,7 +161,10 @@ def run( report_format: Desired output format connector_config_path: Path to model configuration (for report metadata) generate_ai_summary: Whether to generate AI summary - runs: How many times to the SET is ran + runs: How many times to run the SET + + Returns: + ReportData: The final report with all the SET data Requirements: Return the final report @@ -172,28 +175,35 @@ def run( self.set_config_path = set_config_path self.target_model_name = connector.model - # Initialize - sets = self.initialize(set_config_path) + try: + # Initialize + sets = self.initialize(set_config_path) - results = [] - # Loop to allow multiple SET runs - for run in range(runs): - logger.info(f"Starting SET run {run}/{runs}.") + results = [] + # Loop to allow multiple SET runs + for run in range(runs): + logger.info(f"Starting SET run {run}/{runs}.") - # Execute - execution_data = self.execute(connector, sets) + # Execute + execution_data = self.execute(connector, sets) - # Evaluate - results += self.evaluate(execution_data) + # Evaluate + results += self.evaluate(execution_data) - logger.info(f"SET run {run}/{runs} finished.") + logger.info(f"SET run {run}/{runs} finished.") - # Report - report_data = self.report( - results, output_path, report_format, generate_ai_summary - ) + # Report + report_data = self.report( + results, output_path, report_format, generate_ai_summary + ) + + return report_data - return report_data + finally: + if self.evaluation_model: + # Clean evaluation model from memory + self.evaluation_model.del_model() + self.evaluation_model = None def generate_ai_summary( self, @@ -210,29 +220,25 @@ def generate_ai_summary( results: List of EvaluationResult from evaluate() summary_stats: Summary statistics from calculate_passrates() subcategory_runs: Optional dict of subcategory -> number of runs - - Returns: - Dict with ai_summary or None if generation fails """ try: from avise.reportgen.summarizers.ai_summarizer import AISummarizer - model_to_use = None + model_to_use = self.evaluation_model if hasattr(self, "evaluation_model") and self.evaluation_model is not None: logger.info("Reusing existing evaluation model for AI summary") - model_to_use = self.evaluation_model else: logger.info( - "Creating new model for AI summary (CPU mode due to memory constraints)" + "Creating new model for AI summary (no existing evaluation model)" ) - model_to_use = None summarizer = AISummarizer(reuse_model=model_to_use) results_dict = [r.to_dict() for r in results] ai_summary = summarizer.generate_summary( results_dict, summary_stats, subcategory_runs ) - + # Delete model from memory + summarizer.del_model() return { "issue_summary": ai_summary.issue_summary, "recommended_remediations": ai_summary.recommended_remediations, diff --git a/avise/reportgen/reporters/html_reporter.py b/avise/reportgen/reporters/html_reporter.py index 9caa274..6f307f8 100644 --- a/avise/reportgen/reporters/html_reporter.py +++ b/avise/reportgen/reporters/html_reporter.py @@ -26,7 +26,7 @@ def write(self, report_data: ReportData, output_path: Path) -> None: output_path: Path to the output file / directory """ html = self._generate_html(report_data) - with open(output_path, "w") as f: + with open(output_path, "w", encoding="utf-8") as f: f.write(html) def _generate_html(self, report_data: ReportData) -> str: diff --git a/avise/reportgen/summarizers/ai_summarizer.py b/avise/reportgen/summarizers/ai_summarizer.py index 9c0980c..45c326c 100644 --- a/avise/reportgen/summarizers/ai_summarizer.py +++ b/avise/reportgen/summarizers/ai_summarizer.py @@ -35,16 +35,13 @@ def __init__( from avise.models.evaluation_lm import EvaluationLanguageModel if reuse_model is not None: - logger.info("Reusing existing evaluation model for AI summary") self.model = reuse_model - self._owns_model = False else: logger.info("Loading AI summarizer model...") self.model = EvaluationLanguageModel( model_name=evaluation_model_name, max_new_tokens=max_new_tokens, ) - self._owns_model = True def generate_summary( self, @@ -292,15 +289,6 @@ def _format_results_for_prompt( return "\n".join(lines) - def cleanup(self): - """Clean up the model from memory.""" - if self.model and self._owns_model: - logger.info("Cleaning up AI summarizer model...") - self.model.del_model() - self.model = None - elif self.model: - logger.info("Skipping cleanup - model is shared with evaluation") - def format_json_ai_summary(ai_summary: AISummary) -> Dict[str, Any]: """Format AI summary for JSON report output.