-
-
Notifications
You must be signed in to change notification settings - Fork 0
Fixops acquisition review #235
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,190 @@ | ||
| """AI Bill of Materials (AI-BOM) Generator. | ||
|
|
||
| This module provides the core logic for generating AI-BOMs in CycloneDX format (v1.5+). | ||
| It tracks model provenance, training data lineage, model cards, and usage context. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import datetime | ||
| import hashlib | ||
| import json | ||
| import logging | ||
| import uuid | ||
| from dataclasses import dataclass, field | ||
| from typing import Any, Dict, List, Optional, Union | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| @dataclass | ||
| class ModelCard: | ||
| """Standardized Model Card metadata (ISO/IEC 42001 & NIST AI RMF).""" | ||
|
|
||
| model_id: str | ||
| name: str | ||
| version: str | ||
| author: str | ||
| description: str | ||
| license: str | ||
| framework: str # pytorch, tensorflow, sklearn, etc. | ||
| model_type: str # llm, classification, regression, etc. | ||
| tags: List[str] = field(default_factory=list) | ||
| intended_use: List[str] = field(default_factory=list) | ||
| limitations: List[str] = field(default_factory=list) | ||
| ethical_considerations: List[str] = field(default_factory=list) | ||
| inputs: List[Dict[str, str]] = field(default_factory=list) | ||
| outputs: List[Dict[str, str]] = field(default_factory=list) | ||
|
|
||
|
|
||
| @dataclass | ||
| class TrainingData: | ||
| """Metadata about training datasets.""" | ||
|
|
||
| name: str | ||
| url: Optional[str] = None | ||
| description: Optional[str] = None | ||
| provenance: str = "unknown" # public, proprietary, synthetic | ||
| size: str = "unknown" | ||
| license: str = "unknown" | ||
| sensitive_data: bool = False | ||
| pii_present: bool = False | ||
| bias_analysis: Optional[str] = None | ||
| hash_alg: str = "sha256" | ||
| hash_value: Optional[str] = None | ||
|
|
||
|
|
||
| @dataclass | ||
| class AIBOM: | ||
| """AI Bill of Materials container.""" | ||
|
|
||
| bom_format: str = "CycloneDX" | ||
| spec_version: str = "1.5" | ||
| serial_number: str = field(default_factory=lambda: f"urn:uuid:{uuid.uuid4()}") | ||
| version: int = 1 | ||
| metadata: Dict[str, Any] = field(default_factory=dict) | ||
| components: List[Dict[str, Any]] = field(default_factory=list) | ||
| services: List[Dict[str, Any]] = field(default_factory=list) | ||
| dependencies: List[Dict[str, Any]] = field(default_factory=list) | ||
|
|
||
| def to_json(self) -> str: | ||
| """Serialize AI-BOM to JSON string.""" | ||
| return json.dumps( | ||
| { | ||
| "bomFormat": self.bom_format, | ||
| "specVersion": self.spec_version, | ||
| "serialNumber": self.serial_number, | ||
| "version": self.version, | ||
| "metadata": self.metadata, | ||
| "components": self.components, | ||
| "services": self.services, | ||
| "dependencies": self.dependencies, | ||
| }, | ||
| indent=2, | ||
| ) | ||
|
|
||
|
|
||
| class AIBOMGenerator: | ||
| """Generates AI-BOMs from model and training data metadata.""" | ||
|
|
||
| def __init__(self, organization: str = "FixOps"): | ||
| self.organization = organization | ||
| self.timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat() | ||
|
|
||
| def create_bom( | ||
| self, | ||
| model_card: ModelCard, | ||
| training_data: Optional[List[TrainingData]] = None, | ||
| dependencies: Optional[List[Dict[str, str]]] = None, | ||
| ) -> AIBOM: | ||
| """Create a full AI-BOM.""" | ||
| metadata = self._build_metadata(model_card) | ||
| components = self._build_components(model_card, training_data or [], dependencies or []) | ||
|
|
||
| return AIBOM( | ||
| metadata=metadata, | ||
| components=components, | ||
| ) | ||
|
|
||
| def _build_metadata(self, model: ModelCard) -> Dict[str, Any]: | ||
| """Build BOM metadata section.""" | ||
| return { | ||
| "timestamp": self.timestamp, | ||
| "component": { | ||
| "type": "machine-learning-model", | ||
| "name": model.name, | ||
| "version": model.version, | ||
| "group": self.organization, | ||
| "description": model.description, | ||
| "author": model.author, | ||
| "licenses": [{"license": {"id": model.license}}], | ||
| "purl": f"pkg:ml/{self.organization}/{model.name}@{model.version}", | ||
| }, | ||
| "tools": [ | ||
| { | ||
| "vendor": "FixOps", | ||
| "name": "AI-BOM Generator", | ||
| "version": "1.0.0" | ||
| } | ||
| ], | ||
| "properties": [ | ||
| {"name": "fixops:model_type", "value": model.model_type}, | ||
| {"name": "fixops:framework", "value": model.framework}, | ||
| {"name": "fixops:intended_use", "value": ", ".join(model.intended_use)}, | ||
| {"name": "fixops:limitations", "value": ", ".join(model.limitations)}, | ||
| ] | ||
| } | ||
|
|
||
| def _build_components( | ||
| self, | ||
| model: ModelCard, | ||
| datasets: List[TrainingData], | ||
| libs: List[Dict[str, str]] | ||
| ) -> List[Dict[str, Any]]: | ||
| """Build BOM components (datasets, libraries).""" | ||
| components = [] | ||
|
|
||
| # Add Training Data Components | ||
| for data in datasets: | ||
| comp = { | ||
| "type": "data", | ||
| "name": data.name, | ||
| "description": data.description or "Training dataset", | ||
| "scope": "required", | ||
| "properties": [ | ||
| {"name": "fixops:data:provenance", "value": data.provenance}, | ||
| {"name": "fixops:data:size", "value": data.size}, | ||
| {"name": "fixops:data:sensitive", "value": str(data.sensitive_data).lower()}, | ||
| {"name": "fixops:data:pii", "value": str(data.pii_present).lower()}, | ||
| ] | ||
| } | ||
| if data.url: | ||
| comp["externalReferences"] = [ | ||
| {"type": "source-distribution", "url": data.url} | ||
| ] | ||
| if data.hash_value: | ||
| comp["hashes"] = [ | ||
| {"alg": data.hash_alg.upper(), "content": data.hash_value} | ||
| ] | ||
| if data.license != "unknown": | ||
| comp["licenses"] = [{"license": {"id": data.license}}] | ||
|
|
||
| components.append(comp) | ||
|
|
||
| # Add Library Dependencies (e.g., torch, tensorflow) | ||
| for lib in libs: | ||
| comp = { | ||
| "type": "library", | ||
| "name": lib.get("name", "unknown"), | ||
| "version": lib.get("version", "unknown"), | ||
| "purl": f"pkg:pypi/{lib.get('name')}@{lib.get('version')}" | ||
| } | ||
| components.append(comp) | ||
|
|
||
| return components | ||
|
|
||
| def generate_hash(self, content: Union[str, bytes]) -> str: | ||
| """Helper to generate SHA-256 hash.""" | ||
| if isinstance(content, str): | ||
| content = content.encode("utf-8") | ||
| return hashlib.sha256(content).hexdigest() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,171 @@ | ||
| """AI Model Governance & System Cards. | ||
|
|
||
| This module implements "System Cards" and Model Governance artifacts aligned with | ||
| ISO/IEC 42001 (AI Management Systems) and NIST AI RMF. | ||
|
|
||
| It generates human-readable and machine-parseable reports detailing: | ||
| 1. Intended Purpose & Limitations | ||
| 2. Fairness & Bias Checks | ||
| 3. Safety & Performance Metrics | ||
| 4. Human Oversight Controls | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import datetime | ||
| import json | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P3: Unused import: Prompt for AI agents |
||
| import logging | ||
| from dataclasses import dataclass, field | ||
| from typing import Any, Dict, List, Optional | ||
|
|
||
| from core.ai_bom import ModelCard | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| @dataclass | ||
| class FairnessCheck: | ||
| """Fairness assessment results.""" | ||
| metric_name: str | ||
| group_attribute: str # e.g., "gender", "age" | ||
| parity_difference: float | ||
| threshold: float | ||
| passed: bool | ||
|
|
||
|
|
||
| @dataclass | ||
| class SafetyEval: | ||
| """Safety evaluation results.""" | ||
| test_name: str # e.g., "hallucination_rate", "jailbreak_resistance" | ||
| score: float | ||
| threshold: float | ||
| passed: bool | ||
| details: str | ||
|
|
||
|
|
||
| @dataclass | ||
| class SystemCard: | ||
| """System Card for a deployed AI system (Model + Context).""" | ||
|
|
||
| system_id: str | ||
| system_name: str | ||
| model_card: ModelCard | ||
|
|
||
| # ISO 42001 Controls | ||
| human_oversight_measures: List[str] = field(default_factory=list) | ||
| data_governance_policy: str = "Standard Enterprise Policy" | ||
|
|
||
| # Validation Results | ||
| fairness_checks: List[FairnessCheck] = field(default_factory=list) | ||
| safety_evals: List[SafetyEval] = field(default_factory=list) | ||
|
|
||
| # Operational Metrics | ||
| deployment_date: str = field(default_factory=lambda: datetime.datetime.now().isoformat()) | ||
| status: str = "active" # active, deprecated, testing | ||
|
|
||
| def to_dict(self) -> Dict[str, Any]: | ||
| return { | ||
| "system_id": self.system_id, | ||
| "system_name": self.system_name, | ||
| "model_metadata": { | ||
| "name": self.model_card.name, | ||
| "version": self.model_card.version, | ||
| "type": self.model_card.model_type | ||
| }, | ||
| "governance": { | ||
| "human_oversight": self.human_oversight_measures, | ||
| "data_policy": self.data_governance_policy, | ||
| "iso_42001_alignment": True | ||
| }, | ||
| "validation": { | ||
| "fairness": [ | ||
| { | ||
| "metric": f.metric_name, | ||
| "group": f.group_attribute, | ||
| "diff": f.parity_difference, | ||
| "passed": f.passed | ||
| } | ||
| for f in self.fairness_checks | ||
| ], | ||
| "safety": [ | ||
| { | ||
| "test": s.test_name, | ||
| "score": s.score, | ||
| "passed": s.passed, | ||
| "details": s.details | ||
| } | ||
| for s in self.safety_evals | ||
| ] | ||
| }, | ||
| "status": self.status, | ||
| "generated_at": self.deployment_date | ||
| } | ||
|
|
||
| def generate_markdown(self) -> str: | ||
| """Generate a human-readable System Card report.""" | ||
| md = [ | ||
| f"# System Card: {self.system_name}", | ||
| f"**ID**: {self.system_id} | **Status**: {self.status.upper()}", | ||
| "---", | ||
| "## 1. Model Overview", | ||
| f"- **Model**: {self.model_card.name} (v{self.model_card.version})", | ||
| f"- **Type**: {self.model_card.model_type}", | ||
| f"- **Description**: {self.model_card.description}", | ||
| "", | ||
| "## 2. Intended Use & Limitations", | ||
| "**Intended Use**:", | ||
| *[f"- {use}" for use in self.model_card.intended_use], | ||
| "", | ||
| "**Limitations**:", | ||
| *[f"- {limit}" for limit in self.model_card.limitations], | ||
| "", | ||
| "## 3. Governance & Oversight (ISO 42001)", | ||
| "**Human Oversight Measures**:", | ||
| *[f"- {measure}" for measure in self.human_oversight_measures], | ||
| "", | ||
| "## 4. Safety & Fairness Validation", | ||
| ] | ||
|
|
||
| if self.fairness_checks: | ||
| md.append("### Fairness Checks") | ||
| md.append("| Metric | Group | Parity Diff | Status |") | ||
| md.append("|---|---|---|---|") | ||
| for f in self.fairness_checks: | ||
| status = "✅ PASS" if f.passed else "❌ FAIL" | ||
| md.append(f"| {f.metric_name} | {f.group_attribute} | {f.parity_difference:.3f} | {status} |") | ||
| md.append("") | ||
|
|
||
| if self.safety_evals: | ||
| md.append("### Safety Evaluations") | ||
| md.append("| Test | Score | Threshold | Status |") | ||
| md.append("|---|---|---|---|") | ||
| for s in self.safety_evals: | ||
| status = "✅ PASS" if s.passed else "❌ FAIL" | ||
| md.append(f"| {s.test_name} | {s.score:.2f} | {s.threshold:.2f} | {status} |") | ||
|
|
||
| return "\n".join(md) | ||
|
|
||
|
|
||
| class GovernanceEngine: | ||
| """Orchestrates the creation and validation of System Cards.""" | ||
|
|
||
| def __init__(self, organization: str = "FixOps"): | ||
| self.organization = organization | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P3: Unused attribute: Prompt for AI agents |
||
|
|
||
| def create_system_card( | ||
| self, | ||
| system_name: str, | ||
| model_card: ModelCard, | ||
| safety_results: List[SafetyEval], | ||
| fairness_results: List[FairnessCheck], | ||
| oversight_measures: List[str] | ||
| ) -> SystemCard: | ||
| """Create a full System Card.""" | ||
| return SystemCard( | ||
| system_id=f"sys-{model_card.name.lower().replace(' ', '-')}-{model_card.version}", | ||
| system_name=system_name, | ||
| model_card=model_card, | ||
| safety_evals=safety_results, | ||
| fairness_checks=fairness_results, | ||
| human_oversight_measures=oversight_measures | ||
| ) | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P2: Inconsistent use of
.get()defaults will produce malformed purl when library dict is missing keys. Lines 176-177 use defaults ("unknown"), but the purl construction does not, resulting inpkg:pypi/None@Noneinstead ofpkg:pypi/unknown@unknown.Prompt for AI agents