diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py index d130bac190..b31c2b2d62 100644 --- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py @@ -46,6 +46,23 @@ ) ), ) +EXACT_MATCH_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric( + metric="exact_match", + metric_config=types.UnifiedMetric( + computation_based_metric_spec=types.ComputationBasedMetricSpec( + type=types.ComputationBasedMetricType.EXACT_MATCH, + ) + ), +) +BLEU_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric( + metric="exact_match_2", + metric_config=types.UnifiedMetric( + computation_based_metric_spec=types.ComputationBasedMetricSpec( + type=types.ComputationBasedMetricType.BLEU, + parameters={"use_effective_order": True}, + ) + ), +) def test_create_eval_run_data_source_evaluation_set(client): @@ -74,6 +91,8 @@ def test_create_eval_run_data_source_evaluation_set(client): UNIVERSAL_AR_METRIC, types.RubricMetric.FINAL_RESPONSE_QUALITY, LLM_METRIC, + EXACT_MATCH_COMPUTATION_BASED_METRIC, + BLEU_COMPUTATION_BASED_METRIC, ], agent_info=types.evals.AgentInfo( agent_resource_name="project/123/locations/us-central1/reasoningEngines/456", @@ -94,7 +113,13 @@ def test_create_eval_run_data_source_evaluation_set(client): output_config=genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST) ), - metrics=[UNIVERSAL_AR_METRIC, FINAL_RESPONSE_QUALITY_METRIC, LLM_METRIC], + metrics=[ + UNIVERSAL_AR_METRIC, + FINAL_RESPONSE_QUALITY_METRIC, + LLM_METRIC, + EXACT_MATCH_COMPUTATION_BASED_METRIC, + BLEU_COMPUTATION_BASED_METRIC, + ], ) assert evaluation_run.inference_configs[ "agent-1" diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index d968b4e7c3..48085900db 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -577,6 +577,13 @@ def _UnifiedMetric_from_vertex( getv(from_object, ["predefinedMetricSpec"]), ) + if getv(from_object, ["computationBasedMetricSpec"]) is not None: + setv( + to_object, + ["computation_based_metric_spec"], + getv(from_object, ["computationBasedMetricSpec"]), + ) + return to_object @@ -621,6 +628,13 @@ def _UnifiedMetric_to_vertex( getv(from_object, ["predefined_metric_spec"]), ) + if getv(from_object, ["computation_based_metric_spec"]) is not None: + setv( + to_object, + ["computationBasedMetricSpec"], + getv(from_object, ["computation_based_metric_spec"]), + ) + return to_object diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py index 939f04301a..8f08686b6a 100644 --- a/vertexai/_genai/types/__init__.py +++ b/vertexai/_genai/types/__init__.py @@ -174,6 +174,10 @@ from .common import CometResult from .common import CometResultDict from .common import CometResultOrDict +from .common import ComputationBasedMetricSpec +from .common import ComputationBasedMetricSpecDict +from .common import ComputationBasedMetricSpecOrDict +from .common import ComputationBasedMetricType from .common import ContainerSpec from .common import ContainerSpecDict from .common import ContainerSpecOrDict @@ -1067,6 +1071,9 @@ "CustomCodeExecutionSpec", "CustomCodeExecutionSpecDict", "CustomCodeExecutionSpecOrDict", + "ComputationBasedMetricSpec", + "ComputationBasedMetricSpecDict", + "ComputationBasedMetricSpecOrDict", "UnifiedMetric", "UnifiedMetricDict", "UnifiedMetricOrDict", @@ -1915,6 +1922,7 @@ "EvaluationItemType", "SamplingMethod", "RubricContentType", + "ComputationBasedMetricType", "EvaluationRunState", "OptimizeTarget", "GenerateMemoriesResponseGeneratedMemoryAction", diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index 7c48303729..e375ee7fc6 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -311,6 +311,21 @@ class RubricContentType(_common.CaseInSensitiveEnum): """Generate rubrics in a unit test format.""" +class ComputationBasedMetricType(_common.CaseInSensitiveEnum): + """Represents the type of the computation based metric.""" + + COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED = ( + "COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED" + ) + """Computation based metric type is unspecified.""" + EXACT_MATCH = "EXACT_MATCH" + """Exact match metric.""" + BLEU = "BLEU" + """BLEU metric.""" + ROUGE = "ROUGE" + """ROUGE metric.""" + + class EvaluationRunState(_common.CaseInSensitiveEnum): """Represents the state of an evaluation run.""" @@ -943,6 +958,33 @@ def evaluate(instance: dict[str, Any]) -> float: ] +class ComputationBasedMetricSpec(_common.BaseModel): + """Specification for a computation based metric.""" + + type: Optional[ComputationBasedMetricType] = Field( + default=None, description="""The type of the computation based metric.""" + ) + parameters: Optional[dict[str, Any]] = Field( + default=None, + description="""A map of parameters for the metric, e.g. {"rouge_type": "rougeL"}.""", + ) + + +class ComputationBasedMetricSpecDict(TypedDict, total=False): + """Specification for a computation based metric.""" + + type: Optional[ComputationBasedMetricType] + """The type of the computation based metric.""" + + parameters: Optional[dict[str, Any]] + """A map of parameters for the metric, e.g. {"rouge_type": "rougeL"}.""" + + +ComputationBasedMetricSpecOrDict = Union[ + ComputationBasedMetricSpec, ComputationBasedMetricSpecDict +] + + class UnifiedMetric(_common.BaseModel): """The unified metric used for evaluation.""" @@ -964,6 +1006,9 @@ class UnifiedMetric(_common.BaseModel): predefined_metric_spec: Optional[PredefinedMetricSpec] = Field( default=None, description="""The spec for a pre-defined metric.""" ) + computation_based_metric_spec: Optional[ComputationBasedMetricSpec] = Field( + default=None, description="""The spec for a computation based metric.""" + ) class UnifiedMetricDict(TypedDict, total=False): @@ -987,6 +1032,9 @@ class UnifiedMetricDict(TypedDict, total=False): predefined_metric_spec: Optional[PredefinedMetricSpecDict] """The spec for a pre-defined metric.""" + computation_based_metric_spec: Optional[ComputationBasedMetricSpecDict] + """The spec for a computation based metric.""" + UnifiedMetricOrDict = Union[UnifiedMetric, UnifiedMetricDict]