diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py index cd97ab042c..d10523f563 100644 --- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py @@ -18,6 +18,7 @@ from vertexai import types from google.genai import types as genai_types import pytest +import pandas as pd GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output" GENERAL_QUALITY_METRIC = types.EvaluationRunMetric( @@ -63,28 +64,46 @@ ) ), ) - +TOOL = genai_types.Tool( + function_declarations=[ + genai_types.FunctionDeclaration( + name="get_weather", + description="Get weather in a location", + parameters={ + "type": "object", + "properties": {"location": {"type": "string"}}, + }, + ) + ] +) +AGENT_INFO = types.evals.AgentInfo( + agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456", + name="agent-1", + instruction="agent-1 instruction", + tool_declarations=[TOOL], +) +DEFAULT_PROMPT_TEMPLATE = "{prompt}" +INPUT_DF_WITH_CONTEXT_AND_HISTORY = pd.DataFrame( + { + "prompt": ["prompt1", "prompt2"], + "reference": ["reference1", "reference2"], + "response": ["response1", "response2"], + "context": ["context1", "context2"], + "conversation_history": ["history1", "history2"], + } +) +CANDIDATE_NAME = "candidate_1" +MODEL_NAME = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash" +EVAL_SET_NAME = "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" def test_create_eval_run_data_source_evaluation_set(client): """Tests that create_evaluation_run() creates a correctly structured EvaluationRun.""" client._api_client._http_options.api_version = "v1beta1" - tool = genai_types.Tool( - function_declarations=[ - genai_types.FunctionDeclaration( - name="get_weather", - description="Get weather in a location", - parameters={ - "type": "object", - "properties": {"location": {"type": "string"}}, - }, - ) - ] - ) evaluation_run = client.evals.create_evaluation_run( name="test4", display_name="test4", dataset=types.EvaluationRunDataSource( - evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" + evaluation_set=EVAL_SET_NAME ), dest=GCS_DEST, metrics=[ @@ -94,21 +113,14 @@ def test_create_eval_run_data_source_evaluation_set(client): EXACT_MATCH_COMPUTATION_BASED_METRIC, BLEU_COMPUTATION_BASED_METRIC, ], - agent_info=types.evals.AgentInfo( - agent_resource_name="project/123/locations/us-central1/reasoningEngines/456", - name="agent-1", - instruction="agent-1 instruction", - tool_declarations=[tool], - ), + agent_info=AGENT_INFO, labels={"label1": "value1"}, ) assert isinstance(evaluation_run, types.EvaluationRun) assert evaluation_run.display_name == "test4" assert evaluation_run.state == types.EvaluationRunState.PENDING assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource) - assert evaluation_run.data_source.evaluation_set == ( - "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" - ) + assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME assert evaluation_run.evaluation_config == types.EvaluationRunConfig( output_config=genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST) @@ -122,13 +134,13 @@ def test_create_eval_run_data_source_evaluation_set(client): ], ) assert evaluation_run.inference_configs[ - "agent-1" + AGENT_INFO.name ] == types.EvaluationRunInferenceConfig( agent_config=types.EvaluationRunAgentConfig( developer_instruction=genai_types.Content( parts=[genai_types.Part(text="agent-1 instruction")] ), - tools=[tool], + tools=[TOOL], ) ) assert evaluation_run.labels == { @@ -190,13 +202,16 @@ def test_create_eval_run_with_inference_configs(client): """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs.""" client._api_client._http_options.api_version = "v1beta1" inference_config = types.EvaluationRunInferenceConfig( - model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash" + model=MODEL_NAME, + prompt_template=types.EvaluationRunPromptTemplate( + prompt_template="test prompt template" + ), ) evaluation_run = client.evals.create_evaluation_run( name="test_inference_config", display_name="test_inference_config", dataset=types.EvaluationRunDataSource( - evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" + evaluation_set=EVAL_SET_NAME ), dest=GCS_DEST, metrics=[GENERAL_QUALITY_METRIC], @@ -207,9 +222,7 @@ def test_create_eval_run_with_inference_configs(client): assert evaluation_run.display_name == "test_inference_config" assert evaluation_run.state == types.EvaluationRunState.PENDING assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource) - assert evaluation_run.data_source.evaluation_set == ( - "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" - ) + assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME assert evaluation_run.evaluation_config == types.EvaluationRunConfig( output_config=genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST) @@ -223,9 +236,11 @@ def test_create_eval_run_with_inference_configs(client): assert evaluation_run.error is None -# Test fails in replay mode because of UUID generation mismatch. +# Dataframe tests fail in replay mode because of UUID generation mismatch. # def test_create_eval_run_data_source_evaluation_dataset(client): -# """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset.""" +# """Tests that create_evaluation_run() creates a correctly structured +# EvaluationRun with EvaluationDataset. +# """ # input_df = pd.DataFrame( # { # "prompt": ["prompt1", "prompt2"], @@ -275,7 +290,7 @@ def test_create_eval_run_with_inference_configs(client): # name="test6", # display_name="test6", # dataset=types.EvaluationDataset( -# candidate_name="candidate_1", +# candidate_name=CANDIDATE_NAME, # eval_dataset_df=input_df, # ), # dest=GCS_DEST, @@ -319,6 +334,196 @@ def test_create_eval_run_with_inference_configs(client): # assert evaluation_run.error is None +# def test_create_eval_run_data_source_evaluation_dataset_with_inference_configs_and_prompt_template_data( +# client, +# ): +# """Tests that create_evaluation_run() creates a correctly structured +# EvaluationRun with EvaluationDataset and inference_configs. +# Prompt template data is inferred from the dataset and a default prompt +# template should be used. +# """ +# evaluation_run = client.evals.create_evaluation_run( +# name="test9", +# display_name="test9", +# dataset=types.EvaluationDataset( +# candidate_name=CANDIDATE_NAME, +# eval_dataset_df=INPUT_DF_WITH_CONTEXT_AND_HISTORY, +# ), +# dest=GCS_DEST, +# metrics=[GENERAL_QUALITY_METRIC], +# inference_configs={ +# CANDIDATE_NAME: types.EvaluationRunInferenceConfig( +# model=MODEL_NAME, +# ) +# }, +# ) +# assert isinstance(evaluation_run, types.EvaluationRun) +# assert evaluation_run.display_name == "test9" +# assert evaluation_run.state == types.EvaluationRunState.PENDING +# assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource) +# assert evaluation_run.inference_configs[ +# CANDIDATE_NAME +# ] == types.EvaluationRunInferenceConfig( +# model=MODEL_NAME, +# prompt_template=types.EvaluationRunPromptTemplate( +# prompt_template=DEFAULT_PROMPT_TEMPLATE +# ), +# ) +# # Check evaluation set +# assert evaluation_run.data_source.evaluation_set +# eval_set = client.evals.get_evaluation_set( +# name=evaluation_run.data_source.evaluation_set +# ) +# assert len(eval_set.evaluation_items) == 2 +# # Check evaluation items +# for i, eval_item_name in enumerate(eval_set.evaluation_items): +# eval_item = client.evals.get_evaluation_item(name=eval_item_name) +# assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST +# assert ( +# eval_item.evaluation_request.prompt.prompt_template_data.values[ +# "prompt" +# ] +# == genai_types.Content( +# parts=[ +# genai_types.Part( +# text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["prompt"] +# ) +# ], +# role="user", +# ) +# ) +# assert ( +# eval_item.evaluation_request.prompt.prompt_template_data.values[ +# "context" +# ] +# == genai_types.Content( +# parts=[ +# genai_types.Part( +# text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["context"] +# ) +# ], +# role="user", +# ) +# ) +# assert ( +# eval_item.evaluation_request.prompt.prompt_template_data.values[ +# "conversation_history" +# ] +# == genai_types.Content( +# parts=[ +# genai_types.Part( +# text=( +# INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i][ +# "conversation_history" +# ] +# ) +# ) +# ], +# role="user", +# ) +# ) +# assert ( +# eval_item.evaluation_request.candidate_responses[0].text +# == INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["response"] +# ) +# assert evaluation_run.error is None + + +# def test_create_eval_run_data_source_evaluation_dataset_with_agent_info_and_prompt_template_data( +# client, +# ): +# """Tests that create_evaluation_run() creates a correctly structured +# EvaluationRun with EvaluationDataset and agent_info. +# Prompt template data is inferred from the dataset and a default prompt +# template should be used. +# """ +# evaluation_run = client.evals.create_evaluation_run( +# name="test9", +# display_name="test9", +# dataset=types.EvaluationDataset( +# candidate_name=CANDIDATE_NAME, +# eval_dataset_df=INPUT_DF_WITH_CONTEXT_AND_HISTORY, +# ), +# dest=GCS_DEST, +# metrics=[GENERAL_QUALITY_METRIC], +# agent_info=AGENT_INFO, +# ) +# assert isinstance(evaluation_run, types.EvaluationRun) +# assert evaluation_run.display_name == "test9" +# assert evaluation_run.state == types.EvaluationRunState.PENDING +# assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource) +# assert evaluation_run.inference_configs[ +# AGENT_INFO.name +# ] == types.EvaluationRunInferenceConfig( +# agent_config=types.EvaluationRunAgentConfig( +# developer_instruction=genai_types.Content( +# parts=[genai_types.Part(text=AGENT_INFO.instruction)] +# ), +# tools=[TOOL], +# ), +# prompt_template=types.EvaluationRunPromptTemplate( +# prompt_template=DEFAULT_PROMPT_TEMPLATE +# ), +# ) +# # Check evaluation set +# assert evaluation_run.data_source.evaluation_set +# eval_set = client.evals.get_evaluation_set( +# name=evaluation_run.data_source.evaluation_set +# ) +# assert len(eval_set.evaluation_items) == 2 +# # Check evaluation items +# for i, eval_item_name in enumerate(eval_set.evaluation_items): +# eval_item = client.evals.get_evaluation_item(name=eval_item_name) +# assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST +# assert ( +# eval_item.evaluation_request.prompt.prompt_template_data.values[ +# "prompt" +# ] +# == genai_types.Content( +# parts=[ +# genai_types.Part( +# text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["prompt"] +# ) +# ], +# role="user", +# ) +# ) +# assert ( +# eval_item.evaluation_request.prompt.prompt_template_data.values[ +# "context" +# ] +# == genai_types.Content( +# parts=[ +# genai_types.Part( +# text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["context"] +# ) +# ], +# role="user", +# ) +# ) +# assert ( +# eval_item.evaluation_request.prompt.prompt_template_data.values[ +# "conversation_history" +# ] +# == genai_types.Content( +# parts=[ +# genai_types.Part( +# text=( +# INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i][ +# "conversation_history" +# ] +# ) +# ) +# ], +# role="user", +# ) +# ) +# assert ( +# eval_item.evaluation_request.candidate_responses[0].text +# == INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["response"] +# ) +# assert evaluation_run.error is None + pytest_plugins = ("pytest_asyncio",) @@ -371,13 +576,16 @@ async def test_create_eval_run_async_with_inference_configs(client): """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously.""" client._api_client._http_options.api_version = "v1beta1" inference_config = types.EvaluationRunInferenceConfig( - model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash" + model=MODEL_NAME, + prompt_template=types.EvaluationRunPromptTemplate( + prompt_template="Test the {prompt}" + ), ) evaluation_run = await client.aio.evals.create_evaluation_run( name="test_inference_config_async", display_name="test_inference_config_async", dataset=types.EvaluationRunDataSource( - evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" + evaluation_set=EVAL_SET_NAME ), dest=GCS_DEST, metrics=[GENERAL_QUALITY_METRIC], @@ -388,9 +596,7 @@ async def test_create_eval_run_async_with_inference_configs(client): assert evaluation_run.display_name == "test_inference_config_async" assert evaluation_run.state == types.EvaluationRunState.PENDING assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource) - assert evaluation_run.data_source.evaluation_set == ( - "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" - ) + assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME assert evaluation_run.evaluation_config == types.EvaluationRunConfig( output_config=genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST) diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index 0bc28994ed..d50b8c968a 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -277,13 +277,51 @@ def _resolve_dataset( return dataset +def _get_default_prompt_template( + api_client: BaseApiClient, + inference_config: types.EvaluationRunInferenceConfigOrDict, + dataset: types.EvaluationRunDataSource, +) -> Any: + """Resolves prompt template data for the evaluation run.""" + if isinstance(inference_config, dict): + if inference_config.get("prompt_template"): + return inference_config["prompt_template"] + elif inference_config.prompt_template: + return inference_config.prompt_template + + try: + evals_module = evals.Evals(api_client_=api_client) + eval_set = evals_module.get_evaluation_set(name=dataset.evaluation_set) + if eval_set and eval_set.evaluation_items: + eval_item = evals_module.get_evaluation_item( + name=eval_set.evaluation_items[0] + ) + if ( + eval_item + and eval_item.evaluation_request + and eval_item.evaluation_request.prompt + and eval_item.evaluation_request.prompt.prompt_template_data + ): + if ( + "prompt" + in eval_item.evaluation_request.prompt.prompt_template_data.values + ): + return "{prompt}" # Default prompt template + except Exception as e: + logger.warning("Failed to get prompt template from evaluation set: %s", e) + return None + + def _resolve_inference_configs( + api_client: BaseApiClient, + dataset: types.EvaluationRunDataSource, inference_configs: Optional[ dict[str, types.EvaluationRunInferenceConfigOrDict] ] = None, agent_info_pydantic: Optional[types.evals.AgentInfo] = None, ) -> Optional[dict[str, types.EvaluationRunInferenceConfigOrDict]]: """Resolves inference configs for the evaluation run.""" + # Resolve agent config if agent_info_pydantic and agent_info_pydantic.name: inference_configs = {} inference_configs[agent_info_pydantic.name] = ( @@ -296,6 +334,34 @@ def _resolve_inference_configs( ) ) ) + # Resolve prompt template data + if inference_configs: + for inference_config in inference_configs.values(): + prompt_template_val = ( + inference_config.get("prompt_template") + if isinstance(inference_config, dict) + else inference_config.prompt_template + ) + if not prompt_template_val: + default_prompt_template = _get_default_prompt_template( + api_client, inference_config, dataset + ) + if default_prompt_template: + prompt_template_to_set = default_prompt_template + if not isinstance( + default_prompt_template, types.EvaluationRunPromptTemplate + ): + prompt_template_to_set = types.EvaluationRunPromptTemplate( + prompt_template=default_prompt_template + ) + if isinstance(inference_config, dict): + inference_config[ + "prompt_template" + ] = prompt_template_to_set.model_dump(exclude_none=True) + else: + inference_config.prompt_template = ( + prompt_template_to_set.model_dump(exclude_none=True) + ) return inference_configs @@ -324,8 +390,7 @@ def _get_candidate_name( and dataset.candidate_name != agent_info_pydantic.name ): logger.warning( - "Evaluation dataset candidate_name and agent_info.name are different." - " Please make sure this is intended." + "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended." ) elif dataset.candidate_name is None and agent_info_pydantic: return agent_info_pydantic.name @@ -1944,6 +2009,21 @@ def _object_to_dict(obj: Any) -> Union[dict[str, Any], Any]: return result +def _get_content(row: dict[str, Any], column: str): + if isinstance(row[column], str): + return genai_types.Content( + parts=[genai_types.Part(text=row[column])], + role=_evals_constant.USER_AUTHOR, + ) + elif isinstance(row[column], genai_types.Content): + return row[column] + else: + raise ValueError( + f"{column} must be a string or a Content object. " + f"Got {type(row[column])}." + ) + + def _create_evaluation_set_from_dataframe( api_client: BaseApiClient, gcs_dest_prefix: str, @@ -1962,13 +2042,28 @@ def _create_evaluation_set_from_dataframe( for event in row[_evals_constant.INTERMEDIATE_EVENTS]: if CONTENT in event: intermediate_events.append(event[CONTENT]) + if _evals_constant.CONTEXT in row or _evals_constant.HISTORY in row: + values = {} + if _evals_constant.CONTEXT in row: + values[_evals_constant.CONTEXT] = _get_content( + row, _evals_constant.CONTEXT + ) + if _evals_constant.HISTORY in row: + values[_evals_constant.HISTORY] = _get_content( + row, _evals_constant.HISTORY + ) + if _evals_constant.PROMPT in row: + values[_evals_constant.PROMPT] = _get_content( + row, _evals_constant.PROMPT + ) + prompt = types.EvaluationPrompt( + prompt_template_data=types.PromptTemplateData(values=values) + ) + elif _evals_constant.PROMPT in row: + prompt = types.EvaluationPrompt(text=row[_evals_constant.PROMPT]) eval_item_requests.append( types.EvaluationItemRequest( - prompt=( - types.EvaluationPrompt(text=row[_evals_constant.PROMPT]) - if _evals_constant.PROMPT in row - else None - ), + prompt=prompt if prompt else None, golden_response=( types.CandidateResponse(text=row[_evals_constant.REFERENCE]) if _evals_constant.REFERENCE in row diff --git a/vertexai/_genai/_evals_constant.py b/vertexai/_genai/_evals_constant.py index 6fc27d94e0..847140dc5c 100644 --- a/vertexai/_genai/_evals_constant.py +++ b/vertexai/_genai/_evals_constant.py @@ -53,6 +53,7 @@ CONTENT = "content" PARTS = "parts" USER_AUTHOR = "user" +HISTORY = "conversation_history" COMMON_DATASET_COLUMNS = frozenset( { @@ -61,5 +62,6 @@ REFERENCE, SESSION_INPUT, CONTEXT, + HISTORY, } ) diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index dba63496fb..73922955cd 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -253,6 +253,9 @@ def _EvaluationRunConfig_from_vertex( if getv(from_object, ["autoraterConfig"]) is not None: setv(to_object, ["autorater_config"], getv(from_object, ["autoraterConfig"])) + if getv(from_object, ["promptTemplate"]) is not None: + setv(to_object, ["prompt_template"], getv(from_object, ["promptTemplate"])) + return to_object @@ -277,6 +280,9 @@ def _EvaluationRunConfig_to_vertex( if getv(from_object, ["autorater_config"]) is not None: setv(to_object, ["autoraterConfig"], getv(from_object, ["autorater_config"])) + if getv(from_object, ["prompt_template"]) is not None: + setv(to_object, ["promptTemplate"], getv(from_object, ["prompt_template"])) + return to_object @@ -1643,7 +1649,7 @@ def create_evaluation_run( output_config=output_config, metrics=resolved_metrics ) resolved_inference_configs = _evals_common._resolve_inference_configs( - inference_configs, agent_info_pydantic + self._api_client, resolved_dataset, inference_configs, agent_info_pydantic ) resolved_labels = _evals_common._add_evaluation_run_labels( labels, agent_info_pydantic @@ -2478,7 +2484,7 @@ async def create_evaluation_run( output_config=output_config, metrics=resolved_metrics ) resolved_inference_configs = _evals_common._resolve_inference_configs( - inference_configs, agent_info_pydantic + self._api_client, resolved_dataset, inference_configs, agent_info_pydantic ) resolved_labels = _evals_common._add_evaluation_run_labels( labels, agent_info_pydantic diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py index e1d4e73d23..d1026d84d7 100644 --- a/vertexai/_genai/types/__init__.py +++ b/vertexai/_genai/types/__init__.py @@ -386,6 +386,9 @@ from .common import EvaluationRunMetricDict from .common import EvaluationRunMetricOrDict from .common import EvaluationRunOrDict +from .common import EvaluationRunPromptTemplate +from .common import EvaluationRunPromptTemplateDict +from .common import EvaluationRunPromptTemplateOrDict from .common import EvaluationRunResults from .common import EvaluationRunResultsDict from .common import EvaluationRunResultsOrDict @@ -1285,6 +1288,9 @@ "EvaluationRunMetric", "EvaluationRunMetricDict", "EvaluationRunMetricOrDict", + "EvaluationRunPromptTemplate", + "EvaluationRunPromptTemplateDict", + "EvaluationRunPromptTemplateOrDict", "EvaluationRunConfig", "EvaluationRunConfigDict", "EvaluationRunConfigOrDict", diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index 0903178153..18d3576a3f 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -2287,6 +2287,38 @@ class EvaluationRunMetricDict(TypedDict, total=False): EvaluationRunMetricOrDict = Union[EvaluationRunMetric, EvaluationRunMetricDict] +class EvaluationRunPromptTemplate(_common.BaseModel): + """Prompt template used for inference.""" + + prompt_template: Optional[str] = Field( + default=None, + description="""Inline prompt template. Template variables should be in the format + "{var_name}".""", + ) + gcs_uri: Optional[str] = Field( + default=None, + description="""Prompt template stored in Cloud Storage. Format: + "gs://my-bucket/file-name.txt".""", + ) + + +class EvaluationRunPromptTemplateDict(TypedDict, total=False): + """Prompt template used for inference.""" + + prompt_template: Optional[str] + """Inline prompt template. Template variables should be in the format + "{var_name}".""" + + gcs_uri: Optional[str] + """Prompt template stored in Cloud Storage. Format: + "gs://my-bucket/file-name.txt".""" + + +EvaluationRunPromptTemplateOrDict = Union[ + EvaluationRunPromptTemplate, EvaluationRunPromptTemplateDict +] + + class EvaluationRunConfig(_common.BaseModel): """The evaluation configuration used for the evaluation run.""" @@ -2300,6 +2332,9 @@ class EvaluationRunConfig(_common.BaseModel): autorater_config: Optional[genai_types.AutoraterConfig] = Field( default=None, description="""The autorater config for the evaluation run.""" ) + prompt_template: Optional[EvaluationRunPromptTemplate] = Field( + default=None, description="""The prompt template used for inference.""" + ) class EvaluationRunConfigDict(TypedDict, total=False): @@ -2314,6 +2349,9 @@ class EvaluationRunConfigDict(TypedDict, total=False): autorater_config: Optional[genai_types.AutoraterConfigDict] """The autorater config for the evaluation run.""" + prompt_template: Optional[EvaluationRunPromptTemplateDict] + """The prompt template used for inference.""" + EvaluationRunConfigOrDict = Union[EvaluationRunConfig, EvaluationRunConfigDict] @@ -3038,6 +3076,9 @@ class EvaluationRunInferenceConfig(_common.BaseModel): Contains configuration for a user simulator that uses an LLM to generate messages on behalf of the user.""", ) + prompt_template: Optional[EvaluationRunPromptTemplate] = Field( + default=None, description="""The prompt template used for inference.""" + ) class EvaluationRunInferenceConfigDict(TypedDict, total=False): @@ -3057,6 +3098,9 @@ class EvaluationRunInferenceConfigDict(TypedDict, total=False): Contains configuration for a user simulator that uses an LLM to generate messages on behalf of the user.""" + prompt_template: Optional[EvaluationRunPromptTemplateDict] + """The prompt template used for inference.""" + EvaluationRunInferenceConfigOrDict = Union[ EvaluationRunInferenceConfig, EvaluationRunInferenceConfigDict