diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
index cd97ab042c..d10523f563 100644
--- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
+++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
@@ -18,6 +18,7 @@
 from vertexai import types
 from google.genai import types as genai_types
 import pytest
+import pandas as pd
 
 GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output"
 GENERAL_QUALITY_METRIC = types.EvaluationRunMetric(
@@ -63,28 +64,46 @@
         )
     ),
 )
-
+TOOL = genai_types.Tool(
+    function_declarations=[
+        genai_types.FunctionDeclaration(
+            name="get_weather",
+            description="Get weather in a location",
+            parameters={
+                "type": "object",
+                "properties": {"location": {"type": "string"}},
+            },
+        )
+    ]
+)
+AGENT_INFO = types.evals.AgentInfo(
+    agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456",
+    name="agent-1",
+    instruction="agent-1 instruction",
+    tool_declarations=[TOOL],
+)
+DEFAULT_PROMPT_TEMPLATE = "{prompt}"
+INPUT_DF_WITH_CONTEXT_AND_HISTORY = pd.DataFrame(
+    {
+        "prompt": ["prompt1", "prompt2"],
+        "reference": ["reference1", "reference2"],
+        "response": ["response1", "response2"],
+        "context": ["context1", "context2"],
+        "conversation_history": ["history1", "history2"],
+    }
+)
+CANDIDATE_NAME = "candidate_1"
+MODEL_NAME = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
+EVAL_SET_NAME = "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
 
 def test_create_eval_run_data_source_evaluation_set(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
     client._api_client._http_options.api_version = "v1beta1"
-    tool = genai_types.Tool(
-        function_declarations=[
-            genai_types.FunctionDeclaration(
-                name="get_weather",
-                description="Get weather in a location",
-                parameters={
-                    "type": "object",
-                    "properties": {"location": {"type": "string"}},
-                },
-            )
-        ]
-    )
     evaluation_run = client.evals.create_evaluation_run(
         name="test4",
         display_name="test4",
         dataset=types.EvaluationRunDataSource(
-            evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
+            evaluation_set=EVAL_SET_NAME
         ),
         dest=GCS_DEST,
         metrics=[
@@ -94,21 +113,14 @@ def test_create_eval_run_data_source_evaluation_set(client):
             EXACT_MATCH_COMPUTATION_BASED_METRIC,
             BLEU_COMPUTATION_BASED_METRIC,
         ],
-        agent_info=types.evals.AgentInfo(
-            agent_resource_name="project/123/locations/us-central1/reasoningEngines/456",
-            name="agent-1",
-            instruction="agent-1 instruction",
-            tool_declarations=[tool],
-        ),
+        agent_info=AGENT_INFO,
         labels={"label1": "value1"},
     )
     assert isinstance(evaluation_run, types.EvaluationRun)
     assert evaluation_run.display_name == "test4"
     assert evaluation_run.state == types.EvaluationRunState.PENDING
     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
-    assert evaluation_run.data_source.evaluation_set == (
-        "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
-    )
+    assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
     assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
         output_config=genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
@@ -122,13 +134,13 @@ def test_create_eval_run_data_source_evaluation_set(client):
         ],
     )
     assert evaluation_run.inference_configs[
-        "agent-1"
+        AGENT_INFO.name
     ] == types.EvaluationRunInferenceConfig(
         agent_config=types.EvaluationRunAgentConfig(
             developer_instruction=genai_types.Content(
                 parts=[genai_types.Part(text="agent-1 instruction")]
             ),
-            tools=[tool],
+            tools=[TOOL],
         )
     )
     assert evaluation_run.labels == {
@@ -190,13 +202,16 @@ def test_create_eval_run_with_inference_configs(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
     client._api_client._http_options.api_version = "v1beta1"
     inference_config = types.EvaluationRunInferenceConfig(
-        model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
+        model=MODEL_NAME,
+        prompt_template=types.EvaluationRunPromptTemplate(
+            prompt_template="test prompt template"
+        ),
     )
     evaluation_run = client.evals.create_evaluation_run(
         name="test_inference_config",
         display_name="test_inference_config",
         dataset=types.EvaluationRunDataSource(
-            evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
+            evaluation_set=EVAL_SET_NAME
         ),
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
@@ -207,9 +222,7 @@ def test_create_eval_run_with_inference_configs(client):
     assert evaluation_run.display_name == "test_inference_config"
     assert evaluation_run.state == types.EvaluationRunState.PENDING
     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
-    assert evaluation_run.data_source.evaluation_set == (
-        "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
-    )
+    assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
     assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
         output_config=genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
@@ -223,9 +236,11 @@ def test_create_eval_run_with_inference_configs(client):
     assert evaluation_run.error is None
 
 
-# Test fails in replay mode because of UUID generation mismatch.
+# Dataframe tests fail in replay mode because of UUID generation mismatch.
 # def test_create_eval_run_data_source_evaluation_dataset(client):
-#     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
+#     """Tests that create_evaluation_run() creates a correctly structured
+#     EvaluationRun with EvaluationDataset.
+#     """
 #     input_df = pd.DataFrame(
 #         {
 #             "prompt": ["prompt1", "prompt2"],
@@ -275,7 +290,7 @@ def test_create_eval_run_with_inference_configs(client):
 #         name="test6",
 #         display_name="test6",
 #         dataset=types.EvaluationDataset(
-#             candidate_name="candidate_1",
+#             candidate_name=CANDIDATE_NAME,
 #             eval_dataset_df=input_df,
 #         ),
 #         dest=GCS_DEST,
@@ -319,6 +334,196 @@ def test_create_eval_run_with_inference_configs(client):
 #     assert evaluation_run.error is None
 
 
+# def test_create_eval_run_data_source_evaluation_dataset_with_inference_configs_and_prompt_template_data(
+#     client,
+# ):
+#     """Tests that create_evaluation_run() creates a correctly structured
+#     EvaluationRun with EvaluationDataset and inference_configs.
+#     Prompt template data is inferred from the dataset and a default prompt
+#     template should be used.
+#     """
+#     evaluation_run = client.evals.create_evaluation_run(
+#         name="test9",
+#         display_name="test9",
+#         dataset=types.EvaluationDataset(
+#             candidate_name=CANDIDATE_NAME,
+#             eval_dataset_df=INPUT_DF_WITH_CONTEXT_AND_HISTORY,
+#         ),
+#         dest=GCS_DEST,
+#         metrics=[GENERAL_QUALITY_METRIC],
+#         inference_configs={
+#             CANDIDATE_NAME: types.EvaluationRunInferenceConfig(
+#                 model=MODEL_NAME,
+#             )
+#         },
+#     )
+#     assert isinstance(evaluation_run, types.EvaluationRun)
+#     assert evaluation_run.display_name == "test9"
+#     assert evaluation_run.state == types.EvaluationRunState.PENDING
+#     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
+#     assert evaluation_run.inference_configs[
+#         CANDIDATE_NAME
+#     ] == types.EvaluationRunInferenceConfig(
+#         model=MODEL_NAME,
+#         prompt_template=types.EvaluationRunPromptTemplate(
+#             prompt_template=DEFAULT_PROMPT_TEMPLATE
+#         ),
+#     )
+#     # Check evaluation set
+#     assert evaluation_run.data_source.evaluation_set
+#     eval_set = client.evals.get_evaluation_set(
+#         name=evaluation_run.data_source.evaluation_set
+#     )
+#     assert len(eval_set.evaluation_items) == 2
+#     # Check evaluation items
+#     for i, eval_item_name in enumerate(eval_set.evaluation_items):
+#         eval_item = client.evals.get_evaluation_item(name=eval_item_name)
+#         assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "prompt"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["prompt"]
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "context"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["context"]
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "conversation_history"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=(
+#                             INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i][
+#                                 "conversation_history"
+#                             ]
+#                         )
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.candidate_responses[0].text
+#             == INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["response"]
+#         )
+#     assert evaluation_run.error is None
+
+
+# def test_create_eval_run_data_source_evaluation_dataset_with_agent_info_and_prompt_template_data(
+#     client,
+# ):
+#     """Tests that create_evaluation_run() creates a correctly structured
+#     EvaluationRun with EvaluationDataset and agent_info.
+#     Prompt template data is inferred from the dataset and a default prompt
+#     template should be used.
+#     """
+#     evaluation_run = client.evals.create_evaluation_run(
+#         name="test9",
+#         display_name="test9",
+#         dataset=types.EvaluationDataset(
+#             candidate_name=CANDIDATE_NAME,
+#             eval_dataset_df=INPUT_DF_WITH_CONTEXT_AND_HISTORY,
+#         ),
+#         dest=GCS_DEST,
+#         metrics=[GENERAL_QUALITY_METRIC],
+#         agent_info=AGENT_INFO,
+#     )
+#     assert isinstance(evaluation_run, types.EvaluationRun)
+#     assert evaluation_run.display_name == "test9"
+#     assert evaluation_run.state == types.EvaluationRunState.PENDING
+#     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
+#     assert evaluation_run.inference_configs[
+#         AGENT_INFO.name
+#     ] == types.EvaluationRunInferenceConfig(
+#         agent_config=types.EvaluationRunAgentConfig(
+#             developer_instruction=genai_types.Content(
+#                 parts=[genai_types.Part(text=AGENT_INFO.instruction)]
+#             ),
+#             tools=[TOOL],
+#         ),
+#         prompt_template=types.EvaluationRunPromptTemplate(
+#             prompt_template=DEFAULT_PROMPT_TEMPLATE
+#         ),
+#     )
+#     # Check evaluation set
+#     assert evaluation_run.data_source.evaluation_set
+#     eval_set = client.evals.get_evaluation_set(
+#         name=evaluation_run.data_source.evaluation_set
+#     )
+#     assert len(eval_set.evaluation_items) == 2
+#     # Check evaluation items
+#     for i, eval_item_name in enumerate(eval_set.evaluation_items):
+#         eval_item = client.evals.get_evaluation_item(name=eval_item_name)
+#         assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "prompt"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["prompt"]
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "context"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["context"]
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "conversation_history"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=(
+#                             INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i][
+#                                 "conversation_history"
+#                             ]
+#                         )
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.candidate_responses[0].text
+#             == INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["response"]
+#         )
+#     assert evaluation_run.error is None
+
 pytest_plugins = ("pytest_asyncio",)
 
 
@@ -371,13 +576,16 @@ async def test_create_eval_run_async_with_inference_configs(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
     client._api_client._http_options.api_version = "v1beta1"
     inference_config = types.EvaluationRunInferenceConfig(
-        model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
+        model=MODEL_NAME,
+        prompt_template=types.EvaluationRunPromptTemplate(
+            prompt_template="Test the {prompt}"
+        ),
     )
     evaluation_run = await client.aio.evals.create_evaluation_run(
         name="test_inference_config_async",
         display_name="test_inference_config_async",
         dataset=types.EvaluationRunDataSource(
-            evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
+            evaluation_set=EVAL_SET_NAME
         ),
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
@@ -388,9 +596,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
     assert evaluation_run.display_name == "test_inference_config_async"
     assert evaluation_run.state == types.EvaluationRunState.PENDING
     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
-    assert evaluation_run.data_source.evaluation_set == (
-        "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
-    )
+    assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
     assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
         output_config=genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
index 0bc28994ed..d50b8c968a 100644
--- a/vertexai/_genai/_evals_common.py
+++ b/vertexai/_genai/_evals_common.py
@@ -277,13 +277,51 @@ def _resolve_dataset(
     return dataset
 
 
+def _get_default_prompt_template(
+    api_client: BaseApiClient,
+    inference_config: types.EvaluationRunInferenceConfigOrDict,
+    dataset: types.EvaluationRunDataSource,
+) -> Any:
+    """Resolves prompt template data for the evaluation run."""
+    if isinstance(inference_config, dict):
+        if inference_config.get("prompt_template"):
+            return inference_config["prompt_template"]
+    elif inference_config.prompt_template:
+        return inference_config.prompt_template
+
+    try:
+        evals_module = evals.Evals(api_client_=api_client)
+        eval_set = evals_module.get_evaluation_set(name=dataset.evaluation_set)
+        if eval_set and eval_set.evaluation_items:
+            eval_item = evals_module.get_evaluation_item(
+                name=eval_set.evaluation_items[0]
+            )
+            if (
+                eval_item
+                and eval_item.evaluation_request
+                and eval_item.evaluation_request.prompt
+                and eval_item.evaluation_request.prompt.prompt_template_data
+            ):
+                if (
+                    "prompt"
+                    in eval_item.evaluation_request.prompt.prompt_template_data.values
+                ):
+                    return "{prompt}"  # Default prompt template
+    except Exception as e:
+        logger.warning("Failed to get prompt template from evaluation set: %s", e)
+    return None
+
+
 def _resolve_inference_configs(
+    api_client: BaseApiClient,
+    dataset: types.EvaluationRunDataSource,
     inference_configs: Optional[
         dict[str, types.EvaluationRunInferenceConfigOrDict]
     ] = None,
     agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
 ) -> Optional[dict[str, types.EvaluationRunInferenceConfigOrDict]]:
     """Resolves inference configs for the evaluation run."""
+    # Resolve agent config
     if agent_info_pydantic and agent_info_pydantic.name:
         inference_configs = {}
         inference_configs[agent_info_pydantic.name] = (
@@ -296,6 +334,34 @@ def _resolve_inference_configs(
                 )
             )
         )
+    # Resolve prompt template data
+    if inference_configs:
+        for inference_config in inference_configs.values():
+            prompt_template_val = (
+                inference_config.get("prompt_template")
+                if isinstance(inference_config, dict)
+                else inference_config.prompt_template
+            )
+            if not prompt_template_val:
+                default_prompt_template = _get_default_prompt_template(
+                    api_client, inference_config, dataset
+                )
+                if default_prompt_template:
+                    prompt_template_to_set = default_prompt_template
+                    if not isinstance(
+                        default_prompt_template, types.EvaluationRunPromptTemplate
+                    ):
+                        prompt_template_to_set = types.EvaluationRunPromptTemplate(
+                            prompt_template=default_prompt_template
+                        )
+                    if isinstance(inference_config, dict):
+                        inference_config[
+                            "prompt_template"
+                        ] = prompt_template_to_set.model_dump(exclude_none=True)
+                    else:
+                        inference_config.prompt_template = (
+                            prompt_template_to_set.model_dump(exclude_none=True)
+                        )
     return inference_configs
 
 
@@ -324,8 +390,7 @@ def _get_candidate_name(
         and dataset.candidate_name != agent_info_pydantic.name
     ):
         logger.warning(
-            "Evaluation dataset candidate_name and agent_info.name are different."
-            " Please make sure this is intended."
+            "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
         )
     elif dataset.candidate_name is None and agent_info_pydantic:
         return agent_info_pydantic.name
@@ -1944,6 +2009,21 @@ def _object_to_dict(obj: Any) -> Union[dict[str, Any], Any]:
     return result
 
 
+def _get_content(row: dict[str, Any], column: str):
+    if isinstance(row[column], str):
+        return genai_types.Content(
+            parts=[genai_types.Part(text=row[column])],
+            role=_evals_constant.USER_AUTHOR,
+        )
+    elif isinstance(row[column], genai_types.Content):
+        return row[column]
+    else:
+        raise ValueError(
+            f"{column} must be a string or a Content object. "
+            f"Got {type(row[column])}."
+        )
+
+
 def _create_evaluation_set_from_dataframe(
     api_client: BaseApiClient,
     gcs_dest_prefix: str,
@@ -1962,13 +2042,28 @@ def _create_evaluation_set_from_dataframe(
             for event in row[_evals_constant.INTERMEDIATE_EVENTS]:
                 if CONTENT in event:
                     intermediate_events.append(event[CONTENT])
+        if _evals_constant.CONTEXT in row or _evals_constant.HISTORY in row:
+            values = {}
+            if _evals_constant.CONTEXT in row:
+                values[_evals_constant.CONTEXT] = _get_content(
+                    row, _evals_constant.CONTEXT
+                )
+            if _evals_constant.HISTORY in row:
+                values[_evals_constant.HISTORY] = _get_content(
+                    row, _evals_constant.HISTORY
+                )
+            if _evals_constant.PROMPT in row:
+                values[_evals_constant.PROMPT] = _get_content(
+                    row, _evals_constant.PROMPT
+                )
+            prompt = types.EvaluationPrompt(
+                prompt_template_data=types.PromptTemplateData(values=values)
+            )
+        elif _evals_constant.PROMPT in row:
+            prompt = types.EvaluationPrompt(text=row[_evals_constant.PROMPT])
         eval_item_requests.append(
             types.EvaluationItemRequest(
-                prompt=(
-                    types.EvaluationPrompt(text=row[_evals_constant.PROMPT])
-                    if _evals_constant.PROMPT in row
-                    else None
-                ),
+                prompt=prompt if prompt else None,
                 golden_response=(
                     types.CandidateResponse(text=row[_evals_constant.REFERENCE])
                     if _evals_constant.REFERENCE in row
diff --git a/vertexai/_genai/_evals_constant.py b/vertexai/_genai/_evals_constant.py
index 6fc27d94e0..847140dc5c 100644
--- a/vertexai/_genai/_evals_constant.py
+++ b/vertexai/_genai/_evals_constant.py
@@ -53,6 +53,7 @@
 CONTENT = "content"
 PARTS = "parts"
 USER_AUTHOR = "user"
+HISTORY = "conversation_history"
 
 COMMON_DATASET_COLUMNS = frozenset(
     {
@@ -61,5 +62,6 @@
         REFERENCE,
         SESSION_INPUT,
         CONTEXT,
+        HISTORY,
     }
 )
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
index dba63496fb..73922955cd 100644
--- a/vertexai/_genai/evals.py
+++ b/vertexai/_genai/evals.py
@@ -253,6 +253,9 @@ def _EvaluationRunConfig_from_vertex(
     if getv(from_object, ["autoraterConfig"]) is not None:
         setv(to_object, ["autorater_config"], getv(from_object, ["autoraterConfig"]))
 
+    if getv(from_object, ["promptTemplate"]) is not None:
+        setv(to_object, ["prompt_template"], getv(from_object, ["promptTemplate"]))
+
     return to_object
 
 
@@ -277,6 +280,9 @@ def _EvaluationRunConfig_to_vertex(
     if getv(from_object, ["autorater_config"]) is not None:
         setv(to_object, ["autoraterConfig"], getv(from_object, ["autorater_config"]))
 
+    if getv(from_object, ["prompt_template"]) is not None:
+        setv(to_object, ["promptTemplate"], getv(from_object, ["prompt_template"]))
+
     return to_object
 
 
@@ -1643,7 +1649,7 @@ def create_evaluation_run(
             output_config=output_config, metrics=resolved_metrics
         )
         resolved_inference_configs = _evals_common._resolve_inference_configs(
-            inference_configs, agent_info_pydantic
+            self._api_client, resolved_dataset, inference_configs, agent_info_pydantic
         )
         resolved_labels = _evals_common._add_evaluation_run_labels(
             labels, agent_info_pydantic
@@ -2478,7 +2484,7 @@ async def create_evaluation_run(
             output_config=output_config, metrics=resolved_metrics
         )
         resolved_inference_configs = _evals_common._resolve_inference_configs(
-            inference_configs, agent_info_pydantic
+            self._api_client, resolved_dataset, inference_configs, agent_info_pydantic
         )
         resolved_labels = _evals_common._add_evaluation_run_labels(
             labels, agent_info_pydantic
diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py
index e1d4e73d23..d1026d84d7 100644
--- a/vertexai/_genai/types/__init__.py
+++ b/vertexai/_genai/types/__init__.py
@@ -386,6 +386,9 @@
 from .common import EvaluationRunMetricDict
 from .common import EvaluationRunMetricOrDict
 from .common import EvaluationRunOrDict
+from .common import EvaluationRunPromptTemplate
+from .common import EvaluationRunPromptTemplateDict
+from .common import EvaluationRunPromptTemplateOrDict
 from .common import EvaluationRunResults
 from .common import EvaluationRunResultsDict
 from .common import EvaluationRunResultsOrDict
@@ -1285,6 +1288,9 @@
     "EvaluationRunMetric",
     "EvaluationRunMetricDict",
     "EvaluationRunMetricOrDict",
+    "EvaluationRunPromptTemplate",
+    "EvaluationRunPromptTemplateDict",
+    "EvaluationRunPromptTemplateOrDict",
     "EvaluationRunConfig",
     "EvaluationRunConfigDict",
     "EvaluationRunConfigOrDict",
diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py
index 0903178153..18d3576a3f 100644
--- a/vertexai/_genai/types/common.py
+++ b/vertexai/_genai/types/common.py
@@ -2287,6 +2287,38 @@ class EvaluationRunMetricDict(TypedDict, total=False):
 EvaluationRunMetricOrDict = Union[EvaluationRunMetric, EvaluationRunMetricDict]
 
 
+class EvaluationRunPromptTemplate(_common.BaseModel):
+    """Prompt template used for inference."""
+
+    prompt_template: Optional[str] = Field(
+        default=None,
+        description="""Inline prompt template. Template variables should be in the format
+      "{var_name}".""",
+    )
+    gcs_uri: Optional[str] = Field(
+        default=None,
+        description="""Prompt template stored in Cloud Storage. Format:
+      "gs://my-bucket/file-name.txt".""",
+    )
+
+
+class EvaluationRunPromptTemplateDict(TypedDict, total=False):
+    """Prompt template used for inference."""
+
+    prompt_template: Optional[str]
+    """Inline prompt template. Template variables should be in the format
+      "{var_name}"."""
+
+    gcs_uri: Optional[str]
+    """Prompt template stored in Cloud Storage. Format:
+      "gs://my-bucket/file-name.txt"."""
+
+
+EvaluationRunPromptTemplateOrDict = Union[
+    EvaluationRunPromptTemplate, EvaluationRunPromptTemplateDict
+]
+
+
 class EvaluationRunConfig(_common.BaseModel):
     """The evaluation configuration used for the evaluation run."""
 
@@ -2300,6 +2332,9 @@ class EvaluationRunConfig(_common.BaseModel):
     autorater_config: Optional[genai_types.AutoraterConfig] = Field(
         default=None, description="""The autorater config for the evaluation run."""
     )
+    prompt_template: Optional[EvaluationRunPromptTemplate] = Field(
+        default=None, description="""The prompt template used for inference."""
+    )
 
 
 class EvaluationRunConfigDict(TypedDict, total=False):
@@ -2314,6 +2349,9 @@ class EvaluationRunConfigDict(TypedDict, total=False):
     autorater_config: Optional[genai_types.AutoraterConfigDict]
     """The autorater config for the evaluation run."""
 
+    prompt_template: Optional[EvaluationRunPromptTemplateDict]
+    """The prompt template used for inference."""
+
 
 EvaluationRunConfigOrDict = Union[EvaluationRunConfig, EvaluationRunConfigDict]
 
@@ -3038,6 +3076,9 @@ class EvaluationRunInferenceConfig(_common.BaseModel):
       Contains configuration for a user simulator that
       uses an LLM to generate messages on behalf of the user.""",
     )
+    prompt_template: Optional[EvaluationRunPromptTemplate] = Field(
+        default=None, description="""The prompt template used for inference."""
+    )
 
 
 class EvaluationRunInferenceConfigDict(TypedDict, total=False):
@@ -3057,6 +3098,9 @@ class EvaluationRunInferenceConfigDict(TypedDict, total=False):
       Contains configuration for a user simulator that
       uses an LLM to generate messages on behalf of the user."""
 
+    prompt_template: Optional[EvaluationRunPromptTemplateDict]
+    """The prompt template used for inference."""
+
 
 EvaluationRunInferenceConfigOrDict = Union[
     EvaluationRunInferenceConfig, EvaluationRunInferenceConfigDict