import inspect
import logging
from contextlib import contextmanager
from dataclasses import asdict
from typing import TYPE_CHECKING, Optional, Union
from mlflow.entities.model_registry import Prompt
from mlflow.exceptions import MlflowException
from mlflow.genai.evaluation.utils import (
_convert_eval_set_to_df,
)
from mlflow.genai.optimize.optimizers import _BaseOptimizer, _DSPyMIPROv2Optimizer
from mlflow.genai.optimize.types import (
OBJECTIVE_FN,
LLMParams,
OptimizerConfig,
PromptOptimizationResult,
)
from mlflow.genai.scorers import Scorer
from mlflow.tracking._model_registry.fluent import load_prompt
from mlflow.tracking.fluent import log_params, log_table, start_run
from mlflow.utils.annotations import experimental
if TYPE_CHECKING:
import pandas as pd
from genai.evaluation.utils import EvaluationDatasetTypes
_ALGORITHMS = {"DSPy/MIPROv2": _DSPyMIPROv2Optimizer}
_logger = logging.getLogger(__name__)
[docs]@experimental
def optimize_prompt(
*,
target_llm_params: LLMParams,
prompt: Union[str, Prompt],
train_data: "EvaluationDatasetTypes",
scorers: list[Scorer],
objective: Optional[OBJECTIVE_FN] = None,
eval_data: Optional["EvaluationDatasetTypes"] = None,
optimizer_config: Optional[OptimizerConfig] = None,
) -> PromptOptimizationResult:
"""
Optimize a LLM prompt using the given dataset and evaluation metrics.
The optimized prompt template is automatically registered as a new version of the
original prompt and included in the result.
Currently, this API only supports DSPy's MIPROv2 optimizer.
Args:
target_llm_params: Parameters for the the LLM that prompt is optimized for.
The model name must be specified in the format `<provider>/<model>`.
prompt: The URI or Prompt object of the MLflow prompt to optimize.
The optimized prompt is registered as a new version of the prompt.
train_data: Training dataset used for optimization.
The data must be one of the following formats:
* An EvaluationDataset entity
* Pandas DataFrame
* Spark DataFrame
* List of dictionaries
The dataset must include the following columns:
- inputs: A column containing single inputs in dict format.
Each input should contain keys matching the variables in the prompt template.
- expectations: A column containing a dictionary
of ground truths for individual output fields.
scorers: List of scorers that evaluate the inputs, outputs and expectations.
Note: Trace input is not supported for optimization. Use inputs, outputs and
expectations for optimization. Also, pass the `objective` argument
when using scorers with string or :class:`~mlflow.entities.Feedback` type outputs.
objective: A callable that computes the overall performance metric from individual
assessments. Takes a dict mapping assessment names to assessment scores and
returns a float value (greater is better).
eval_data: Evaluation dataset with the same format as train_data. If not provided,
train_data will be automatically split into training and evaluation sets.
optimizer_config: Configuration parameters for the optimizer.
Returns:
PromptOptimizationResult: The optimization result including the optimized prompt.
Example:
.. code-block:: python
import os
import mlflow
from typing import Any
from mlflow.genai.scorers import scorer
from mlflow.genai.optimize import OptimizerConfig, LLMParams
os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"
@scorer
def exact_match(expectations: dict[str, Any], outputs: dict[str, Any]) -> bool:
return expectations == outputs
prompt = mlflow.register_prompt(
name="qa",
template="Answer the following question: {{question}}",
)
result = mlflow.genai.optimize_prompt(
target_llm_params=LLMParams(model_name="openai/gpt-4.1-nano"),
train_data=[
{"inputs": {"question": f"{i}+1"}, "expectations": {"answer": f"{i + 1}"}}
for i in range(100)
],
scorers=[exact_match],
prompt=prompt.uri,
optimizer_config=OptimizerConfig(num_instruction_candidates=5),
)
print(result.prompt.template)
"""
if optimizer_config is None:
optimizer_config = OptimizerConfig()
optimzer = _select_optimizer(optimizer_config)
_validate_scorers(scorers)
train_data = _convert_eval_set_to_df(train_data)
if eval_data is not None:
eval_data = _convert_eval_set_to_df(eval_data)
if isinstance(prompt, str):
prompt: Prompt = load_prompt(prompt)
with _maybe_start_autolog(optimizer_config, train_data, eval_data, prompt, target_llm_params):
optimized_prompt = optimzer.optimize(
prompt=prompt,
target_llm_params=target_llm_params,
train_data=train_data,
scorers=scorers,
objective=objective,
eval_data=eval_data,
)
return PromptOptimizationResult(prompt=optimized_prompt)
def _select_optimizer(optimizer_config: OptimizerConfig) -> _BaseOptimizer:
if optimizer_config.algorithm not in _ALGORITHMS:
raise ValueError(
f"Algorithm {optimizer_config.algorithm} is not supported. "
f"Supported algorithms are {_ALGORITHMS}."
)
return _ALGORITHMS[optimizer_config.algorithm](optimizer_config)
def _validate_scorers(scorers: list[Scorer]) -> None:
for scorer in scorers:
if not isinstance(scorer, Scorer):
raise MlflowException.invalid_parameter_value(
f"Scorer {scorer} is not a valid scorer. Please use the @scorer decorator "
"to convert a function into a scorer or inherit from the Scorer class"
)
signature = inspect.signature(scorer)
if "trace" in signature.parameters:
raise MlflowException.invalid_parameter_value(
f"Trace input is found in Scorer {scorer}. "
"Scorers for optimization can only take inputs, outputs or expectations."
)
@contextmanager
def _maybe_start_autolog(
optimizer_config: OptimizerConfig,
train_data: "pd.DataFrame",
eval_data: Optional["pd.DataFrame"],
prompt: Prompt,
target_llm_params: LLMParams,
):
if optimizer_config.autolog:
with start_run() as run:
_logger.info(
f"Run `{run.info.run_id}` is created for autologging prompt optimization. "
"Watch the run to track the optimization progress."
)
log_table(train_data, "train_data.json")
if eval_data is not None:
log_table(eval_data, "eval_data.json")
params = {
"prompt_uri": prompt.uri,
**{f"target_llm_params.{k}": v for k, v in asdict(target_llm_params).items()},
**{f"optimizer_config.{k}": v for k, v in asdict(optimizer_config).items()},
}
log_params(params)
yield
else:
yield