from typing import Any, Optional
from mlflow.entities.assessment import (
Assessment,
AssessmentError,
Expectation,
Feedback,
FeedbackValueType,
experimental,
)
from mlflow.entities.assessment_source import AssessmentSource
from mlflow.exceptions import MlflowException
from mlflow.tracing.client import TracingClient
[docs]@experimental
def log_expectation(
trace_id: str,
name: str,
source: AssessmentSource,
value: Any,
metadata: Optional[dict[str, Any]] = None,
span_id: Optional[str] = None,
) -> Assessment:
"""
.. important::
This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.
Logs an expectation (e.g. ground truth label) to a Trace.
Args:
trace_id: The ID of the trace.
name: The name of the expectation assessment e.g., "expected_answer
source: The source of the expectation assessment. Must be an instance of
:py:class:`~mlflow.entities.AssessmentSource`.
value: The value of the expectation. It can be any JSON-serializable value.
metadata: Additional metadata for the expectation.
span_id: The ID of the span associated with the expectation, if it needs be
associated with a specific span in the trace.
Returns:
:py:class:`~mlflow.entities.Assessment`: The created expectation assessment.
Example:
The following code annotates a trace with human-provided ground truth.
.. code-block:: python
import mlflow
from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType
# Specify the annotator information as a source.
source = AssessmentSource(
source_type=AssessmentSourceType.HUMAN,
source_id="john@example.com",
)
mlflow.log_expectation(
trace_id="1234",
name="expected_answer",
value=42,
source=source,
)
The expectation value can be any JSON-serializable value. For example, you may
record the full LLM message as the expectation value.
.. code-block:: python
import mlflow
from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType
mlflow.log_expectation(
trace_id="1234",
name="expected_message",
# Full LLM message including expected tool calls
value={
"role": "assistant",
"content": "The answer is 42.",
"tool_calls": [
{
"id": "1234",
"type": "function",
"function": {"name": "add", "arguments": "40 + 2"},
}
],
},
source=AssessmentSource(
source_type=AssessmentSourceType.HUMAN,
source_id="john@example.com",
),
)
"""
if value is None:
raise MlflowException.invalid_parameter_value("Expectation value cannot be None.")
if not isinstance(source, AssessmentSource):
raise MlflowException.invalid_parameter_value(
f"`source` must be an instance of `AssessmentSource`. Got {type(source)} instead."
)
return TracingClient().log_assessment(
trace_id=trace_id,
name=name,
source=source,
expectation=Expectation(value) if value is not None else None,
metadata=metadata,
span_id=span_id,
)
[docs]@experimental
def update_expectation(
trace_id: str,
assessment_id: str,
name: Optional[str] = None,
value: Any = None,
metadata: Optional[dict[str, Any]] = None,
) -> Assessment:
"""
.. important::
This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.
Updates an existing expectation (ground truth) in a Trace.
Args:
trace_id: The ID of the trace.
assessment_id: The ID of the expectation assessment to update.
name: The updated name of the expectation. Specify only when updating the name.
value: The updated value of the expectation. Specify only when updating the value.
metadata: Additional metadata for the expectation. Specify only when updating the metadata.
Returns:
:py:class:`~mlflow.entities.Assessment`: The updated feedback assessment.
Example:
The following code updates an existing expectation with a new value.
To update other fields, provide the corresponding parameters.
.. code-block:: python
import mlflow
from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType
# Create an expectation with value 42.
assessment = mlflow.log_expectation(
trace_id="1234",
name="expected_answer",
value=42,
# Original annotator
source=AssessmentSource(
source_type=AssessmentSourceType.HUMAN,
source_id="bob@example.com",
),
)
# Update the expectation with a new value 43.
mlflow.update_expectation(
trace_id="1234", assessment_id=assessment.assessment_id, value=43
)
"""
return TracingClient().update_assessment(
assessment_id=assessment_id,
trace_id=trace_id,
name=name,
expectation=Expectation(value) if value is not None else None,
metadata=metadata,
)
[docs]@experimental
def delete_expectation(trace_id: str, assessment_id: str):
"""
.. important::
This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.
Deletes an expectation associated with a trace.
Args:
trace_id: The ID of the trace.
assessment_id: The ID of the expectation assessment to delete.
"""
return TracingClient().delete_assessment(trace_id=trace_id, assessment_id=assessment_id)
[docs]@experimental
def log_feedback(
trace_id: str,
name: str,
source: AssessmentSource,
value: Optional[FeedbackValueType] = None,
error: Optional[AssessmentError] = None,
rationale: Optional[str] = None,
metadata: Optional[dict[str, Any]] = None,
span_id: Optional[str] = None,
) -> Assessment:
"""
.. important::
This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.
Logs feedback to a Trace.
Args:
trace_id: The ID of the trace.
name: The name of the feedback assessment e.g., "faithfulness"
source: The source of the feedback assessment. Must be an instance of
:py:class:`~mlflow.entities.AssessmentSource`.
value: The value of the feedback. Must be one of the following types:
- float
- int
- str
- bool
- list of values of the same types as above
- dict with string keys and values of the same types as above
error: An error object representing any issues encountered while computing the
feedback, e.g., a timeout error from an LLM judge. Either this or `value`
must be provided.
rationale: The rationale / justification for the feedback.
metadata: Additional metadata for the feedback.
span_id: The ID of the span associated with the feedback, if it needs be
associated with a specific span in the trace.
Returns:
:py:class:`~mlflow.entities.Assessment`: The created feedback assessment.
Example:
The following code annotates a trace with a feedback provided by LLM-as-a-Judge.
.. code-block:: python
import mlflow
from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType
source = AssessmentSource(
source_type=Type.LLM_JUDGE,
source_id="faithfulness-judge",
)
mlflow.log_feedback(
trace_id="1234",
name="faithfulness",
source=source,
value=0.9,
rationale="The model is faithful to the input.",
metadata={"model": "gpt-4o-mini"},
)
You can also log an error information during the feedback generation process. To do so,
provide an instance of :py:class:`~mlflow.entities.AssessmentError` to the `error`
parameter, and leave the `value` parameter as `None`.
.. code-block:: python
import mlflow
from mlflow.entities.assessment import AssessmentError
source = AssessmentSource(
source_type=Type.LLM_JUDGE,
source_id="faithfulness-judge",
)
error = AssessmentError(
error_code="RATE_LIMIT_EXCEEDED",
error_message="Rate limit for the judge exceeded.",
)
mlflow.log_feedback(
trace_id="1234",
name="faithfulness",
source=source,
error=error,
)
"""
if value is None and error is None:
raise MlflowException.invalid_parameter_value("Either `value` or `error` must be provided.")
if not isinstance(source, AssessmentSource):
raise MlflowException.invalid_parameter_value(
f"`source` must be an instance of `AssessmentSource`. Got {type(source)} instead."
)
return TracingClient().log_assessment(
trace_id=trace_id,
name=name,
source=source,
feedback=Feedback(value, error),
rationale=rationale,
metadata=metadata,
span_id=span_id,
)
[docs]@experimental
def update_feedback(
trace_id: str,
assessment_id: str,
name: Optional[str] = None,
value: Optional[FeedbackValueType] = None,
rationale: Optional[str] = None,
metadata: Optional[dict[str, Any]] = None,
) -> Assessment:
"""
.. important::
This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.
Updates an existing feedback in a Trace.
Args:
trace_id: The ID of the trace.
assessment_id: The ID of the feedback assessment to update.
name: The updated name of the feedback. Specify only when updating the name.
value: The updated value of the feedback. Specify only when updating the value.
rationale: The updated rationale of the feedback. Specify only when updating the rationale.
metadata: Additional metadata for the feedback. Specify only when updating the metadata.
Returns:
:py:class:`~mlflow.entities.Assessment`: The updated feedback assessment.
Example:
The following code updates an existing feedback with a new value.
To update other fields, provide the corresponding parameters.
.. code-block:: python
import mlflow
from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType
# Create a feedback with value 0.9.
assessment = mlflow.log_feedback(
trace_id="1234",
name="faithfulness",
value=0.9,
source=AssessmentSource(
source_type=AssessmentSourceType.LLM_JUDGE,
source_id="gpt-4o-mini",
),
)
# Update the feedback with a new value 0.95.
mlflow.update_feedback(
trace_id="1234",
assessment_id=assessment.assessment_id,
value=0.95,
)
"""
return TracingClient().update_assessment(
trace_id=trace_id,
assessment_id=assessment_id,
name=name,
feedback=Feedback(value) if value is not None else None,
rationale=rationale,
metadata=metadata,
)
[docs]@experimental
def delete_feedback(trace_id: str, assessment_id: str):
"""
.. important::
This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.
Deletes feedback associated with a trace.
Args:
trace_id: The ID of the trace.
assessment_id: The ID of the feedback assessment to delete.
"""
return TracingClient().delete_assessment(trace_id=trace_id, assessment_id=assessment_id)