Source code for mlflow.genai.scheduled_scorers

from dataclasses import dataclass
from typing import Optional

from mlflow.genai.scorers.base import Scorer
from mlflow.utils.annotations import experimental

_ERROR_MSG = (
    "The `databricks-agents` package is required to use `mlflow.genai.scheduled_scorers`. "
    "Please install it with `pip install databricks-agents`."
)


[docs]@experimental @dataclass() class ScorerScheduleConfig: """ A scheduled scorer configuration for automated monitoring of generative AI applications. Scheduled scorers are used to automatically evaluate traces logged to MLflow experiments by production applications. They are part of [Databricks Lakehouse Monitoring for GenAI](https://docs.databricks.com/aws/en/generative-ai/agent-evaluation/monitoring), which helps track quality metrics like groundedness, safety, and guideline adherence alongside operational metrics like volume, latency, and cost. When configured, scheduled scorers run automatically in the background to evaluate a sample of traces based on the specified sampling rate and filter criteria. The Assessments are displayed in the Traces tab of the MLflow experiment and can be used to identify quality issues in production. Args: scorer: The scorer function to run on sampled traces. Must be either a built-in scorer (e.g., Safety, Correctness) or a function decorated with @scorer. Subclasses of Scorer are not supported. scheduled_scorer_name: The name for this scheduled scorer configuration within the experiment. This name must be unique among all scheduled scorers in the same experiment. We recommend using the scorer's name (e.g., scorer.name) for consistency. sample_rate: The fraction of traces to evaluate, between 0.0 and 1.0. For example, 0.1 means 10% of traces will be randomly selected for evaluation. filter_string: An optional MLflow search_traces compatible filter string to apply before sampling traces. Only traces matching this filter will be considered for evaluation. Uses the same syntax as mlflow.search_traces(). Example: .. code-block:: python from mlflow.genai.scorers import Safety, scorer from mlflow.genai.scheduled_scorers import ScorerScheduleConfig # Using a built-in scorer safety_config = ScorerScheduleConfig( scorer=Safety(), scheduled_scorer_name="production_safety", sample_rate=0.2, # Evaluate 20% of traces filter_string="trace.status = 'OK'", ) # Using a custom scorer @scorer def response_length(outputs): return len(str(outputs)) > 100 length_config = ScorerScheduleConfig( scorer=response_length, scheduled_scorer_name="adequate_length", sample_rate=0.1, # Evaluate 10% of traces filter_string="trace.status = 'OK'", ) Note: Scheduled scorers are executed automatically by Databricks and do not need to be manually triggered. The Assessments appear in the Traces tab of the MLflow experiment. Only traces logged directly to the experiment are monitored; traces logged to individual runs within the experiment are not evaluated. .. warning:: This API is in Beta and may change or be removed in a future release without warning. """ scorer: Scorer scheduled_scorer_name: str sample_rate: float filter_string: Optional[str] = None
# Scheduled Scorer CRUD operations
[docs]@experimental def add_scheduled_scorer( # clint: disable=missing-docstring-param # noqa: D417 *, scheduled_scorer_name: str, scorer: Scorer, sample_rate: float, filter_string: Optional[str] = None, experiment_id: Optional[str] = None, **kwargs, ) -> ScorerScheduleConfig: """ Add a scheduled scorer to automatically monitor traces in an MLflow experiment. This function configures a scorer function to run automatically on traces logged to the specified experiment. The scorer will evaluate a sample of traces based on the sampling rate and any filter criteria. Assessments are displayed in the Traces tab of the MLflow experiment. Args: scheduled_scorer_name: The name for this scheduled scorer within the experiment. We recommend using the scorer's name (e.g., scorer.name) for consistency. scorer: The scorer function to execute on sampled traces. Must be either a built-in scorer or a function decorated with @scorer. Subclasses of Scorer are not supported. sample_rate: The fraction of traces to evaluate, between 0.0 and 1.0. For example, 0.3 means 30% of traces will be randomly selected for evaluation. filter_string: An optional MLflow search_traces compatible filter string. Only traces matching this filter will be considered for evaluation. If None, all traces in the experiment are eligible for sampling. experiment_id: The ID of the MLflow experiment to monitor. If None, uses the currently active experiment. Returns: A ScorerScheduleConfig object representing the configured scheduled scorer. Example: .. code-block:: python import mlflow from mlflow.genai.scorers import Safety, Correctness from mlflow.genai.scheduled_scorers import add_scheduled_scorer # Set up your experiment experiment = mlflow.set_experiment("my_genai_app_monitoring") # Add a safety scorer to monitor 50% of traces safety_scorer = add_scheduled_scorer( scheduled_scorer_name="safety_monitor", scorer=Safety(), sample_rate=0.5, filter_string="trace.status = 'OK'", ) # Add a correctness scorer with different sampling correctness_scorer = add_scheduled_scorer( scheduled_scorer_name="correctness_monitor", scorer=Correctness(), sample_rate=0.2, # More expensive, so lower sample rate experiment_id=experiment.experiment_id, # Explicitly specify experiment ) Note: Once added, the scheduled scorer will begin evaluating new traces automatically. There may be a delay between when traces are logged and when they are evaluated. Only traces logged directly to the experiment are monitored; traces logged to individual runs within the experiment are not evaluated. .. warning:: This API is in Beta and may change or be removed in a future release without warning. """ try: from databricks.agents.scorers import add_scheduled_scorer except ImportError as e: raise ImportError(_ERROR_MSG) from e return add_scheduled_scorer( experiment_id, scheduled_scorer_name, scorer, sample_rate, filter_string, **kwargs )
[docs]@experimental def update_scheduled_scorer( # clint: disable=missing-docstring-param # noqa: D417 *, scheduled_scorer_name: str, scorer: Optional[Scorer] = None, sample_rate: Optional[float] = None, filter_string: Optional[str] = None, experiment_id: Optional[str] = None, **kwargs, ) -> ScorerScheduleConfig: """ Update an existing scheduled scorer configuration. This function modifies the configuration of an existing scheduled scorer, allowing you to change the scorer function, sampling rate, or filter criteria. Only the provided parameters will be updated; omitted parameters will retain their current values. The scorer will continue to run automatically with the new configuration. Args: scheduled_scorer_name: The name of the existing scheduled scorer to update. Must match the name used when the scorer was originally added. We recommend using the scorer's name (e.g., scorer.name) for consistency. scorer: The new scorer function to execute on sampled traces. Must be either a built-in scorer or a function decorated with @scorer. If None, the current scorer function will be retained. sample_rate: The new fraction of traces to evaluate, between 0.0 and 1.0. If None, the current sample rate will be retained. filter_string: The new MLflow search_traces compatible filter string. If None, the current filter string will be retained. Pass an empty string to remove the filter entirely. experiment_id: The ID of the MLflow experiment containing the scheduled scorer. If None, uses the currently active experiment. Returns: A ScorerScheduleConfig object representing the updated scheduled scorer configuration. Example: .. code-block:: python from mlflow.genai.scorers import Safety from mlflow.genai.scheduled_scorers import update_scheduled_scorer # Update an existing safety scorer to increase sampling rate updated_scorer = update_scheduled_scorer( scheduled_scorer_name="safety_monitor", sample_rate=0.8, # Increased from 0.5 to 0.8 ) .. warning:: This API is in Beta and may change or be removed in a future release without warning. """ try: from databricks.agents.scorers import update_scheduled_scorer except ImportError as e: raise ImportError(_ERROR_MSG) from e return update_scheduled_scorer( experiment_id, scheduled_scorer_name, scorer, sample_rate, filter_string, **kwargs )
[docs]@experimental def delete_scheduled_scorer( # clint: disable=missing-docstring-param # noqa: D417 *, scheduled_scorer_name: str, experiment_id: Optional[str] = None, **kwargs, ) -> None: """ Delete a scheduled scorer from an MLflow experiment. This function removes a scheduled scorer configuration, stopping automatic evaluation of traces. Existing Assessments will remain in the Traces tab of the MLflow experiment, but no new evaluations will be performed. Args: scheduled_scorer_name: The name of the scheduled scorer to delete. Must match the name used when the scorer was originally added. experiment_id: The ID of the MLflow experiment containing the scheduled scorer. If None, uses the currently active experiment. Example: .. code-block:: python from mlflow.genai.scheduled_scorers import delete_scheduled_scorer # Remove a scheduled scorer that's no longer needed delete_scheduled_scorer(scheduled_scorer_name="safety_monitor") # To delete all scheduled scorers at once, use set_scheduled_scorers # with an empty list instead: from mlflow.genai.scheduled_scorers import set_scheduled_scorers set_scheduled_scorers( scheduled_scorers=[] # Empty list removes all scorers ) Note: Deletion is immediate and cannot be undone. If you need the same scorer configuration later, you will need to add it again using add_scheduled_scorer. .. warning:: This API is in Beta and may change or be removed in a future release without warning. """ try: from databricks.agents.scorers import delete_scheduled_scorer except ImportError as e: raise ImportError(_ERROR_MSG) from e return delete_scheduled_scorer(experiment_id, scheduled_scorer_name, **kwargs)
[docs]@experimental def get_scheduled_scorer( # clint: disable=missing-docstring-param # noqa: D417 *, scheduled_scorer_name: str, experiment_id: Optional[str] = None, **kwargs, ) -> ScorerScheduleConfig: """ Retrieve the configuration of a specific scheduled scorer. This function returns the current configuration of a scheduled scorer, including its scorer function, sampling rate, and filter criteria. Args: scheduled_scorer_name: The name of the scheduled scorer to retrieve. experiment_id: The ID of the MLflow experiment containing the scheduled scorer. If None, uses the currently active experiment. Returns: A ScorerScheduleConfig object containing the current configuration of the specified scheduled scorer. Example: .. code-block:: python from mlflow.genai.scheduled_scorers import get_scheduled_scorer # Get the current configuration of a scheduled scorer scorer_config = get_scheduled_scorer(scheduled_scorer_name="safety_monitor") print(f"Sample rate: {scorer_config.sample_rate}") print(f"Filter: {scorer_config.filter_string}") print(f"Scorer: {scorer_config.scorer.name}") .. warning:: This API is in Beta and may change or be removed in a future release without warning. """ try: from databricks.agents.scorers import get_scheduled_scorer except ImportError as e: raise ImportError(_ERROR_MSG) from e return get_scheduled_scorer(experiment_id, scheduled_scorer_name, **kwargs)
[docs]@experimental def list_scheduled_scorers( # clint: disable=missing-docstring-param # noqa: D417 *, experiment_id: Optional[str] = None, **kwargs ) -> list[ScorerScheduleConfig]: """ List all scheduled scorers for an experiment. This function returns all scheduled scorers configured for the specified experiment, or for the current active experiment if no experiment ID is provided. Args: experiment_id: The ID of the MLflow experiment to list scheduled scorers for. If None, uses the currently active experiment. Returns: A list of ScheduledScorerConfig objects representing all scheduled scorers configured for the specified experiment. Example: .. code-block:: python import mlflow from mlflow.genai.scheduled_scorers import list_scheduled_scorers # List scorers for a specific experiment scorers = list_scheduled_scorers(experiment_id="12345") for scorer in scorers: print(f"Scorer: {scorer.scheduled_scorer_name}") print(f"Sample rate: {scorer.sample_rate}") print(f"Filter: {scorer.filter_string}") # List scorers for the current active experiment mlflow.set_experiment("my_genai_app_monitoring") current_scorers = list_scheduled_scorers() print(f"Found {len(current_scorers)} scheduled scorers") .. warning:: This API is in Beta and may change or be removed in a future release without warning. """ try: from databricks.agents.scorers import list_scheduled_scorers except ImportError as e: raise ImportError(_ERROR_MSG) from e return list_scheduled_scorers(experiment_id, **kwargs)
[docs]@experimental def set_scheduled_scorers( # clint: disable=missing-docstring-param # noqa: D417 *, scheduled_scorers: list[ScorerScheduleConfig], experiment_id: Optional[str] = None, **kwargs, ) -> None: """ Replace all scheduled scorers for an experiment with the provided list. This function removes all existing scheduled scorers for the specified experiment and replaces them with the new list. This is useful for batch configuration updates or when you want to ensure only specific scorers are active. Args: scheduled_scorers: A list of ScheduledScorerConfig objects to set as the complete set of scheduled scorers for the experiment. Any existing scheduled scorers not in this list will be removed. experiment_id: The ID of the MLflow experiment to configure. If None, uses the currently active experiment. Example: .. code-block:: python from mlflow.genai.scorers import Safety, Correctness, RelevanceToQuery from mlflow.genai.scheduled_scorers import ScorerScheduleConfig, set_scheduled_scorers # Define a complete monitoring configuration monitoring_config = [ ScorerScheduleConfig( scorer=Safety(), scheduled_scorer_name="safety_check", sample_rate=1.0, # Check all traces for safety ), ScorerScheduleConfig( scorer=Correctness(), scheduled_scorer_name="correctness_check", sample_rate=0.2, # Sample 20% for correctness (more expensive) filter_string="trace.status = 'OK'", ), ScorerScheduleConfig( scorer=RelevanceToQuery(), scheduled_scorer_name="relevance_check", sample_rate=0.5, # Sample 50% for relevance ), ] # Apply this configuration, replacing any existing scorers set_scheduled_scorers(scheduled_scorers=monitoring_config) Warning: This function will remove all existing scheduled scorers for the experiment that are not included in the provided list. Use add_scheduled_scorer() if you want to add scorers without affecting existing ones. Note: Existing Assessments will remain in the Traces tab of the MLflow experiment. .. warning:: This API is in Beta and may change or be removed in a future release without warning. """ try: from databricks.agents.scorers import set_scheduled_scorers except ImportError as e: raise ImportError(_ERROR_MSG) from e return set_scheduled_scorers(experiment_id, scheduled_scorers, **kwargs)