U
    hG                     @  s   d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ eeZG dd deeZG dd deZG dd dZG dd deeZ G dd deeZ!G dd deeZ"dS )z3Interfaces to be implemented by general evaluators.    )annotationsN)ABCabstractmethod)Enum)AnyOptionalSequenceTupleUnion)warn)AgentAction)BaseLanguageModel)run_in_executor)Chainc                   @  s`   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdS )EvaluatorTypezThe types of the evaluators.ZqaZcot_qaZ
context_qaZpairwise_stringZscore_stringZlabeled_pairwise_stringZlabeled_score_stringZ
trajectoryZcriteriaZlabeled_criteriaZstring_distanceZexact_matchZregex_matchZpairwise_string_distanceZembedding_distanceZpairwise_embedding_distanceZjson_validityZjson_equalityZjson_edit_distanceZjson_schema_validationN)__name__
__module____qualname____doc__ZQAZCOT_QAZ
CONTEXT_QAZPAIRWISE_STRINGZSCORE_STRINGZLABELED_PAIRWISE_STRINGZLABELED_SCORE_STRINGZAGENT_TRAJECTORYZCRITERIAZLABELED_CRITERIAZSTRING_DISTANCEZEXACT_MATCHZREGEX_MATCHZPAIRWISE_STRING_DISTANCEZEMBEDDING_DISTANCEZPAIRWISE_EMBEDDING_DISTANCEZJSON_VALIDITYZJSON_EQUALITYZJSON_EDIT_DISTANCEZJSON_SCHEMA_VALIDATION r   r   ?/tmp/pip-unpacked-wheel-bo69hh5q/langchain/evaluation/schema.pyr      s,   r   c                   @  s*   e Zd ZdZeeddd dddZdS )LLMEvalChainz,A base class for evaluators that use an LLM.r   r   )llmkwargsreturnc                 K  s   dS )z#Create a new evaluator from an LLM.Nr   )clsr   r   r   r   r   from_llmM   s    zLLMEvalChain.from_llmN)r   r   r   r   classmethodr   r   r   r   r   r   r   J   s   r   c                   @  sl   e Zd ZdZeddddZeddddZeddd	d
ZeddddZdddddddZ	dS )_EvalArgsMixinz(Mixin for checking evaluation arguments.boolr   c                 C  s   dS z2Whether this evaluator requires a reference label.Fr   selfr   r   r   requires_referenceV   s    z!_EvalArgsMixin.requires_referencec                 C  s   dS )0Whether this evaluator requires an input string.Fr   r"   r   r   r   requires_input[   s    z_EvalArgsMixin.requires_inputstrc                 C  s   d| j j dS )z&Warning to show when input is ignored.zIgnoring input in , as it is not expected.	__class__r   r"   r   r   r   _skip_input_warning`   s    z"_EvalArgsMixin._skip_input_warningc                 C  s   d| j j dS )z*Warning to show when reference is ignored.zIgnoring reference in r(   r)   r"   r   r   r   _skip_reference_warninge   s    z&_EvalArgsMixin._skip_reference_warningNOptional[str]None)	referenceinputr   c                 C  sx   | j r"|dkr"t| jj dn|dk	r:| j s:t| j | jr\|dkr\t| jj dn|dk	rt| jstt| j dS )a  Check if the evaluation arguments are valid.

        Args:
            reference (Optional[str], optional): The reference label.
            input (Optional[str], optional): The input string.
        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.z requires a reference string.)r&   
ValueErrorr*   r   r   r+   r$   r,   )r#   r/   r0   r   r   r   _check_evaluation_argsl   s    
z%_EvalArgsMixin._check_evaluation_args)NN)
r   r   r   r   propertyr$   r&   r+   r,   r2   r   r   r   r   r   S   s     r   c                   @  s   e Zd ZdZeddddZeddddZed	d	d
ddddddddZd	d	d
ddddddddZ	d	d	d
ddddddddZ
d	d	d
ddddddddZd	S )StringEvaluatorzcGrade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.r'   r    c                 C  s   | j jS )zThe name of the evaluation.r)   r"   r   r   r   evaluation_name   s    zStringEvaluator.evaluation_namer   c                 C  s   dS r!   r   r"   r   r   r   r$      s    z"StringEvaluator.requires_referenceNr/   r0   zUnion[str, Any]zOptional[Union[str, Any]]r   dict)
predictionr/   r0   r   r   c                K  s   dS )a:  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr   r#   r8   r/   r0   r   r   r   r   _evaluate_strings   s    	z!StringEvaluator._evaluate_stringsc                  s"   t d| jf|||d|I dH S )aI  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr8   r/   r0   )r   r:   r9   r   r   r   _aevaluate_strings   s    z"StringEvaluator._aevaluate_stringsr-   c                K  s&   | j ||d | jf |||d|S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r6   r;   )r2   r:   r9   r   r   r   evaluate_strings   s      z StringEvaluator.evaluate_stringsc                  s,   | j ||d | jf |||d|I dH S )a	  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r6   r;   N)r2   r<   r9   r   r   r   aevaluate_strings   s      z!StringEvaluator.aevaluate_strings)r   r   r   r   r3   r5   r$   r   r:   r<   r=   r>   r   r   r   r   r4      s$   #r4   c                	   @  s   e Zd ZdZeddddddddddd	d
ZddddddddddddZddddddddddddZddddddddddddZdS )PairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nr6   r'   r-   r   r7   )r8   prediction_br/   r0   r   r   c                K  s   dS )1  Evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr   r#   r8   r@   r/   r0   r   r   r   r   _evaluate_string_pairs   s    
z.PairwiseStringEvaluator._evaluate_string_pairsc                  s$   t d| jf||||d|I dH S )@  Asynchronously evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr8   r@   r/   r0   )r   rC   rB   r   r   r   _aevaluate_string_pairs  s    z/PairwiseStringEvaluator._aevaluate_string_pairsc                K  s(   | j ||d | jf ||||d|S )rA   r6   rE   )r2   rC   rB   r   r   r   evaluate_string_pairs/  s    z-PairwiseStringEvaluator.evaluate_string_pairsc                  s.   | j ||d | jf ||||d|I dH S )rD   r6   rE   N)r2   rF   rB   r   r   r   aevaluate_string_pairsL  s    z.PairwiseStringEvaluator.aevaluate_string_pairs)	r   r   r   r   r   rC   rF   rG   rH   r   r   r   r   r?      s   #"r?   c                	   @  s   e Zd ZdZeddddZedddd	dd
dddddZdddd	dd
dddddZdddd	dd
dddddZ	dddd	dd
dddddZ
dS )AgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.r   r    c                 C  s   dS )r%   Tr   r"   r   r   r   r&   m  s    z'AgentTrajectoryEvaluator.requires_inputN)r/   r'   z!Sequence[Tuple[AgentAction, str]]r-   r   r7   )r8   agent_trajectoryr0   r/   r   r   c                K  s   dS )  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        Nr   r#   r8   rJ   r0   r/   r   r   r   r   _evaluate_agent_trajectoryr  s    
z3AgentTrajectoryEvaluator._evaluate_agent_trajectoryc                  s$   t d| jf||||d|I dH S )  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        N)r8   rJ   r/   r0   )r   rM   rL   r   r   r   _aevaluate_agent_trajectory  s    z4AgentTrajectoryEvaluator._aevaluate_agent_trajectoryc                K  s(   | j ||d | jf ||||d|S )rK   r6   r8   r0   rJ   r/   )r2   rM   rL   r   r   r   evaluate_agent_trajectory  s    z2AgentTrajectoryEvaluator.evaluate_agent_trajectoryc                  s.   | j ||d | jf ||||d|I dH S )rN   r6   rP   N)r2   rO   rL   r   r   r   aevaluate_agent_trajectory  s    z3AgentTrajectoryEvaluator.aevaluate_agent_trajectory)r   r   r   r   r3   r&   r   rM   rO   rQ   rR   r   r   r   r   rI   j  s   %$rI   )#r   
__future__r   loggingabcr   r   enumr   typingr   r   r   r	   r
   warningsr   Zlangchain_core.agentsr   Zlangchain_core.language_modelsr   Zlangchain_core.runnables.configr   Zlangchain.chains.baser   	getLoggerr   loggerr'   r   r   r   r4   r?   rI   r   r   r   r   <module>   s"   
6	1tr