U
    hg4                     @   sP  d Z ddlmZmZmZmZmZmZmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlm Z m!Z! ddl"m#Z$ eeee geeee%f f Z&eee eee  geeee%f f Z'G dd deZ(G dd de(Z)ee&ee!f Z*ee e+e(f Z,G dd deZ-dS )z!Configuration for run evaluators.    )AnyCallableDictListOptionalSequenceUnion)
Embeddings)BaseLanguageModel)BasePromptTemplate)	BaseModelField)RunEvaluator)EvaluationResultEvaluationResults)ExampleRun)CRITERIA_TYPE)EmbeddingDistance)EvaluatorTypeStringEvaluator)StringDistancec                   @   s0   e Zd ZU dZeed< eeef dddZ	dS )
EvalConfiga  Configuration for a given run evaluator.

    Parameters
    ----------
    evaluator_type : EvaluatorType
        The type of evaluator to use.

    Methods
    -------
    get_kwargs()
        Get the keyword arguments for the evaluator configuration.

    evaluator_typereturnc                 C   s4   i }| D ]&\}}|dkrqn
|dkr&q|||< q|S )zGet the keyword arguments for the load_evaluator call.

        Returns
        -------
        Dict[str, Any]
            The keyword arguments for the load_evaluator call.

        r   N )selfkwargsfieldvalr   r   E/tmp/pip-unpacked-wheel-bo69hh5q/langchain/smith/evaluation/config.py
get_kwargs0   s    	
zEvalConfig.get_kwargsN)
__name__
__module____qualname____doc__r   __annotations__r   strr   r"   r   r   r   r!   r      s   
r   c                       s`   e Zd ZU dZdZee ed< dZee ed< dZ	ee ed< e
eef d fddZ  ZS )	SingleKeyEvalConfigzBConfiguration for a run evaluator that only requires a single key.Nreference_keyprediction_key	input_keyr   c                    s$   t   }dD ]}||d  q|S )N)r*   r+   r,   )superr"   pop)r   r   key	__class__r   r!   r"   Q   s    
zSingleKeyEvalConfig.get_kwargs)r#   r$   r%   r&   r*   r   r(   r'   r+   r,   r   r   r"   __classcell__r   r   r0   r!   r)   C   s
   
r)   c                   @   sv  e Zd ZU dZeedZeee	e
f  ed< dZeee
  ed< dZeee  ed< dZee ed< dZee ed< dZee ed	< dZee ed
< G dd dZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd de Z!G dd  d eZ"G d!d" d"eZ#G d#d$ d$eZ$G d%d& d&e$Z%dS )'RunEvalConfiga0  Configuration for a run evaluation.

    Parameters
    ----------
    evaluators : List[Union[EvaluatorType, EvalConfig, RunEvaluator, Callable]]
        Configurations for which evaluators to apply to the dataset run.
        Each can be the string of an :class:`EvaluatorType <langchain.evaluation.schema.EvaluatorType>`, such
        as EvaluatorType.QA, the evaluator type string ("qa"), or a configuration for a
        given evaluator (e.g., :class:`RunEvalConfig.QA <langchain.smith.evaluation.config.RunEvalConfig.QA>`).

    custom_evaluators : Optional[List[Union[RunEvaluator, StringEvaluator]]]
        Custom evaluators to apply to the dataset run.

    reference_key : Optional[str]
        The key in the dataset run to use as the reference string.
        If not provided, it will be inferred automatically.

    prediction_key : Optional[str]
        The key from the traced run's outputs dictionary to use to
        represent the prediction. If not provided, it will be inferred
        automatically.

    input_key : Optional[str]
        The key from the traced run's inputs dictionary to use to represent the
        input. If not provided, it will be inferred automatically.

    eval_llm : Optional[BaseLanguageModel]
        The language model to pass to any evaluators that use a language model.
    )default_factory
evaluatorsNcustom_evaluatorsbatch_evaluatorsr*   r+   r,   eval_llmc                   @   s   e Zd ZdZdS )zRunEvalConfig.ConfigTNr#   r$   r%   Zarbitrary_types_allowedr   r   r   r!   Config   s   r:   c                       s`   e Zd ZU dZdZee ed< dZee	 ed< e
jZe
ed< d	ee edd fddZ  ZS )
zRunEvalConfig.Criteriaa"  Configuration for a reference-free criteria evaluator.

        Parameters
        ----------
        criteria : Optional[CRITERIA_TYPE]
            The criteria to evaluate.
        llm : Optional[BaseLanguageModel]
            The language model to use for the evaluation chain.

        Ncriteriallmr   r;   r   r   c                    s   t  jf d|i| d S Nr;   r-   __init__r   r;   r   r0   r   r!   r@      s    zRunEvalConfig.Criteria.__init__)N)r#   r$   r%   r&   r;   r   r   r'   r<   r
   r   ZCRITERIAr   r   r@   r2   r   r   r0   r!   Criteria   s   
  rB   c                       s`   e Zd ZU dZdZee ed< dZee	 ed< e
jZe
ed< d	ee edd fddZ  ZS )
zRunEvalConfig.LabeledCriteriaa,  Configuration for a labeled (with references) criteria evaluator.

        Parameters
        ----------
        criteria : Optional[CRITERIA_TYPE]
            The criteria to evaluate.
        llm : Optional[BaseLanguageModel]
            The language model to use for the evaluation chain.
        Nr;   r<   r   r=   c                    s   t  jf d|i| d S r>   r?   rA   r0   r   r!   r@      s    z&RunEvalConfig.LabeledCriteria.__init__)N)r#   r$   r%   r&   r;   r   r   r'   r<   r
   r   ZLABELED_CRITERIAr   r   r@   r2   r   r   r0   r!   LabeledCriteria   s   

  rC   c                   @   sN   e Zd ZU dZejZeed< dZe	e
 ed< dZe	e ed< G dd dZdS )zRunEvalConfig.EmbeddingDistanceaI  Configuration for an embedding distance evaluator.

        Parameters
        ----------
        embeddings : Optional[Embeddings]
            The embeddings to use for computing the distance.

        distance_metric : Optional[EmbeddingDistanceEnum]
            The distance metric to use for computing the distance.

        r   N
embeddingsdistance_metricc                   @   s   e Zd ZdZdS )z&RunEvalConfig.EmbeddingDistance.ConfigTNr9   r   r   r   r!   r:      s   r:   )r#   r$   r%   r&   r   ZEMBEDDING_DISTANCEr   r'   rD   r   r	   rE   EmbeddingDistanceEnumr:   r   r   r   r!   r      s
   
r   c                   @   s<   e Zd ZU dZejZeed< dZe	e
 ed< dZeed< dS )zRunEvalConfig.StringDistancezConfiguration for a string distance evaluator.

        Parameters
        ----------
        distance : Optional[StringDistanceEnum]
            The string distance metric to use.

        r   NdistanceTnormalize_score)r#   r$   r%   r&   r   ZSTRING_DISTANCEr   r'   rG   r   StringDistanceEnumrH   boolr   r   r   r!   r      s
   
	r   c                   @   s@   e Zd ZU dZejZeed< dZe	e
 ed< dZe	e ed< dS )zRunEvalConfig.QAa-  Configuration for a QA evaluator.

        Parameters
        ----------
        prompt : Optional[BasePromptTemplate]
            The prompt template to use for generating the question.
        llm : Optional[BaseLanguageModel]
            The language model to use for the evaluation chain.
        r   Nr<   prompt)r#   r$   r%   r&   r   QAr   r'   r<   r   r
   rK   r   r   r   r   r!   rL      s   

rL   c                   @   s@   e Zd ZU dZejZeed< dZe	e
 ed< dZe	e ed< dS )zRunEvalConfig.ContextQA<  Configuration for a context-based QA evaluator.

        Parameters
        ----------
        prompt : Optional[BasePromptTemplate]
            The prompt template to use for generating the question.
        llm : Optional[BaseLanguageModel]
            The language model to use for the evaluation chain.

        r   Nr<   rK   r#   r$   r%   r&   r   Z
CONTEXT_QAr   r'   r<   r   r
   rK   r   r   r   r   r!   	ContextQA  s   
rO   c                   @   s@   e Zd ZU dZejZeed< dZe	e
 ed< dZe	e ed< dS )zRunEvalConfig.CoTQArM   r   Nr<   rK   rN   r   r   r   r!   CoTQA  s   
rP   c                   @   s    e Zd ZU dZejZeed< dS )zRunEvalConfig.JsonValidityz\Configuration for a json validity evaluator.

        Parameters
        ----------
        r   N)r#   r$   r%   r&   r   ZJSON_VALIDITYr   r'   r   r   r   r!   JsonValidity$  s   
rQ   c                   @   s    e Zd ZU dZejZeed< dS )z#RunEvalConfig.JsonEqualityEvaluatorz\Configuration for a json equality evaluator.

        Parameters
        ----------
        r   N)r#   r$   r%   r&   r   ZJSON_EQUALITYr   r'   r   r   r   r!   JsonEqualityEvaluator-  s   
rR   c                   @   sD   e Zd ZU dZejZeed< dZe	ed< dZ
e	ed< dZe	ed< dS )zRunEvalConfig.ExactMatchax  Configuration for an exact match string evaluator.

        Parameters
        ----------
        ignore_case : bool
            Whether to ignore case when comparing strings.
        ignore_punctuation : bool
            Whether to ignore punctuation when comparing strings.
        ignore_numbers : bool
            Whether to ignore numbers when comparing strings.
        r   Fignore_caseignore_punctuationignore_numbersN)r#   r$   r%   r&   r   ZEXACT_MATCHr   r'   rS   rJ   rT   rU   r   r   r   r!   
ExactMatch6  s
   
rV   c                   @   s,   e Zd ZU dZejZeed< dZe	ed< dS )zRunEvalConfig.RegexMatchzConfiguration for a regex match string evaluator.

        Parameters
        ----------
        flags : int
            The flags to pass to the regex. Example: re.IGNORECASE.
        r   r   flagsN)
r#   r$   r%   r&   r   ZREGEX_MATCHr   r'   rW   intr   r   r   r!   
RegexMatchH  s   
rY   c                       s   e Zd ZU dZejZeed< dZe	e
 ed< dZe	e ed< dZe	e ed< dZe	e ed< de	e
 e	e edd fd	d
Z  ZS )zRunEvalConfig.ScoreStringa  Configuration for a score string evaluator.
        This is like the criteria evaluator but it is configured by
        default to return a score on the scale from 1-10.

        It is recommended to normalize these scores
        by setting `normalize_by` to 10.

        Parameters
        ----------
        criteria : Optional[CRITERIA_TYPE]
            The criteria to evaluate.
        llm : Optional[BaseLanguageModel]
            The language model to use for the evaluation chain.
        normalize_by: Optional[int] = None
            If you want to normalize the score, the denominator to use.
            If not provided, the score will be between 1 and 10 (by default).
        prompt : Optional[BasePromptTemplate]

        r   Nr;   r<   normalize_byrK   )r;   rZ   r   r   c                    s   t  jf ||d| d S )N)r;   rZ   r?   )r   r;   rZ   r   r0   r   r!   r@   o  s    z"RunEvalConfig.ScoreString.__init__)NN)r#   r$   r%   r&   r   ZSCORE_STRINGr   r'   r;   r   r   r<   r
   rZ   floatrK   r   r   r@   r2   r   r   r0   r!   ScoreStringT  s   
  r\   c                   @   s   e Zd ZU ejZeed< dS )z RunEvalConfig.LabeledScoreStringr   N)r#   r$   r%   r   ZLABELED_SCORE_STRINGr   r'   r   r   r   r!   LabeledScoreStringw  s   
r]   )&r#   r$   r%   r&   r   listr5   r   r   SINGLE_EVAL_CONFIG_TYPECUSTOM_EVALUATOR_TYPEr'   r6   r   r7   BATCH_EVALUATOR_LIKEr*   r(   r+   r,   r8   r
   r:   r)   rB   rC   r   r   rL   rO   rP   rQ   r   rR   rV   rY   r\   r]   r   r   r   r!   r3   ]   s:   
#		#r3   N).r&   typingr   r   r   r   r   r   r   Zlangchain_core.embeddingsr	   Zlangchain_core.language_modelsr
   Zlangchain_core.promptsr   Zlangchain_core.pydantic_v1r   r   Z	langsmithr   Zlangsmith.evaluation.evaluatorr   r   Zlangsmith.schemasr   r   Z(langchain.evaluation.criteria.eval_chainr   Z,langchain.evaluation.embedding_distance.baser   rF   Zlangchain.evaluation.schemar   r   Z)langchain.evaluation.string_distance.baser   rI   dictZRUN_EVALUATOR_LIKEra   r   r)   r`   r(   r_   r3   r   r   r   r!   <module>   s2   $$