
    	h                    "   d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	m
Z
 ddlmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ 	 dd
lmZmZmZmZ n# e$ r dd
lmZmZmZmZ Y nw xY wddlZddl m!Z! ddl"m#Z#m$Z$m%Z%m&Z&  ej'        e(          Z) G d de          Z* G d ded          Z+ G d de          Z, G d ded          Z- G d d          Z.ee,e-e/f         Z0 G d de          Z1ee1e/f         Z2 G d de.          Z3d4dZ4d Z5d5d#Z6 G d$ d%          Z7d6d(Z8d7d+Z9d8d-Z:d9d1Z;eee
ej&                 e
ej%                 gee,e-f         f         ee<ej&                 e<ej%                 gee,e-f         f         f         Z=d:d3Z>dS );z?This module contains the evaluator classes for evaluating runs.    )annotationsN)abstractmethod)	AwaitableSequence)AnyCallableLiteralOptionalUnioncast)	TypedDictrun_helpers)schemas)	BaseModelFieldValidationError	validator)wraps)
SCORE_TYPE
VALUE_TYPEExampleRunc                  *    e Zd ZU dZded<   	 ded<   dS )Categoryz$A category for categorical feedback.Optional[Union[float, int]]valuestrlabelN__name__
__module____qualname____doc____annotations__     k/var/www/html/web-builder-api.evdpl.com/venv/lib/python3.11/site-packages/langsmith/evaluation/evaluator.pyr   r   /   s0         ..&&&&CJJJ&&r'   r   c                  B    e Zd ZU dZded<   	 ded<   	 ded<   	 ded<   d	S )
FeedbackConfigziConfiguration to define a type of feedback.

    Applied on on the first creation of a feedback_key.
    z0Literal['continuous', 'categorical', 'freeform']typer   minmaxz%Optional[list[Union[Category, dict]]]
categoriesNr    r&   r'   r(   r*   r*   8   sW          
 ;:::$$$$;$$$$A555555r'   r*   F)totalc                  
   e Zd ZU dZded<   	 dZded<   	 dZded<   	 dZd	ed
<   	 dZded<   	  e	e
          Zded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	  G d d          Z edd          d             ZdS )EvaluationResultzEvaluation result.r   keyNr   scorer   r   zOptional[str]commentzOptional[dict]
correction)default_factorydictevaluator_infoz%Optional[Union[FeedbackConfig, dict]]feedback_configOptional[Union[uuid.UUID, str]]source_run_idtarget_run_idextrac                      e Zd ZdZdZdS )EvaluationResult.ConfigzPydantic model configuration.FN)r!   r"   r#   r$   allow_extrar&   r'   r(   Configr?   b   s        ++r'   rA   T)prec                    d|vs|d         9t          |t          t          f          rt                              d|            |S )z$Check that the value is not numeric.r3   NzJNumeric values should be provided in the 'score' field, not 'value'. Got: )
isinstanceintfloatloggerwarning)clsvvaluess      r(   check_value_non_numericz(EvaluationResult.check_value_non_numericg   s]    
 &  F7O$;!c5\** !! !  
 r'   )r!   r"   r#   r$   r%   r3   r   r4   r5   r   r7   r8   r9   r;   r<   r=   rA   r   rL   r&   r'   r(   r1   r1   G   s@        HHH@E0E8!G!!!!2!%J%%%%: 5666N66665=AOAAAA;59M9999659M9999 !E    )       
 YwD!!!  "!  r'   r1   c                      e Zd ZU dZded<   dS )EvaluationResultszqBatch evaluation results.

    This makes it easy for your evaluator to return multiple
    metrics at once.
    zlist[EvaluationResult]resultsNr    r&   r'   r(   rN   rN   v   s*           $###!!r'   rN   c                  >    e Zd ZdZe	 	 ddd            Z	 	 dddZdS )RunEvaluatorzEvaluator interface class.Nrunr   exampleOptional[Example]evaluator_run_idOptional[uuid.UUID]return*Union[EvaluationResult, EvaluationResults]c                    dS )zEvaluate an example.Nr&   )selfrR   rS   rU   s       r(   evaluate_runzRunEvaluator.evaluate_run   s      r'   c                    K   t          j                     fd}t          j                                        d|           d{V S )z#Evaluate an example asynchronously.c                     t          j        di  5                                cd d d            S # 1 swxY w Y   d S )Nr&   )rhtracing_contextr[   )current_contextrU   rS   rR   rZ   s   r(   _run_with_contextz5RunEvaluator.aevaluate_run.<locals>._run_with_context   s    #66o66 I I((g7GHHI I I I I I I I I I I I I I I I I Is   7;;N)r^   get_tracing_contextasyncioget_running_looprun_in_executor)rZ   rR   rS   rU   ra   r`   s   ```` @r(   aevaluate_runzRunEvaluator.aevaluate_run   s       022	I 	I 	I 	I 	I 	I 	I 	I 	I -//??FWXXXXXXXXXr'   NNrR   r   rS   rT   rU   rV   rW   rX   )r!   r"   r#   r$   r   r[   rf   r&   r'   r(   rQ   rQ      sm        $$ &*04	# # # # ^# &*04	Y Y Y Y Y Y Yr'   rQ   c                  J    e Zd ZU dZded<   	 ded<   	 dZded<   	 dZd	ed
<   dS )ComparisonEvaluationResultzFeedback scores for the results of comparative evaluations.

    These are generated by functions that compare two or more runs,
    returning a ranking or other feedback.
    r   r2   z'dict[Union[uuid.UUID, str], SCORE_TYPE]scoresNr:   r;   z6Optional[Union[str, dict[Union[uuid.UUID, str], str]]]r4   )r!   r"   r#   r$   r%   r;   r4   r&   r'   r(   rj   rj      sb           HHH@3333459M99996FJGJJJJ: :r'   rj   c                       e Zd ZdZ	 d$d%dZ	 d&d'dZd(dZd)dZed*d            Z		 	 d+d,dZ
	 	 d+d- fd Z	 d$d.d!Zd/d#Z xZS )0DynamicRunEvaluatora  A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`.

    This class is designed to be used with the `@run_evaluator` decorator, allowing
    functions that take a `Run` and an optional `Example` as arguments, and return
    an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`.

    Attributes:
        func (Callable): The function that is wrapped by this evaluator.
    NfuncXCallable[[Run, Optional[Example]], Union[_RUNNABLE_OUTPUT, Awaitable[_RUNNABLE_OUTPUT]]]afuncIOptional[Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]]c                   t          |          \  }|rt          |          \  }dfd} t          |          |            ddlm} |2|                    ||          | _        t          |d	d
          | _        t          j	        |          rE|t          d          |                    ||          | _        t          |d	d
          | _        dS |                    t          t          t          t          t                   gt           f         |          |          | _        t          |d	d
          | _        dS )zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns a dict or `ComparisonEvaluationResult`.
        inputsr7   rW   c                |    | S  |                      d          |                      d                    \  }}}|S )NrR   rS   getrs   _traced_inputsprepare_inputss      r(   process_inputsz4DynamicRunEvaluator.__init__.<locals>.process_inputs   sK    %$2N

5!!6::i#8#8% %!Q= ! r'   r   r   Nr{   r!   rm   Func was provided as a coroutine function, but afunc was also provided. If providing both, func should be a regular function to avoid ambiguity.rs   r7   rW   r7   )_normalize_evaluator_funcr   	langsmithr   ensure_traceablerp   getattr_nameinspectiscoroutinefunction	TypeErrorr   r   r   r
   r   _RUNNABLE_OUTPUTrn   rZ   rn   rp   r{   r   rz   s        @r(   __init__zDynamicRunEvaluator.__init__   s   ( ";4!@!@~ 	G&?&F&F#UN	! 	! 	! 	! 	! 	! 	dD))))))$55n 6  DJ !
4IJJDJ&t,, 	J 3  
 %55^ 6  DJ !z3HIIDJJJ#44XsHW$568HHI4PP- 5  DI !z3HIIDJJJr'   FresultUnion[EvaluationResult, dict]r;   	uuid.UUIDallow_no_keyboolrW   r1   c                \   t          t                    rj        s|_        S 	 st          d           dvr|r
| j        d<   t          fddD                       rt          d           t          di d|iS # t          $ r}t          d           |d }~ww xY w)	NziExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got empty result: r2   c              3      K   | ]}|vV  	d S Nr&   ).0kr   s     r(   	<genexpr>z@DynamicRunEvaluator._coerce_evaluation_result.<locals>.<genexpr>  s'      JJq1F?JJJJJJr'   )r3   r   r4   zrExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score' or categorical 'value'; got r;   z[Expected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got r&   )rD   r1   r;   
ValueErrorr   allr   )rZ   r   r;   r   es    `   r(   _coerce_evaluation_resultz-DynamicRunEvaluator._coerce_evaluation_result   s6    f.// 	' 5'4$M	  OFLO O   F""|" $
uJJJJ,IJJJJJ  XOUX X   $QQ&P&PQQQ 	 	 	=4:= =  	s   A B	 	
B+B&&B+rO   Union[dict, EvaluationResults]rX   c                     d|v r8|                                 } fd|d         D             |d<   t          di |S                      t          t          |          d          S )NrO   c                >    g | ]}                     |           S ))r;   )r   )r   rrZ   r;   s     r(   
<listcomp>zBDynamicRunEvaluator._coerce_evaluation_results.<locals>.<listcomp>  s<        ..q.NN  r'   T)r;   r   r&   )copyrN   r   r   r7   )rZ   rO   r;   cps   ` ` r(   _coerce_evaluation_resultsz.DynamicRunEvaluator._coerce_evaluation_results  s    
 B     +  ByM %**r***--w}4 . 
 
 	
r'   MUnion[EvaluationResult, EvaluationResults, dict, str, int, bool, float, list]c                    t          |t                    r|j        s||_        |S t          |          }|                     ||          S r   )rD   r1   r;   _format_evaluator_resultr   )rZ   r   r;   s      r(   _format_resultz"DynamicRunEvaluator._format_result)  sR     f.// 	' 5'4$M)&11..v}EEEr'   c                "    t          | d          S zCheck if the evaluator function is asynchronous.

        Returns:
            bool: True if the evaluator function is asynchronous, False otherwise.
        rp   hasattrrZ   s    r(   is_asynczDynamicRunEvaluator.is_async7       tW%%%r'   rR   r   rS   rT   rU   rV   c                   t          | d          s_t          j                    }|                                rt	          d          |                    |                     ||                    S |t          j                    }d|j	        i}t          |dd          rt          |j                  |d<   |                     ||||d          }|                     ||          S )	a  Evaluate a run using the wrapped function.

        This method directly invokes the wrapped function with the provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        rn   tCannot call `evaluate_run` on an async run evaluator from within an running event loop. Use `aevaluate_run` instead.Nr<   
session_id
experimentrun_idmetadatalangsmith_extra)r   rc   get_event_loop
is_runningRuntimeErrorrun_until_completerf   uuiduuid4idr   r   r   rn   r   )rZ   rR   rS   rU   running_loopr   r   s          r(   r[   z DynamicRunEvaluator.evaluate_run@  s    " tV$$ 	Y"133L&&(( Y"R  
 $66t7I7I#w7W7WXXX##z||$3SV#<3d++ 	9%(%8%8H\"'7XNN  
 

 ""6+;<<<r'   c                r  K   t          | d          s(t                                          ||           d{V S |t          j                    }d|j        i}t          |dd          rt          |j                  |d<   | 	                    ||||d           d{V }| 
                    ||          S )a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        rp   Nr<   r   r   r   r   )r   superrf   r   r   r   r   r   r   rp   r   )rZ   rR   rS   rU   r   r   	__class__s         r(   rf   z!DynamicRunEvaluator.aevaluate_runf  s      & tW%% 	=..sG<<<<<<<<<##z||$3SV#<3d++ 	9%(%8%8H\"zz'7XNN " 
 
 
 
 
 
 
 

 ""6+;<<<r'   c                .    |                      ||          S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        )r[   )rZ   rR   rS   s      r(   __call__zDynamicRunEvaluator.__call__  s       g...r'   r   c                    d| j          dS ))Represent the DynamicRunEvaluator object.z<DynamicRunEvaluator >r   r   s    r(   __repr__zDynamicRunEvaluator.__repr__  s    4tz4444r'   r   )rn   ro   rp   rq   )F)r   r   r;   r   r   r   rW   r1   )rO   r   r;   r   rW   rX   )r   r   r;   r   rW   rX   rW   r   rg   rh   )rR   r   rS   rT   rU   rV   )rR   r   rS   rT   rW   rX   rW   r   )r!   r"   r#   r$   r   r   r   r   propertyr   r[   rf   r   r   __classcell__)r   s   @r(   rm   rm      s7        , 8J 8J 8J 8J 8J| #	    <
 
 
 
"F F F F & & & X& &*04	$= $= $= $= $=R &*04	= = = = = = =D 6:/ / / / /"5 5 5 5 5 5 5 5r'   rm   rn   ro   c                     t          |           S )zmCreate a run evaluator from a function.

    Decorator that transforms a function into a `RunEvaluator`.
    )rm   rn   s    r(   run_evaluatorr     s     t$$$r'   i'  objr   c                ~    t          |           }t          |          t          k    r|d t          dz
           dz   }|S )N   z...))reprlen_MAXSIZE)r   ss     r(   _maxsize_reprr     s:    S		A
1vvn1n&Hr'   c                      e Zd ZdZ	 dddZedd
            Z	 dddZ	 dddZ	 dddZ	d dZ
ed!d            Zd"dZdS )#DynamicComparisonRunEvaluatorz4Compare predictions (as traces) from 2 or more runs.Nrn   fCallable[[Sequence[Run], Optional[Example]], Union[_COMPARISON_OUTPUT, Awaitable[_COMPARISON_OUTPUT]]]rp   UOptional[Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]c                   t          |          \  }|rt          |          \  }dfd} t          |          |            ddlm} |2|                    ||          | _        t          |d	d
          | _        t          j	        |          rE|t          d          |                    ||          | _        t          |d	d
          | _        dS |                    t          t          t          t                   t          t                    gt"          f         |          |          | _        t          |d	d
          | _        dS )zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns an `EvaluationResult` or `EvaluationResults`.
        rs   r7   rW   c                |    | S  |                      d          |                      d                    \  }}}|S )NrunsrS   ru   rw   s      r(   r{   z>DynamicComparisonRunEvaluator.__init__.<locals>.process_inputs  sK    %$2N

6""FJJy$9$9% %!Q= ! r'   r   r   Nr|   r!   rm   r}   r~   )$_normalize_comparison_evaluator_funcr   r   r   r   rp   r   r   r   r   r   r   r   r   r   r
   r   _COMPARISON_OUTPUTrn   r   s        @r(   r   z&DynamicComparisonRunEvaluator.__init__  s   ( "Fd!K!K~ 	R&J5&Q&Q#UN	! 	! 	! 	! 	! 	! 	dD))))))$55n 6  DJ !
4IJJDJ&t,, 	J 3  
 %55^ 6  DJ !z3HIIDJJJ#44!#(9:*,    . 5 	 	DI !z3HIIDJJJr'   rW   r   c                "    t          | d          S r   r   r   s    r(   r   z&DynamicComparisonRunEvaluator.is_async  r   r'   r   Sequence[Run]rS   rT   rj   c                   t          | d          s_t          j                    }|                                rt	          d          |                    |                     ||                    S t          j                    }| 	                    |          }| 
                    ||||d          }|                     |||          S )zCompare runs to score preferences.

        Args:
            runs: A list of runs to compare.
            example: An optional example to be used in the evaluation.

        rn   r   r   tagsr   )r   rc   r   r   r   r   acompare_runsr   r   	_get_tagsrn   _format_results)rZ   r   rS   r   r;   r   r   s          r(   compare_runsz*DynamicComparisonRunEvaluator.compare_runs  s     tV$$ 
	"133L&&(( "R  
 $66&&tW55   
~~d##'4dCC  
 

 ##FM4@@@r'   c                  K   t          | d          s|                     ||          S t          j                    }|                     |          }|                     ||||d           d{V }|                     |||          S )a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            runs (Run): The runs to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        rp   r   r   N)r   r   r   r   r   rp   r   )rZ   r   rS   r;   r   r   s         r(   r   z+DynamicComparisonRunEvaluator.acompare_runs  s        tW%% 	4$$T7333
~~d##zz'4dCC " 
 
 
 
 
 
 
 

 ##FM4@@@r'   c                .    |                      ||          S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        )r   )rZ   r   rS   s      r(   r   z&DynamicComparisonRunEvaluator.__call__:  s       w///r'   r   c                    d| j          dS )r   z<DynamicComparisonRunEvaluator r   r   r   s    r(   r   z&DynamicComparisonRunEvaluator.__repr__K  s    >>>>>r'   	list[str]c                    g }| D ]g}|                     dt          |j                  z              t          |dd          r*|                     dt          |j                  z              h|S )zExtract tags from runs.zrun:r   Nzexperiment:)appendr   r   r   r   )r   r   rR   s      r(   r   z'DynamicComparisonRunEvaluator._get_tagsO  su      	A 	ACKKSV,---sL$// AMC,?,??@@@r'   r   -Union[dict, list, ComparisonEvaluationResult]r;   r   c                   t          |t                    r|j        s||_        |S t          |t                    r$d t	          ||          D             | j        |d}n8t          |t                    rd|vr
| j        |d<   nd|}t          |          	 t          di d|i|S # t          $ r}t          d|           |d }~ww xY w)Nc                $    i | ]\  }}|j         |S r&   )r   )r   rR   r3   s      r(   
<dictcomp>zADynamicComparisonRunEvaluator._format_results.<locals>.<dictcomp>f  s     MMMZS%365MMMr'   )rk   r2   r;   r2   zXExpected 'dict', 'list' or 'ComparisonEvaluationResult' result object. Received: result=r;   zExpected a dictionary with a 'key' and dictionary of scores mappingrun IDs to numeric scores, or ComparisonEvaluationResult object, got r&   )	rD   rj   r;   listzipr   r7   r   r   )rZ   r   r;   r   msgr   s         r(   r   z-DynamicComparisonRunEvaluator._format_resultsZ  s:    f899 	"' 5'4$M%% 	"MM3tV;L;LMMMz!. FF
 %% 	"F"" $
u/%+/ /  S//!		-  "M<V<    	 	 	!! !  		s   B( (
C
2CC
r   )rn   r   rp   r   r   )r   r   rS   rT   rW   rj   r   )r   r   rW   r   )r   r   r;   r   r   r   rW   rj   )r!   r"   r#   r$   r   r   r   r   r   r   r   staticmethodr   r   r&   r'   r(   r   r     s       >> >J >J >J >J >J@ & & & X& AEA A A A A@ AEA A A A A: AE0 0 0 0 0"? ? ? ?    \" " " " " "r'   r   r   rW   c                     t          |           S )z.Create a comaprison evaluator from a function.)r   r   s    r(   comparison_evaluatorr     s     )...r'   r   tuple[Union[Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]], Optional[Callable[..., dict]]]c                    dt          j                   d j                                        D             }d j                                        D             |r;t	          fd|D                       s4t          fd|D                       dk    rd d}t          |          t	          fd	|D                       r|d
dgk    r d fS t          j                   r=dfdd fd}t           d          rt           d          n|j
        |_
        |fS dfdd fd}t           d          rt           d          n|j
        |_
        |fS )N)rR   rS   rs   outputsreference_outputsattachmentsc                8    g | ]\  }}|j         |j        k    |S r&   kindVAR_KEYWORDr   pnameps      r(   r   z-_normalize_evaluator_func.<locals>.<listcomp>  *    XXX(%!-@W@W@W@W@Wr'   c                H    g | ]\  }}|j         t          j        j        u| S r&   defaultr   	Parameteremptyr  s      r(   r   z-_normalize_evaluator_func.<locals>.<listcomp>  9       E19G-333 	333r'   c              3  (   K   | ]}|v p|v V  d S r   r&   r   r  args_with_defaultssupported_argss     r(   r   z,_normalize_evaluator_func.<locals>.<genexpr>  D       
 
GLE^#Bu0B'B
 
 
 
 
 
r'   c                    g | ]}|v|	S r&   r&   r   ar  s     r(   r   z-_normalize_evaluator_func.<locals>.<listcomp>  $    DDDq1C(C(C(C(C(Cr'      UInvalid evaluator function. Must have at least one argument. Supported arguments are . Please see https://docs.smith.langchain.com/evaluation/how_to_guides/evaluation/evaluate_llm_application#use-custom-evaluatorsc              3  (   K   | ]}|v p|v V  d S r   r&   r  s     r(   r   z,_normalize_evaluator_func.<locals>.<genexpr>  D        CH>5,>#>     r'   rR   rS   r   rT   rW   tuple[list, dict, dict]c                ~   | ||r|j         ni | j        pi |r	|j        pi ni |r	|j        pi ni d}i }g }i }j                                        D ]i\  }}||v r`|j        |j        |j        fv r|                    ||                    n||         ||<   |dv rt          ||                   n||         ||<   j|||fS N)rR   rS   rs   r   r   r   )rR   rS   
rs   r   r   
parametersitemsr   POSITIONAL_OR_KEYWORDPOSITIONAL_ONLYr   r   	rR   rS   arg_mapkwargsargsry   
param_nameparamsigs	           r(   _prepare_inputsz2_normalize_evaluator_func.<locals>._prepare_inputs  "    &07?gnnR"{0b@G#O7#6#<"RBI)Q)>Br   "),)=)=)?)?  %J!W,, :!7!1*   !KK
(;<<<<181DF:.  *-??? *'**=>>>!(!4 &j1
 V]22r'   r   c                D   K    | |          \  }}} |i | d {V S r   r&   rR   rS   r$  r#  rx   r(  rn   s        r(   awrapperz+_normalize_evaluator_func.<locals>.awrapper  sH       %4OC$A$A!vq!T426222222222r'   r!   c                ~   | ||r|j         ni | j        pi |r	|j        pi ni |r	|j        pi ni d}i }g }i }j                                        D ]i\  }}||v r`|j        |j        |j        fv r|                    ||                    n||         ||<   |dv rt          ||                   n||         ||<   j|||fS r  r  r!  s	           r(   r(  z2_normalize_evaluator_func.<locals>._prepare_inputs  r)  r'   c                4     | |          \  }}} |i |S r   r&   r+  s        r(   wrapperz*_normalize_evaluator_func.<locals>.wrapper  s0    $3OC$A$A!vqtT,V,,,r'   )rR   r   rS   rT   rW   r  )rR   r   rS   rT   rW   r   r   	signaturer  r  r   r   r   r   r   r   r!   	rn   all_argsr   r,  r/  r(  r  r'  r  s	   `    @@@@r(   r   r     s   N 
D
!
!CXXcn&:&:&<&<XXXH ,,..  
  o. 
 
 
 
 
PX
 
 
 
 
o. DDDDHDDDEEJJG1?G G G 	 oo      LT     `.	 
 
 Tz&t,, X	.3 3 3 3 3 3>3 3 3 3 3 3 3 4,,'j)))& 
 o..3 3 3 3 3 3>- - - - - - - 4,,&j)))% 
 _--r'   tuple[Union[Callable[[Sequence[Run], Optional[Example]], _COMPARISON_OUTPUT], Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]], Optional[Callable[..., dict]]]c                    dt          j                   d j                                        D             }d j                                        D             |r;t	          fd|D                       s4t          fd|D                       dk    rd d}t          |          t	          fd	|D                       r|d
dgk    r d fS t          j                   r=dfdd fd}t           d          rt           d          n|j
        |_
        |fS dfdd fd}t           d          rt           d          n|j
        |_
        |fS )Nr   rS   rs   r   r   c                8    g | ]\  }}|j         |j        k    |S r&   r   r  s      r(   r   z8_normalize_comparison_evaluator_func.<locals>.<listcomp>  r  r'   c                H    g | ]\  }}|j         t          j        j        u| S r&   r  r  s      r(   r   z8_normalize_comparison_evaluator_func.<locals>.<listcomp>  r
  r'   c              3  (   K   | ]}|v p|v V  d S r   r&   r  s     r(   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>%  r  r'   c                    g | ]}|v|	S r&   r&   r  s     r(   r   z8_normalize_comparison_evaluator_func.<locals>.<listcomp>(  r  r'   r  r  r  c              3  (   K   | ]}|v p|v V  d S r   r&   r  s     r(   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>3  r  r'   r   rS   r   rT   rW   r  c                l   | ||r|j         ni d | D             |r	|j        pi ni d}i }g }i }j                                        D ]i\  }}||v r`|j        |j        |j        fv r|                    ||                    n||         ||<   |dv rt          ||                   n||         ||<   j|||fS )Nc                     g | ]}|j         pi S r&   r   r   rR   s     r(   r   zQ_normalize_comparison_evaluator_func.<locals>._prepare_inputs.<locals>.<listcomp>D      BBBc 1rBBBr'   r6  r   rS   	rs   r   r  r  r   r  r   r   r   	r   rS   r"  r#  r$  ry   r%  r&  r'  s	           r(   r(  z=_normalize_comparison_evaluator_func.<locals>._prepare_inputs=      !&07?gnnRBBTBBBBI)Q)>Br   "),)=)=)?)?  %J!W,, :!7!1*   !KK
(;<<<<181DF:.  *-@@@ *'**=>>>!(!4 &j1
 V]22r'   r   c                D   K    | |          \  }}} |i | d {V S r   r&   r   rS   r$  r#  rx   r(  rn   s        r(   r,  z6_normalize_comparison_evaluator_func.<locals>.awrapper[  sH       %4OD'$B$B!vq!T426222222222r'   r!   c                l   | ||r|j         ni d | D             |r	|j        pi ni d}i }g }i }j                                        D ]i\  }}||v r`|j        |j        |j        fv r|                    ||                    n||         ||<   |dv rt          ||                   n||         ||<   j|||fS )Nc                     g | ]}|j         pi S r&   r>  r?  s     r(   r   zQ_normalize_comparison_evaluator_func.<locals>._prepare_inputs.<locals>.<listcomp>q  r@  r'   r6  rA  rB  rC  s	           r(   r(  z=_normalize_comparison_evaluator_func.<locals>._prepare_inputsj  rD  r'   c                4     | |          \  }}} |i |S r   r&   rF  s        r(   r/  z5_normalize_comparison_evaluator_func.<locals>.wrapper  s2     %4OD'$B$B!vqtT,V,,,r'   )r   r   rS   rT   rW   r  )r   r   rS   rT   rW   r   r0  r2  s	   `    @@@@r(   r   r     s    SN

D
!
!CXXcn&:&:&<&<XXXH ,,..  
  o, 
 
 
 
 
PX
 
 
 
 
o, DDDDHDDDEEJJG1?G G G 	 oo      LT     `,	 
 
 Tz&t,, X	,3 3 3 3 3 3<3 3 3 3 3 3 3 4,,'j)))& 
 _,,3 3 3 3 3 3<- - - - - - - 4,,&j)))% 
 O++r'   r   ;Union[EvaluationResults, dict, str, int, bool, float, list]Union[EvaluationResults, dict]c                   t          | t          t          t          f          rd| i} n| st	          d|            t          | t
                    r1t          d | D                       st	          d|  d          d| i} nBt          | t                    rd| i} n(t          | t                    rnt	          d|            | S )	Nr3   zdExpected a non-empty dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got c              3  @   K   | ]}t          |t                    V  d S r   )rD   r7   )r   xs     r(   r   z+_format_evaluator_result.<locals>.<genexpr>  s,      771:a&&777777r'   z8Expected a list of dicts or EvaluationResults. Received .rO   r   zZExpected a dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got )	rD   r   rF   rE   r   r   r   r   r7   )r   s    r(   r   r     s    &4,-- 
6" 
D;AD D
 
 	
 
FD	!	! 
7777777 	T6TTT   V$	FC	 	  
6"	FD	!	! 
/&,/ /
 
 	
 Mr'   SUMMARY_EVALUATOR_Tc                0    dt          j                   d j                                        D             }d j                                        D             |r;t	          fd|D                       s?t          fd|D                       dk    r d d}|r	|d	| dz  }t          |          t	          fd
|D                       r|ddgk    r S d fd}t           d          rt           d          n|j	        |_	        |S )Nr   examplesrs   r   r   c                    g | ]\  }}|S r&   r&   r  s      r(   r   z0_normalize_summary_evaluator.<locals>.<listcomp>  s    ===(%===r'   c                H    g | ]\  }}|j         t          j        j        u| S r&   r  r  s      r(   r   z0_normalize_summary_evaluator.<locals>.<listcomp>  r
  r'   c              3  (   K   | ]}|v p|v V  d S r   r&   r  s     r(   r   z/_normalize_summary_evaluator.<locals>.<genexpr>  r  r'   c                    g | ]}|v|	S r&   r&   r  s     r(   r   z0_normalize_summary_evaluator.<locals>.<listcomp>  r  r'   r  r  rO  z Received arguments c              3      K   | ]}|v V  	d S r   r&   )r   r  r  s     r(   r   z/_normalize_summary_evaluator.<locals>.<genexpr>  s(      ??U.(??????r'   r   rS  Sequence[schemas.Run]Sequence[schemas.Example]rW   rX   c                r   | |d |D             d | D             d |D             d}i }g }	j                                         D ]E\  }}||v r<|j        |j        |j        fv r|                    ||                    :||         ||<   F |i |}t          |t                    r|S t          |          S )Nc                    g | ]	}|j         
S r&   )rs   r   rS   s     r(   r   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>  s    BBBg7>BBBr'   c                     g | ]}|j         pi S r&   r>  r?  s     r(   r   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>  s    >>>#CK-2>>>r'   c                     g | ]}|j         pi S r&   r>  r]  s     r(   r   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>  s    %T%T%Tgo&;%T%T%Tr'   rR  )	r  r  r   r  r   r   rD   r1   r   )
r   rS  r"  r#  r$  r%  r&  r   rn   r'  s
           r(   r/  z-_normalize_summary_evaluator.<locals>.wrapper  s    $BBBBB>>>>>%T%T8%T%T%T G FD%(^%9%9%;%; 	A 	A!
E((z3-&   GJ$78888-4Z-@z*T4*6**F&"233 +F333r'   r!   )r   rY  rS  rZ  rW   rX   )
r   r1  r  r  r   r   r   r   r   r!   )rn   r3  r   r/  r  r'  r  s   `   @@@r(   _normalize_summary_evaluatorr`    s   SN

D
!
!C==cn&:&:&<&<===H ,,..  
  5 
 
 
 
 
PX
 
 
 
 
5 DDDDHDDDEEJJC1?C C C 	  	65(5555Coo ????h????? &8P D D 	4 	4 	4 	4 	4 	4 	4: *1z)B)BXGD*%%%HX 	 r'   )rn   ro   )r   r   )rn   r   rW   r   )rn   r   rW   r   )rn   r   rW   r4  )r   rJ  rW   rK  )rn   r   rW   rP  )?r$   
__future__r   rc   r   r   abcr   collections.abcr   r   typingr   r   r	   r
   r   r   typing_extensionsr   r   r   r^   r   pydantic.v1r   r   r   r   ImportErrorpydanticlogging	functoolsr   langsmith.schemasr   r   r   r   	getLoggerr!   rG   r   r*   r1   rN   rQ   r7   r   rj   r   rm   r   r   r   r   r   r   r   r   r   rP  r`  r&   r'   r(   <module>rm     s   E E " " " " " "          / / / / / / / /                ( ' ' ' ' ' ' ' ' ' ' '                                           B B B B B B B B B B B B		8	$	$' ' ' ' 'y ' ' '6 6 6 6 6Ye 6 6 6 6, , , , ,y , , ,^" " " " "	 " " " "Y Y Y Y Y Y Y Y8 )+<dBC : : : : : : : :$ 5t;< e5 e5 e5 e5 e5, e5 e5 e5P	% 	% 	% 	%    I I I I I I I IX/ / / /G. G. G. G.T@, @, @, @,F   8 	'+	 9: 112	4 	gk	D12 112	4		 > > > > > >s   A A'&A'