U
    hB                     @   s   d Z ddlmZ ddlmZmZmZmZ ddlZ	ddl
mZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ edddZG dd deeZG dd deZG dd deeZG dd deeZ dS )z@A chain for comparing the output of two models using embeddings.    )Enum)AnyDictListOptionalN)AsyncCallbackManagerForChainRunCallbackManagerForChainRun	Callbacks)
Embeddings)Field)pre_init)Chain)PairwiseStringEvaluatorStringEvaluatorRUN_KEYreturnc                  C   sZ   zddl m}  W nB tk
rR   zddlm}  W n tk
rL   tdY nX Y nX |  S )zaCreate an Embeddings object.
    Returns:
        Embeddings: The created Embeddings object.
    r   OpenAIEmbeddingstCould not import OpenAIEmbeddings. Please install the OpenAIEmbeddings package using `pip install langchain-openai`.)langchain_openair   ImportError%langchain_community.embeddings.openair    r   P/tmp/pip-unpacked-wheel-bo69hh5q/langchain/evaluation/embedding_distance/base.py_embedding_factory   s    r   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )EmbeddingDistancea  Embedding Distance Metric.

    Attributes:
        COSINE: Cosine distance metric.
        EUCLIDEAN: Euclidean distance metric.
        MANHATTAN: Manhattan distance metric.
        CHEBYSHEV: Chebyshev distance metric.
        HAMMING: Hamming distance metric.
    ZcosineZ	euclideanZ	manhattanZ	chebyshevZhammingN)	__name__
__module____qualname____doc__COSINE	EUCLIDEAN	MANHATTAN	CHEBYSHEVHAMMINGr   r   r   r   r   *   s   
r   c                   @   s>  e Zd ZU dZeedZeed< ee	j
dZe	ed< eeeef eeef dddZG d	d
 d
Zeee dddZeedddZe	edddZeejejejdddZeejejejdddZeejejejdddZeejejejdddZeejejejdddZ eje!dd d!Z"d"S )#_EmbeddingDistanceChainMixina0  Shared functionality for embedding distance evaluators.

    Attributes:
        embeddings (Embeddings): The embedding objects to vectorize the outputs.
        distance_metric (EmbeddingDistance): The distance metric to use
                                            for comparing the embeddings.
    )default_factory
embeddings)defaultdistance_metric)valuesr   c                 C   s   | d}g }zddlm} || W n tk
r<   Y nX zddlm} || W n tk
rl   Y nX |sztdt|t|rzddl}W n tk
r   tdY nX |S )zValidate that the TikTok library is installed.

        Args:
            values (Dict[str, Any]): The values to validate.

        Returns:
            Dict[str, Any]: The validated values.
        r)   r   r   r   NzThe tiktoken library is required to use the default OpenAI embeddings with embedding distance evaluators. Please either manually select a different Embeddings object or install tiktoken using `pip install tiktoken`.)	getr   r   appendr   r   
isinstancetupletiktoken)clsr,   r)   Ztypes_r   r1   r   r   r   _validate_tiktoken_installedH   s0    


z9_EmbeddingDistanceChainMixin._validate_tiktoken_installedc                   @   s   e Zd ZU dZeed< dS )z#_EmbeddingDistanceChainMixin.ConfigTarbitrary_types_allowedN)r   r   r    r4   bool__annotations__r   r   r   r   Configt   s   
r7   r   c                 C   s   dgS )zgReturn the output keys of the chain.

        Returns:
            List[str]: The output keys.
        scorer   selfr   r   r   output_keysw   s    z(_EmbeddingDistanceChainMixin.output_keys)resultr   c                 C   s$   d|d i}t |kr |t  |t < |S )Nr8   r   )r:   r<   parsedr   r   r   _prepare_output   s    z,_EmbeddingDistanceChainMixin._prepare_output)metricr   c              
   C   sN   t j| jt j| jt j| jt j| jt j	| j
i}||kr<|| S td| dS )zGet the metric function for the given metric name.

        Args:
            metric (EmbeddingDistance): The metric name.

        Returns:
            Any: The metric function.
        zInvalid metric: N)r   r"   _cosine_distancer#   _euclidean_distancer$   _manhattan_distancer%   _chebyshev_distancer&   _hamming_distance
ValueError)r:   r?   Zmetricsr   r   r   _get_metric   s    
     z(_EmbeddingDistanceChainMixin._get_metric)abr   c                 C   s<   zddl m} W n tk
r,   tdY nX d|| | S )zCompute the cosine distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.ndarray: The cosine distance.
        r   )cosine_similarityzThe cosine_similarity function is required to compute cosine distance. Please install the langchain-community package using `pip install langchain-community`.g      ?)Zlangchain_community.utils.mathrI   r   )rG   rH   rI   r   r   r   r@      s    
z-_EmbeddingDistanceChainMixin._cosine_distancec                 C   s   t j| | S )zCompute the Euclidean distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Euclidean distance.
        )npZlinalgZnormrG   rH   r   r   r   rA      s    z0_EmbeddingDistanceChainMixin._euclidean_distancec                 C   s   t t | | S )zCompute the Manhattan distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Manhattan distance.
        )rJ   sumabsrK   r   r   r   rB      s    z0_EmbeddingDistanceChainMixin._manhattan_distancec                 C   s   t t | | S )zCompute the Chebyshev distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Chebyshev distance.
        )rJ   maxrM   rK   r   r   r   rC      s    z0_EmbeddingDistanceChainMixin._chebyshev_distancec                 C   s   t | |kS )zCompute the Hamming distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Hamming distance.
        )rJ   ZmeanrK   r   r   r   rD      s    z._EmbeddingDistanceChainMixin._hamming_distance)vectorsr   c                 C   s6   |  | j}||d dd|d dd }|S )zCompute the score based on the distance metric.

        Args:
            vectors (np.ndarray): The input vectors.

        Returns:
            float: The computed score.
        r      )rF   r+   Zreshapeitem)r:   rO   r?   r8   r   r   r   _compute_score   s    	&z+_EmbeddingDistanceChainMixin._compute_scoreN)#r   r   r    r!   r   r   r)   r
   r6   r   r"   r+   r   r   strr   r3   r7   propertyr   r;   dictr>   rF   staticmethodrJ   Zndarrayr@   ZfloatingrA   rB   rC   rD   floatrS   r   r   r   r   r'   <   s*   
"+r'   c                
   @   s  e Zd ZdZeedddZeedddZee	e dddZ
deeef ee eeef d
ddZdeeef ee eeef d
ddZd	d	d	d	ddeee eee	e  eeeef  eeedddZd	d	d	d	ddeee eee	e  eeeef  eeedddZd	S )EmbeddingDistanceEvalChaina"  Use embedding distances to score semantic difference between
    a prediction and reference.

    Examples:
        >>> chain = EmbeddingDistanceEvalChain()
        >>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
        >>> print(result)
        {'score': 0.5}
    r   c                 C   s   dS )zReturn whether the chain requires a reference.

        Returns:
            bool: True if a reference is required, False otherwise.
        Tr   r9   r   r   r   requires_reference   s    z-EmbeddingDistanceEvalChain.requires_referencec                 C   s   d| j j dS )NZ
embedding_	_distancer+   valuer9   r   r   r   evaluation_name  s    z*EmbeddingDistanceEvalChain.evaluation_namec                 C   s   ddgS )eReturn the input keys of the chain.

        Returns:
            List[str]: The input keys.
        
prediction	referencer   r9   r   r   r   
input_keys
  s    z%EmbeddingDistanceEvalChain.input_keysNinputsrun_managerr   c                 C   s0   t | j|d |d g}| |}d|iS )a0  Compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (Optional[CallbackManagerForChainRun], optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r`   ra   r8   rJ   arrayr)   Zembed_documentsrS   r:   rd   re   rO   r8   r   r   r   _call  s
    
z EmbeddingDistanceEvalChain._callc                    s:   | j |d |d gI dH }t|}| |}d|iS )a:  Asynchronously compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r`   ra   Nr8   r)   Zaembed_documentsrJ   rg   rS   r:   rd   re   ZembeddedrO   r8   r   r   r   _acall(  s    


z!EmbeddingDistanceEvalChain._acallF)ra   	callbackstagsmetadatainclude_run_info)r`   ra   rm   rn   ro   rp   kwargsr   c          	      K   s"   | ||d||||d}|  |S )a  Evaluate the embedding distance between a prediction and
        reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The reference string (required)
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r`   ra   rd   rm   rn   ro   rp   r>   	r:   r`   ra   rm   rn   ro   rp   rq   r<   r   r   r   _evaluate_strings>  s    z,EmbeddingDistanceEvalChain._evaluate_stringsc          	         s*   | j ||d||||dI dH }| |S )a  Asynchronously evaluate the embedding distance between
        a prediction and reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        rr   rs   NZacallr>   ru   r   r   r   _aevaluate_strings`  s    z-EmbeddingDistanceEvalChain._aevaluate_strings)N)N)r   r   r    r!   rU   r5   rZ   rT   r^   r   rb   r   r   r   r   ri   r   rl   r	   rV   rv   rx   r   r   r   r   rY      sb   
 

 


&
rY   c                
   @   s   e Zd ZdZeee dddZeedddZde	ee
f ee e	ee
f dd	d
Zde	ee
f ee e	ee
f dddZdddddeeeeee  ee	ee
f  ee
edddZdddddeeeeee  ee	ee
f  ee
edddZdS )"PairwiseEmbeddingDistanceEvalChaina  Use embedding distances to score semantic difference between two predictions.

    Examples:
    >>> chain = PairwiseEmbeddingDistanceEvalChain()
    >>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
    >>> print(result)
    {'score': 0.5}
    r   c                 C   s   ddgS )r_   r`   prediction_br   r9   r   r   r   rb     s    z-PairwiseEmbeddingDistanceEvalChain.input_keysc                 C   s   d| j j dS )NZpairwise_embedding_r[   r\   r9   r   r   r   r^     s    z2PairwiseEmbeddingDistanceEvalChain.evaluation_nameNrc   c                 C   s0   t | j|d |d g}| |}d|iS )a  Compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (CallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r`   rz   r8   rf   rh   r   r   r   ri     s    
z(PairwiseEmbeddingDistanceEvalChain._callc                    s:   | j |d |d gI dH }t|}| |}d|iS )a/  Asynchronously compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r`   rz   Nr8   rj   rk   r   r   r   rl     s    


z)PairwiseEmbeddingDistanceEvalChain._acallF)rm   rn   ro   rp   )r`   rz   rm   rn   ro   rp   rq   r   c          	      K   s"   | ||d||||d}|  |S )a  Evaluate the embedding distance between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r`   rz   rs   rt   	r:   r`   rz   rm   rn   ro   rp   rq   r<   r   r   r   _evaluate_string_pairs  s    z9PairwiseEmbeddingDistanceEvalChain._evaluate_string_pairsc          	         s*   | j ||d||||dI dH }| |S )a  Asynchronously evaluate the embedding distance

        between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to traces
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r{   rs   Nrw   r|   r   r   r   _aevaluate_string_pairs  s    z:PairwiseEmbeddingDistanceEvalChain._aevaluate_string_pairs)N)N)r   r   r    r!   rU   r   rT   rb   r^   r   r   r   r   ri   r   rl   r	   r5   rV   r}   r~   r   r   r   r   ry     sZ   	 

 


(
ry   )!r!   enumr   typingr   r   r   r   ZnumpyrJ   Z langchain_core.callbacks.managerr   r   r	   Zlangchain_core.embeddingsr
   Zlangchain_core.pydantic_v1r   Zlangchain_core.utilsr   Zlangchain.chains.baser   Zlangchain.evaluation.schemar   r   Zlangchain.schemar   r   rT   r   r'   rY   ry   r   r   r   r   <module>   s(    7 
 