U
    hBB                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlmZm	Z	 d dl
mZ d dlmZmZmZ dZdZdZd	Zd
ZdZdZeddddG dd deeZG dd deeZG dd deeZG dd deeZdS )    N)AnyDictListOptional)
deprecatedwarn_deprecated)
Embeddings)	BaseModelField	SecretStrz'sentence-transformers/all-mpnet-base-v2zhkunlp/instructor-largezBAAI/bge-large-enz&Represent the document for retrieval: z<Represent the question for retrieving supporting documents: z9Represent this question for searching relevant passages: u9   为这个句子生成表示以用于检索相关文章：z0.2.21.0z+langchain_huggingface.HuggingFaceEmbeddings)sinceremovalZalternative_importc                       s   e Zd ZU dZeed< eZeed< dZ	e
e ed< eedZeeef ed< eedZeeef ed< d	Zeed
< d	Zeed< ed fddZG dd dZee eee  dddZeee dddZ  ZS )HuggingFaceEmbeddingsai  HuggingFace sentence_transformers embedding models.

    To use, you should have the ``sentence_transformers`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceEmbeddings

            model_name = "sentence-transformers/all-mpnet-base-v2"
            model_kwargs = {'device': 'cpu'}
            encode_kwargs = {'normalize_embeddings': False}
            hf = HuggingFaceEmbeddings(
                model_name=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs
            )
    client
model_nameNcache_folderdefault_factorymodel_kwargsencode_kwargsFmulti_processshow_progresskwargsc              
      s   t  jf | d|kr`d}d}t||d| jj dd| d d| d	 d| jj d
 d zddl}W n, tk
r } ztd|W 5 d}~X Y nX |j| jfd| j	i| j
| _dS )$Initialize the sentence_transformer.r   0.2.160.4.0Default values for .model_name were deprecated in LangChain  and will be removed in %. Explicitly pass a model_name to the constructor instead.r   r   messager   NzrCould not import sentence_transformers python package. Please install it with `pip install sentence-transformers`.r   )super__init__r   	__class____name__sentence_transformersImportErrorSentenceTransformerr   r   r   r   selfr   r   r   r+   excr)    N/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/embeddings/huggingface.pyr(   C   s<    

	zHuggingFaceEmbeddings.__init__c                   @   s   e Zd ZdZdS )zHuggingFaceEmbeddings.ConfigforbidNr*   
__module____qualname__extrar2   r2   r2   r3   Config`   s   r9   textsreturnc                 C   sj   ddl }ttdd |}| jrF| j }| j||}|j| n| jj	|fd| j
i| j}| S )Compute doc embeddings using a HuggingFace transformer model.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        r   Nc                 S   s   |  ddS )N
r"   )replace)xr2   r2   r3   <lambda>n       z7HuggingFaceEmbeddings.embed_documents.<locals>.<lambda>show_progress_bar)r+   listmapr   r   Zstart_multi_process_poolZencode_multi_processr-   Zstop_multi_process_poolencoder   r   tolist)r/   r;   r+   pool
embeddingsr2   r2   r3   embed_documentsc   s    	
z%HuggingFaceEmbeddings.embed_documentstextr<   c                 C   s   |  |gd S Compute query embeddings using a HuggingFace transformer model.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        r   rJ   r/   rL   r2   r2   r3   embed_queryz   s    	z!HuggingFaceEmbeddings.embed_query)r*   r6   r7   __doc__r   __annotations__DEFAULT_MODEL_NAMEr   strr   r   r
   dictr   r   r   r   boolr   r(   r9   r   floatrJ   rQ   __classcell__r2   r2   r1   r3   r      s   
r   c                       s   e Zd ZU dZeed< eZeed< dZ	e
e ed< eedZeeef ed< eedZeeef ed< eZeed	< eZeed
< dZeed< ed fddZG dd dZee eee  dddZeee dddZ  ZS )HuggingFaceInstructEmbeddingsa  Wrapper around sentence_transformers embedding models.

    To use, you should have the ``sentence_transformers``
    and ``InstructorEmbedding`` python packages installed.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceInstructEmbeddings

            model_name = "hkunlp/instructor-large"
            model_kwargs = {'device': 'cpu'}
            encode_kwargs = {'normalize_embeddings': True}
            hf = HuggingFaceInstructEmbeddings(
                model_name=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs
            )
    r   r   Nr   r   r   r   embed_instructionquery_instructionFr   r   c              
      s   t  jf | d|kr`d}d}t||d| jj dd| d d| d	 d| jj d
 d z,ddlm} || jfd| ji| j	| _
W n, tk
r } ztd|W 5 d}~X Y nX d| jkrtdddd| jj d | jrtd | jd| _dS )r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r   )
INSTRUCTORr   z/Dependencies for InstructorEmbedding not found.NrC   0.2.5r   "encode_kwargs['show_progress_bar']the show_progress method on r   r   namealternativeuBoth encode_kwargs['show_progress_bar'] and show_progress are set;encode_kwargs['show_progress_bar'] takes precedence)r'   r(   r   r)   r*   ZInstructorEmbeddingr]   r   r   r   r   r,   r   r   warningswarnpop)r/   r   r   r   r]   er1   r2   r3   r(      sL    

	
z&HuggingFaceInstructEmbeddings.__init__c                   @   s   e Zd ZdZdS )z$HuggingFaceInstructEmbeddings.Configr4   Nr5   r2   r2   r2   r3   r9      s   r9   r:   c                    s6    fdd|D } j j|fd ji j}| S )zCompute doc embeddings using a HuggingFace instruct model.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        c                    s   g | ]} j |gqS r2   )r[   ).0rL   r/   r2   r3   
<listcomp>   s     zAHuggingFaceInstructEmbeddings.embed_documents.<locals>.<listcomp>rC   r   rF   r   r   rG   )r/   r;   Zinstruction_pairsrI   r2   rj   r3   rJ      s    	z-HuggingFaceInstructEmbeddings.embed_documentsrK   c                 C   s4   | j |g}| jj|gfd| ji| jd }| S )zCompute query embeddings using a HuggingFace instruct model.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        rC   r   )r\   r   rF   r   r   rG   )r/   rL   Zinstruction_pair	embeddingr2   r2   r3   rQ      s    	
z)HuggingFaceInstructEmbeddings.embed_query)r*   r6   r7   rR   r   rS   DEFAULT_INSTRUCT_MODELr   rU   r   r   r
   rV   r   r   r   DEFAULT_EMBED_INSTRUCTIONr[   DEFAULT_QUERY_INSTRUCTIONr\   r   rW   r(   r9   r   rX   rJ   rQ   rY   r2   r2   r1   r3   rZ      s   
'rZ   c                       s   e Zd ZU dZeed< eZeed< dZ	e
e ed< eedZeeef ed< eedZeeef ed< eZeed	< d
Zeed< dZeed< ed fddZG dd dZee eee  dddZeee dddZ  ZS )HuggingFaceBgeEmbeddingsaE  HuggingFace sentence_transformers embedding models.

    To use, you should have the ``sentence_transformers`` python package installed.
    To use Nomic, make sure the version of ``sentence_transformers`` >= 2.3.0.

    Bge Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceBgeEmbeddings

            model_name = "BAAI/bge-large-en-v1.5"
            model_kwargs = {'device': 'cpu'}
            encode_kwargs = {'normalize_embeddings': True}
            hf = HuggingFaceBgeEmbeddings(
                model_name=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs
            )
     Nomic Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceBgeEmbeddings

            model_name = "nomic-ai/nomic-embed-text-v1"
            model_kwargs = {
                'device': 'cpu',
                'trust_remote_code':True
                }
            encode_kwargs = {'normalize_embeddings': True}
            hf = HuggingFaceBgeEmbeddings(
                model_name=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs,
                query_instruction = "search_query:",
                embed_instruction = "search_document:"
            )
    r   r   Nr   r   r   r   r\    r[   Fr   r   c              
      s  t  jf | d|kr`d}d}t||d| jj dd| d d| d	 d| jj d
 d zddl}W n, tk
r } ztd|W 5 d}~X Y nX |j| jfd| j	i| j
| _d| jkrt| _d| jkrtdddd| jj d | jrtd | jd| _dS )r   r   r^   r   r   r   r    r!   r"   r#   r$   r%   r   NzrCould not import sentence_transformers python package. Please install it with `pip install sentence_transformers`.r   z-zhrC   r   r_   r`   ra   rd   )r'   r(   r   r)   r*   r+   r,   r-   r   r   r   r    DEFAULT_QUERY_BGE_INSTRUCTION_ZHr\   r   r   re   rf   rg   r.   r1   r2   r3   r(   1  sX    

	
z!HuggingFaceBgeEmbeddings.__init__c                   @   s   e Zd ZdZdS )zHuggingFaceBgeEmbeddings.Configr4   Nr5   r2   r2   r2   r3   r9   _  s   r9   r:   c                    s6    fdd|D } j j|fd ji j}| S )r=   c                    s   g | ]} j |d d qS )r>   r"   )r[   r?   )ri   trj   r2   r3   rk   k  s     z<HuggingFaceBgeEmbeddings.embed_documents.<locals>.<listcomp>rC   rl   )r/   r;   rI   r2   rj   r3   rJ   b  s    	z(HuggingFaceBgeEmbeddings.embed_documentsrK   c                 C   s6   | dd}| jj| j| fd| ji| j}| S )rN   r>   r"   rC   )r?   r   rF   r\   r   r   rG   )r/   rL   rm   r2   r2   r3   rQ   q  s    	z$HuggingFaceBgeEmbeddings.embed_query)r*   r6   r7   rR   r   rS   DEFAULT_BGE_MODELr   rU   r   r   r
   rV   r   r   r    DEFAULT_QUERY_BGE_INSTRUCTION_ENr\   r[   r   rW   r(   r9   r   rX   rJ   rQ   rY   r2   r2   r1   r3   rq      s   
&.rq   c                   @   s   e Zd ZU dZeed< dZeed< dZe	e ed< i Z
eeef ed< eedd	d
ZeedddZeedddZee eee  dddZeee dddZdS )!HuggingFaceInferenceAPIEmbeddingszkEmbed texts using the HuggingFace API.

    Requires a HuggingFace Inference API key and a model name.
    api_keyz&sentence-transformers/all-MiniLM-L6-v2r   Napi_urladditional_headers)r<   c                 C   s   | j p
| jS )N)ry   _default_api_urlrj   r2   r2   r3   _api_url  s    z*HuggingFaceInferenceAPIEmbeddings._api_urlc                 C   s   d| j  S )NzAhttps://api-inference.huggingface.co/pipeline/feature-extraction/)r   rj   r2   r2   r3   r{     s    
z2HuggingFaceInferenceAPIEmbeddings._default_api_urlc                 C   s   dd| j   i| jS )NAuthorizationzBearer )rx   Zget_secret_valuerz   rj   r2   r2   r3   _headers  s
     z*HuggingFaceInferenceAPIEmbeddings._headersr:   c                 C   s(   t j| j| j|ddddd}| S )a  Get the embeddings for a list of texts.

        Args:
            texts (Documents): A list of texts to get embeddings for.

        Returns:
            Embedded texts as List[List[float]], where each inner List[float]
                corresponds to a single input text.

        Example:
            .. code-block:: python

                from langchain_community.embeddings import (
                    HuggingFaceInferenceAPIEmbeddings,
                )

                hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
                    api_key="your_api_key",
                    model_name="sentence-transformers/all-MiniLM-l6-v2"
                )
                texts = ["Hello, world!", "How are you?"]
                hf_embeddings.embed_documents(texts)
        T)Zwait_for_modelZ	use_cache)inputsoptions)headersjson)requestspostr|   r~   r   )r/   r;   responser2   r2   r3   rJ     s    z1HuggingFaceInferenceAPIEmbeddings.embed_documentsrK   c                 C   s   |  |gd S rM   rO   rP   r2   r2   r3   rQ     s    	z-HuggingFaceInferenceAPIEmbeddings.embed_query)r*   r6   r7   rR   r   rS   r   rU   ry   r   rz   r   propertyr|   r{   rV   r~   r   rX   rJ   rQ   r2   r2   r2   r3   rw     s   
"rw   )re   typingr   r   r   r   r   Zlangchain_core._apir   r   Zlangchain_core.embeddingsr   Zlangchain_core.pydantic_v1r	   r
   r   rT   rn   ru   ro   rp   rv   rs   r   rZ   rq   rw   r2   r2   r2   r3   <module>   s0   ks 