U
    hq                     @  sV  d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZmZmZ d dlmZmZmZ d dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d d	l'm(Z( e)e*Z+d
ddddZ,d
ddddZ-d"ddddddZ.d
dddddZ/d
dddddZ0edddd G d!d
 d
eeZ1dS )#    )annotationsN)AnyCallableDictListLiteralMappingOptionalSequenceSetTupleUnioncast)
deprecated)
Embeddings)	BaseModelFieldroot_validator)get_from_dict_or_envget_pydantic_field_namespre_init)AsyncRetryingbefore_sleep_logretryretry_if_exception_typestop_after_attemptwait_exponential)is_openai_v1OpenAIEmbeddingszCallable[[Any], Any])
embeddingsreturnc                 C  sn   dd l }tdt| jtd| j| jdt|jj	t|jj
B t|jjB t|jjB t|jjB tttjdS )Nr   T   Z
multiplierminmaxreraisestopwaitr   Zbefore_sleep)openair   r   max_retriesr   retry_min_secondsretry_max_secondsr   errorTimeoutAPIErrorAPIConnectionErrorRateLimitErrorServiceUnavailableErrorr   loggerloggingWARNING)r   r)    r6   I/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/embeddings/openai.py_create_retry_decorator,   s(    





r8   r   c                   s   dd l }tdt| jtd| j| jdt|jj	t|jj
B t|jjB t|jjB t|jjB tttjd ddd fdd	}|S )
Nr   Tr!   r"   r%   r   )funcr    c                   s   dddd fdd}|S )Nr   r   )argskwargsr    c                    s2    2 z3 d H W }| |I d H   S 6 t dd S )Nzthis is unreachable)AssertionError)r:   r;   _)async_retryingr9   r6   r7   	wrapped_fb   s    z7_async_retry_decorator.<locals>.wrap.<locals>.wrapped_fr6   )r9   r?   r>   )r9   r7   wrapa   s    z$_async_retry_decorator.<locals>.wrap)r)   r   r   r*   r   r+   r,   r   r-   r.   r/   r0   r1   r2   r   r3   r4   r5   )r   r)   rA   r6   r@   r7   _async_retry_decoratorG   s,    





rB   Fdictbool)response
skip_emptyr    c                 C  s2   t dd | d D r.|s.dd l}|jd| S )Nc                 s  s   | ]}t |d  dkV  qdS )	embeddingr!   N)len).0dr6   r6   r7   	<genexpr>n   s     z"_check_response.<locals>.<genexpr>datar   z&OpenAI API returned an empty embedding)anyr)   r-   r/   )rE   rF   r)   r6   r6   r7   _check_responsem   s    rN   )r   r;   r    c                   s>   t  r jjf |S t }|ddd fdd}|f |S ))Use tenacity to retry the embedding call.r   r;   r    c                    s    j jf | }t| jdS N)rF   )clientcreaterN   rF   r;   rE   r   r6   r7   _embed_with_retry{   s    z+embed_with_retry.<locals>._embed_with_retry)r   rR   rS   r8   )r   r;   Zretry_decoratorrV   r6   rU   r7   embed_with_retryu   s    rW   c                   sF   t  r jjf |I dH S t ddd fdd}|f |I dH S )rO   Nr   rP   c                    s"    j jf | I d H }t| jdS rQ   )rR   ZacreaterN   rF   rT   rU   r6   r7   _async_embed_with_retry   s    z7async_embed_with_retry.<locals>._async_embed_with_retry)r   async_clientrS   rB   )r   r;   rX   r6   rU   r7   async_embed_with_retry   s
    rZ   z0.0.9z1.0z!langchain_openai.OpenAIEmbeddings)ZsinceZremovalZalternative_importc                   @  s`  e Zd ZU dZedddZded< edddZded< dZd	ed
< eZ	ded< edddZ
ded< edddZded< dZded< dZded< dZded< edddZded< edddZded< e Zded< dZded< d Zded!< d"Zded#< edd$dZd%ed&< dZded'< dZd(ed)< dZded*< d+Zd(ed,< eed-Zd.ed/< d+Zd(ed0< dZd1ed2< dZd3ed4< d5Z ded6< d7Z!ded8< dZ"d9ed:< G d;d< d<Z#e$dd=d.d.d>d?d@Z%e&dAdAd>dBdCZ'e(d.dDdEdFZ)ddGdHd	dIdJdKdLdMZ*ddGdHd	dIdJdKdNdOZ+d\dHdIdJdQdRdSZ,d]dHdIdJdQdTdUZ-d	dVdWdXdYZ.d	dVdWdZd[Z/dS )^r   aj  OpenAI embedding models.

    To use, you should have the ``openai`` python package installed, and the
    environment variable ``OPENAI_API_KEY`` set with your API key or pass it
    as a named parameter to the constructor.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import OpenAIEmbeddings
            openai = OpenAIEmbeddings(openai_api_key="my-api-key")

    In order to use the library with Microsoft Azure endpoints, you need to set
    the OPENAI_API_TYPE, OPENAI_API_BASE, OPENAI_API_KEY and OPENAI_API_VERSION.
    The OPENAI_API_TYPE must be set to 'azure' and the others correspond to
    the properties of your endpoint.
    In addition, the deployment name must be passed as the model parameter.

    Example:
        .. code-block:: python

            import os

            os.environ["OPENAI_API_TYPE"] = "azure"
            os.environ["OPENAI_API_BASE"] = "https://<your-endpoint.openai.azure.com/"
            os.environ["OPENAI_API_KEY"] = "your AzureOpenAI key"
            os.environ["OPENAI_API_VERSION"] = "2023-05-15"
            os.environ["OPENAI_PROXY"] = "http://your-corporate-proxy:8080"

            from langchain_community.embeddings.openai import OpenAIEmbeddings
            embeddings = OpenAIEmbeddings(
                deployment="your-embeddings-deployment-name",
                model="your-embeddings-model-name",
                openai_api_base="https://your-endpoint.openai.azure.com/",
                openai_api_type="azure",
            )
            text = "This is a test query."
            query_result = embeddings.embed_query(text)

    NT)defaultexcluder   rR   rY   ztext-embedding-ada-002strmodelzOptional[str]
deploymentapi_version)r[   aliasopenai_api_versionbase_urlopenai_api_baseopenai_api_typeopenai_proxyi  intembedding_ctx_lengthapi_keyopenai_api_keyorganizationopenai_organizationzUnion[Literal['all'], Set[str]]allowed_specialallz.Union[Literal['all'], Set[str], Sequence[str]]disallowed_speciali  
chunk_size   r*   timeoutz0Optional[Union[float, Tuple[float, float], Any]]request_timeoutheadersrD   tiktoken_enabledtiktoken_model_nameFshow_progress_bar)default_factoryzDict[str, Any]model_kwargsrF   zUnion[Mapping[str, str], None]default_headersz!Union[Mapping[str, object], None]default_query   r+      r,   zUnion[Any, None]http_clientc                   @  s   e Zd ZdZdZdS )zOpenAIEmbeddings.ConfigTZforbidN)__name__
__module____qualname__Zallow_population_by_field_nameextrar6   r6   r6   r7   Config  s   r   )pre)valuesr    c              
   C  s   t | }|di }t|D ]P}||kr8td| d||krtd| d| d| d ||||< q|| }|rtd| d	||d< |S )
z>Build extra kwargs from additional params that were passed in.ry   zFound z supplied twice.z	WARNING! z/ is not default parameter.
                    zJ was transferred to model_kwargs.
                    Please confirm that z is what you intended.zParameters za should be specified explicitly. Instead they were passed in as part of `model_kwargs` parameter.)	r   getlist
ValueErrorwarningswarnpopintersectionkeys)clsr   Zall_required_field_namesr   
field_nameZinvalid_model_kwargsr6   r6   r7   build_extra  s"    
zOpenAIEmbeddings.build_extrar   c              	   C  s  t |dd|d< |d p td|d< t |dddd|d< t |d	d
dd|d	< |d dkrrd}t|d d|d< nd}t |dd|d|d< |d ptdptd|d< zddl}W n tk
r   tdY nX t rf|d dkrtd |d |d |d |d |d |d |d |d d}|	dsF|j
f |j|d< |	ds~|jf |j|d< n|	ds~|j|d< n |S ) z?Validate that api key and python package exists in environment.rj   ZOPENAI_API_KEYrd   ZOPENAI_API_BASEre   ZOPENAI_API_TYPE )r[   rf   ZOPENAI_PROXYZazureZazure_adZazureadz
2023-05-15rp   i   rb   ZOPENAI_API_VERSIONrl   ZOPENAI_ORG_IDZOPENAI_ORGANIZATIONr   NTCould not import openai python package. Please install it with `pip install openai`.zfIf you have openai>=1.0.0 installed and are using Azure, please use the `AzureOpenAIEmbeddings` class.rs   r*   rz   r{   r~   )ri   rk   rc   rr   r*   rz   r{   r~   rR   rY   )r   osgetenvr#   r)   ImportErrorr   r   r   r   ZOpenAIr   ZAsyncOpenAIZ	Embedding)r   r   Zdefault_api_versionr)   Zclient_paramsr6   r6   r7   validate_environment  s      




z%OpenAIEmbeddings.validate_environment)r    c              	   C  s   t  rd| ji| j}n| j| j| j| j| j| j| j| j	d| j}| jdkrX| j
|d< | jrzdd l}W n tk
r   tdY nX | j| jd|_|S )Nr^   )r^   rs   rt   ri   rk   Zapi_baseZapi_typer`   r   enginer   r   )httphttps)r   r^   ry   rs   rt   rj   rl   rd   re   rb   r_   rf   r)   r   proxy)selfZopenai_argsr)   r6   r6   r7   _invocation_paramsl  s4    	


z#OpenAIEmbeddings._invocation_params)rp   z	List[str]zOptional[int]zList[List[float]])textsr   rp   r    c                C  s  g }g }| j p| j}|p| j}| jszddlm} W n tk
rP   tdY nX |j|d}	t|D ]^\}
}|	j	|dd}t
dt|| jD ]4}|||| j  }|	|}|| ||
 qqfnzddl}W n tk
r   tdY nX z||}W n. tk
r.   td	 d
}||}Y nX t|D ]t\}
}| jdrZ|dd}|j	|| j| jd}t
dt|| jD ](}||||| j   ||
 qq8| jrz$ddlm} |t
dt||}W n& tk
r    t
dt||}Y nX nt
dt||}g }|D ]T}
t| fd||
|
|  i| j}t|tsV| }| dd |d D  qdd t
t|D }dd t
t|D }t
t|D ]T}
| j!rt||
 dkr̐q|||
  ||
  |||
  t||
  qdd t
t|D }t
t|D ]}
||
 }t|dkrxt| fddi| j}t|tsf| }|d d d }nt"j#|d||
 d}|t"j$%| & ||
< q"|S )al  
        Generate length-safe embeddings for a list of texts.

        This method handles tokenization and embedding generation, respecting the
        set embedding context length and chunk size. It supports both tiktoken
        and HuggingFace tokenizer based on the tiktoken_enabled flag.

        Args:
            texts (List[str]): A list of texts to embed.
            engine (str): The engine or model to use for embeddings.
            chunk_size (Optional[int]): The size of chunks for processing embeddings.

        Returns:
            List[List[float]]: A list of embeddings for each input text.
        r   AutoTokenizerzCould not import transformers python package. This is needed in order to for OpenAIEmbeddings without `tiktoken`. Please install it with `pip install transformers`. Zpretrained_model_name_or_pathFZadd_special_tokensNCould not import tiktoken python package. This is needed in order to for OpenAIEmbeddings. Please install it with `pip install tiktoken`.5Warning: model not found. Using cl100k_base encoding.cl100k_base001
 textrm   ro   )tqdminputc                 s  s   | ]}|d  V  qdS rG   Nr6   rI   rr6   r6   r7   rK     s     z<OpenAIEmbeddings._get_len_safe_embeddings.<locals>.<genexpr>rL   c                 S  s   g | ]}g qS r6   r6   rI   r=   r6   r6   r7   
<listcomp>  s     z=OpenAIEmbeddings._get_len_safe_embeddings.<locals>.<listcomp>c                 S  s   g | ]}g qS r6   r6   r   r6   r6   r7   r     s     r!   c                 S  s   g | ]}g qS r6   r6   r   r6   r6   r7   r     s     r   rG   Zaxisweights)'rv   r^   rp   ru   transformersr   r   from_pretrained	enumerateencoderangerH   rh   decodeappendtiktokenencoding_for_modelKeyErrorr3   warningget_encodingendswithreplacerm   ro   rw   Z	tqdm.autor   rW   r   
isinstancerC   extendrF   npaveragelinalgnormtolist)r   r   r   rp   tokensindices
model_name_chunk_sizer   	tokenizerir   	tokenizedjtoken_chunk
chunk_textr   encodingr^   tokenr   Z_iterbatched_embeddingsrE   resultsnum_tokens_in_batchr   _resultaverage_embeddedr   r6   r6   r7   _get_len_safe_embeddings  s    





z)OpenAIEmbeddings._get_len_safe_embeddingsc                  sL  g }g }| j p| j}|p| j}| jszddlm} W n tk
rP   tdY nX |j|d}	t|D ]^\}
}|	j	|dd}t
dt|| jD ]4}|||| j  }|	|}|| ||
 qqfnzddl}W n tk
r   tdY nX z||}W n. tk
r.   td	 d
}||}Y nX t|D ]t\}
}| jdrZ|dd}|j	|| j| jd}t
dt|| jD ](}||||| j   ||
 qq8g }|p| j}t
dt||D ]Z}
t| fd||
|
|  i| jI dH }t|ts| }|dd |d D  qdd t
t|D }dd t
t|D }t
t|D ]6}
|||
  ||
  |||
  t||
  qbdd t
t|D }t
t|D ]}
||
 }t|dkrt| fddi| jI dH }t|ts| }|d d d }ntj|d||
 d}|tj !| " ||
< q|S )a  
        Asynchronously generate length-safe embeddings for a list of texts.

        This method handles tokenization and asynchronous embedding generation,
        respecting the set embedding context length and chunk size. It supports both
        `tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.

        Args:
            texts (List[str]): A list of texts to embed.
            engine (str): The engine or model to use for embeddings.
            chunk_size (Optional[int]): The size of chunks for processing embeddings.

        Returns:
            List[List[float]]: A list of embeddings for each input text.
        r   r   zCould not import transformers python package. This is needed in order to for OpenAIEmbeddings without  `tiktoken`. Please install it with `pip install transformers`.r   Fr   Nr   r   r   r   r   r   r   r   c                 s  s   | ]}|d  V  qdS r   r6   r   r6   r6   r7   rK   t  s     z=OpenAIEmbeddings._aget_len_safe_embeddings.<locals>.<genexpr>rL   c                 S  s   g | ]}g qS r6   r6   r   r6   r6   r7   r   v  s     z>OpenAIEmbeddings._aget_len_safe_embeddings.<locals>.<listcomp>c                 S  s   g | ]}g qS r6   r6   r   r6   r6   r7   r   w  s     c                 S  s   g | ]}g qS r6   r6   r   r6   r6   r7   r   |  s     r   rG   r   )#rv   r^   rp   ru   r   r   r   r   r   r   r   rH   rh   r   r   r   r   r   r3   r   r   r   r   rm   ro   rZ   r   r   rC   r   r   r   r   r   r   )r   r   r   rp   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r^   r   r   rE   r   r   r   r   r   r   r6   r6   r7   _aget_len_safe_embeddings  s    





z*OpenAIEmbeddings._aget_len_safe_embeddingsr   )r   rp   r    c                 C  s   t t| j}| j||dS )aM  Call out to OpenAI's embedding endpoint for embedding search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings. If None, will use the chunk size
                specified by the class.

        Returns:
            List of embeddings, one for each text.
        r   )r   r]   r_   r   r   r   rp   r   r6   r6   r7   embed_documents  s    z OpenAIEmbeddings.embed_documentsc                   s    t t| j}| j||dI dH S )aS  Call out to OpenAI's embedding endpoint async for embedding search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings. If None, will use the chunk size
                specified by the class.

        Returns:
            List of embeddings, one for each text.
        r   N)r   r]   r_   r   r   r6   r6   r7   aembed_documents  s    z!OpenAIEmbeddings.aembed_documentszList[float])r   r    c                 C  s   |  |gd S )zCall out to OpenAI's embedding endpoint for embedding query text.

        Args:
            text: The text to embed.

        Returns:
            Embedding for the text.
        r   )r   )r   r   r6   r6   r7   embed_query  s    	zOpenAIEmbeddings.embed_queryc                   s   |  |gI dH }|d S )zCall out to OpenAI's embedding endpoint async for embedding query text.

        Args:
            text: The text to embed.

        Returns:
            Embedding for the text.
        Nr   )r   )r   r   r   r6   r6   r7   aembed_query  s    	zOpenAIEmbeddings.aembed_query)r   )r   )0r   r   r   __doc__r   rR   __annotations__rY   r^   r_   rb   rd   re   rf   rh   rj   rl   setrm   ro   rp   r*   rs   rt   ru   rv   rw   rC   ry   rF   rz   r{   r+   r,   r~   r   r   r   r   r   propertyr   r   r   r   r   r   r   r6   r6   r6   r7   r      sb   
) 
L$ z  )F)2
__future__r   r4   r   r   typingr   r   r   r   r   r   r	   r
   r   r   r   r   Znumpyr   Zlangchain_core._api.deprecationr   Zlangchain_core.embeddingsr   Zlangchain_core.pydantic_v1r   r   r   Zlangchain_core.utilsr   r   r   Ztenacityr   r   r   r   r   r   Z langchain_community.utils.openair   	getLoggerr   r3   r8   rB   rN   rW   rZ   r   r6   r6   r6   r7   <module>   s.   8 	
&