U
    h0                     @   sf   d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZ G dd de
eZdS )    N)Dict	GeneratorListOptional)
Embeddings)	BaseModel)get_from_dict_or_envpre_initc                   @   s   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed< dZ
eed< i Zeed< d	Zeed
< eeedddZedddZeedddZee eedddZdee ee eee  dddZeee dddZdS )SambaStudioEmbeddingsa3  SambaNova embedding models.

    To use, you should have the environment variables
    ``SAMBASTUDIO_EMBEDDINGS_BASE_URL``, ``SAMBASTUDIO_EMBEDDINGS_BASE_URI``
    ``SAMBASTUDIO_EMBEDDINGS_PROJECT_ID``, ``SAMBASTUDIO_EMBEDDINGS_ENDPOINT_ID``,
    ``SAMBASTUDIO_EMBEDDINGS_API_KEY``
    set with your personal sambastudio variable or pass it as a named parameter
    to the constructor.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import SambaStudioEmbeddings

            embeddings = SambaStudioEmbeddings(sambastudio_embeddings_base_url=base_url,
                                          sambastudio_embeddings_base_uri=base_uri,
                                          sambastudio_embeddings_project_id=project_id,
                                          sambastudio_embeddings_endpoint_id=endpoint_id,
                                          sambastudio_embeddings_api_key=api_key,
                                          batch_size=32)
            (or)

            embeddings = SambaStudioEmbeddings(batch_size=32)

            (or)

            # CoE example
            embeddings = SambaStudioEmbeddings(
                batch_size=1,
                model_kwargs={
                    'select_expert':'e5-mistral-7b-instruct'
                }
            )
     sambastudio_embeddings_base_urlsambastudio_embeddings_base_uri!sambastudio_embeddings_project_id"sambastudio_embeddings_endpoint_idsambastudio_embeddings_api_keymodel_kwargs    
batch_size)valuesreturnc                 C   sX   t |dd|d< t |dddd|d< t |dd|d< t |d	d
|d	< t |dd|d< |S )z?Validate that api key and python package exists in environment.r   ZSAMBASTUDIO_EMBEDDINGS_BASE_URLr   ZSAMBASTUDIO_EMBEDDINGS_BASE_URIapi/predict/generic)defaultr   Z!SAMBASTUDIO_EMBEDDINGS_PROJECT_IDr   Z"SAMBASTUDIO_EMBEDDINGS_ENDPOINT_IDr   ZSAMBASTUDIO_EMBEDDINGS_API_KEY)r   )clsr    r   L/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/embeddings/sambanova.pyvalidate_environmentC   s6      
  z*SambaStudioEmbeddings.validate_environment)r   c                 C   s4   d| j kr| j}ndd | j D }t|}|S )z
        Get the tuning parameters to use when calling the model

        Returns:
            The tuning parameters as a JSON string.
        api/v2/predict/genericc                 S   s&   i | ]\}}|t |jt|d qS ))typevalue)r   __name__str).0kvr   r   r   
<dictcomp>h   s    z<SambaStudioEmbeddings._get_tuning_params.<locals>.<dictcomp>)r   r   itemsjsondumps)selfZtuning_params_dictZtuning_paramsr   r   r   _get_tuning_params^   s    

z(SambaStudioEmbeddings._get_tuning_params)pathr   c                 C   s   | j  d| j d| S )z
        Return the full API URL for a given path.

        :param str path: the sub-path
        :returns: the full API URL for the sub-path
        :rtype: str
        /)r   r   )r(   r*   r   r   r   _get_full_urlo   s    z#SambaStudioEmbeddings._get_full_url)textsr   r   c                 c   s,   t dt||D ]}||||  V  qdS )af  Generator for creating batches in the embed documents method
        Args:
            texts (List[str]): list of strings to embed
            batch_size (int, optional): batch size to be used for the embedding model.
            Will depend on the RDU endpoint used.
        Yields:
            List[str]: list (batch) of strings of size batch size
        r   N)rangelen)r(   r-   r   ir   r   r   _iterate_over_batchesy   s    	z+SambaStudioEmbeddings._iterate_over_batchesNc              	   C   sz  |dkr| j }t }| | j d| j }t|  }g }d| j	kr| 
||D ]}||d}|j|d| ji|d}	|	jdkrtd|	j d	|	j z|	 d
 }
||
 W qV tk
r   td|	 Y qVX qVnd| j	kr| 
||D ]}dd t|D }||d}|j|d| ji|d}	|	jdkrRtd|	j d	|	j z$dd |	 d D }
||
 W q tk
r   td|	 Y qX qnd| j	krd| 
||D ]}||d}|j|d| ji|d}	|	jdkr td|	j d	|	j z8|dr |	 d d }
n|	 d }
||
 W n$ tk
r\   td|	 Y nX qntd| j	 d|S )a<  Returns a list of embeddings for the given sentences.
        Args:
            texts (`List[str]`): List of texts to encode
            batch_size (`int`): Batch size for the encoding

        Returns:
            `List[np.ndarray]` or `List[tensor]`: List of embeddings
            for the given sentences
        Nr+   api/predict/nlpinputsparamskeyheadersr&      1Sambanova /complete call failed with status code .
 Details: data%'data' not found in endpoint responser   c                 S   s    g | ]\}}d | |dqS )itemidr   r   )r!   r0   r>   r   r   r   
<listcomp>   s    z9SambaStudioEmbeddings.embed_documents.<locals>.<listcomp>r%   r5   c                 S   s   g | ]}|d  qS )r   r   )r!   r>   r   r   r   rA      s     r%   &'items' not found in endpoint responser   Z	instancesr5   select_expertpredictionsr   ,'predictions' not found in endpoint responsehandling of endpoint uri:  not implemented)r   requestsSessionr,   r   r   r&   loadsr)   r   r1   postr   status_codeRuntimeErrortextextendKeyError	enumerateget
ValueError)r(   r-   r   http_sessionurlr5   Z
embeddingsbatchr<   response	embeddingr%   r   r   r   embed_documents   s    




z%SambaStudioEmbeddings.embed_documents)rP   r   c                 C   s  t  }| | j d| j }t|  }d| jkr|g|d}|j	|d| j
i|d}|jdkr|td|j d|j z| d	 d
 }W n" tk
r   td| Y nX nZd| jkrPd|dg|d}|j	|d| j
i|d}|jdkrtd|j d|j z| d d
 d }W n$ tk
rL   td| Y nX nd| jkr |g|d}|j	|d| j
i|d}|jdkrtd|j d|j z6|dr| d d
 d
 }n| d d
 }W n$ tk
r   td| Y nX ntd| j d|S )a  Returns a list of embeddings for the given sentences.
        Args:
            sentences (`List[str]`): List of sentences to encode

        Returns:
            `List[np.ndarray]` or `List[tensor]`: List of embeddings
            for the given sentences
        r+   r2   r3   r6   r7   r9   r:   r;   r<   r   r=   r   Zitem0r?   rB   r%   r   rC   r   rD   rE   rF   rG   rH   rI   )rJ   rK   r,   r   r   r&   rL   r)   r   rM   r   rN   rO   rP   rR   rT   rU   )r(   rP   rV   rW   r5   r<   rY   rZ   r   r   r   embed_query   s    	

z!SambaStudioEmbeddings.embed_query)N)r   
__module____qualname____doc__r   r    __annotations__r   r   r   r   r   dictr   intr	   r   r   r)   r,   r   r   r1   r   floatr[   r\   r   r   r   r   r
   
   s(   
#
  
dr
   )r&   typingr   r   r   r   rJ   Zlangchain_core.embeddingsr   Zlangchain_core.pydantic_v1r   Zlangchain_core.utilsr   r	   r
   r   r   r   r   <module>   s   