U
    hN(                     @  s   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ G d	d
 d
eZdS )    )annotationsN)StringIO)AnyDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)get_pydantic_field_namesc                   @  s  e Zd ZU dZdZded< dZded< dZd	ed
< dZded< dZ	ded< dZ
ded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded < d!Zd	ed"< d#Zded$< d#Zded%< dZded&< d'Zded(< d)Zded*< G d+d, d,Zedd-d.d/Zed0d-d1d2Zed3d-d4d5ZdEd6d7d3d8d9d:ZdFdd6d;d7dd<d=d>ZdGdd6d;d7d?d<d@dAZ dddBdCdDZ!dS )H	LlamafileaO  Llamafile lets you distribute and run large language models with a
    single file.

    To get started, see: https://github.com/Mozilla-Ocho/llamafile

    To use this class, you will need to first:

    1. Download a llamafile.
    2. Make the downloaded file executable: `chmod +x path/to/model.llamafile`
    3. Start the llamafile in server mode:

        `./path/to/model.llamafile --server --nobrowser`

    Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile()
            llm.invoke("Tell me a joke.")
    zhttp://localhost:8080strbase_urlNzOptional[int]request_timeoutFbool	streamingintseedg?floattemperature(   top_kgffffff?top_pg?min_p	n_predictr   n_keepg      ?tfs_z	typical_pg?repeat_penalty@   repeat_last_nTpenalize_nlg        presence_penaltyfrequency_penaltymirostatg      @mirostat_taug?mirostat_etac                   @  s   e Zd ZdZdS )zLlamafile.ConfigZforbidN)__name__
__module____qualname__extra r-   r-   F/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/llms/llamafile.pyConfigs   s   r/   )returnc                 C  s   dS )NZ	llamafiler-   )selfr-   r-   r.   	_llm_typev   s    zLlamafile._llm_typez	List[str]c                   s6   ddddddddd	d
dg  fddt | jD }|S )Nr   cacheZcallback_manager	callbacksmetadatanamer   r   tagsverboseZcustom_get_token_idsc                   s   g | ]}| kr|qS r-   r-   ).0kZignore_keysr-   r.   
<listcomp>   s     z/Llamafile._param_fieldnames.<locals>.<listcomp>)r   	__class__)r1   attrsr-   r;   r.   _param_fieldnamesz   s     
zLlamafile._param_fieldnameszDict[str, Any]c                 C  s"   i }| j D ]}t| |||< q
|S )N)r?   getattr)r1   params	fieldnamer-   r-   r.   _default_params   s    
zLlamafile._default_paramszOptional[List[str]]r   )stopkwargsr0   c                 K  sV   | j }| D ]\}}||kr|||< q|d k	rDt|dkrD||d< | jrRd|d< |S )Nr   rD   Tstream)rC   itemslenr   )r1   rD   rE   rA   r:   vr-   r-   r.   _get_parameters   s    
zLlamafile._get_parametersz"Optional[CallbackManagerForLLMRun])promptrD   run_managerrE   r0   c              	   K  s   | j rNt 8}| j|f||d|D ]}||j q&| }W 5 Q R X |S | jf d|i|}d|i|}	z&tj| j	 dddi|	d| j
d}
W n. tjjk
r   tjd	| j	 d
Y nX |
  d|
_|
 d }|S dS )a  Request prompt completion from the llamafile server and return the
        output.

        Args:
            prompt: The prompt to use for generation.
            stop: A list of strings to stop generation when encountered.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            The string generated by the model.

        )rD   rL   rD   rK   /completionContent-Typeapplication/jsonFurlheadersjsonrF   timeoutTCould not connect to Llamafile server. Please make sure that a server is running at .zutf-8contentN)r   r   _streamwritetextgetvaluerJ   requestspostr   r   
exceptionsConnectionErrorraise_for_statusencodingrS   )r1   rK   rD   rL   rE   ZbuffchunkrZ   rA   payloadresponser-   r-   r.   _call   sB     

 
	
zLlamafile._callzIterator[GenerationChunk]c                 k  s   | j f d|i|}d|kr$d|d< d|i|}z&tj| j dddi|d| jd}W n. tjjk
r   tjd	| j d
Y nX d|_|jddD ]0}| 	|}	t
|	d}
|r|j|
jd |
V  qdS )a"  Yields results objects as they are generated in real time.

        It also calls the callback manager's on_llm_new_token event with
        similar parameters to the OpenAI LLM class method of the same name.

        Args:
            prompt: The prompts to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            A generator representing the stream of tokens being generated.

        Yields:
            Dictionary-like objects each containing a token

        Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile(
                temperature = 0.0
            )
            for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
                    stop=["'","
"]):
                result = chunk["choices"][0]
                print(result["text"], end='', flush=True)

        rD   rF   TrK   rM   rN   rO   rP   rU   rV   utf8)decode_unicode)rZ   )tokenN)rJ   r\   r]   r   r   r^   r_   ra   
iter_lines_get_chunk_contentr   Zon_llm_new_tokenrZ   )r1   rK   rD   rL   rE   rA   rc   rd   Z	raw_chunkrW   rb   r-   r-   r.   rX      s2    &
 
	


zLlamafile._stream)rb   r0   c                 C  s.   | dr&|d}t|}|d S |S dS )zWhen streaming is turned on, llamafile server returns lines like:

        'data: {"content":" They","multimodal":true,"slot_id":0,"stop":false}'

        Here, we convert this to a dict and return the value of the 'content'
        field
        zdata:zdata: rW   N)
startswithlstriprS   loads)r1   rb   cleaneddatar-   r-   r.   rj   /  s
    	


zLlamafile._get_chunk_content)N)NN)NN)"r)   r*   r+   __doc__r   __annotations__r   r   r   r   r   r   r   r   r   r   r   r    r"   r#   r$   r%   r&   r'   r(   r/   propertyr2   r?   rC   rJ   re   rX   rj   r-   r-   r-   r.   r      sJ   
   ?  Fr   )
__future__r   rS   ior   typingr   r   r   r   r   r\   Z langchain_core.callbacks.managerr	   Z#langchain_core.language_models.llmsr
   Zlangchain_core.outputsr   Zlangchain_core.utilsr   r   r-   r-   r-   r.   <module>   s   