U
    h[$                     @   s   d dl mZ d dlmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ G dd	 d	eeZG d
d deZG dd de
ZdS )    )Enum)AnyIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)	BaseModel)enforce_stop_tokensc                   @   s   e Zd ZdZdZdZdS )Devicez,The device to use for inference, cuda or cpucudacpuN)__name__
__module____qualname____doc__r   r    r   r   J/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/llms/titan_takeoff.pyr      s   r   c                   @   sj   e Zd ZU dZG dd dZeed< ejZ	eed< dZ
eed< dZee ed	< d
Zeed< dZeed< dS )ReaderConfigzAConfiguration for the reader to be deployed in Titan Takeoff API.c                   @   s   e Zd ZdZdS )zReaderConfig.Configr   N)r   r   r   Zprotected_namespacesr   r   r   r   Config   s   r   Z
model_namedeviceZprimaryconsumer_groupNtensor_paralleli   max_seq_length   max_batch_size)r   r   r   r   r   str__annotations__r   r   r   r   r   r   intr   r   r   r   r   r   r      s   
r   c                       s   e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< d
Zeed< ddddg feeeeee d fddZeedddZdeeee  ee eedddZdeeee  ee eee dddZ  ZS )TitanTakeoffa  Titan Takeoff API LLMs.

    Titan Takeoff is a wrapper to interface with Takeoff Inference API for
    generative text to text language models.

    You can use this wrapper to send requests to a generative language model
    and to deploy readers with Takeoff.

    Examples:
        This is an example how to deploy a generative language model and send
        requests.

        .. code-block:: python
            # Import the TitanTakeoff class from community package
            import time
            from langchain_community.llms import TitanTakeoff

            # Specify the embedding reader you'd like to deploy
            reader_1 = {
                "model_name": "TheBloke/Llama-2-7b-Chat-AWQ",
                "device": "cuda",
                "tensor_parallel": 1,
                "consumer_group": "llama"
            }

            # For every reader you pass into models arg Takeoff will spin
            # up a reader according to the specs you provide. If you don't
            # specify the arg no models are spun up and it assumes you have
            # already done this separately.
            llm = TitanTakeoff(models=[reader_1])

            # Wait for the reader to be deployed, time needed depends on the
            # model size and your internet speed
            time.sleep(60)

            # Returns the query, ie a List[float], sent to `llama` consumer group
            # where we just spun up the Llama 7B model
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama"
            ))

            # You can also send generation parameters to the model, any of the
            # following can be passed in as kwargs:
            # https://docs.titanml.co/docs/next/apis/Takeoff%20inference_REST_API/generate#request
            # for instance:
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama", max_new_tokens=100
            ))
    zhttp://localhostbase_urli  porti  	mgmt_portF	streamingNclient)r!   r"   r#   r$   modelsc                    sr   t  j||||d zddlm} W n tk
r@   tdY nX || j| j| jd| _|D ]}| j	| q\dS )a  Initialize the Titan Takeoff language wrapper.

        Args:
            base_url (str, optional): The base URL where the Takeoff
                Inference Server is listening. Defaults to `http://localhost`.
            port (int, optional): What port is Takeoff Inference API
                listening on. Defaults to 3000.
            mgmt_port (int, optional): What port is Takeoff Management API
                listening on. Defaults to 3001.
            streaming (bool, optional): Whether you want to by default use the
                generate_stream endpoint over generate to stream responses.
                Defaults to False. In reality, this is not significantly different
                as the streamed response is buffered and returned similar to the
                non-streamed response, but the run manager is applied per token
                generated.
            models (List[ReaderConfig], optional): Any readers you'd like to
                spin up on. Defaults to [].

        Raises:
            ImportError: If you haven't installed takeoff-client, you will
            get an ImportError. To remedy run `pip install 'takeoff-client==0.4.0'`
        )r!   r"   r#   r$   r   )TakeoffClientzjtakeoff-client is required for TitanTakeoff. Please install it with `pip install 'takeoff-client>=0.4.0'`.)r"   r#   N)
super__init__Ztakeoff_clientr'   ImportErrorr!   r"   r#   r%   Zcreate_reader)selfr!   r"   r#   r$   r&   r'   model	__class__r   r   r)   n   s&       
  zTitanTakeoff.__init__)returnc                 C   s   dS )zReturn type of llm.Ztitan_takeoffr   )r+   r   r   r   	_llm_type   s    zTitanTakeoff._llm_type)promptstoprun_managerkwargsr/   c           	      K   s\   | j r.d}| j|||dD ]}||j7 }q|S | jj|f|}|d }|dk	rXt||}|S )a  Call out to Titan Takeoff (Pro) generate endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Returns:
            The string generated by the model.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"

                # Use of model(prompt), ie `__call__` was deprecated in LangChain 0.1.7,
                # use model.invoke(prompt) instead.
                response = model.invoke(prompt)

         )r1   r2   r3   textN)r$   _streamr6   r%   generater   )	r+   r1   r2   r3   r4   Ztext_outputchunkresponser6   r   r   r   _call   s    

zTitanTakeoff._callc                 k   s   | j j|f|}d}|D ]|}||j7 }d|kr|dr<d}t|dddkrj|dd\}}	|d}|rt|d}
d}|
V  |r|j|
j	d q|rt|
ddd}
|
V  |r|j|
j	d d	S )
a  Call out to Titan Takeoff (Pro) stream endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Yields:
            A dictionary like object containing a string token.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"
                response = model.stream(prompt)

                # OR

                model = TitanTakeoff(streaming=True)

                response = model.invoke(prompt)

        r5   zdata:      
)r6   )tokenz</s>N)r%   Zgenerate_streamdata
startswithlensplitrstripr	   Zon_llm_new_tokenr6   replace)r+   r1   r2   r3   r4   r:   bufferr6   content_r9   r   r   r   r7      s*     



zTitanTakeoff._stream)NN)NN)r   r   r   r   r!   r   r   r"   r   r#   r$   boolr%   r   r   r   r)   propertyr0   r   r   r;   r   r	   r7   __classcell__r   r   r-   r   r    ,   sL   
2.  
1  
r    N)enumr   typingr   r   r   r   Zlangchain_core.callbacksr   Z#langchain_core.language_models.llmsr   Zlangchain_core.outputsr	   Zlangchain_core.pydantic_v1r
   Zlangchain_community.llms.utilsr   r   r   r   r    r   r   r   r   <module>   s   