U
    h&                     @  st   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ G dd	 d	eZdS )
    )annotationsN)Path)AnyDictIterableListOptional)CallbackManagerForRetrieverRunDocument)BaseRetrieverc                   @  s   e Zd ZU dZded< ded< ded< dZded	< G d
d dZed(ddddd dddZedddddd dddZ	ddddddZ
d)ddddd d!Zed"dd#dd$dd d%d&d'ZdS )*TFIDFRetrieverz`TF-IDF` retriever.

    Largely based on
    https://github.com/asvskartheek/Text-Retrieval/blob/master/TF-IDF%20Search%20Engine%20(SKLEARN).ipynb
    r   
vectorizerzList[Document]docstfidf_array   intkc                   @  s   e Zd ZdZdS )zTFIDFRetriever.ConfigTN)__name__
__module____qualname__Zarbitrary_types_allowed r   r   H/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/retrievers/tfidf.pyConfig   s   r   NzIterable[str]zOptional[Iterable[dict]]zOptional[Dict[str, Any]])texts	metadatastfidf_paramskwargsreturnc           	      K  s   zddl m} W n tk
r,   tdY nX |p4i }|f |}||}|pZdd |D }dd t||D }| f |||d|S )	Nr   )TfidfVectorizerzNCould not import scikit-learn, please install with `pip install scikit-learn`.c                 s  s   | ]
}i V  qd S Nr   ).0_r   r   r   	<genexpr>2   s     z,TFIDFRetriever.from_texts.<locals>.<genexpr>c                 S  s   g | ]\}}t ||d qS )Zpage_contentmetadatar
   )r!   tmr   r   r   
<listcomp>3   s     z-TFIDFRetriever.from_texts.<locals>.<listcomp>r   r   r   )Zsklearn.feature_extraction.textr   ImportErrorZfit_transformzip)	clsr   r   r   r   r   r   r   r   r   r   r   
from_texts   s    


zTFIDFRetriever.from_texts)r   zIterable[Document])	documentsr   r   r   c                K  s.   t dd |D  \}}| jf |||d|S )Nc                 s  s   | ]}|j |jfV  qd S r    r$   )r!   dr   r   r   r#   >   s     z0TFIDFRetriever.from_documents.<locals>.<genexpr>)r   r   r   )r+   r-   )r,   r.   r   r   r   r   r   r   r   from_documents6   s      zTFIDFRetriever.from_documentsstrr	   )queryrun_managerr   c                  s\   ddl m}  j|g}| j|d} fdd|  j d  d d d D }|S )Nr   )cosine_similarity)c                   s   g | ]} j | qS r   )r   )r!   iselfr   r   r(   N   s     z:TFIDFRetriever._get_relevant_documents.<locals>.<listcomp>r5   )Zsklearn.metrics.pairwiser4   r   Z	transformr   ZreshapeZargsortr   )r8   r2   r3   r4   Z	query_vecresultsZreturn_docsr   r7   r   _get_relevant_documentsC   s    ,z&TFIDFRetriever._get_relevant_documentstfidf_vectorizerNone)folder_path	file_namer   c              	   C  s   zdd l }W n tk
r(   tdY nX t|}|jddd || j|| d  t|| d d}t| j| j	f| W 5 Q R X d S )Nr   BCould not import joblib, please install with `pip install joblib`.T)exist_okparents.joblib.pklwb)
joblibr*   r   mkdirdumpr   openpickler   r   )r8   r=   r>   rE   pathfr   r   r   
save_localQ   s    
zTFIDFRetriever.save_localF)allow_dangerous_deserializationr>   bool)r=   rM   r>   r   c          
   	   C  s   zddl }W n tk
r(   tdY nX |s6tdt|}||| d }t|| d d}t|\}}	W 5 Q R X | |||	dS )	a  Load the retriever from local storage.

        Args:
            folder_path: Folder path to load from.
            allow_dangerous_deserialization: Whether to allow dangerous deserialization.
                Defaults to False.
                The deserialization relies on .joblib and .pkl files, which can be
                modified to deliver a malicious payload that results in execution of
                arbitrary code on your machine. You will need to set this to `True` to
                use deserialization. If you do this, make sure you trust the source of
                the file.
            file_name: File name to load from. Defaults to "tfidf_vectorizer".

        Returns:
            TFIDFRetriever: Loaded retriever.
        r   Nr?   a  The de-serialization of this retriever is based on .joblib and .pkl files.Such files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to load this retriever. If you do this, make sure you trust the source of the file, and you are responsible for validating the file came from a trusted source.rB   rC   rbr)   )rE   r*   
ValueErrorr   loadrH   rI   )
r,   r=   rM   r>   rE   rJ   r   rK   r   r   r   r   r   
load_localg   s    
zTFIDFRetriever.load_local)NN)r;   )r   r   r   __doc____annotations__r   r   classmethodr-   r0   r:   rL   rR   r   r   r   r   r      s(   
   r   )
__future__r   rI   pathlibr   typingr   r   r   r   r   Zlangchain_core.callbacksr	   Zlangchain_core.documentsr   Zlangchain_core.retrieversr   r   r   r   r   r   <module>   s   