U
    h(                     @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZm Z  ddl!m"Z"m#Z# edZ$ededZ%ee$ e
e$ge%f ee$ dddZ&G dd deZ'dS )zo
Ensemble retriever that ensemble the results of
multiple retrievers by using weighted  Reciprocal Rank Fusion
    N)defaultdict)Hashable)chain)	AnyCallableDictIterableIteratorListOptionalTypeVarcast)#AsyncCallbackManagerForRetrieverRunCallbackManagerForRetrieverRun)Document)root_validator)BaseRetrieverRetrieverLike)RunnableConfig)ensure_configpatch_config)ConfigurableFieldSpecget_unique_config_specsTH)bound)iterablekeyreturnc                 c   s4   t  }| D ]$}|| }|kr
|| |V  q
dS )a  Yield unique elements of an iterable based on a key function.

    Args:
        iterable: The iterable to filter.
        key: A function that returns a hashable key for each element.

    Yields:
        Unique elements of the iterable based on the key function.
    N)setadd)r   r   seenek r$   A/tmp/pip-unpacked-wheel-bo69hh5q/langchain/retrievers/ensemble.pyunique_by_key(   s
    

r&   c                   @   sN  e Zd ZU dZee ed< ee ed< dZe	ed< dZ
ee ed< eee dd	d
Zeddeeef eeef dddZd#eee eee dddZd$eee eee dddZeeee dddZeeee dddZddeeee ee dddZddeeee ee dddZeee  ee d d!d"ZdS )%EnsembleRetrieverae  Retriever that ensembles the multiple retrievers.

    It uses a rank fusion.

    Args:
        retrievers: A list of retrievers to ensemble.
        weights: A list of weights corresponding to the retrievers. Defaults to equal
            weighting for all retrievers.
        c: A constant added to the rank, controlling the balance between the importance
            of high-ranked items and the consideration given to lower-ranked items.
            Default is 60.
        id_key: The key in the document's metadata used to determine unique documents.
            If not specified, page_content is used.
    
retrieversweights<   cNid_key)r   c                 C   s   t dd | jD S )z+List configurable fields for this runnable.c                 s   s   | ]}|j D ]
}|V  qqd S N)config_specs).0	retrieverspecr$   r$   r%   	<genexpr>Q   s      z1EnsembleRetriever.config_specs.<locals>.<genexpr>)r   r(   selfr$   r$   r%   r.   N   s    zEnsembleRetriever.config_specsT)pre)valuesr   c                 C   s,   | ds(t|d }d| g| |d< |S )Nr)   r(      )getlen)clsr6   Zn_retrieversr$   r$   r%   set_weightsU   s    
zEnsembleRetriever.set_weights)inputconfigkwargsr   c           	   
   K   s   ddl m} t|}|j|dd |dd|dg | j|di | jd}|jd |fd	|d
pj|  i|}z| j	|||d}W n0 t
k
r } z|| |W 5 d }~X Y nX |j|f| |S d S )Nr   )CallbackManager	callbacksverboseFtagsmetadatarA   Zinheritable_tagsZ
local_tagsZinheritable_metadataZlocal_metadatanamerun_namerun_managerr=   )langchain_core.callbacksr?   r   	configurer8   rB   rC   on_retriever_startget_namerank_fusion	Exceptionon_retriever_erroron_retriever_end)	r4   r<   r=   r>   r?   callback_managerrH   resultr"   r$   r$   r%   invoke\   s<    


	
zEnsembleRetriever.invokec           	   
      s   ddl m} t|}|j|dd |dd|dg | j|di | jd}|jd |fd	|d
pj|  i|I d H }z| j	|||dI d H }W n6 t
k
r } z||I d H  |W 5 d }~X Y nX |j|f|I d H  |S d S )Nr   )AsyncCallbackManagerr@   rA   FrB   rC   rD   rE   rF   rG   )rI   rT   r   rJ   r8   rB   rC   rK   rL   arank_fusionrN   rO   rP   )	r4   r<   r=   r>   rT   rQ   rH   rR   r"   r$   r$   r%   ainvoke}   sD    


	  
zEnsembleRetriever.ainvoke)queryrH   r   c                C   s   |  ||}|S )z
        Get the relevant documents for a given query.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        )rM   r4   rW   rH   fused_documentsr$   r$   r%   _get_relevant_documents   s    z)EnsembleRetriever._get_relevant_documentsc                   s   |  ||I dH }|S )z
        Asynchronously get the relevant documents for a given query.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        N)rU   rX   r$   r$   r%   _aget_relevant_documents   s    z*EnsembleRetriever._aget_relevant_documents)r=   )rW   rH   r=   r   c                   sR    fddt | jD }tt|D ]}dd || D ||< q(| |}|S )z
        Retrieve the results of the retrievers and use rank_fusion_func to get
        the final result.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        c                    s6   g | ].\}}| t jd |d  ddqS Z
retriever_r7   )tag)r@   )rS   r   	get_childr/   ir0   r=   rW   rH   r$   r%   
<listcomp>   s    z1EnsembleRetriever.rank_fusion.<locals>.<listcomp>c                 S   s*   g | ]"}t |tr"ttt|d n|qS )page_content)
isinstancestrr   r   r/   docr$   r$   r%   rb      s   )	enumerater(   ranger9   weighted_reciprocal_rankr4   rW   rH   r=   Zretriever_docsr`   rY   r$   ra   r%   rM      s    
zEnsembleRetriever.rank_fusionc                   s^   t j fddt| jD  I dH }tt|D ]}dd || D ||< q4| |}|S )z
        Asynchronously retrieve the results of the retrievers
        and use rank_fusion_func to get the final result.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        c                    s6   g | ].\}}| t jd |d  ddqS r\   )rV   r   r^   r_   ra   r$   r%   rb     s    z2EnsembleRetriever.arank_fusion.<locals>.<listcomp>Nc                 S   s$   g | ]}t |tst|d n|qS rc   )re   r   rg   r$   r$   r%   rb     s   )asyncioZgatherri   r(   rj   r9   rk   rl   r$   ra   r%   rU      s    

zEnsembleRetriever.arank_fusion)	doc_listsr   c                    s   t |t jkrtdtt t|jD ]P\}}t|ddD ]:\}} jdkr\|jn
|j	j   ||j
  7  < qBq.t|}tt|fddd fddd	}|S )
a  
        Perform weighted Reciprocal Rank Fusion on multiple rank lists.
        You can find more details about RRF here:
        https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf

        Args:
            doc_lists: A list of rank lists, where each rank list contains unique items.

        Returns:
            list: The final aggregated list of items sorted by their weighted RRF
                    scores in descending order.
        z<Number of rank lists must be equal to the number of weights.r7   )startNc                    s    j d kr| jS | j j  S r-   r,   rd   rC   rh   r3   r$   r%   <lambda>B  s    z<EnsembleRetriever.weighted_reciprocal_rank.<locals>.<lambda>Tc                    s     j d kr| jn
| jj   S r-   rp   rq   Z	rrf_scorer4   r$   r%   rr   G  s   )reverser   )r9   r)   
ValueErrorr   floatzipri   r,   rd   rC   r+   r   from_iterablesortedr&   )r4   rn   Zdoc_listZweightZrankrh   Zall_docsZsorted_docsr$   rs   r%   rk     s0    



z*EnsembleRetriever.weighted_reciprocal_rank)N)N)__name__
__module____qualname____doc__r
   r   __annotations__rv   r+   intr,   r   rf   propertyr   r.   r   r   r   r;   r   r   rS   rV   r   rZ   r   r[   rM   rU   rk   r$   r$   r$   r%   r'   9   s`   
"   "   %.,
r'   )(r}   rm   collectionsr   collections.abcr   	itertoolsr   typingr   r   r   r   r	   r
   r   r   r   rI   r   r   Zlangchain_core.documentsr   Zlangchain_core.pydantic_v1r   Zlangchain_core.retrieversr   r   Zlangchain_core.runnablesr   Zlangchain_core.runnables.configr   r   Zlangchain_core.runnables.utilsr   r   r   r   r&   r'   r$   r$   r$   r%   <module>   s    ,$