U
    h.                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZ eeZG dd deZdS )    N)AnyDictIteratorList)Document)	BaseModelroot_validatorc                   @   s(  e Zd ZU dZeed< dZeed< dZeed< dZ	e
ed< d	Zeed
< dZe
ed< dZe
ed< dZe
ed< dZeed< eddeedddZeedddZeee dddZeee dddZeedd d!Zeee dd"d#Zeee dd$d%Zeeed&d'd(Zeeed)d*d+Zd,S )-PubMedAPIWrappera(  
    Wrapper around PubMed API.

    This wrapper will use the PubMed API to conduct searches and fetch
    document summaries. By default, it will return the document summaries
    of the top-k results of an input search.

    Parameters:
        top_k_results: number of the top-scored document used for the PubMed tool
        MAX_QUERY_LENGTH: maximum length of the query.
          Default is 300 characters.
        doc_content_chars_max: maximum length of the document content.
          Content will be truncated if it exceeds this length.
          Default is 2000 characters.
        max_retry: maximum number of retries for a request. Default is 5.
        sleep_time: time to wait between retries.
          Default is 0.2 seconds.
        email: email address to be used for the PubMed API.
    parsez;https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?base_url_esearchz:https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?base_url_efetch   	max_retryg?
sleep_time   top_k_resultsi,  MAX_QUERY_LENGTHi  doc_content_chars_maxzyour_email@example.comemailT)pre)valuesreturnc                 C   s8   zddl }|j|d< W n tk
r2   tdY nX |S )z7Validate that the python package exists in environment.r   Nr
   zZCould not import xmltodict python package. Please install it with `pip install xmltodict`.)	xmltodictr
   ImportError)clsr   r    r   H/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/utilities/pubmed.pyvalidate_environment3   s    
z%PubMedAPIWrapper.validate_environment)queryr   c              
   C   sr   z<dd |  |d| j D }|r8d|d| j ndW S  tk
rl } zd|  W Y S d}~X Y nX dS )z
        Run PubMed search and get the article meta information.
        See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
        It uses only the most informative fields of article meta information.
        c                 S   s8   g | ]0}d |d  d|d  d|d  d|d  qS )zPublished: 	Publishedz
Title: Titlez
Copyright Information: Copyright Informationz
Summary::
Summaryr   ).0resultr   r   r   
<listcomp>J   s   z(PubMedAPIWrapper.run.<locals>.<listcomp>Nz

zNo good PubMed Result was foundzPubMed exception: )loadr   joinr   	Exception)selfr   Zdocsexr   r   r   runA   s    zPubMedAPIWrapper.runc                 c   s~   | j d ttj|h d| j d }tj|}| 	d}t
|}|d d }|d d D ]}| ||V  qfdS )	z
        Search PubMed for documents matching the query.
        Return an iterator of dictionaries containing the document metadata.
        zdb=pubmed&term=z&retmode=json&retmax=z&usehistory=yutf-8ZesearchresultwebenvZidlistN)r   strurllibr
   quoter   requesturlopenreaddecodejsonloadsretrieve_article)r)   r   urlr$   textZ	json_textr-   uidr   r   r   	lazy_load[   s    
zPubMedAPIWrapper.lazy_loadc                 C   s   t | |S )z
        Search PubMed for documents matching the query.
        Return a list of dictionaries containing the document metadata.
        )listr;   r)   r   r   r   r   r&   o   s    zPubMedAPIWrapper.load)docr   c                 C   s   | d}t||dS )Nr"   )Zpage_contentmetadata)popr   )r)   r>   summaryr   r   r   _dict2documentv   s    
zPubMedAPIWrapper._dict2documentc                 c   s"   | j |dD ]}| |V  qd S N)r   )r;   rB   )r)   r   dr   r   r   lazy_load_docsz   s    zPubMedAPIWrapper.lazy_load_docsc                 C   s   t | j|dS rC   )r<   rE   r=   r   r   r   	load_docs~   s    zPubMedAPIWrapper.load_docs)r:   r-   r   c           	   
   C   s   | j d | d | }d}ztj|}W qW q tjjk
r } zT|jdkr|| jk rtd| j	dd t
| j	 |  j	d9  _	|d	7 }n|W 5 d }~X Y qX q| d
}| |}| ||S )Nzdb=pubmed&retmode=xml&id=z&webenv=r   i  zToo Many Requests, waiting for z.2fz seconds...      r,   )r   r/   r1   r2   error	HTTPErrorcoder   printr   timesleepr3   r4   r
   _parse_article)	r)   r:   r-   r8   retryr$   eZxml_text	text_dictr   r   r   r7      s4    

z!PubMedAPIWrapper.retrieve_article)r:   rR   r   c           	      C   s   z|d d d d }W n$ t k
r<   |d d d }Y nX |di dg }d	d
 |D }|rnd|n2t|tr||n$t|trddd | D nd}|di }d|dd|dd|ddg}||dd||di dd|dS )NZPubmedArticleSetZPubmedArticleZMedlineCitationZArticleZPubmedBookArticleZBookDocumentZAbstractZAbstractTextc                 S   s2   g | ]*}d |krd|kr|d  d|d   qS )z#textz@Labelz: r   )r#   txtr   r   r   r%      s    z3PubMedAPIWrapper._parse_article.<locals>.<listcomp>
c                 s   s   | ]}t |V  qd S )N)r.   )r#   valuer   r   r   	<genexpr>   s     z2PubMedAPIWrapper._parse_article.<locals>.<genexpr>zNo abstract availableZArticleDate-ZYear ZMonthZDayZArticleTitleZCopyrightInformation)r:   r    r   r!   r"   )KeyErrorgetr'   
isinstancer.   dictr   )	r)   r:   rR   arZabstract_textZ	summariesrA   Za_dZpub_dater   r   r   rO      s>     
 zPubMedAPIWrapper._parse_articleN) __name__
__module____qualname____doc__r   __annotations__r   r.   r   r   intr   floatr   r   r   r   r   r   r   r+   r   r\   r;   r   r&   r   rB   rE   rF   r7   rO   r   r   r   r   r	      s*   

 r	   )r5   loggingrM   urllib.errorr/   urllib.parseurllib.requesttypingr   r   r   r   Zlangchain_core.documentsr   Zlangchain_core.pydantic_v1r   r   	getLoggerr^   loggerr	   r   r   r   r   <module>   s   
