U
    h#                     @   s~   d dl Z d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZ d dlmZ d dlmZ e eZdZG dd	 d	eZdS )
    N)BytesIO)ListOptionalSequence)ElementTree)Document)
BaseLoader@   c                	   @   s   e Zd ZdZdddeeee edddZd ee	e  ee	e  ee eeee	e
 d
ddZeee	e ddddZee eee	e
 dddZeeeee
 dddZeedddZeedddZeeedddZdS )!
QuipLoaderz_Load `Quip` pages.

    Port of https://github.com/quip/quip-api/tree/master/samples/baqup
    <   F)allow_dangerous_xml_parsing)api_urlaccess_tokenrequest_timeoutr   c                C   sN   zddl m} W n tk
r,   tdY nX ||||d| _|sJtddS )a  
        Args:
            api_url: https://platform.quip.com
            access_token: token of access quip API. Please refer:
                https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs
            request_timeout: timeout of request, default 60s.
            allow_dangerous_xml_parsing: Allow dangerous XML parsing, defaults to False
        r   )
QuipClientz?`quip_api` package not found, please run `pip install quip_api`)r   base_urlr   ac  The quip client uses the built-in XML parser which may causesecurity issues when parsing XML data in some cases. Please see https://docs.python.org/3/library/xml.html#xml-vulnerabilities For more information, set `allow_dangerous_xml_parsing` as True if you are sure that your distribution of the standard library is not vulnerable to XML vulnerabilities.N)quip_api.quipr   ImportErrorquip_client
ValueError)selfr   r   r   r   r    r   M/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/document_loaders/quip.py__init__   s    
  zQuipLoader.__init__N  )
folder_ids
thread_idsmax_docsinclude_all_foldersinclude_commentsinclude_imagesreturnc           	      C   s   |s|s|st d|pg }|r8|D ]}| |d| q$|rz| j }d|kr`| |d d| d|krz| |d d| tt|d| }| |||S )aA  
        Args:
            :param folder_ids: List of specific folder IDs to load, defaults to None
            :param thread_ids: List of specific thread IDs to load, defaults to None
            :param max_docs: Maximum number of docs to retrieve in total, defaults 1000
            :param include_all_folders: Include all folders that your access_token
                   can access, but doesn't include your private folder
            :param include_comments: Include comments, defaults to False
            :param include_images: Include images, defaults to False
        z_Must specify at least one among `folder_ids`, `thread_ids` or set `include_all`_folders as Truer   Zgroup_folder_idsZshared_folder_idsN)r   get_thread_ids_by_folder_idr   Zget_authenticated_userlistsetprocess_threads)	r   r   r   r   r   r   r    	folder_iduserr   r   r   load=   s0    
    zQuipLoader.load)r&   depthr   r!   c           
      C   sL  ddl m}m} z| j|}W n |k
r } zP|jdkrZtd| d| d|  ntd| d| d|j  W Y d	S d	}~X Y nF |k
r } z(td| d| d
|j  W Y d	S d	}~X Y nX |d dd| }t	d| d|  |d D ]@}	d|	kr,| 
|	d |d | nd|	kr||	d  qd	S )z4Get thread ids by folder id and update in thread_idsr   )	HTTPError	QuipErrori  zdepth z!, Skipped over restricted folder z, z, Skipped over folder z due to unknown error Nz due to HTTP error foldertitlez	Folder %sz, Processing folder childrenr&      	thread_id)r   r*   r+   r   Z
get_foldercodeloggingwarninggetinfor"   append)
r   r&   r)   r   r*   r+   r,   er-   childr   r   r   r"   j   s8    

  
z&QuipLoader.get_thread_ids_by_folder_id)r   r    include_messagesr!   c                 C   s2   g }|D ]$}|  |||}|dk	r|| q|S )z2Process a list of thread into a list of documents.N)process_threadr6   )r   r   r    r9   Zdocsr0   docr   r   r   r%      s    zQuipLoader.process_threads)r0   r    r9   r!   c                 C   s"  | j |}|d d }|d d }|d d }|d d }t|}td| d| d| d	|  d
|krz| j |d
 }	W nJ tjj	j
k
r }
 z&td| d| d|
  W Y d S d }
~
X Y nX ||||d}d}|r| |	}|r
|d | | }t|d
 | |dS d S )Nthreadidr-   linkZupdated_useczprocessing thread z title z link z update_ts htmlzError parsing thread  z, skipping, )r-   	update_tsr=   source z/n)Zpage_contentmetadata)r   Z
get_threadr
   _sanitize_titleloggerr5   Zparse_document_htmlxmletreecElementTree
ParseErrorerrorprocess_thread_imagesprocess_thread_messagesr   )r   r0   r    r9   r<   r-   r>   rA   sanitized_titletreer7   rD   textr   r   r   r:      s<    



zQuipLoader.process_thread)rO   r!   c                 C   s   d}zddl m} ddlm} W n tk
r<   tdY nX |dD ]}|d}|rH|dsfqH|d	\}}}}	| j	||	}
z(|
t|
 }|d
 || }W qH tk
r } ztd|  |W 5 d }~X Y qHX qH|S )NrC   r   )Image)pytesseractzg`Pillow or pytesseract` package not found, please run `pip install Pillow` or `pip install pytesseract`imgsrcz/blob/
z!failed to convert image to text, )ZPILrQ   rR   r   iterr4   
startswithsplitr   Zget_blobopenr   readZimage_to_stringOSErrorrF   rK   )r   rO   rP   rQ   rR   rS   rT   _r0   Zblob_idZblob_responseimager7   r   r   r   rL      s*    

z QuipLoader.process_thread_images)r0   r!   c                 C   s^   d }g }| j j||dd}|| |r>|d d d }qq>q|  dd |D }d|S )	Nd   )max_created_useccountZcreated_usecr/   c                 S   s   g | ]}|d  qS )rP   r   ).0messager   r   r   
<listcomp>   s     z6QuipLoader.process_thread_messages.<locals>.<listcomp>rV   )r   Zget_messagesextendreversejoin)r   r0   r`   messageschunkZtextsr   r   r   rM      s      
z"QuipLoader.process_thread_messages)r-   r!   c                 C   s8   t dd| }t dd|}t|tkr4|d t }|S )Nz\sr@   z(?u)[^- \w.]rC   )resublen_MAXIMUM_TITLE_LENGTH)r-   rN   r   r   r   rE      s
    zQuipLoader._sanitize_title)r   )NNr   FFF)__name__
__module____qualname____doc__strr   intboolr   r   r   r(   r"   r   r%   r:   r   rL   rM   staticmethodrE   r   r   r   r   r
      sX   	 (      

.  &    ,r
   )r2   rk   xml.etree.cElementTreerG   ior   typingr   r   r   xml.etree.ElementTreer   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   	getLoggerro   rF   rn   r
   r   r   r   r   <module>   s   
