U
    h                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
Z
 dZed Zed Zed Ze je jd	 e eZG d
d de
ZdS )    N)AnyDictListOptional)Document)
BaseLoaderzhttps://api.notion.com/v1z/databases/{database_id}/queryz/pages/{page_id}z/blocks/{block_id}/children)levelc                	   @   s   e Zd ZdZdddeeee eeeef  ddddZ	e
e dd	d
Zddifeeef e
eeef  dddZeeef edddZdeeedddZdi fddeeeeef eeeef  edddZdS )NotionDBLoaderaY  Load from `Notion DB`.

    Reads content from pages within a Notion Database.
    Args:
        integration_token (str): Notion integration token.
        database_id (str): Notion database id.
        request_timeout_sec (int): Timeout for Notion requests in seconds.
            Defaults to 10.
        filter_object (Dict[str, Any]): Filter object used to limit returned
            entries based on specified criteria.
            E.g.: {
                "timestamp": "last_edited_time",
                "last_edited_time": {
                    "on_or_after": "2024-02-07"
                }
            } -> will only return entries that were last edited
                on or after 2024-02-07
            Notion docs: https://developers.notion.com/reference/post-database-query-filter
            Defaults to None, which will return ALL entries.
    
   N)filter_object)integration_tokendatabase_idrequest_timeout_secr   returnc                C   sL   |st d|st d|| _|| _d| j ddd| _|| _|pDi | _dS )zInitialize with parameters.z"integration_token must be providedzdatabase_id must be providedzBearer zapplication/jsonz
2022-06-28)AuthorizationzContent-TypezNotion-VersionN)
ValueErrortokenr   headersr   r   )selfr   r   r   r    r   Q/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/document_loaders/notiondb.py__init__)   s    	zNotionDBLoader.__init__)r   c                    s      }t fdd|D S )zqLoad documents from the Notion database.
        Returns:
            List[Document]: List of documents.
        c                 3   s   | ]}  |V  qd S )N)	load_page).0page_summaryr   r   r   	<genexpr>G   s     z&NotionDBLoader.load.<locals>.<genexpr>)_retrieve_page_summarieslist)r   Zpage_summariesr   r   r   loadA   s    zNotionDBLoader.loadZ	page_sized   )
query_dictr   c                 C   sR   g }| j tj| jdd|| jd}||d |ds>qN|d|d< q|S )zi
        Get all the pages from a Notion database
        OR filter based on specified criteria.
        )r   POST)methodr!   r   resultsZhas_morenext_cursorZstart_cursor)_requestDATABASE_URLformatr   r   extendget)r   r!   Zpagesdatar   r   r   r   I   s    
z'NotionDBLoader._retrieve_page_summaries)r   r   c           
      C   s^  |d }i }|d   D ](\}}|d }|dkrR|d rJ|d d d nd}n|dkrz|d rr|d d d nd}n|d	kr|d	 rd
d |d	 D ng }n|dkr|d }n~|dkr|d r|d d  d|d d  nd}nH|dkr|d r|d d nd}n |dkrjg }|d r6|d D ]0}|d}	|	sZtd|  ||	 q6n|dkr|d r|d nd}n|dkr|d r|d nd}n|dkr|d r|d nd}nf|dkr|d }nR|dkr|d }n>|dkr|d }n*|dkr2|d r,|d d nd}nd}||| < q||d< t| ||dS )z\Read a page.

        Args:
            page_summary: Page summary from Notion API.
        idZ
propertiestype	rich_textr   Z
plain_textNtitleZmulti_selectc                 S   s   g | ]}|d  qS )namer   )r   itemr   r   r   
<listcomp>}   s     z,NotionDBLoader.load_page.<locals>.<listcomp>urlZ	unique_idprefix-numberstatusr0   Zpeoplez-Missing 'name' in 'people' property for page dateZlast_edited_timeZcreated_timeZcheckboxemailselect)Zpage_contentmetadata)itemsr*   loggerwarningappendlowerr   _load_blocks)
r   r   Zpage_idr;   Z	prop_nameZ	prop_dataZ	prop_typevaluer1   r0   r   r   r   r   c   sr     













zNotionDBLoader.load_pager   )block_idnum_tabsr   c                 C   s   g }|}|r|  tj|d}|d D ]}||d  }d|kr@q&g }|d D ]&}	d|	krL|d| |	d d   qL|d r| j|d	 |d
 d}
||
 |d| q&|d}qd|S )zRead a block and its children.)rC   r$   r-   r.   text	contentZhas_childrenr,      )rD   
r%   )r&   	BLOCK_URLr(   r?   rA   joinr*   )r   rC   rD   Zresult_lines_arrZcur_block_idr+   resultZ
result_objZcur_result_text_arrr.   Zchildren_textr   r   r   rA      s.     
zNotionDBLoader._load_blocksGET)r3   r#   r!   r   r   c                C   s<   |  }|r||d< tj||| j|| jd}|  | S )Nfilter)r   jsontimeout)copyrequestsrequestr   r   raise_for_statusrO   )r   r3   r#   r!   r   Zjson_payloadresr   r   r   r&      s    zNotionDBLoader._request)r
   )r   )__name__
__module____qualname____doc__strr   intr   r   r   r   r   r   r   r   rA   r&   r   r   r   r   r	      s<    	
N%
r	   )loggingtypingr   r   r   r   rR   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   ZNOTION_BASE_URLr'   ZPAGE_URLrJ   basicConfigWARNING	getLoggerrV   r=   r	   r   r   r   r   <module>   s   
