U
    hn                     @   s   d Z ddlmZ ddlmZmZmZmZ ddlZddl	m
Z
 ddlmZmZmZmZmZ ddlmZ G dd	 d	eZG d
d deeZdS )z!Loads data from OneNote Notebooks    )Path)DictIteratorListOptionalN)Document)	BaseModelBaseSettingsFieldFilePath	SecretStr)
BaseLoaderc                   @   sD   e Zd ZU edddZeed< edddZeed< G dd dZ	d	S )
_OneNoteGraphSettings.ZMS_GRAPH_CLIENT_ID)env	client_idZMS_GRAPH_CLIENT_SECRETclient_secretc                   @   s   e Zd ZdZdZdZdS )z_OneNoteGraphSettings.ConfigFz.env N)__name__
__module____qualname__Zcase_sentiveZenv_fileZ
env_prefix r   r   P/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/document_loaders/onenote.pyConfig   s   r   N)
r   r   r   r
   r   str__annotations__r   r   r   r   r   r   r   r      s   
r   c                   @   s"  e Zd ZU dZeedZeed< dZe	ed< dZ
eed< dZeed	< d
Zeed< e d d Zeed< dZee ed< dZee ed< dZee ed< dZeee  ed< ee dddZeedddZeeeef dddZeee dddZddddZ eedd d!Z!dS )"OneNoteLoaderz"Load pages from OneNote notebooks.)default_factorysettingsFauth_with_tokenr   access_tokenz+https://graph.microsoft.com/v1.0/me/onenoteonenote_api_base_urlz,https://login.microsoftonline.com/consumers/authority_urlz.credentialszonenote_graph_token.txt
token_pathNnotebook_namesection_name
page_title
object_ids)returnc                 c   sF  |    zddlm} W n tk
r4   tdY nX | jdk	r| jD ]R}| |}||d}d}|j}|rx|jdd}|jd	dd
}t|d|idV  qFn| j	}|dkrBt
j|| jdd}	|	  |	 }
|
d D ]P}|d }| |}||d}d}|j}|r|jd	dd
}t|d|idV  qd|
kr<|
d }qd}qdS )z
        Get pages from OneNote notebooks.

        Returns:
            A list of Documents with attributes:
                - page_content
                - metadata
                    - title
        r   )BeautifulSoupzJbeautifulsoup4 package not found, please install it with `pip install bs4`Nzhtml.parserr   T)strip
)	separatorr)   title)page_contentmetadata
   headerstimeoutvalueidz@odata.nextLink)_authZbs4r(   ImportErrorr&   _get_page_contentr,   Zget_textr   _urlrequestsget_headersraise_for_statusjson)selfr(   Z	object_idZpage_content_htmlZsoupr%   Z	title_tagr-   request_urlresponseZpagespagepage_idr   r   r   	lazy_load5   sP    





 


 


zOneNoteLoader.lazy_load)rB   r'   c                 C   s2   | j d| d }tj|| jdd}|  |jS )z!Get page content from OneNote APIz/pages/z/contentr/   r0   )r    r9   r:   r;   r<   text)r>   rB   r?   r@   r   r   r   r7   n   s    zOneNoteLoader._get_page_contentc                 C   s   dd| j  iS )z*Return headers for requests to OneNote APIAuthorizationzBearer )r   r>   r   r   r   r;   u   s     
zOneNoteLoader._headersc                 C   s   dgS )zReturn required scopes.z
Notes.Readr   rF   r   r   r   _scopes|   s    zOneNoteLoader._scopesc           	   
   C   st  | j dkrdS | jr:| jd}| | _ W 5 Q R X n6zddlm} W n, tk
rv } ztd|W 5 d}~X Y nX || jj	| jj
 | jd}|| j}td t| td	}|d
d dd }|j|| jd}|d | _ z"| jj s
| jjjdd W n> tk
rJ } ztd| jj dd |W 5 d}~X Y nX | jd}|| j  W 5 Q R X dS )z%Authenticate with Microsoft Graph APIr   Nrr   )ConfidentialClientApplicationzAMSAL package not found, please install it with `pip install msal`)r   Zclient_credential	authorityz(Visit the following url to give consent:z"Paste the authenticated url here:
zcode=   &)codeZscopesr   T)parentszCould not create the folder  zto store the access token.w)r   r   r"   openreadZmsalrI   r6   r   r   r   Zget_secret_valuer!   Zget_authorization_request_urlrG   printinputsplitZ#acquire_token_by_authorization_codeparentexistsmkdir	Exceptionwrite)	r>   Z
token_filerI   eZclient_instanceZauthorization_request_urlZauthorization_urlZauthorization_codeZaccess_token_jsonr   r   r   r5      sV    

 
zOneNoteLoader._authc                 C   s  g }g }g }| d | jdk	rJ| dd| jdd d  | d | jdk	r~| dd| jdd d  | d	 | jdk	r| d
d| jdd d  t|dkr| dd|  t|dkr| dd|  d|}|dkrd| }| j d| S )z8Create URL for getting page ids from the OneNoteApi API.z
$select=idNz"parentNotebook/displayName%20eq%20'rO   z%20ZparentNotebookz!parentSection/displayName%20eq%20ZparentSectionztitle%20eq%20r   z$expand=,z$filter=z	%20and%20rL   r   ?z/pages)appendr#   replacer$   r%   lenjoinr    )r>   Zquery_params_listZfilter_listZexpand_listZquery_paramsr   r   r   r8      s<    







zOneNoteLoader._url)"r   r   r   __doc__r
   r   r   r   r   boolr   r   r    r!   r   homer"   r   r#   r   r$   r%   r&   r   r   r   rC   r7   propertyr   r;   rG   r5   r8   r   r   r   r   r      s(   
9/r   )rc   pathlibr   typingr   r   r   r   r9   Zlangchain_core.documentsr   Zlangchain_core.pydantic_v1r   r	   r
   r   r   Z)langchain_community.document_loaders.baser   r   r   r   r   r   r   <module>   s   
