U
    ™ÀÂhÑ%  ã                   @  sž   d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ G dd„ deeƒZdS )z7Loader that loads data from Sharepoint Document Libraryé    )ÚannotationsN)ÚPath)ÚAnyÚIteratorÚListÚOptionalÚSequence)Ú
BaseLoader)ÚDocument)ÚField)ÚO365BaseLoaderÚ	_FileType)Ú
get_parserc                   @  sÖ   e Zd ZU dZedƒZded< dZded< dZded	< dZ	ded
< dZ
ded< e ¡ d d Zded< dZded< eddœdd„ƒZeddœdd„ƒZddœdd„Zdddœdd „Zd!dœd"d#„Zdd$dœd%d&„ZdS )'ÚSharePointLoaderzLoad  from `SharePoint`..ÚstrÚdocument_library_idNzOptional[str]Úfolder_pathzOptional[List[str]]Ú
object_idsÚ	folder_idFzOptional[bool]Ú	load_authz.credentialszo365_token.txtr   Ú
token_pathÚload_extended_metadatazSequence[_FileType])Úreturnc                 C  s   t jt jt jfS )zfReturn supported file types.
        Returns:
            A sequence of supported file types.
        )r   ZDOCZDOCXZPDF©Úself© r   úS/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/document_loaders/sharepoint.pyÚ_file_types'   s    zSharePointLoader._file_typesz	List[str]c                 C  s   ddgS )zcReturn required scopes.
        Returns:
            List[str]: A list of required scopes.
        Z
sharepointÚbasicr   r   r   r   r   Ú_scopes/   s    zSharePointLoader._scopeszIterator[Document]c                 c  s˜  zddl m}m} W n tk
r0   tdƒ‚Y nX |  ¡  ¡  | j¡}t||ƒsbt	d| j› dƒ‚t
dƒ}| jr6| | j¡}t||ƒsšt	d| j› dƒ‚|  |¡D ]}t|j d¡ƒ}| jd	krÌ|  |¡}| jd	krð|  |¡}	|	 d
|ji¡ | |¡D ]8}
| jd	kr||
jd< | jd	kr,|
j |	¡ |
V  qúq¤| jr| | j¡}t||ƒsht	d| j› dƒ‚|  |¡D ]˜}t|j d¡ƒ}| jd	krœ|  |¡}| jd	krÂ|  |¡}	|	 d
|ji¡ | |¡D ]:}
| jd	kræ||
jd< | jd	krþ|
j |	¡ |
V  qÌqr| jr¬|  || j¡D ]ˆ}t|j d¡ƒ}| jd	krL|  |¡}| jd	krb|  |¡}	| |¡D ]:}
| jd	kr†||
jd< | jd	krž|
j |	¡ |
V  qlq"| js”| js”| js”| ¡ }t||ƒsàt	dƒ‚|  |¡D ]¨}t|j d¡ƒ}| jd	kr|  |¡}| jd	kr*|  |¡}	| |¡D ]Z}|j |j¡ | jd	kr\||jd< | jd	kr†|j |	¡ |j d
|ji¡ |V  q4qêdS )z¦
        Load documents lazily. Use this when working at a large scale.
        Yields:
            Document: A document object representing the parsed blob.
        r   )ÚDriveÚFolderzAO365 package not found, please install it with `pip install o365`zThere isn't a Drive with id Ú.ÚdefaultzThere isn't a folder with path ÚidTZsource_full_urlÚauthorized_identitieszUnable to fetch root folderN)Z
O365.driver    r!   ÚImportErrorZ_authZstorageZ	get_driver   Ú
isinstanceÚ
ValueErrorr   r   Zget_item_by_pathZ_load_from_folderr   ÚmetadataÚgetr   r%   r   Úget_extended_metadataÚupdateZweb_urlZ
lazy_parser   Zget_itemr   Z_load_from_object_idsZget_root_folder)r   r    r!   ÚdriveZblob_parserZtarget_folderZblobÚfile_idZauth_identitiesZextended_metadataZparsed_blobZ	blob_partr   r   r   Ú	lazy_load7   sš    ÿ

















ÿzSharePointLoader.lazy_loadr   )r.   r   c                 C  sº   |   ¡ }| d¡}d| j› d|› d}dd|› i}tjd||d}| ¡ }g }| d	¡D ]Z}	|	 d
¡rZ|	 d
¡ d¡p–|	 d
¡ d¡p–|	 d
¡ d¡}
|
rZ|
 d¡}|rZ| |¡ qZ|S )a  
        Retrieve the access identities (user/group emails) for a given file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            List: A list of group names (email addresses) that have
                  access to the file.
        Úaccess_tokenú(https://graph.microsoft.com/v1.0/drives/ú/items/z/permissionsÚAuthorizationúBearer ÚGET©ÚheadersÚvalueZgrantedToV2ZsiteUserÚuserÚgroupÚemail)Ú_fetch_access_tokenr*   r   ÚrequestsÚrequestÚjsonÚappend)r   r.   Údatar0   Úurlr7   ÚresponseZaccess_listZgroup_namesZaccess_dataZ	site_datar;   r   r   r   r%   Œ   s(    	
ÿ
ÿý
z&SharePointLoader.authorized_identitiesr   c              	   C  s0   t | jdd}| ¡ }W 5 Q R X t |¡}|S )z|
        Fetch the access token from the token file.
        Returns:
            The access token as a dictionary.
        zutf-8)Úencoding)Úopenr   Úreadr?   Úloads)r   ÚfÚsrA   r   r   r   r<   ®   s    
z$SharePointLoader._fetch_access_tokenÚdictc           	      C  s¦   |   ¡ }| d¡}d| j› d|› d}dd|› i}tjd||d}| ¡ }| d	d
¡| di ¡ di ¡ dd¡| di ¡ dd¡ d¡d d | dd¡ dœ}|S )a  
        Retrieve extended metadata for a file in SharePoint.
        As of today, following fields are supported in the extended metadata:
        - size: size of the source file.
        - owner: display name of the owner of the source file.
        - full_path: pretty human readable path of the source file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            dict: A dictionary containing the extended metadata of the file,
                  including size, owner, and full path.
        r0   r1   r2   z,?$select=size,createdBy,parentReference,namer3   r4   r5   r6   Úsizer   Z	createdByr9   ZdisplayNameÚ ZparentReferenceÚpathú:éÿÿÿÿú/Úname)rK   ÚownerÚ	full_path)r<   r*   r   r=   r>   r?   Úsplit)	r   r.   rA   r0   rB   r7   rC   r)   Zstaged_metadatar   r   r   r+   ¹   s:    
ÿ
 ÿ þ ÿþþý
üûz&SharePointLoader.get_extended_metadata)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   Ú__annotations__r   r   r   r   r   Úhomer   r   Úpropertyr   r   r/   r%   r<   r+   r   r   r   r   r      s    
U"r   )rX   Ú
__future__r   r?   Úpathlibr   Útypingr   r   r   r   r   r=   Zlangchain_core.document_loadersr	   Zlangchain_core.documentsr
   Zlangchain_core.pydantic_v1r   Z.langchain_community.document_loaders.base_o365r   r   Z5langchain_community.document_loaders.parsers.registryr   r   r   r   r   r   Ú<module>   s   