U
    h                     @   sH   d dl Z d dlmZmZmZ d dlmZ d dlmZ G dd deZ	dS )    N)CallableIteratorOptional)Document)
BaseLoaderc                   @   sN   e Zd ZdZd
eee ee eeegef  dddZe	e
 ddd	ZdS )	GitLoadera  Load `Git` repository files.

    The Repository can be local on disk available at `repo_path`,
    or remote at `clone_url` that will be cloned to `repo_path`.
    Currently, supports only text files.

    Each document represents one file in the repository. The `path` points to
    the local Git repository, and the `branch` specifies the branch to load
    files from. By default, it loads from the `main` branch.
    Nmain	repo_path	clone_urlbranchfile_filterc                 C   s   || _ || _|| _|| _dS )a  

        Args:
            repo_path: The path to the Git repository.
            clone_url: Optional. The URL to clone the repository from.
            branch: Optional. The branch to load files from. Defaults to `main`.
            file_filter: Optional. A function that takes a file path and returns
              a boolean indicating whether to load the file. Defaults to None.
        Nr	   )selfr
   r   r   r    r   L/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/document_loaders/git.py__init__   s    zGitLoader.__init__)returnc                 c   s   zddl m}m} W n, tk
r@ } ztd|W 5 d }~X Y nX tj| jsn| jd krnt	d| j dnz| jrtj
tj| jdr|| j}|jjj| jkrt	dn|| j| j}|j | j n|| j}|j | j |  D ]$}t||sqtj| j|j}||g}t|r2q| jrH| |sHqtj|| j}zt|dt}	|	 }
tj|jd	 }z|
d
}W n& tk
r   Y W 5 Q R  W qY nX |||j|d}t||dV  W 5 Q R X W q t k
r } zt!d| d|  W 5 d }~X Y qX qd S )Nr   )BlobRepozTCould not import git python package. Please install it with `pip install GitPython`.zPath z does not existz.gitz6A different repository is already cloned at this path.rb   zutf-8)source	file_path	file_name	file_type)Zpage_contentmetadatazError reading file z: )"gitr   r   ImportErrorospathexistsr
   r   
ValueErrorisdirjoinremotesoriginurlZ
clone_fromcheckoutr   treeZtraverse
isinstanceZignoredlenr   relpathopenreadsplitextnamedecodeUnicodeDecodeErrorr   	Exceptionprint)r   r   r   exrepoitemr   Zignored_filesZrel_file_pathfcontentr   Ztext_contentr   er   r   r   	lazy_load*   s^    


zGitLoader.lazy_load)Nr   N)__name__
__module____qualname____doc__strr   r   boolr   r   r   r:   r   r   r   r   r   	   s      r   )
r   typingr   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   r   r   r   r   r   <module>   s   