U
    hi                     @  sp   d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ G dd deZdS )    )annotationsN)AnyDictIteratorListOptionalTuple)Document)
BaseLoaderc                   @  s   e Zd ZdZdddddddddZd	d
ddZddddddZddddddZdddddZd	ddddZ	dd
ddZ
dS )AthenaLoaderaY  Load documents from `AWS Athena`.

    Each document represents one row of the result.
    - By default, all columns are written into the `page_content` of the document
    and none into the `metadata` of the document.
    - If `metadata_columns` are provided then these columns are written
    into the `metadata` of the document while the rest of the columns
    are written into the `page_content` of the document.

    To authenticate, the AWS client uses this method to automatically load credentials:
    https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html

    If a specific credential profile should be used, you must pass
    the name of the profile from the ~/.aws/credentials file that is to be used.

    Make sure the credentials / roles used have the required policies to
    access the Amazon Textract service.
    NstrzOptional[str]zOptional[List[str]])querydatabases3_output_uriprofile_namemetadata_columnsc           	   
   C  s   || _ || _|| _|dk	r|ng | _zddl}W n tk
rL   tdY nX z |dk	rd|j|dn| }W n, tk
r } ztd|W 5 d}~X Y nX |	d| _
|	d| _dS )ag  Initialize Athena document loader.

        Args:
            query: The query to run in Athena.
            database: Athena database.
            s3_output_uri: Athena output path.
            profile_name: Optional. AWS credential profile, if profiles are being used.
            metadata_columns: Optional. Columns written to Document `metadata`.
        Nr   zRCould not import boto3 python package. Please install it with `pip install boto3`.)r   zCould not load credentials to authenticate with AWS client. Please check that credentials in the specified profile name are valid.ZathenaZs3)r   r   r   r   boto3ImportErrorSession	Exception
ValueErrorclientathena_client	s3_client)	selfr   r   r   r   r   r   sessione r   O/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/document_loaders/athena.py__init__!   s.    
zAthenaLoader.__init__zList[Dict[str, Any]])returnc                 C  s   | j j| jd| jid| jid}|d }| j j|d}|d d d }|d	krRqn@|d
kr|d d }|d }d| }t|n|dkrtdtd q(| 	|}t
|jddS )NZDatabaseZOutputLocation)ZQueryStringZQueryExecutionContextZResultConfigurationQueryExecutionId)r!   ZQueryExecutionZStatusStateZ	SUCCEEDEDZFAILEDZStateChangeReasonzQuery Failed: Z	CANCELLEDz Query was cancelled by the user.   records)Zorient)r   Zstart_query_executionr   r   r   Zget_query_executionr   timesleep_get_result_setjsonloadsZto_json)r   responsequery_execution_idstateZresp_statusZstate_change_reasonerrZ
result_setr   r   r   _execute_queryO   s,    


zAthenaLoader._execute_query)input_stringsuffixr    c                 C  s$   |r | |r |d t|  S |S N)endswithlenr   r/   r0   r   r   r   _remove_suffixi   s    zAthenaLoader._remove_suffixc                 C  s"   |r| |r|t|d  S |S r1   )
startswithr3   r4   r   r   r   _remove_prefixn   s    zAthenaLoader._remove_prefixr   )r+   r    c           	      C  s   zdd l }W n tk
r(   tdY nX | j}| | |ddd}|d }d|dd  |g d }| jj||d}|j	t
|d  d	d
}|S )Nr   zTCould not import pandas python package. Please install it with `pip install pandas`./zs3://r#   z.csv)ZBucketZKeyZBodyutf8)encoding)Zpandasr   r   r7   r5   splitjoinr   Z
get_objectZread_csvioBytesIOread)	r   r+   pdZ
output_uritokensZbucketkeyobjZdfr   r   r   r'   s   s$    

 zAthenaLoader._get_result_setzTuple[List[str], List[str]])query_resultr    c                 C  sJ   g }g }t |d  }|D ]$}|| jkr6|| q|| q||fS )Nr   )listkeysr   append)r   rD   content_columnsr   Zall_columnsrB   r   r   r   _get_columns   s    
zAthenaLoader._get_columnszIterator[Document]c                 #  sh   |   }| |\ |D ]H}d fdd| D }fdd| D }t||d}|V  qd S )N
c                 3  s(   | ] \}}| kr| d | V  qdS )z: Nr   .0kv)rH   r   r   	<genexpr>   s     z)AthenaLoader.lazy_load.<locals>.<genexpr>c                   s&   i | ]\}}| kr|d k	r||qS r1   r   rK   )r   r   r   
<dictcomp>   s
       z*AthenaLoader.lazy_load.<locals>.<dictcomp>)page_contentmetadata)r.   rI   r<   itemsr	   )r   rD   rowrQ   rR   docr   )rH   r   r   	lazy_load   s    
zAthenaLoader.lazy_load)NN)__name__
__module____qualname____doc__r   r.   r5   r7   r'   rI   rV   r   r   r   r   r      s     .r   )
__future__r   r=   r(   r%   typingr   r   r   r   r   r   Zlangchain_core.documentsr	   Z)langchain_community.document_loaders.baser
   r   r   r   r   r   <module>   s    