U
    hX                     @  sR   d dl mZ d dlZd dlZd dlmZmZmZmZ d dl	m
Z
 G dd dZdS )    )annotationsN)AnyDictListOptional)Documentc                      s   e Zd Zd)ddd fddZeddd	d
dZedddddddZddd	ddZd*dddddddZd+ddddddZ	d,ddddd!d"d#Z
d-dddd$d%d&d'd(Z  ZS ).RecursiveJsonSplitter  NintzOptional[int])max_chunk_sizemin_chunk_sizec                   s0   t    || _|d k	r|nt|d d| _d S )N   2   )super__init__r   maxr   )selfr   r   	__class__ A/tmp/pip-unpacked-wheel-a648t6hw/langchain_text_splitters/json.pyr      s    
zRecursiveJsonSplitter.__init__r   )datareturnc                 C  s   t t| S )z1Calculate the size of the serialized JSON object.)lenjsondumps)r   r   r   r   
_json_size   s    z RecursiveJsonSplitter._json_sizez	List[str]r   None)dpathvaluer   c                 C  s.   |dd D ]}|  |i } q|| |d < dS )z;Set a value in a nested dictionary based on the given path.N)
setdefault)r   r   r    keyr   r   r   _set_nested_dict   s    z&RecursiveJsonSplitter._set_nested_dictc                   sH   t |tr  fdd| D S t |tr@ fddt|D S |S d S )Nc                   s   i | ]\}}|  |qS r   )_list_to_dict_preprocessing).0kvr   r   r   
<dictcomp>%   s      zERecursiveJsonSplitter._list_to_dict_preprocessing.<locals>.<dictcomp>c                   s    i | ]\}}t | |qS r   )strr%   )r&   iitemr)   r   r   r*   (   s    )
isinstancedictitemslist	enumerate)r   r   r   r)   r   r%   "   s    


z1RecursiveJsonSplitter._list_to_dict_preprocessingzDict[str, Any]zOptional[List[str]]zOptional[List[Dict]]z
List[Dict])r   current_pathchunksr   c           
      C  s   |pg }|dk	r|ni g}t |tr| D ]v\}}||g }| |d }| ||i}| j| }	||	k r| |d || q,|| jkr|i  | ||| q,n| |d || |S )zW
        Split json into maximum size dictionaries while preserving structure.
        Nr!   )	r.   r/   r0   r   r   r$   r   append_json_split)
r   r   r3   r4   r#   r    new_path
chunk_sizesize	remainingr   r   r   r6   0   s    	




z!RecursiveJsonSplitter._json_splitFbool)	json_dataconvert_listsr   c                 C  s4   |r|  | |}n
|  |}|d s0|  |S )z&Splits JSON into a list of JSON chunksr!   )r6   r%   pop)r   r<   r=   r4   r   r   r   
split_jsonQ   s    
z RecursiveJsonSplitter.split_jsonT)r<   r=   ensure_asciir   c                   s    | j ||d} fdd|D S )z1Splits JSON into a list of JSON formatted strings)r<   r=   c                   s   g | ]}t j| d qS )r@   )r   r   )r&   chunkrA   r   r   
<listcomp>m   s     z4RecursiveJsonSplitter.split_text.<locals>.<listcomp>)r?   )r   r<   r=   r@   r4   r   rA   r   
split_textb   s    z RecursiveJsonSplitter.split_textzOptional[List[dict]]zList[Document])textsr=   r@   	metadatasr   c                 C  sf   |pi gt | }g }t|D ]B\}}| j|||dD ](}	t|| }
t|	|
d}|| q6q|S )z4Create documents from a list of json objects (Dict).)r<   r=   r@   )Zpage_contentmetadata)r   r2   rD   copydeepcopyr   r5   )r   rE   r=   r@   rF   Z
_metadatasZ	documentsr,   textrB   rG   Znew_docr   r   r   create_documentso   s      
z&RecursiveJsonSplitter.create_documents)r	   N)NN)F)FT)FTN)__name__
__module____qualname__r   staticmethodr   r$   r%   r6   r?   rD   rK   __classcell__r   r   r   r   r   
   s(        $      r   )
__future__r   rH   r   typingr   r   r   r   Zlangchain_core.documentsr   r   r   r   r   r   <module>   s
   