U
    hX                     @  sx   d dl mZ d dlZd dlmZmZmZmZmZ d dl	m
Z
mZ G dd deZdddd	d
ddZG dd deZdS )    )annotationsN)AnyListLiteralOptionalUnion)LanguageTextSplitterc                      s>   e Zd ZdZdddddd fd	d
ZdddddZ  ZS )CharacterTextSplitterz(Splitting text that looks at characters.

Fstrboolr   None)	separatoris_separator_regexkwargsreturnc                   s   t  jf | || _|| _dS )Create a new TextSplitter.N)super__init__
_separator_is_separator_regex)selfr   r   r   	__class__ F/tmp/pip-unpacked-wheel-a648t6hw/langchain_text_splitters/character.pyr      s    zCharacterTextSplitter.__init__	List[str]textr   c                 C  sB   | j r| jn
t| j}t||| j}| jr0dn| j}| ||S )&Split incoming text and return chunks. )r   r   reescape_split_text_with_regex_keep_separator_merge_splits)r   r   r   splitsr   r   r   r   
split_text   s
    z CharacterTextSplitter.split_text)r   F)__name__
__module____qualname____doc__r   r(   __classcell__r   r   r   r   r
   	   s
      r
   r   &Union[bool, Literal[('start', 'end')]]r   )r   r   keep_separatorr   c                   s   |r|rt d| d|  |dkrF fddtdt d dD n fd	dtdt dD }t d dkr| d
d  7 }|dkr| d
 g n d g| }qt || }nt| }dd |D S )N()endc                   s    g | ]} |  |d    qS    r   .0iZ_splitsr   r   
<listcomp>(   s     z*_split_text_with_regex.<locals>.<listcomp>r   r4      c                   s    g | ]} |  |d    qS r3   r   r5   r8   r   r   r9   *   s     c                 S  s   g | ]}|d kr|qS )r!   r   )r6   sr   r   r   r9   7   s      )r"   splitrangelenlist)r   r   r/   r'   r   r8   r   r$      s     $r$   c                      s|   e Zd ZdZdddddd	d
 fddZddddddZdddddZeddd dddZe	dddddZ
  ZS )RecursiveCharacterTextSplitterzSplitting text by recursively look at characters.

    Recursively tries to split by different characters to find one
    that works.
    NTFzOptional[List[str]]r.   r   r   r   )
separatorsr/   r   r   r   c                   s2   t  jf d|i| |p$ddddg| _|| _dS )r   r/   r   
 r!   N)r   r   _separatorsr   )r   rB   r/   r   r   r   r   r   r   A   s    z'RecursiveCharacterTextSplitter.__init__r   r   )r   rB   r   c                 C  s(  g }|d }g }t |D ]P\}}| jr*|nt|}|dkrD|} qjt||r|}||d d } qjq| jrt|nt|}t||| j}	g }
| jrdn|}|	D ]d}| || jk r|
	| q|
r| 
|
|}|| g }
|s|	| q| ||}|| q|
r$| 
|
|}|| |S )r    r;   r!   r4   N)	enumerater   r"   r#   searchr$   r%   Z_length_functionZ_chunk_sizeappendr&   extend_split_text)r   r   rB   Zfinal_chunksr   Znew_separatorsr7   Z_sr   r'   Z_good_splitsr<   Zmerged_textZ
other_infor   r   r   rJ   M   s>    

z*RecursiveCharacterTextSplitter._split_textr   c                 C  s   |  || jS )N)rJ   rE   )r   r   r   r   r   r(   u   s    z)RecursiveCharacterTextSplitter.split_textr   )languager   r   c                 K  s   |  |}| f |dd|S )NT)rB   r   )get_separators_for_language)clsrK   r   rB   r   r   r   from_languagex   s    
z,RecursiveCharacterTextSplitter.from_language)rK   r   c                 C  s  | t jks| t jkr4ddddddddd	d
ddddgS | t jkrZddddddd	d
ddddgS | t jkrddddddddd	d
ddddgS | t jkrdddddddddddddd
dddddgS | t jkrddddddddd	d
dddddgS | t jkrd d!d"dddddddddd	d
dddddgS | t jkrBdddd#dd$d	d
ddddgS | t j	krfd%d&d d'd(d)ddddg
S | t j
krdd*d+ddddgS | t jkrd,d-d.d/ddddgS | t jkrd*ddd0ddd$d1d2ddddgS | t jkrd*d3d4d5d6d7dd0dd
d8d9dd$ddddgS | t jkr,d:dddddd;d<dddddgS | t jkrXdd=d*dddddd<d
ddddgS | t jkrddd>d dddd$d	d
ddddgS | t jkrd?d@dAdBdCddddg	S | t jkrdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRddgS | t jkrdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldgS | t jkrhd!d dmdndoddpdddddqddrdd#dd	dsd
ddtdudvdwddddgS | t jkrdxdydzd!d{d|dddod}d~d>d dddddddddgS | t jkrddddddddddddddddddddddddgS | t jkrddddddddddg
S | t jkrNddddd$ddddddddd(ddddd
ddddddddgS | t jkr|dddd#ddd	ddtdwdvddddgS | t jkrtd|  dntd|  dtt  d S )Nz
class z
void z
int z
float z
double z
if z
for z
while z
switch z
case r   rC   rD   r!   z
func z
var z
const z
type z
public z
protected z	
private z
static z

internal z
companion z
fun z
val z
when z
else z

function z
let z	
default z
enum z
interface z
namespace z	
foreach z
do z	
message z	
service z
option z
import z
syntax z
def z
	def z
=+
z
-+
z
\*+
z

.. *

z
unless z
begin z
rescue z
defp z
defmodule z
defprotocol z

defmacro z
defmacrop z
cond z
with z
fn z
loop z
match z
object z
struct z
#{1,6} z```
z	
\*\*\*+
z
---+
z
___+
z
\\chapter{z
\\section{z
\\subsection{z
\\subsubsection{z
\\begin{enumerate}z
\\begin{itemize}z
\\begin{description}z
\\begin{list}z
\\begin{quote}z
\\begin{quotation}z
\\begin{verse}z
\\begin{verbatim}z
\egin{align}z$$$z<bodyz<divz<pz<brz<liz<h1z<h2z<h3z<h4z<h5z<h6z<spanz<tablez<trz<tdz<thz<ulz<olz<headerz<footerz<navz<headz<stylez<scriptz<metaz<titlez
implements z

delegate z
event z

abstract z
return z

continue z
break z
try z
throw z	
finally z
catch z
pragma z
using z

contract z	
library z
constructor z

modifier z
error z

do while z

assembly z
IDENTIFICATION DIVISION.z
ENVIRONMENT DIVISION.z
DATA DIVISION.z
PROCEDURE DIVISION.z
WORKING-STORAGE SECTION.z
LINKAGE SECTION.z
FILE SECTION.z
INPUT-OUTPUT SECTION.z
OPEN z
CLOSE z
READ z
WRITE z
IF z
ELSE z
MOVE z	
PERFORM z
UNTIL z	
VARYING z
ACCEPT z	
DISPLAY z

STOP RUN.z
local z
repeat z	
main :: z
main = z
in z
where z
:: z
= z
data z	
newtype z
module z
qualified z
import qualified z

instance z
| z
= {z
, z
param z	Language z is not implemented yet!z& is not supported! Please choose from )r   CCPPZGOZJAVAZKOTLINZJSZTSZPHPPROTOPYTHONZRSTZRUBYZELIXIRZRUSTZSCALAZSWIFTZMARKDOWNZLATEXZHTMLZCSHARPZSOLZCOBOLZLUAZHASKELLZ
POWERSHELL_value2member_map_
ValueErrorr@   )rK   r   r   r   rL      sf   



$&z:RecursiveCharacterTextSplitter.get_separators_for_language)NTF)r)   r*   r+   r,   r   rJ   r(   classmethodrN   staticmethodrL   r-   r   r   r   r   rA   :   s      (rA   )
__future__r   r"   typingr   r   r   r   r   Zlangchain_text_splitters.baser   r	   r
   r$   rA   r   r   r   r   <module>   s   