1st_langchain / data_parsing.py
jfeng1115's picture
init commit
72a5f6e
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000, ### YOUR CODE HERE, # the character length of the chunk
chunk_overlap = 100, ### YOUR CODE HERE, # the character length of the overlap between chunks
length_function = len ### YOUR CODE HERE, # the length function - in this case, character length (aka the python len() fn.)
)