from langchain_huggingface import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter, TokenTextSplitter def setup_chunking_and_embedding(documents, chunking_strategy, chunk_size, chunk_overlap, embedding_model): """Setup text chunking and embedding.""" embedding_models = { 'HuggingFace': HuggingFaceEmbeddings() } embeddings = embedding_models.get(embedding_model) if not embeddings: raise ValueError("Unsupported embedding model.") chunking_strategies = { 'Recursive': RecursiveCharacterTextSplitter, 'Character': CharacterTextSplitter, 'Token': TokenTextSplitter } text_splitter = chunking_strategies.get(chunking_strategy) if not text_splitter: raise ValueError("Unsupported chunking strategy.") splitter_instance = text_splitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) doc_chunks = splitter_instance.split_documents(documents) if not doc_chunks: raise ValueError("No document chunks created.") return doc_chunks, embeddings