from abc import ABC, abstractmethod from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS class TextProcessor(ABC): @abstractmethod def split_text(self, text): pass @abstractmethod def create_embeddings(self, chunks): pass class DefaultTextProcessor(TextProcessor): def __init__(self,chunk_size,chunk_overlap): self.chunk_overlap = chunk_overlap self.chunk_size = chunk_size def split_text(self, text): text_splitter = RecursiveCharacterTextSplitter( chunk_size=self.chunk_size , chunk_overlap=self.chunk_overlap, separators=[" ", ",", "\n"], length_function=len ) chunks = text_splitter.split_text(text) return chunks def create_embeddings(self, chunks): if not chunks: return None embeddings = OpenAIEmbeddings() try: return FAISS.from_texts(chunks, embeddings) except Exception as e: print(f"Error creating embeddings: {e}") return None