Spaces:
Runtime error
Runtime error
| import os | |
| from langchain_community.document_loaders import TextLoader | |
| from langchain.vectorstores import Chroma | |
| from langchain.chains.query_constructor.base import AttributeInfo | |
| from langchain.retrievers.self_query.base import SelfQueryRetriever | |
| from langchain_text_splitters import CharacterTextSplitter | |
| from llm.gemini import Gemini | |
| from utils.questions_parser import parse_question | |
| class Retriever: | |
| _model = Gemini() | |
| def __init__(self): | |
| if "DATA_PATH" not in os.environ: | |
| raise ValueError("DATA_PATH environment variable is not set") | |
| DATA_PATH = os.environ["DATA_PATH"] | |
| data_loader = TextLoader(DATA_PATH, encoding="UTF-8").load() | |
| text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=0) | |
| docs = text_splitter.split_documents(data_loader) | |
| self.vectorstore = Chroma.from_documents( | |
| docs, self._model.embeddings, persist_directory="./chroma_db" | |
| ) | |
| self.metadata_field_info = [ | |
| AttributeInfo( | |
| name="topico", | |
| description="A materia escolar da qual a questão pertence.", | |
| type="string", | |
| ), | |
| AttributeInfo( | |
| name="assunto", | |
| description="O assunto da materia fornecida anteriormente.", | |
| type="string", | |
| ), | |
| AttributeInfo( | |
| name="dificuldade", | |
| description="O nivel de dificuldade para resolver a questao.", | |
| type="string", | |
| ), | |
| AttributeInfo( | |
| name="tipo", | |
| description="O tipo da questao. Pode ser ou Multipla Escolha ou Justificativa", | |
| type="string", | |
| ), | |
| ] | |
| document_content_description = "Questões de matérias do ensino médio." | |
| db = Chroma.from_documents(docs, self._model.embeddings) | |
| self.retriever = SelfQueryRetriever.from_llm( | |
| self._model.llm, | |
| self.vectorstore, | |
| document_content_description, | |
| self.metadata_field_info, | |
| verbose=True, | |
| ) | |
| self.docs_retriever = db.as_retriever() | |