Spaces:
Sleeping
Sleeping
import argparse | |
from dataclasses import asdict | |
import json | |
import os | |
import streamlit as st | |
from datasets import load_dataset | |
from data_driven_characters.character import get_character_definition | |
from data_driven_characters.corpus import ( | |
get_corpus_summaries, | |
load_docs, | |
) | |
from data_driven_characters.chatbots import ( | |
SummaryChatBot, | |
RetrievalChatBot, | |
SummaryRetrievalChatBot, | |
) | |
from data_driven_characters.interfaces import CommandLine, Streamlit | |
OUTPUT_ROOT = "output" | |
def create_chatbot(corpus, character_name, chatbot_type, retrieval_docs, summary_type): | |
# logging | |
corpus_name = os.path.splitext(os.path.basename(corpus))[0] | |
output_dir = f"{OUTPUT_ROOT}/{corpus_name}/summarytype_{summary_type}" | |
#### corpus é fixo do Dov Tzamir, carregado em main() | |
#### | |
os.makedirs(output_dir, exist_ok=True) | |
summaries_dir = f"{output_dir}/summaries" | |
character_definitions_dir = f"{output_dir}/character_definitions" | |
os.makedirs(character_definitions_dir, exist_ok=True) | |
# load docs | |
docs = load_docs(corpus_path=corpus, chunk_size=2048, chunk_overlap=64) | |
# generate summaries | |
corpus_summaries = get_corpus_summaries( | |
docs=docs, summary_type=summary_type, cache_dir=summaries_dir | |
) | |
# get character definition | |
character_definition = get_character_definition( | |
name=character_name, | |
corpus_summaries=corpus_summaries, | |
cache_dir=character_definitions_dir, | |
) | |
print(json.dumps(asdict(character_definition), indent=4)) | |
# construct retrieval documents | |
if retrieval_docs == "raw": | |
documents = [ | |
doc.page_content | |
for doc in load_docs(corpus_path=corpus, chunk_size=256, chunk_overlap=16) | |
] | |
elif retrieval_docs == "summarized": | |
documents = corpus_summaries | |
else: | |
raise ValueError(f"Unknown retrieval docs type: {retrieval_docs}") | |
# initialize chatbot | |
if chatbot_type == "summary": | |
chatbot = SummaryChatBot(character_definition=character_definition) | |
elif chatbot_type == "retrieval": | |
chatbot = RetrievalChatBot( | |
character_definition=character_definition, | |
documents=documents, | |
) | |
elif chatbot_type == "summary_retrieval": | |
chatbot = SummaryRetrievalChatBot( | |
character_definition=character_definition, | |
documents=documents, | |
) | |
else: | |
raise ValueError(f"Unknown chatbot type: {chatbot_type}") | |
exit | |
return chatbot | |
## python -m streamlit run chat_dov.py -- --corpus data/tzamir.txt --character_name Dov --chatbot_type retrieval --retrieval_docs raw --interface streamlit | |
def main(): | |
# parametros fixos para Dov Tzamir, arquivos ja processados , exceto indice que são em memoria | |
st.title("Converse com o avatar do Dov Tzamir") | |
st.write("Baseado no texto do livro Fragmentos de Memória do Tito") | |
st.write(" ") | |
chatbot = st.cache_resource(create_chatbot)( | |
"data/tzamir.txt", #args.corpus, | |
"Dov", #args.character_name, | |
"retrieval", #args.chatbot_type, | |
"raw", #args.retrieval_docs, | |
"map_reduce", #args.summary_type, | |
) | |
st.write(" ") | |
st.write("Digite o seu diálogo aqui finalizando a linha com ENTER") | |
st.write("Voce pode continuar o diálogo, apagando sua perguntanda anterior e digitando aqui novamente") | |
openai_api_key = os.environ["OPENAI_API_KEY"] | |
app = Streamlit(chatbot=chatbot) | |
app.run() | |
if __name__ == "__main__": | |
main() | |