Dov_Tzamir / chat_dov.py
rubensmau's picture
updates
33e91c3
raw
history blame
3.59 kB
import argparse
from dataclasses import asdict
import json
import os
import streamlit as st
from datasets import load_dataset
from data_driven_characters.character import get_character_definition
from data_driven_characters.corpus import (
get_corpus_summaries,
load_docs,
)
from data_driven_characters.chatbots import (
SummaryChatBot,
RetrievalChatBot,
SummaryRetrievalChatBot,
)
from data_driven_characters.interfaces import CommandLine, Streamlit
OUTPUT_ROOT = "output"
def create_chatbot(corpus, character_name, chatbot_type, retrieval_docs, summary_type):
# logging
corpus_name = os.path.splitext(os.path.basename(corpus))[0]
output_dir = f"{OUTPUT_ROOT}/{corpus_name}/summarytype_{summary_type}"
#### corpus é fixo do Dov Tzamir, carregado em main()
####
os.makedirs(output_dir, exist_ok=True)
summaries_dir = f"{output_dir}/summaries"
character_definitions_dir = f"{output_dir}/character_definitions"
os.makedirs(character_definitions_dir, exist_ok=True)
# load docs
docs = load_docs(corpus_path=corpus, chunk_size=2048, chunk_overlap=64)
# generate summaries
corpus_summaries = get_corpus_summaries(
docs=docs, summary_type=summary_type, cache_dir=summaries_dir
)
# get character definition
character_definition = get_character_definition(
name=character_name,
corpus_summaries=corpus_summaries,
cache_dir=character_definitions_dir,
)
print(json.dumps(asdict(character_definition), indent=4))
# construct retrieval documents
if retrieval_docs == "raw":
documents = [
doc.page_content
for doc in load_docs(corpus_path=corpus, chunk_size=256, chunk_overlap=16)
]
elif retrieval_docs == "summarized":
documents = corpus_summaries
else:
raise ValueError(f"Unknown retrieval docs type: {retrieval_docs}")
# initialize chatbot
if chatbot_type == "summary":
chatbot = SummaryChatBot(character_definition=character_definition)
elif chatbot_type == "retrieval":
chatbot = RetrievalChatBot(
character_definition=character_definition,
documents=documents,
)
elif chatbot_type == "summary_retrieval":
chatbot = SummaryRetrievalChatBot(
character_definition=character_definition,
documents=documents,
)
else:
raise ValueError(f"Unknown chatbot type: {chatbot_type}")
exit
return chatbot
## python -m streamlit run chat_dov.py -- --corpus data/tzamir.txt --character_name Dov --chatbot_type retrieval --retrieval_docs raw --interface streamlit
def main():
# parametros fixos para Dov Tzamir, arquivos ja processados , exceto indice que são em memoria
st.title("Converse com o avatar do Dov Tzamir")
st.write("Baseado no texto do livro Fragmentos de Memória do Tito")
st.write(" ")
chatbot = st.cache_resource(create_chatbot)(
"data/tzamir.txt", #args.corpus,
"Dov", #args.character_name,
"retrieval", #args.chatbot_type,
"raw", #args.retrieval_docs,
"map_reduce", #args.summary_type,
)
st.write(" ")
st.write("Digite o seu diálogo aqui finalizando a linha com ENTER")
st.write("Voce pode continuar o diálogo, apagando sua perguntanda anterior e digitando aqui novamente")
openai_api_key = os.environ["OPENAI_API_KEY"]
app = Streamlit(chatbot=chatbot)
app.run()
if __name__ == "__main__":
main()