chatgirl / app.py
uyen13's picture
Update app.py
9786194
raw
history blame
4.8 kB
import streamlit as st
from streamlit_chat import message
import tempfile
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain
from ctransformers import AutoModelForCausalLM
from langchain_g4f import G4FLLM
from g4f import Provider, models
import spacy
import requests
# Define the path for generated embeddings
DB_FAISS_PATH = 'vectorstore/db_faiss'
# Load the model of choice
def load_llm():
# url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
# model_loc, file_size = dl_hf_model(url)
# llm = CTransformers(
# model=model_loc,
# temperature=0.2,
# model_type="llama",
# top_k=10,
# top_p=0.9,
# repetition_penalty=1.0,
# max_new_tokens=512, # adjust as needed
# seed=42,
# reset=True, # reset history (cache)
# stream=False,
# # threads=cpu_count,
# # stop=prompt_prefix[1:2],
# )
llm = G4FLLM(
model=models.gpt_35_turbo,
provider=Provider.DeepAi,
)
return llm
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
# Set the title for the Streamlit app
st.title("Zendo美ε₯³γƒγƒ£γƒƒγƒˆγƒœγƒƒγ‚―γ‚Ή")
csv_url = "https://huggingface.co/spaces/uyen13/chatgirl/raw/main/testchatdata.csv"
# csv_url="https://docs.google.com/uc?export=download&id=1fQ2v2n9zQcoi6JoOU3lCBDHRt3a1PmaE"
# Define the path where you want to save the downloaded file
tmp_file_path = "testchatdata.csv"
# Download the CSV file
response = requests.get(csv_url)
if response.status_code == 200:
with open(tmp_file_path, 'wb') as file:
file.write(response.content)
else:
raise Exception(f"Failed to download the CSV file from {csv_url}")
# Load CSV data using CSVLoader
loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()
# Create embeddings using Sentence Transformers
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})
# Create a FAISS vector store and save embeddings
db = FAISS.from_documents(data, embeddings)
db.save_local(DB_FAISS_PATH)
# Load the language model
llm = load_llm()
# Create a conversational chain
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever())
# Initialize spaCy with the Japanese model
nlp = spacy.load("ja_core_news_sm")
# Function for conversational chat
def conversational_chat(query):
result = None
similarity_score = 0
# Set a threshold for similarity (you can adjust this)
similarity_threshold = 0.8
while similarity_score <= similarity_threshold:
result = chain({"question": query, "chat_history": st.session_state['history']})
doc = nlp(result["answer"])
# Define the Japanese phrase to match
target_phrase = query # Replace with your desired Japanese phrase
# Check for similarity
similarity_score = doc.similarity(nlp(target_phrase))
st.session_state['history'].append((query, result["answer"]))
return result["answer"]
# Initialize chat history
if 'history' not in st.session_state:
st.session_state['history'] = []
# Initialize messages
if 'generated' not in st.session_state:
st.session_state['generated'] = ["こんにけは!zendo美ε₯³γ§γ™γ€‚δ½•γ‹γŠζŽ’γ—γ§γ™γ‹οΌŸ... πŸ€—"]
if 'past' not in st.session_state:
st.session_state['past'] = ["γƒγƒ£γƒƒγƒˆγ―γ“γ“γ‹γ‚‰"]
# Create containers for chat history and user input
response_container = st.container()
container = st.container()
# User input form
with container:
with st.form(key='my_form', clear_on_submit=True):
user_input = st.text_input("ChatBox", placeholder="θ³ͺε•γ‚’γ”θ¨˜ε…₯ください... ", key='input')
submit_button = st.form_submit_button(label='Send')
if submit_button and user_input:
output = conversational_chat(user_input)
st.session_state['past'].append(user_input)
st.session_state['generated'].append(output)
# Display chat history
if st.session_state['generated']:
with response_container:
for i in range(len(st.session_state['generated'])):
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")