import streamlit as st | |
from streamlit_chat import message | |
import tempfile | |
from langchain.document_loaders.csv_loader import CSVLoader | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.llms import CTransformers | |
from langchain.chains import ConversationalRetrievalChain | |
from ctransformers import AutoModelForCausalLM | |
from langchain_g4f import G4FLLM | |
from g4f import Provider, models | |
# import spacy | |
import requests | |
# Define the path for generated embeddings | |
DB_FAISS_PATH = 'vectorstore/db_faiss' | |
# Load the model of choice | |
def load_llm(): | |
# url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G | |
# model_loc, file_size = dl_hf_model(url) | |
# llm = CTransformers( | |
# model=model_loc, | |
# temperature=0.2, | |
# model_type="llama", | |
# top_k=10, | |
# top_p=0.9, | |
# repetition_penalty=1.0, | |
# max_new_tokens=512, # adjust as needed | |
# seed=42, | |
# reset=True, # reset history (cache) | |
# stream=False, | |
# # threads=cpu_count, | |
# # stop=prompt_prefix[1:2], | |
# ) | |
llm = G4FLLM( | |
model=models.gpt_35_turbo, | |
provider=Provider.DeepAi, | |
) | |
return llm | |
hide_streamlit_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |
# Set the title for the Streamlit app | |
st.title("ZendoηΎε₯³γγ£γγγγγ―γΉ") | |
csv_url = "https://huggingface.co/spaces/uyen13/chatgirl/raw/main/testchatdata.csv" | |
# csv_url="https://docs.google.com/uc?export=download&id=1fQ2v2n9zQcoi6JoOU3lCBDHRt3a1PmaE" | |
# Define the path where you want to save the downloaded file | |
tmp_file_path = "testchatdata.csv" | |
# Download the CSV file | |
response = requests.get(csv_url) | |
if response.status_code == 200: | |
with open(tmp_file_path, 'wb') as file: | |
file.write(response.content) | |
else: | |
raise Exception(f"Failed to download the CSV file from {csv_url}") | |
# Load CSV data using CSVLoader | |
loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={'delimiter': ','}) | |
data = loader.load() | |
# Create embeddings using Sentence Transformers | |
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'}) | |
# Create a FAISS vector store and save embeddings | |
db = FAISS.from_documents(data, embeddings) | |
db.save_local(DB_FAISS_PATH) | |
# Load the language model | |
llm = load_llm() | |
# Create a conversational chain | |
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever()) | |
# Initialize spaCy with the Japanese model | |
# nlp = spacy.load("ja_core_news_sm") | |
# Function for conversational chat | |
def conversational_chat(query): | |
# result = None | |
# similarity_score = 0 | |
# # Set a threshold for similarity (you can adjust this) | |
# similarity_threshold = 0.8 | |
# while similarity_score <= similarity_threshold: | |
# result = chain({"question": query, "chat_history": st.session_state['history']}) | |
# doc = nlp(result["answer"]) | |
# # Define the Japanese phrase to match | |
# target_phrase = query # Replace with your desired Japanese phrase | |
# # Check for similarity | |
# similarity_score = doc.similarity(nlp(target_phrase)) | |
result = chain({"question": query, "chat_history": st.session_state['history']}) | |
st.session_state['history'].append((query, result["answer"])) | |
return result["answer"] | |
if 'history' not in st.session_state: | |
st.session_state['history'] = [] | |
# Initialize session state if not already done | |
if 'past' not in st.session_state: | |
st.session_state['past'] = ["γγ£γγγ―γγγγ"] | |
if 'generated' not in st.session_state: | |
st.session_state['generated'] = ["γγγ«γ‘γ―οΌzendoηΎε₯³γ§γγδ½γγζ’γγ§γγοΌ... π€"] | |
# Create a container for the chat history with a fixed height (half of the body height) | |
chat_history_container = st.container() | |
chat_history_container.style.height = '50vh' # Set the height to 50% of the viewport height | |
# Create a form for user input | |
with st.form(key='my_form', clear_on_submit=True): | |
user_input = st.text_input("ChatBox", placeholder="θ³ͺεγγθ¨ε ₯γγ γγ...", key='input') | |
submit_button = st.form_submit_button(label='Send') | |
# Process user input and update chat history | |
if submit_button and user_input: | |
output = conversational_chat(user_input) | |
st.session_state['past'].append(user_input) | |
st.session_state['generated'].append(output) | |
# Display chat history within the container | |
if st.session_state['generated']: | |
with chat_history_container: | |
for i in range(len(st.session_state['generated'])): | |
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile") | |
message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs") | |
# Ensure chat history is always scrollable | |
st.markdown(""" | |
<style> | |
.stText { | |
overflow-y: auto; | |
height: 100%; /* Ensure the chat history container takes up full available height */ | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Initialize chat history | |
# if 'history' not in st.session_state: | |
# st.session_state['history'] = [] | |
# # Initialize messages | |
# if 'generated' not in st.session_state: | |
# st.session_state['generated'] = ["γγγ«γ‘γ―οΌzendoηΎε₯³γ§γγδ½γγζ’γγ§γγοΌ... π€"] | |
# if 'past' not in st.session_state: | |
# st.session_state['past'] = ["γγ£γγγ―γγγγ"] | |
# # Create containers for chat history and user input | |
# response_container = st.container() | |
# container = st.container() | |
# # User input form | |
# with container: | |
# with st.form(key='my_form', clear_on_submit=True): | |
# user_input = st.text_input("ChatBox", placeholder="θ³ͺεγγθ¨ε ₯γγ γγ... ", key='input') | |
# submit_button = st.form_submit_button(label='Send') | |
# if submit_button and user_input: | |
# output = conversational_chat(user_input) | |
# st.session_state['past'].append(user_input) | |
# st.session_state['generated'].append(output) | |
# # Display chat history | |
# if st.session_state['generated']: | |
# with response_container: | |
# for i in range(len(st.session_state['generated'])): | |
# message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile") | |
# message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs") |