File size: 4,800 Bytes
b61f3ac 9786194 b61f3ac ea7acc8 b61f3ac 6c32741 b61f3ac a7b6011 d48402b b61f3ac 9786194 b61f3ac 9786194 b61f3ac 6c32741 b61f3ac 84bb3f7 b61f3ac 84bb3f7 b61f3ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import streamlit as st
from streamlit_chat import message
import tempfile
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain
from ctransformers import AutoModelForCausalLM
from langchain_g4f import G4FLLM
from g4f import Provider, models
import spacy
import requests
# Define the path for generated embeddings
DB_FAISS_PATH = 'vectorstore/db_faiss'
# Load the model of choice
def load_llm():
# url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
# model_loc, file_size = dl_hf_model(url)
# llm = CTransformers(
# model=model_loc,
# temperature=0.2,
# model_type="llama",
# top_k=10,
# top_p=0.9,
# repetition_penalty=1.0,
# max_new_tokens=512, # adjust as needed
# seed=42,
# reset=True, # reset history (cache)
# stream=False,
# # threads=cpu_count,
# # stop=prompt_prefix[1:2],
# )
llm = G4FLLM(
model=models.gpt_35_turbo,
provider=Provider.DeepAi,
)
return llm
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
# Set the title for the Streamlit app
st.title("ZendoηΎε₯³γγ£γγγγγ―γΉ")
csv_url = "https://huggingface.co/spaces/uyen13/chatgirl/raw/main/testchatdata.csv"
# csv_url="https://docs.google.com/uc?export=download&id=1fQ2v2n9zQcoi6JoOU3lCBDHRt3a1PmaE"
# Define the path where you want to save the downloaded file
tmp_file_path = "testchatdata.csv"
# Download the CSV file
response = requests.get(csv_url)
if response.status_code == 200:
with open(tmp_file_path, 'wb') as file:
file.write(response.content)
else:
raise Exception(f"Failed to download the CSV file from {csv_url}")
# Load CSV data using CSVLoader
loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()
# Create embeddings using Sentence Transformers
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})
# Create a FAISS vector store and save embeddings
db = FAISS.from_documents(data, embeddings)
db.save_local(DB_FAISS_PATH)
# Load the language model
llm = load_llm()
# Create a conversational chain
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever())
# Initialize spaCy with the Japanese model
nlp = spacy.load("ja_core_news_sm")
# Function for conversational chat
def conversational_chat(query):
result = None
similarity_score = 0
# Set a threshold for similarity (you can adjust this)
similarity_threshold = 0.8
while similarity_score <= similarity_threshold:
result = chain({"question": query, "chat_history": st.session_state['history']})
doc = nlp(result["answer"])
# Define the Japanese phrase to match
target_phrase = query # Replace with your desired Japanese phrase
# Check for similarity
similarity_score = doc.similarity(nlp(target_phrase))
st.session_state['history'].append((query, result["answer"]))
return result["answer"]
# Initialize chat history
if 'history' not in st.session_state:
st.session_state['history'] = []
# Initialize messages
if 'generated' not in st.session_state:
st.session_state['generated'] = ["γγγ«γ‘γ―οΌzendoηΎε₯³γ§γγδ½γγζ’γγ§γγοΌ... π€"]
if 'past' not in st.session_state:
st.session_state['past'] = ["γγ£γγγ―γγγγ"]
# Create containers for chat history and user input
response_container = st.container()
container = st.container()
# User input form
with container:
with st.form(key='my_form', clear_on_submit=True):
user_input = st.text_input("ChatBox", placeholder="θ³ͺεγγθ¨ε
₯γγ γγ... ", key='input')
submit_button = st.form_submit_button(label='Send')
if submit_button and user_input:
output = conversational_chat(user_input)
st.session_state['past'].append(user_input)
st.session_state['generated'].append(output)
# Display chat history
if st.session_state['generated']:
with response_container:
for i in range(len(st.session_state['generated'])):
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs") |