File size: 4,800 Bytes
b61f3ac
 
 
 
 
 
 
 
 
 
 
9786194
b61f3ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea7acc8
 
b61f3ac
 
 
 
 
 
 
 
 
 
 
6c32741
b61f3ac
a7b6011
d48402b
b61f3ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9786194
 
b61f3ac
 
 
9786194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b61f3ac
 
 
 
 
 
 
 
6c32741
b61f3ac
 
84bb3f7
b61f3ac
 
 
 
 
 
 
 
84bb3f7
b61f3ac
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import streamlit as st
from streamlit_chat import message
import tempfile
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain
from ctransformers import AutoModelForCausalLM
from langchain_g4f import G4FLLM
from g4f import Provider, models
import spacy
import requests
# Define the path for generated embeddings
DB_FAISS_PATH = 'vectorstore/db_faiss'

# Load the model of choice
def load_llm():
    # url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin"  # 2.87G

    # model_loc, file_size = dl_hf_model(url)
    
    # llm = CTransformers(
    #     model=model_loc,
    #     temperature=0.2,
    #     model_type="llama",
    #     top_k=10,
    #     top_p=0.9,
    #     repetition_penalty=1.0,
    #     max_new_tokens=512,  # adjust as needed
    #     seed=42,
    #     reset=True,  # reset history (cache)
    #     stream=False,
    #     # threads=cpu_count,
    #     # stop=prompt_prefix[1:2],
               
        
    # )
    llm = G4FLLM(
        model=models.gpt_35_turbo,
        provider=Provider.DeepAi,
    )
    return llm
hide_streamlit_style = """
        <style>
        #MainMenu {visibility: hidden;}
        footer {visibility: hidden;}
        </style>
        """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 

# Set the title for the Streamlit app
st.title("Zendo美ε₯³γƒγƒ£γƒƒγƒˆγƒœγƒƒγ‚―γ‚Ή")

csv_url = "https://huggingface.co/spaces/uyen13/chatgirl/raw/main/testchatdata.csv"
# csv_url="https://docs.google.com/uc?export=download&id=1fQ2v2n9zQcoi6JoOU3lCBDHRt3a1PmaE"

# Define the path where you want to save the downloaded file
tmp_file_path = "testchatdata.csv"

# Download the CSV file
response = requests.get(csv_url)
if response.status_code == 200:
    with open(tmp_file_path, 'wb') as file:
        file.write(response.content)
else:
    raise Exception(f"Failed to download the CSV file from {csv_url}")

# Load CSV data using CSVLoader
loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()

# Create embeddings using Sentence Transformers
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})

# Create a FAISS vector store and save embeddings
db = FAISS.from_documents(data, embeddings)
db.save_local(DB_FAISS_PATH)


# Load the language model
llm = load_llm()

# Create a conversational chain
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever())
# Initialize spaCy with the Japanese model
nlp = spacy.load("ja_core_news_sm")

# Function for conversational chat
def conversational_chat(query):
    result = None
    similarity_score = 0
    # Set a threshold for similarity (you can adjust this)
    similarity_threshold = 0.8
    while similarity_score <= similarity_threshold:
        result = chain({"question": query, "chat_history": st.session_state['history']})
        doc = nlp(result["answer"])
    
        # Define the Japanese phrase to match
        target_phrase = query  # Replace with your desired Japanese phrase
    
        # Check for similarity
        similarity_score = doc.similarity(nlp(target_phrase))
        
    st.session_state['history'].append((query, result["answer"]))   
    return result["answer"]

# Initialize chat history
if 'history' not in st.session_state:
    st.session_state['history'] = []

# Initialize messages
if 'generated' not in st.session_state:
    st.session_state['generated'] = ["こんにけは!zendo美ε₯³γ§γ™γ€‚δ½•γ‹γŠζŽ’γ—γ§γ™γ‹οΌŸ... πŸ€—"]

if 'past' not in st.session_state:
    st.session_state['past'] = ["γƒγƒ£γƒƒγƒˆγ―γ“γ“γ‹γ‚‰"]

# Create containers for chat history and user input
response_container = st.container()
container = st.container()

# User input form
with container:
    with st.form(key='my_form', clear_on_submit=True):
        user_input = st.text_input("ChatBox", placeholder="θ³ͺε•γ‚’γ”θ¨˜ε…₯ください...  ", key='input')
        submit_button = st.form_submit_button(label='Send')

    if submit_button and user_input:
        output = conversational_chat(user_input)
        st.session_state['past'].append(user_input)
        st.session_state['generated'].append(output)

# Display chat history
if st.session_state['generated']:
    with response_container:
        for i in range(len(st.session_state['generated'])):
            message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
            message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")