Spaces:

uyen13
/

chatgirl

Running

File size: 4,800 Bytes

import streamlit as st
from streamlit_chat import message
import tempfile
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain
from ctransformers import AutoModelForCausalLM
from langchain_g4f import G4FLLM
from g4f import Provider, models
import spacy
import requests
# Define the path for generated embeddings
DB_FAISS_PATH = 'vectorstore/db_faiss'

# Load the model of choice
def load_llm():
    # url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin"  # 2.87G

    # model_loc, file_size = dl_hf_model(url)
    
    # llm = CTransformers(
    #     model=model_loc,
    #     temperature=0.2,
    #     model_type="llama",
    #     top_k=10,
    #     top_p=0.9,
    #     repetition_penalty=1.0,
    #     max_new_tokens=512,  # adjust as needed
    #     seed=42,
    #     reset=True,  # reset history (cache)
    #     stream=False,
    #     # threads=cpu_count,
    #     # stop=prompt_prefix[1:2],
               
        
    # )
    llm = G4FLLM(
        model=models.gpt_35_turbo,
        provider=Provider.DeepAi,
    )
    return llm
hide_streamlit_style = """
        <style>
        #MainMenu {visibility: hidden;}
        footer {visibility: hidden;}
        </style>
        """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 

# Set the title for the Streamlit app
st.title("Zendo美女チャットボックス")

csv_url = "https://huggingface.co/spaces/uyen13/chatgirl/raw/main/testchatdata.csv"
# csv_url="https://docs.google.com/uc?export=download&id=1fQ2v2n9zQcoi6JoOU3lCBDHRt3a1PmaE"

# Define the path where you want to save the downloaded file
tmp_file_path = "testchatdata.csv"

# Download the CSV file
response = requests.get(csv_url)
if response.status_code == 200:
    with open(tmp_file_path, 'wb') as file:
        file.write(response.content)
else:
    raise Exception(f"Failed to download the CSV file from {csv_url}")

# Load CSV data using CSVLoader
loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()

# Create embeddings using Sentence Transformers
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})

# Create a FAISS vector store and save embeddings
db = FAISS.from_documents(data, embeddings)
db.save_local(DB_FAISS_PATH)


# Load the language model
llm = load_llm()

# Create a conversational chain
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever())
# Initialize spaCy with the Japanese model
nlp = spacy.load("ja_core_news_sm")

# Function for conversational chat
def conversational_chat(query):
    result = None
    similarity_score = 0
    # Set a threshold for similarity (you can adjust this)
    similarity_threshold = 0.8
    while similarity_score <= similarity_threshold:
        result = chain({"question": query, "chat_history": st.session_state['history']})
        doc = nlp(result["answer"])
    
        # Define the Japanese phrase to match
        target_phrase = query  # Replace with your desired Japanese phrase
    
        # Check for similarity
        similarity_score = doc.similarity(nlp(target_phrase))
        
    st.session_state['history'].append((query, result["answer"]))   
    return result["answer"]

# Initialize chat history
if 'history' not in st.session_state:
    st.session_state['history'] = []

# Initialize messages
if 'generated' not in st.session_state:
    st.session_state['generated'] = ["こんにちは！zendo美女です。何かお探しですか？... 🤗"]

if 'past' not in st.session_state:
    st.session_state['past'] = ["チャットはここから"]

# Create containers for chat history and user input
response_container = st.container()
container = st.container()

# User input form
with container:
    with st.form(key='my_form', clear_on_submit=True):
        user_input = st.text_input("ChatBox", placeholder="質問をご記入ください...  ", key='input')
        submit_button = st.form_submit_button(label='Send')

    if submit_button and user_input:
        output = conversational_chat(user_input)
        st.session_state['past'].append(user_input)
        st.session_state['generated'].append(output)

# Display chat history
if st.session_state['generated']:
    with response_container:
        for i in range(len(st.session_state['generated'])):
            message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
            message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")