Spaces:
Sleeping
Sleeping
import streamlit as st | |
'---' | |
#st.title('Watson Assistant VDF TOBi improvement') | |
st.markdown( | |
""" | |
<style> | |
.stTextInput > div > div > input { | |
background-color: #d3d3d3; | |
} | |
body { | |
background-color: #f0f0f0; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
st.header('Watson Assistant VDF TOBi improvement') | |
'---' | |
st.write('The model is trained on the TOBi 🤖 intents in Romanian language.') | |
#st.write('🤖') | |
#:robot_face: | |
import os | |
import pandas as pd | |
import re | |
from time import time | |
from src.E_Model_utils import load_model, train_model, get_embeddings | |
from src.E_Faiss_utils import load_embeddings_and_index, normalize_embeddings | |
from src.A_Preprocess import load_data, clean_text | |
import warnings | |
warnings.filterwarnings("ignore", category=FutureWarning) | |
model_name = st.sidebar.radio("Selectează modelul 👇", ["other","e5_small_fine_tuned_model","multilingual-e5-small","all-MiniLM-L6-v2","all-distilroberta-v1"]) | |
# Load the saved embeddings | |
#model_name = "xlm-roberta-base" # Choose the desired model | |
#model_name = "xlm-r-distilroberta-base-paraphrase-v1" | |
# Model path | |
# Load the trained model | |
if model_name != "other": | |
# future improvement: add a loading spinner | |
model_path = f"output/fine-tuned-model" | |
st.write("Model path:", model_path) | |
#model = load_model(model_path) | |
if model_name == "multilingual-e5-small": | |
infloat_model_name = "intfloat/multilingual-e5-small" | |
model = load_model(infloat_model_name) | |
elif model_name == "e5_small_fine_tuned_model": | |
infloat_model_name = "intfloat/multilingual-e5-small" | |
model = load_model(infloat_model_name) | |
pass#model = load_model(model_path) | |
else: | |
model = load_model(model_name) | |
st.write(f"Modelul selectat: {model_name}") | |
st.write("Model loaded successfully!") | |
# Load the embeddings and the index | |
#embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index") | |
st.stop() | |
# Upload the intents data | |
uploaded_file = st.file_uploader("Încarcă fișierul cu intenții", type="csv") | |
if uploaded_file is not None: | |
data = pd.read_csv(uploaded_file) | |
st.write("CSV file successfully uploaded!") | |
# Save data to session state | |
st.session_state.data = data | |
else: | |
# If no file is uploaded, try to load data from session state | |
data = st.session_state.data if 'data' in st.session_state else None | |
if data is not None: | |
# Extract utterances and intents | |
utterances = data['utterance'].tolist() | |
intents = data['intent'].tolist() | |
user_text = st.text_input("Te rog introdu un text.") | |
if user_text: | |
if st.button("Identifică Intenția"): | |
start = time() | |
st.write("Procesare text...") | |
st.write(start) | |
cleaned_text = clean_text(user_text) | |
input_embedding = get_embeddings(model, [cleaned_text]) | |
normalized_embedding = normalize_embeddings(input_embedding) | |
embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index") | |
D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin | |
print(I) | |
intents = data['intent'].tolist() | |
intent = intents[I[0][0]] | |
distance = D[0][0] | |
similarity = 1 / (1 + distance) | |
st.write(f"Intenția identificată: {intent}") | |
st.write(f"Nivel de încredere: {similarity:.4f}") | |
st.write(f"Timp de răspuns: {time() - start:.4f} secunde") | |
else: | |
st.write("Te rog introdu un text.") | |
st.stop() | |
# Endpoint pentru identificarea intenției | |
input_text = st.text_input("Introdu mai jos textul! 👇", label_visibility="visible") | |
if input_text: | |
start = time() | |
input_embeddings = model.encode([input_text]) | |
if st.button("Identifică Intenția"): | |
if input_text: | |
cleaned_text = clean_text(input_text) | |
input_embedding = get_embeddings(model, [cleaned_text]) | |
normalized_embedding = normalize_embeddings(input_embedding) | |
D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin | |
#print(I) | |
intents = data['intent'].tolist() | |
intent = intentions[I[0][0]] | |
distance = D[0][0] | |
similarity = 1 / (1 + distance) | |
st.write(f"Intenția identificată: {intent}") | |
st.write(f"Nivel de încredere: {similarity:.4f}") | |
st.write(f"Timp de răspuns: {time() - start:.4f} secunde") | |
else: | |
st.write("Te rog introdu un text.") | |
st.stop() | |