import streamlit as st '---' #st.title('Watson Assistant VDF TOBi improvement') st.markdown( """ """, unsafe_allow_html=True ) st.header('Watson Assistant VDF TOBi improvement') '---' st.write('The model is trained on the TOBi 🤖 intents in Romanian language.') #st.write('🤖') #:robot_face: import os import pandas as pd import re from time import time from src.E_Model_utils import load_model, train_model, get_embeddings from src.E_Faiss_utils import load_embeddings_and_index, normalize_embeddings from src.A_Preprocess import load_data, clean_text import warnings warnings.filterwarnings("ignore", category=FutureWarning) model_name = st.sidebar.radio("Selectează modelul 👇", ["other","e5_small_fine_tuned_model","multilingual-e5-small","all-MiniLM-L6-v2","all-distilroberta-v1"]) # Load the saved embeddings #model_name = "xlm-roberta-base" # Choose the desired model #model_name = "xlm-r-distilroberta-base-paraphrase-v1" # Model path # Load the trained model if model_name != "other": # future improvement: add a loading spinner model_path = f"output/fine-tuned-model" st.write("Model path:", model_path) #model = load_model(model_path) if model_name == "multilingual-e5-small": infloat_model_name = "intfloat/multilingual-e5-small" model = load_model(infloat_model_name) elif model_name == "e5_small_fine_tuned_model": infloat_model_name = "intfloat/multilingual-e5-small" model = load_model(infloat_model_name) pass#model = load_model(model_path) else: model = load_model(model_name) st.write(f"Modelul selectat: {model_name}") st.write("Model loaded successfully!") # Load the embeddings and the index #embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index") st.stop() # Upload the intents data uploaded_file = st.file_uploader("Încarcă fișierul cu intenții", type="csv") if uploaded_file is not None: data = pd.read_csv(uploaded_file) st.write("CSV file successfully uploaded!") # Save data to session state st.session_state.data = data else: # If no file is uploaded, try to load data from session state data = st.session_state.data if 'data' in st.session_state else None if data is not None: # Extract utterances and intents utterances = data['utterance'].tolist() intents = data['intent'].tolist() user_text = st.text_input("Te rog introdu un text.") if user_text: if st.button("Identifică Intenția"): start = time() st.write("Procesare text...") st.write(start) cleaned_text = clean_text(user_text) input_embedding = get_embeddings(model, [cleaned_text]) normalized_embedding = normalize_embeddings(input_embedding) embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index") D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin print(I) intents = data['intent'].tolist() intent = intents[I[0][0]] distance = D[0][0] similarity = 1 / (1 + distance) st.write(f"Intenția identificată: {intent}") st.write(f"Nivel de încredere: {similarity:.4f}") st.write(f"Timp de răspuns: {time() - start:.4f} secunde") else: st.write("Te rog introdu un text.") st.stop() # Endpoint pentru identificarea intenției input_text = st.text_input("Introdu mai jos textul! 👇", label_visibility="visible") if input_text: start = time() input_embeddings = model.encode([input_text]) if st.button("Identifică Intenția"): if input_text: cleaned_text = clean_text(input_text) input_embedding = get_embeddings(model, [cleaned_text]) normalized_embedding = normalize_embeddings(input_embedding) D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin #print(I) intents = data['intent'].tolist() intent = intentions[I[0][0]] distance = D[0][0] similarity = 1 / (1 + distance) st.write(f"Intenția identificată: {intent}") st.write(f"Nivel de încredere: {similarity:.4f}") st.write(f"Timp de răspuns: {time() - start:.4f} secunde") else: st.write("Te rog introdu un text.") st.stop()