HQ_Project_EN / pages /Project_1.1_-_LLM.py
1mpreccable's picture
Initial commit with cleared history
cff87c7
import pandas as pd
import streamlit as st
import datetime
import socket
from src.functions_llm import load_model, compare_label_results
from src.functions_db import connect_to_db, Activity, LLM
list_of_models = [
"1mpreccable/10k_trained_bert",
"naloui/results",
"another model from hub"
]
session, user, activity, llm, _ = connect_to_db(address="sqlite:///src/databases/main.db") # connect to the database
## --------------
## SIDEBAR PARAMS
## --------------
st.sidebar.title("App parameters")
model_url = st.sidebar.selectbox("Choose your model", list_of_models)
st.sidebar.divider()
imported_df = st.sidebar.file_uploader("Upload your df in format csv or xlsx", type=["csv", 'xlsx'])
## --------------
## MAINBAR PARAMS
## --------------
tab1, tab2 = st.tabs(["LLM", "DB_Extraction"])
tab1.title("LLM project")
# Display model description
if model_url:
tab1.write(f"Selected model: **{model_url}**")
# Sentence for analysis
input_sentence = tab1.text_area("Enter a sentence for sentiment analysis")
button_launch = tab1.button('Launch Prediction')
#logic for single sentence inputed by user manually
if model_url and input_sentence:
if model_url == "another model from hub":
model_url = tab1.text_input("Please provide the model URL here:")
if button_launch:
# Add activity to the database
activity.add_activity(
session,
"admin",
datetime.date.today(),
datetime.datetime.now(),
socket.gethostbyname(socket.gethostname()),
"LLM project"
)
sentiment_analyzer = load_model(model_url)
if sentiment_analyzer:
# Get sentiment prediction
result = sentiment_analyzer(input_sentence)
# Display result
if result:
sentiment = result[0]['label']
score = result[0]['score']
tab1.write(f"Sentiment: **{sentiment}** with a confidence score of **{score:.2f}**")
if sentiment == 'LABEL_1' or sentiment == 'POSITIVE':
tab1.write("Positive")
elif sentiment == 'LABEL_0' or sentiment == 'NEGATIVE':
tab1.write("Negative")
# Option to download results
results_df = pd.DataFrame([{'Sentence': input_sentence, 'Sentiment': sentiment, 'Score': score}])
csv = results_df.to_csv(index=False).encode('utf-8')
tab1.download_button(
label="Download results as CSV",
data=csv,
file_name='sentiment_analysis_results.csv',
mime='text/csv',
)
# Add LLM history to the database
llm.add_llm(
session,
"admin",
input_sentence,
sentiment,
model_url,
"LLM project"
)
#logic for file uploaded by user
if model_url and imported_df:
if imported_df.name.endswith('csv'):
df = pd.read_csv(imported_df, encoding='unicode_escape')
# print(df.head())
df = df[:500] # Limit to 500 rows while testing
name_of_the_column = tab1.selectbox("Select the column for sentiment analysis", df.columns)
column_with_real_results = tab1.selectbox("Select the column with real results", df.columns)
sentiment_analyzer = load_model(model_url)
if sentiment_analyzer and name_of_the_column and button_launch and column_with_real_results:
text_data = df[name_of_the_column].tolist()
# Initialize progress bar
progress_bar = tab1.progress(0)
results = []
for i, text in enumerate(text_data):
result = sentiment_analyzer(text)
results.append(result[0])
# Update progress bar
progress_bar.progress((i + 1) / len(text_data))
df['Sentiment'] = [res['label'] for res in results]
df['Score'] = [res['score'] for res in results]
df['Sentiment_Label'] = df['Sentiment'].apply(lambda x: 'Positive' if x in ['LABEL_1', 'POSITIVE'] else 'Negative')
tab1.write(df)
# here add logic for calculating % of positive and negative reviews and compare with the real results
result = compare_label_results(df, 'Sentiment_Label', column_with_real_results)
tab1.write(f"Accuracy: {result:.2%}")
# Option to download results
csv = df.to_csv(index=False).encode('utf-8')
tab1.download_button(
label="Download results as CSV",
data=csv,
file_name='sentiment_analysis_results.csv',
mime='text/csv',
)
#need to add % of positive and negative reviews and compare with the real results
elif imported_df.name.endswith('xlsx'):
df = pd.read_excel(imported_df)
# print(df.head())
df = df[:500] # Limit to 500 rows while testing
name_of_the_column = tab1.selectbox("Select the column for sentiment analysis", df.columns)
column_with_real_results = tab1.selectbox("Select the column with real results", df.columns)
sentiment_analyzer = load_model(model_url)
if sentiment_analyzer and name_of_the_column and button_launch and column_with_real_results:
text_data = df[name_of_the_column].tolist()
# Initialize progress bar
progress_bar = tab1.progress(0)
results = []
for i, text in enumerate(text_data):
result = sentiment_analyzer(text)
results.append(result[0])
# Update progress bar
progress_bar.progress((i + 1) / len(text_data))
df['Sentiment'] = [res['label'] for res in results]
df['Score'] = [res['score'] for res in results]
df['Sentiment_Label'] = df['Sentiment'].apply(lambda x: 'Positive' if x in ['LABEL_1', 'POSITIVE'] else 'Negative')
# here add logic for calculating % of positive and negative reviews and compare with the real results
result = compare_label_results(df, 'Sentiment_Label', column_with_real_results)
tab1.write(f"Accuracy: {result:.2%}")
tab1.write(df)
# Option to download results
csv = df.to_csv(index=False).encode('utf-8')
tab1.download_button(
label="Download results as CSV",
data=csv,
file_name='sentiment_analysis_results.csv',
mime='text/csv',
)
else:
tab1.write("Please upload a file in csv or xlsx format")
################################################################################
# TAB 2 DB
################################################################################
llm_histories = session.query(LLM).all()
llm_histories_df = pd.DataFrame([{
'id': llm_history.id,
'input_sentence': llm_history.input_sentence,
'output_label': llm_history.output_label,
'model_url': llm_history.model_url,
'type_of_activity': llm_history.type_of_activity,
'user_id': llm_history.user_id,
} for llm_history in llm_histories])
tab2.write("LLM history:")
tab2.data_editor(llm_histories_df)