Spaces:

EN-collab
/

HQ_Project_EN

Running

App Files Files Community

HQ_Project_EN / pages /Project_1.1_-_LLM.py

1mpreccable

Initial commit with cleared history

cff87c7 10 months ago

raw

history blame contribute delete

7.55 kB

	import pandas as pd
	import streamlit as st
	import datetime
	import socket

	from src.functions_llm import load_model, compare_label_results
	from src.functions_db import connect_to_db, Activity, LLM


	list_of_models = [
	"1mpreccable/10k_trained_bert",
	"naloui/results",
	"another model from hub"
	]

	session, user, activity, llm, _ = connect_to_db(address="sqlite:///src/databases/main.db") # connect to the database


	## --------------
	## SIDEBAR PARAMS
	## --------------

	st.sidebar.title("App parameters")

	model_url = st.sidebar.selectbox("Choose your model", list_of_models)
	st.sidebar.divider()
	imported_df = st.sidebar.file_uploader("Upload your df in format csv or xlsx", type=["csv", 'xlsx'])

	## --------------
	## MAINBAR PARAMS
	## --------------
	tab1, tab2 = st.tabs(["LLM", "DB_Extraction"])

	tab1.title("LLM project")

	# Display model description
	if model_url:
	tab1.write(f"Selected model: {model_url}")

	# Sentence for analysis
	input_sentence = tab1.text_area("Enter a sentence for sentiment analysis")
	button_launch = tab1.button('Launch Prediction')

	#logic for single sentence inputed by user manually
	if model_url and input_sentence:
	if model_url == "another model from hub":
	model_url = tab1.text_input("Please provide the model URL here:")
	if button_launch:

	# Add activity to the database
	activity.add_activity(
	session,
	"admin",
	datetime.date.today(),
	datetime.datetime.now(),
	socket.gethostbyname(socket.gethostname()),
	"LLM project"
	)

	sentiment_analyzer = load_model(model_url)

	if sentiment_analyzer:
	# Get sentiment prediction
	result = sentiment_analyzer(input_sentence)

	# Display result
	if result:
	sentiment = result[0]['label']
	score = result[0]['score']
	tab1.write(f"Sentiment: {sentiment} with a confidence score of {score:.2f}")

	if sentiment == 'LABEL_1' or sentiment == 'POSITIVE':
	tab1.write("Positive")
	elif sentiment == 'LABEL_0' or sentiment == 'NEGATIVE':
	tab1.write("Negative")

	# Option to download results
	results_df = pd.DataFrame([{'Sentence': input_sentence, 'Sentiment': sentiment, 'Score': score}])
	csv = results_df.to_csv(index=False).encode('utf-8')
	tab1.download_button(
	label="Download results as CSV",
	data=csv,
	file_name='sentiment_analysis_results.csv',
	mime='text/csv',
	)

	# Add LLM history to the database
	llm.add_llm(
	session,
	"admin",
	input_sentence,
	sentiment,
	model_url,
	"LLM project"
	)

	#logic for file uploaded by user
	if model_url and imported_df:
	if imported_df.name.endswith('csv'):
	df = pd.read_csv(imported_df, encoding='unicode_escape')
	# print(df.head())
	df = df[:500] # Limit to 500 rows while testing

	name_of_the_column = tab1.selectbox("Select the column for sentiment analysis", df.columns)
	column_with_real_results = tab1.selectbox("Select the column with real results", df.columns)

	sentiment_analyzer = load_model(model_url)
	if sentiment_analyzer and name_of_the_column and button_launch and column_with_real_results:
	text_data = df[name_of_the_column].tolist()

	# Initialize progress bar
	progress_bar = tab1.progress(0)
	results = []
	for i, text in enumerate(text_data):
	result = sentiment_analyzer(text)
	results.append(result[0])
	# Update progress bar
	progress_bar.progress((i + 1) / len(text_data))


	df['Sentiment'] = [res['label'] for res in results]
	df['Score'] = [res['score'] for res in results]

	df['Sentiment_Label'] = df['Sentiment'].apply(lambda x: 'Positive' if x in ['LABEL_1', 'POSITIVE'] else 'Negative')

	tab1.write(df)

	# here add logic for calculating % of positive and negative reviews and compare with the real results
	result = compare_label_results(df, 'Sentiment_Label', column_with_real_results)
	tab1.write(f"Accuracy: {result:.2%}")

	# Option to download results
	csv = df.to_csv(index=False).encode('utf-8')
	tab1.download_button(
	label="Download results as CSV",
	data=csv,
	file_name='sentiment_analysis_results.csv',
	mime='text/csv',
	)
	#need to add % of positive and negative reviews and compare with the real results
	elif imported_df.name.endswith('xlsx'):
	df = pd.read_excel(imported_df)
	# print(df.head())
	df = df[:500] # Limit to 500 rows while testing

	name_of_the_column = tab1.selectbox("Select the column for sentiment analysis", df.columns)
	column_with_real_results = tab1.selectbox("Select the column with real results", df.columns)

	sentiment_analyzer = load_model(model_url)
	if sentiment_analyzer and name_of_the_column and button_launch and column_with_real_results:
	text_data = df[name_of_the_column].tolist()
	# Initialize progress bar
	progress_bar = tab1.progress(0)
	results = []
	for i, text in enumerate(text_data):
	result = sentiment_analyzer(text)
	results.append(result[0])
	# Update progress bar
	progress_bar.progress((i + 1) / len(text_data))

	df['Sentiment'] = [res['label'] for res in results]
	df['Score'] = [res['score'] for res in results]

	df['Sentiment_Label'] = df['Sentiment'].apply(lambda x: 'Positive' if x in ['LABEL_1', 'POSITIVE'] else 'Negative')

	# here add logic for calculating % of positive and negative reviews and compare with the real results
	result = compare_label_results(df, 'Sentiment_Label', column_with_real_results)
	tab1.write(f"Accuracy: {result:.2%}")

	tab1.write(df)


	# Option to download results
	csv = df.to_csv(index=False).encode('utf-8')
	tab1.download_button(
	label="Download results as CSV",
	data=csv,
	file_name='sentiment_analysis_results.csv',
	mime='text/csv',
	)
	else:
	tab1.write("Please upload a file in csv or xlsx format")


	################################################################################
	# TAB 2 DB
	################################################################################

	llm_histories = session.query(LLM).all()

	llm_histories_df = pd.DataFrame([{
	'id': llm_history.id,
	'input_sentence': llm_history.input_sentence,
	'output_label': llm_history.output_label,
	'model_url': llm_history.model_url,
	'type_of_activity': llm_history.type_of_activity,
	'user_id': llm_history.user_id,
	} for llm_history in llm_histories])

	tab2.write("LLM history:")
	tab2.data_editor(llm_histories_df)