unaiolaizola's picture
Update app.py
afa9273
import streamlit as st
import string
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import stylecloud
from stop_words import get_stop_words
from PIL import Image
from pysentimiento import create_analyzer
import altair as alt
from sentence_transformers import SentenceTransformer
#LIMPIAR FRASES
def clean_string(text):
if text == "nan":
return ""
text = ''.join([word for word in text if word not in string.punctuation])
text = text.lower()
return text
#CARGAR MODELO
roberta = f"cardiffnlp/twitter-xlm-roberta-base-sentiment"
model = AutoModelForSequenceClassification.from_pretrained(roberta)
tokenizer = AutoTokenizer.from_pretrained(roberta)
modelS = SentenceTransformer('hiiamsid/sentence_similarity_spanish_es')
#LEER Y CLASIFICAR LAS RESPUESTAS
data = pd.read_csv(r'data.csv')
person1_objetives, person1_difficulties, person1_utilities, person2_objetives, person2_difficulties, person2_utilities = [], [], [], [], [], []
person1_all_text, person2_all_text = [], []
for index, row in data.iterrows():
if row["DNI"] == "72838728M":
person1_objetives.append(str((row["objeto_si"])))
person1_difficulties.append(str((row["objeto_no"])) + " " + str((row["que_necesito"])))
person1_utilities.append(str((row["para_que"])) + " " + str((row["como_uso"])))
person1_all_text.append(clean_string(str((row["objeto_si"]))) + ". " + clean_string(str((row["objeto_no"]))) + ". " + clean_string(str((row["que_necesito"]))) + ". " + clean_string(str((row["para_que"]))) + ". " + clean_string(str((row["como_uso"]))))
elif row["DNI"] == "73233278J":
person2_objetives.append(str((row["objeto_si"])))
person2_difficulties.append(str((row["objeto_no"])) + " " + str((row["que_necesito"])))
person2_utilities.append(str((row["para_que"])) + " " + str((row["como_uso"])))
person2_all_text.append(str((row["objeto_si"])) + ". " + str((row["objeto_no"])) + ". " + str((row["que_necesito"])) + ". " + str((row["para_que"])) + ". " + str((row["como_uso"])))
#WORDCLOUDS
person1_wordcloud = " ".join(person1_objetives)
person2_wordcloud = " ".join(person2_objetives)
irrelevant_words = get_stop_words("spanish")
custom_irrelevant_words = irrelevant_words[:]
custom_irrelevant_words.extend(["hacer","realizar","aprender","aprendido"])
stylecloud.gen_stylecloud(text=person1_wordcloud, custom_stopwords=custom_irrelevant_words, icon_name="fas fa-circle", output_name="person1.png")
person1 = Image.open("person1.png")
stylecloud.gen_stylecloud(text=person2_wordcloud, custom_stopwords=custom_irrelevant_words, icon_name="fas fa-circle", output_name="person2.png")
person2 = Image.open("person2.png")
#LEER OBJETIVOS COMPETENCIAS Y AUTONOMIA
f1 = open('daw_obj.txt','r', encoding="utf8")
objetivos1 = f1.read()
f2 = open('mark_obj.txt','r', encoding="utf8")
objetivos2 = f2.read()
f3 = open('autonomia.txt','r', encoding="utf8")
autonomia = f3.read()
f4 = open('participacion.txt','r', encoding="utf8")
participacion = f4.read()
f5 = open('compromiso.txt','r', encoding="utf8")
compromiso = f5.read()
#LLAMADA AL CHATGPT
# session_token = "eyJhbGciOiJkaXIiLCJlbmMiOiJBMjU2R0NNIn0..1K4xDB69QDCvq957.8XFvLu5dFg23jOdjkDyT-B_LE826oFzkcnRUJmx-poHDheX45HTf0m3cKKSgRp2B6QXxMR01ELGOHb0ZdeS5TGXC_8qyl9xTX1MvvFIkxLDVEc884xroPBFJdne2d-xoQrriAkDWZQFhE87tJSLlID-BZBKgUS_leaCbxJL87_KTxBKU4F_DNI-P_RMUL8ErLNZEFVs_CISJMMQLSpPA1GDAtecSPll55_FGuoNI3iYEYT-Rro3pFBOXdJhiEgmoKvWfVoItdN8NemVtXxXHFGl3XlZUgh5F7b6LT6id2MO5y5uZv_04lkw6mSl-Bh7ziBmXw2qtQY9vGX5s2p4SKI4CduaEMZtLslZlPM0p23fnGoIt2BYC7ijSw2nwqOLnl_axGJK0Sw1Jpmy5moNRs8yQcusQ2qMPl8g3r9WIosfuaoIz8qRiiP2nSzYUwfGI3-fRzsnXC3XtN-sfeywH3TFIMWo9MvKa0mU3SWfQZ8H2PZvXAUZ7-_j8Eopz6fYxpwAImJD1gIrG2JGTKyU4Mffh8_IBAo7yt9W8T6NLdXyCT9t5536i751Ga9CW6ahTBb7f3RWPwcsNnIB7VMwxwy996uwBquHiGWua-gepZw2PsO7yEQB3xZKCdafur-MegcxcWep_qbpmGo6-8AEGKLgLKD8Ed6MS4rnrcPLKfcHAvboO7SNmBuB4-lBUltaWTPDEfiXK25OXbwpQ7qychURy9OLd4fPuYtP3gwGOVi6k1Mni2rI4oa_XAhlJTvH6MMaYZxQHVXSTNOcgLXx9cz1JTqnkmk4mRHnvlj8uoyVszXQi2EFq1ozz4bxFCiir4wYzBdCwC2bp5S--i7E89xL3RQ8DvCMKO3q3Ro3nPU8hD4QItoCHgHaxpexrtiq_4feHmVl9A4cAFEkTGyjC4ZuNT0Ety0fsM0JtytFNiTnBHGqB7ZNOSLMyjNqEs7IpnBxRlzCB5afLDG5cP3ipOIMILSVyEv2je8yWSEx2E5ogSL-inO2p-EcThnT5KxySZMmCDT25qQmLI0Gk8afm8M--c1PUd3Z2ZXx50ouqvftZyEjlTocQfoAITVIUc6cCXhEuCsIL4RuyVvz5Ps73WuM0K9MmESn8iQRddz_03MxyHHsDdGoiT97TaGz-ivOjoO2eRdzOwU5k0JbJH5xZOWSvpVg2wnYWKLv2_gOjEMrTYxx_4kSBxpeYKyiIhFKug6nuRUmRwfCksPiWjNEVLjPo_x0_K_thH0jii76WQcq-224VibB3hAMTMxdr7aLVwqPJNVHOVI0Log4vJcledthlzilRGiw4kNBOYqo95DyYjXZJ5haKnUdsQrb9pwCBmXeK0PFxssZ904Wwpd22tH9w5ZvJZCz5p39tniakd9UeOHaPQmY3N26jzXfV4h1w3lkdzjrBEEMwxuUFjaaolQE6GKpswRiDdHZm4mbGOHkQYeMYebEVhy17r9drLvTc4QrlwNuP8HA4vfgQTUvqo64QM6RvIvqHftawkVazxNYUEhTmWsuUemZXI-GHLTbDrfD9BafGI9yk3hp8bG2u8R9ZvPZAA4R1wkBCQiY1BumfaGN49ETuQRJF0HTf4mRVJ9b6BSbgEO7tzAN2adQ0T22ePh2FkUqmOmDHHp_QwPFja5NCfLebcLyBLqxDkdcLANfpS0g-BkraV0ZpuU4_SZrb3Qu1Et8tnF5coVJwZWm6A-1PHhClHpf3KEz5F5MVfkAPCAiSMPqD8UaTCGWbMY7CHUav9IyuX7-uoAOzz9ZyoUQuDC9-OGHQb8x15XzsNWffCRpJwjWMBTRBB_rw5HwXuWBBWk-GWzXZtSHhWRXhouRhoUjKloqhgUfeNmL1gg4lusel5NQF6phwVek1V3oJknO1XezLjyVeio69_OOzkqosSkHs2ZskisqnFfL--LG0m5TO9-o88OYESeZIO4tQuUSN8HYxyqtaWo0iiJHxMDumo8fJypiR5z5L13aGrNA8ZPm2S_tg-Mmz34wqgLihFhgDRMMqu87dYXrF78oz3uKbYmhYsCk-jY519yUgwOfiC3CrfG6LqTcbWCVbmh-yogcstCV-nLfTeosGIZUHNI_H5didKNzh6hzUjoYHUhpiCDFD7mM89lm5EoeEa3S1ZNoAhO-4QXYIA5AajgxtES9SX7hPfP94zh9rm-l__9eJkWg7KOblWBYTq1eT71FxtN9fEIJZQ7pa9h1UEvgJL8aP2EJ3yWY5KFZ6GGcKpFf9x_omFxDxF8AdQ5n9uSKNcK0I8wz43FF8HRbWYWumUV3n7GpEZS7hGgB7ynRkYZ-X6ZtQj9VADJtUojMcAwYM7KdCqO1FBBKOd8McBirXkjzYfS3-LEjeYCdu67ogQYF3toxzR3Xhc_rcfcHnHh601I4opVb9mZBiws_d31146_CqJ-r5NF1PDnm9JfOURLua2ySMDd2B13uzP3L0BFveLR2Dq62BO2grKLEbn97__2HtESrVp-ozDqc9yLHSxhPjmWUAy9xCPDBlQNt8KrA.FxxuIKliXcdC8v7F1lX9sQ"
# api = ChatGPT(session_token)
# text = api.send_message(f'objetivos y competencias en un grado superior de desarrolllo de aplicaciones web')
#LIMPIAR FRASES
stopwords = get_stop_words('spanish')
#SIMILITUD DE COSENOS
def cosine_sim_vectors(vec1, vec2):
vec1 = vec1.reshape(1, -1)
vec2 = vec2.reshape(1, -1)
return cosine_similarity(vec1, vec2)[0][0]
#PROBAR CON RESPUESTAS DE PERSONA1
#objetivos
person1_objetives.append(objetivos1)
cleaned2 = list(map(clean_string, person1_objetives))
embeddings = modelS.encode(cleaned2)
aut = []
for idx,answer in enumerate(cleaned2[:-1]):
aut.append(cosine_similarity([embeddings[-1]],[embeddings[idx]]))
similarities1 = []
for answer in aut:
similarities1.append(float(answer[0][0]))
index = [*range(0,len(similarities1),1)]
chart_objetives1 = pd.DataFrame({
'x':index,
'y':similarities1
}
)
o1 = alt.Chart(chart_objetives1).mark_area().encode(
x=alt.X('x', title="Semanas"),
y=alt.Y('y', title=""),
color=alt.value("#3399ff")
)
#dificultades
difficulties1 = []
cleanedD1 = list(map(clean_string, person1_difficulties))
for idx,answer in enumerate(cleanedD1):
encoded_text1 = tokenizer(answer, return_tensors='pt')
output1 = model(**encoded_text1)
scores1 = output1[0][0].detach().numpy()
scores1 = softmax(scores1)
difficulties1.append(scores1)
color_scale = alt.Scale(
domain=[
"positivo",
"neutral",
"negativo",
],
range=["#33cc33", "#6699ff", "#ff0000"]
)
y_axis = alt.Axis(
title='Semanas',
offset=5,
ticks=False,
minExtent=60,
domain=False
)
source1 = []
for idx,d in enumerate(difficulties1):
start,end = -d[1]/2,d[1]/2
source1.append(
{
"question":idx+1,
"type":"neutral",
"value":d[1],
"start":start,
"end":end
}
)
source1.append(
{
"question":idx+1,
"type":"negativo",
"value":d[0],
"start":start,
"end":start-d[0]
}
)
source1.append(
{
"question":idx+1,
"type":"positivo",
"value":d[2],
"start":end,
"end":end+d[2]
}
)
source1 = alt.pd.DataFrame(source1)
d1 = alt.Chart(source1).mark_bar().encode(
x=alt.X('start:Q', title=""),
x2='end:Q',
y=alt.Y('question:N', axis=y_axis),
color=alt.Color(
'type:N',
legend=alt.Legend( title='Sentimiento:'),
scale=color_scale,
)
)
#utilidad
utilities1 = []
cleanedU1 = list(map(clean_string, person1_utilities))
for idx,answer in enumerate(cleanedU1):
encoded_text1 = tokenizer(answer, return_tensors='pt')
output1 = model(**encoded_text1)
scores1 = output1[0][0].detach().numpy()
scores1 = softmax(scores1)
utilities1.append(scores1)
source2 = []
for idx,d in enumerate(utilities1):
start,end = -d[1]/2,d[1]/2
source2.append(
{
"question":idx+1,
"type":"neutral",
"value":d[1],
"start":start,
"end":end
}
)
source2.append(
{
"question":idx+1,
"type":"negativo",
"value":d[0],
"start":start,
"end":start-d[0]
}
)
source2.append(
{
"question":idx+1,
"type":"positivo",
"value":d[2],
"start":end,
"end":end+d[2]
}
)
source2 = alt.pd.DataFrame(source2)
u1 = alt.Chart(source2).mark_bar().encode(
x=alt.X('start:Q', title=""),
x2='end:Q',
y=alt.Y('question:N', axis=y_axis),
color=alt.Color(
'type:N',
legend=alt.Legend( title='Sentimiento:'),
scale=color_scale,
)
)
#emocion
emotion_analyzer = create_analyzer(task="emotion", lang="es")
emotions = emotion_analyzer.predict(person1_all_text)
emotions_data = []
for emotion in emotions:
emotion = emotion.probas
emotions_data.append([emotion["joy"], emotion["sadness"], emotion["anger"], emotion["surprise"], emotion["disgust"], emotion["fear"], emotion["others"]])
chart_data = pd.DataFrame(
emotions_data,
columns=["1-alegria","2-tristeza","3-enfado","4-sorpresa","5-disgusto","6-miedo","7-otros"]
)
data1 = pd.melt(chart_data.reset_index(), id_vars=["index"])
chart = (
alt.Chart(data1)
.mark_bar()
.encode(
x=alt.X("value", type="quantitative", title=""),
y=alt.Y("index", type="nominal", title="", axis=y_axis),
color=alt.Color("variable", type="nominal", title="", legend=alt.Legend( title='Emociones:')),
order=alt.Order("variable", sort="ascending"),
)
)
#autonomia
person1_all_text.append(autonomia)
embeddings = modelS.encode(person1_all_text)
aut = []
for idx,answer in enumerate(person1_all_text[:-1]):
aut.append(cosine_similarity([embeddings[-1]],[embeddings[idx]]))
aut_similarities1 = []
for answer in aut:
aut_similarities1.append(float(answer[0][0]))
index = [*range(0,len(aut_similarities1),1)]
chart_autonomia1 = pd.DataFrame({
'x':index,
'y':aut_similarities1
})
a1 = alt.Chart(chart_autonomia1).mark_area().encode(
x=alt.X('x', title="Semanas"),
y=alt.Y('y', title=""),
color=alt.value("#660033")
)
person1_all_text.pop()
#participacion
person1_all_text.append(participacion)
cleaned1 = list(map(clean_string, person1_all_text))
embeddings = modelS.encode(cleaned1)
par = []
for idx,answer in enumerate(cleaned1[:-1]):
par.append(cosine_similarity([embeddings[-1]],[embeddings[idx]]))
par_similarities1 = []
for answer in par:
par_similarities1.append(float(answer[0][0]))
chart_participacion1 = pd.DataFrame({
'x':index,
'y':par_similarities1
})
p1 = alt.Chart(chart_participacion1).mark_area().encode(
x=alt.X('x', title="Semanas"),
y=alt.Y('y', title=""),
color=alt.value("#33cc33")
)
person1_all_text.pop()
#compromiso
person1_all_text.append(compromiso)
cleaned1 = list(map(clean_string, person1_all_text))
embeddings = modelS.encode(cleaned1)
com = []
for idx,answer in enumerate(cleaned1[:-1]):
com.append(cosine_similarity([embeddings[-1]],[embeddings[idx]]))
com_similarities = []
for answer in com:
com_similarities.append(float(answer[0][0]))
chart_compromiso1 = pd.DataFrame({
'x':index,
'y':com_similarities
})
c1 = alt.Chart(chart_compromiso1).mark_area().encode(
x=alt.X('x', title="Semanas"),
y=alt.Y('y', title=""),
color=alt.value("#ff6600")
)
person1_all_text.pop()
#PERSONA2
#objetivos
person2_objetives.append(objetivos2)
cleaned2 = list(map(clean_string, person2_objetives))
embeddings = modelS.encode(cleaned2)
aut = []
for idx,answer in enumerate(cleaned2[:-1]):
aut.append(cosine_similarity([embeddings[-1]],[embeddings[idx]]))
similarities2 = []
for answer in aut:
similarities2.append(float(answer[0][0]))
index = [*range(0,len(similarities2),1)]
chart_objetives2 = pd.DataFrame({
'x':index,
'y':similarities2
}
)
o2 = alt.Chart(chart_objetives2).mark_area().encode(
x=alt.X('x', title="Semanas"),
y=alt.Y('y', title=""),
color=alt.value("#3399ff")
)
#dicultades
difficulties2 = []
cleanedD2 = list(map(clean_string, person2_difficulties))
for idx,answer in enumerate(cleanedD2):
encoded_text2 = tokenizer(answer, return_tensors='pt')
output2 = model(**encoded_text2)
scores2 = output2[0][0].detach().numpy()
scores2 = softmax(scores2)
difficulties2.append(scores2)
source3 = []
for idx,d in enumerate(difficulties2):
start,end = -d[1]/2,d[1]/2
source3.append(
{
"question":idx+1,
"type":"neutral",
"value":d[1],
"start":start,
"end":end
}
)
source3.append(
{
"question":idx+1,
"type":"negativo",
"value":d[0],
"start":start,
"end":start-d[0]
}
)
source3.append(
{
"question":idx+1,
"type":"positivo",
"value":d[2],
"start":end,
"end":end+d[2]
}
)
source3 = alt.pd.DataFrame(source3)
d2 = alt.Chart(source3).mark_bar().encode(
x=alt.X('start:Q', title=""),
x2='end:Q',
y=alt.Y('question:N', axis=y_axis),
color=alt.Color(
'type:N',
legend=alt.Legend( title='Sentimiento:'),
scale=color_scale,
)
)
#utilidad
utilities2 = []
cleanedU2 = list(map(clean_string, person2_utilities))
for idx,answer in enumerate(cleanedU2):
encoded_text2 = tokenizer(answer, return_tensors='pt')
output2 = model(**encoded_text2)
scores2 = output2[0][0].detach().numpy()
scores2 = softmax(scores2)
utilities2.append(scores2)
source4 = []
for idx,d in enumerate(utilities2):
start,end = -d[1]/2,d[1]/2
source4.append(
{
"question":idx+1,
"type":"neutral",
"value":d[1],
"start":start,
"end":end
}
)
source4.append(
{
"question":idx+1,
"type":"negativo",
"value":d[0],
"start":start,
"end":start-d[0]
}
)
source4.append(
{
"question":idx+1,
"type":"positivo",
"value":d[2],
"start":end,
"end":end+d[2]
}
)
source4 = alt.pd.DataFrame(source4)
u2 = alt.Chart(source4).mark_bar().encode(
x=alt.X('start:Q', title=""),
x2='end:Q',
y=alt.Y('question:N', axis=y_axis),
color=alt.Color(
'type:N',
legend=alt.Legend( title='Sentimiento:'),
scale=color_scale,
)
)
#emocion
emotions2 = emotion_analyzer.predict(person2_all_text)
emotions_data2 = []
for emotion in emotions2:
emotion = emotion.probas
emotions_data2.append([emotion["joy"], emotion["sadness"], emotion["anger"], emotion["surprise"], emotion["disgust"], emotion["fear"], emotion["others"]])
chart_data2 = pd.DataFrame(
emotions_data2,
columns=["1-alegria","2-tristeza","3-enfado","4-sorpresa","5-disgusto","6-miedo","7-otros"]
)
data2 = pd.melt(chart_data2.reset_index(), id_vars=["index"])
chart2 = (
alt.Chart(data2)
.mark_bar()
.encode(
x=alt.X("value", type="quantitative", title=""),
y=alt.Y("index", type="nominal", title="", axis=y_axis),
color=alt.Color("variable", type="nominal", title="", legend=alt.Legend( title='Emociones:')),
order=alt.Order("variable", sort="ascending"),
)
)
#autonomia
person2_all_text.append(autonomia)
embeddings2 = modelS.encode(person2_all_text)
aut2 = []
for idx,answer in enumerate(person2_all_text[:-1]):
aut2.append(cosine_similarity([embeddings2[-1]],[embeddings2[idx]]))
aut_similarities2 = []
for answer in aut2:
aut_similarities2.append(float(answer[0][0]))
index = [*range(0,len(aut_similarities2),1)]
chart_autonomia2 = pd.DataFrame({
'x':index,
'y':aut_similarities2
})
a2 = alt.Chart(chart_autonomia2).mark_area().encode(
x=alt.X('x', title="Semanas"),
y=alt.Y('y', title=""),
color=alt.value("#660033")
)
person2_all_text.pop()
#participacion
person2_all_text.append(participacion)
cleaned1 = list(map(clean_string, person2_all_text))
embeddings = modelS.encode(cleaned1)
par = []
for idx,answer in enumerate(cleaned1[:-1]):
par.append(cosine_similarity([embeddings[-1]],[embeddings[idx]]))
par_similarities2 = []
for answer in par:
par_similarities2.append(float(answer[0][0]))
chart_participacion2 = pd.DataFrame({
'x':index,
'y':par_similarities2
})
p2 = alt.Chart(chart_participacion2).mark_area().encode(
x=alt.X('x', title="Semanas"),
y=alt.Y('y', title=""),
color=alt.value("#33cc33")
)
person2_all_text.pop()
#compromiso
person2_all_text.append(compromiso)
cleaned1 = list(map(clean_string, person2_all_text))
embeddings = modelS.encode(cleaned1)
com = []
for idx,answer in enumerate(cleaned1[:-1]):
com.append(cosine_similarity([embeddings[-1]],[embeddings[idx]]))
com_similarities2 = []
for answer in com:
com_similarities2.append(float(answer[0][0]))
chart_compromiso2 = pd.DataFrame({
'x':index,
'y':com_similarities2
})
c2 = alt.Chart(chart_compromiso2).mark_area().encode(
x=alt.X('x', title="Semanas"),
y=alt.Y('y', title=""),
color=alt.value("#ff6600")
)
person2_all_text.pop()
#graficas
st.header("Persona 1 (DAW)")
with st.container():
col1, col2 = st.columns(2, gap="large")
with col1:
st.text("Analisis de objetivos:")
st.altair_chart(o1, use_container_width=True)
with col2:
st.text("Word Cloud de objetivos:")
st.image(person1)
with st.container():
st.text("Sentimiento de dificultad:")
st.altair_chart(d1, use_container_width=True)
with st.container():
st.text("Sentimiento de utilidad:")
st.altair_chart(u1, use_container_width=True)
with st.container():
st.text("Analisis de emociones:")
st.altair_chart(chart, use_container_width=True)
with st.container():
st.text("Analisis de autonomia:")
st.altair_chart(a1, use_container_width=True)
with st.container():
st.text("Analisis de participacion:")
st.altair_chart(p1, use_container_width=True)
with st.container():
st.text("Analisis de compromiso:")
st.altair_chart(c1, use_container_width=True)
st.header("Persona 2 (MARK)")
with st.container():
col1, col2 = st.columns(2, gap="large")
with col1:
st.text("Analisis de objetivos:")
st.altair_chart(o2, use_container_width=True)
with col2:
st.text("Word Cloud de objetivos:")
st.image(person2)
with st.container():
st.text("Sentimiento de dificultad:")
st.altair_chart(d2, use_container_width=True)
with st.container():
st.text("Sentimiento de utilidad:")
st.altair_chart(u2, use_container_width=True)
with st.container():
st.text("Analisis de emociones:")
st.altair_chart(chart2, use_container_width=True)
with st.container():
st.text("Analisis de autonomia:")
st.altair_chart(a2, use_container_width=True)
with st.container():
st.text("Analisis de participacion:")
st.altair_chart(p2, use_container_width=True)
with st.container():
st.text("Analisis de compromiso:")
st.altair_chart(c2, use_container_width=True)