import streamlit as st import string from sklearn.feature_extraction.text import CountVectorizer from nltk.corpus import stopwords from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification from scipy.special import softmax import stylecloud from stop_words import get_stop_words from PIL import Image from pysentimiento import create_analyzer import altair as alt from sentence_transformers import SentenceTransformer #LIMPIAR FRASES def clean_string(text): if text == "nan": return "" text = ''.join([word for word in text if word not in string.punctuation]) text = text.lower() return text #CARGAR MODELO roberta = f"cardiffnlp/twitter-xlm-roberta-base-sentiment" model = AutoModelForSequenceClassification.from_pretrained(roberta) tokenizer = AutoTokenizer.from_pretrained(roberta) modelS = SentenceTransformer('hiiamsid/sentence_similarity_spanish_es') #LEER Y CLASIFICAR LAS RESPUESTAS data = pd.read_csv(r'data.csv') person1_objetives, person1_difficulties, person1_utilities, person2_objetives, person2_difficulties, person2_utilities = [], [], [], [], [], [] person1_all_text, person2_all_text = [], [] for index, row in data.iterrows(): if row["DNI"] == "72838728M": person1_objetives.append(str((row["objeto_si"]))) person1_difficulties.append(str((row["objeto_no"])) + " " + str((row["que_necesito"]))) person1_utilities.append(str((row["para_que"])) + " " + str((row["como_uso"]))) person1_all_text.append(clean_string(str((row["objeto_si"]))) + ". " + clean_string(str((row["objeto_no"]))) + ". " + clean_string(str((row["que_necesito"]))) + ". " + clean_string(str((row["para_que"]))) + ". " + clean_string(str((row["como_uso"])))) elif row["DNI"] == "73233278J": person2_objetives.append(str((row["objeto_si"]))) person2_difficulties.append(str((row["objeto_no"])) + " " + str((row["que_necesito"]))) person2_utilities.append(str((row["para_que"])) + " " + str((row["como_uso"]))) person2_all_text.append(str((row["objeto_si"])) + ". " + str((row["objeto_no"])) + ". " + str((row["que_necesito"])) + ". " + str((row["para_que"])) + ". " + str((row["como_uso"]))) #WORDCLOUDS person1_wordcloud = " ".join(person1_objetives) person2_wordcloud = " ".join(person2_objetives) irrelevant_words = get_stop_words("spanish") custom_irrelevant_words = irrelevant_words[:] custom_irrelevant_words.extend(["hacer","realizar","aprender","aprendido"]) stylecloud.gen_stylecloud(text=person1_wordcloud, custom_stopwords=custom_irrelevant_words, icon_name="fas fa-circle", output_name="person1.png") person1 = Image.open("person1.png") stylecloud.gen_stylecloud(text=person2_wordcloud, custom_stopwords=custom_irrelevant_words, icon_name="fas fa-circle", output_name="person2.png") person2 = Image.open("person2.png") #LEER OBJETIVOS COMPETENCIAS Y AUTONOMIA f1 = open('daw_obj.txt','r', encoding="utf8") objetivos1 = f1.read() f2 = open('mark_obj.txt','r', encoding="utf8") objetivos2 = f2.read() f3 = open('autonomia.txt','r', encoding="utf8") autonomia = f3.read() f4 = open('participacion.txt','r', encoding="utf8") participacion = f4.read() f5 = open('compromiso.txt','r', encoding="utf8") compromiso = f5.read() #LLAMADA AL CHATGPT # session_token = "eyJhbGciOiJkaXIiLCJlbmMiOiJBMjU2R0NNIn0..1K4xDB69QDCvq957.8XFvLu5dFg23jOdjkDyT-B_LE826oFzkcnRUJmx-poHDheX45HTf0m3cKKSgRp2B6QXxMR01ELGOHb0ZdeS5TGXC_8qyl9xTX1MvvFIkxLDVEc884xroPBFJdne2d-xoQrriAkDWZQFhE87tJSLlID-BZBKgUS_leaCbxJL87_KTxBKU4F_DNI-P_RMUL8ErLNZEFVs_CISJMMQLSpPA1GDAtecSPll55_FGuoNI3iYEYT-Rro3pFBOXdJhiEgmoKvWfVoItdN8NemVtXxXHFGl3XlZUgh5F7b6LT6id2MO5y5uZv_04lkw6mSl-Bh7ziBmXw2qtQY9vGX5s2p4SKI4CduaEMZtLslZlPM0p23fnGoIt2BYC7ijSw2nwqOLnl_axGJK0Sw1Jpmy5moNRs8yQcusQ2qMPl8g3r9WIosfuaoIz8qRiiP2nSzYUwfGI3-fRzsnXC3XtN-sfeywH3TFIMWo9MvKa0mU3SWfQZ8H2PZvXAUZ7-_j8Eopz6fYxpwAImJD1gIrG2JGTKyU4Mffh8_IBAo7yt9W8T6NLdXyCT9t5536i751Ga9CW6ahTBb7f3RWPwcsNnIB7VMwxwy996uwBquHiGWua-gepZw2PsO7yEQB3xZKCdafur-MegcxcWep_qbpmGo6-8AEGKLgLKD8Ed6MS4rnrcPLKfcHAvboO7SNmBuB4-lBUltaWTPDEfiXK25OXbwpQ7qychURy9OLd4fPuYtP3gwGOVi6k1Mni2rI4oa_XAhlJTvH6MMaYZxQHVXSTNOcgLXx9cz1JTqnkmk4mRHnvlj8uoyVszXQi2EFq1ozz4bxFCiir4wYzBdCwC2bp5S--i7E89xL3RQ8DvCMKO3q3Ro3nPU8hD4QItoCHgHaxpexrtiq_4feHmVl9A4cAFEkTGyjC4ZuNT0Ety0fsM0JtytFNiTnBHGqB7ZNOSLMyjNqEs7IpnBxRlzCB5afLDG5cP3ipOIMILSVyEv2je8yWSEx2E5ogSL-inO2p-EcThnT5KxySZMmCDT25qQmLI0Gk8afm8M--c1PUd3Z2ZXx50ouqvftZyEjlTocQfoAITVIUc6cCXhEuCsIL4RuyVvz5Ps73WuM0K9MmESn8iQRddz_03MxyHHsDdGoiT97TaGz-ivOjoO2eRdzOwU5k0JbJH5xZOWSvpVg2wnYWKLv2_gOjEMrTYxx_4kSBxpeYKyiIhFKug6nuRUmRwfCksPiWjNEVLjPo_x0_K_thH0jii76WQcq-224VibB3hAMTMxdr7aLVwqPJNVHOVI0Log4vJcledthlzilRGiw4kNBOYqo95DyYjXZJ5haKnUdsQrb9pwCBmXeK0PFxssZ904Wwpd22tH9w5ZvJZCz5p39tniakd9UeOHaPQmY3N26jzXfV4h1w3lkdzjrBEEMwxuUFjaaolQE6GKpswRiDdHZm4mbGOHkQYeMYebEVhy17r9drLvTc4QrlwNuP8HA4vfgQTUvqo64QM6RvIvqHftawkVazxNYUEhTmWsuUemZXI-GHLTbDrfD9BafGI9yk3hp8bG2u8R9ZvPZAA4R1wkBCQiY1BumfaGN49ETuQRJF0HTf4mRVJ9b6BSbgEO7tzAN2adQ0T22ePh2FkUqmOmDHHp_QwPFja5NCfLebcLyBLqxDkdcLANfpS0g-BkraV0ZpuU4_SZrb3Qu1Et8tnF5coVJwZWm6A-1PHhClHpf3KEz5F5MVfkAPCAiSMPqD8UaTCGWbMY7CHUav9IyuX7-uoAOzz9ZyoUQuDC9-OGHQb8x15XzsNWffCRpJwjWMBTRBB_rw5HwXuWBBWk-GWzXZtSHhWRXhouRhoUjKloqhgUfeNmL1gg4lusel5NQF6phwVek1V3oJknO1XezLjyVeio69_OOzkqosSkHs2ZskisqnFfL--LG0m5TO9-o88OYESeZIO4tQuUSN8HYxyqtaWo0iiJHxMDumo8fJypiR5z5L13aGrNA8ZPm2S_tg-Mmz34wqgLihFhgDRMMqu87dYXrF78oz3uKbYmhYsCk-jY519yUgwOfiC3CrfG6LqTcbWCVbmh-yogcstCV-nLfTeosGIZUHNI_H5didKNzh6hzUjoYHUhpiCDFD7mM89lm5EoeEa3S1ZNoAhO-4QXYIA5AajgxtES9SX7hPfP94zh9rm-l__9eJkWg7KOblWBYTq1eT71FxtN9fEIJZQ7pa9h1UEvgJL8aP2EJ3yWY5KFZ6GGcKpFf9x_omFxDxF8AdQ5n9uSKNcK0I8wz43FF8HRbWYWumUV3n7GpEZS7hGgB7ynRkYZ-X6ZtQj9VADJtUojMcAwYM7KdCqO1FBBKOd8McBirXkjzYfS3-LEjeYCdu67ogQYF3toxzR3Xhc_rcfcHnHh601I4opVb9mZBiws_d31146_CqJ-r5NF1PDnm9JfOURLua2ySMDd2B13uzP3L0BFveLR2Dq62BO2grKLEbn97__2HtESrVp-ozDqc9yLHSxhPjmWUAy9xCPDBlQNt8KrA.FxxuIKliXcdC8v7F1lX9sQ" # api = ChatGPT(session_token) # text = api.send_message(f'objetivos y competencias en un grado superior de desarrolllo de aplicaciones web') #LIMPIAR FRASES stopwords = get_stop_words('spanish') #SIMILITUD DE COSENOS def cosine_sim_vectors(vec1, vec2): vec1 = vec1.reshape(1, -1) vec2 = vec2.reshape(1, -1) return cosine_similarity(vec1, vec2)[0][0] #PROBAR CON RESPUESTAS DE PERSONA1 #objetivos person1_objetives.append(objetivos1) cleaned2 = list(map(clean_string, person1_objetives)) embeddings = modelS.encode(cleaned2) aut = [] for idx,answer in enumerate(cleaned2[:-1]): aut.append(cosine_similarity([embeddings[-1]],[embeddings[idx]])) similarities1 = [] for answer in aut: similarities1.append(float(answer[0][0])) index = [*range(0,len(similarities1),1)] chart_objetives1 = pd.DataFrame({ 'x':index, 'y':similarities1 } ) o1 = alt.Chart(chart_objetives1).mark_area().encode( x=alt.X('x', title="Semanas"), y=alt.Y('y', title=""), color=alt.value("#3399ff") ) #dificultades difficulties1 = [] cleanedD1 = list(map(clean_string, person1_difficulties)) for idx,answer in enumerate(cleanedD1): encoded_text1 = tokenizer(answer, return_tensors='pt') output1 = model(**encoded_text1) scores1 = output1[0][0].detach().numpy() scores1 = softmax(scores1) difficulties1.append(scores1) color_scale = alt.Scale( domain=[ "positivo", "neutral", "negativo", ], range=["#33cc33", "#6699ff", "#ff0000"] ) y_axis = alt.Axis( title='Semanas', offset=5, ticks=False, minExtent=60, domain=False ) source1 = [] for idx,d in enumerate(difficulties1): start,end = -d[1]/2,d[1]/2 source1.append( { "question":idx+1, "type":"neutral", "value":d[1], "start":start, "end":end } ) source1.append( { "question":idx+1, "type":"negativo", "value":d[0], "start":start, "end":start-d[0] } ) source1.append( { "question":idx+1, "type":"positivo", "value":d[2], "start":end, "end":end+d[2] } ) source1 = alt.pd.DataFrame(source1) d1 = alt.Chart(source1).mark_bar().encode( x=alt.X('start:Q', title=""), x2='end:Q', y=alt.Y('question:N', axis=y_axis), color=alt.Color( 'type:N', legend=alt.Legend( title='Sentimiento:'), scale=color_scale, ) ) #utilidad utilities1 = [] cleanedU1 = list(map(clean_string, person1_utilities)) for idx,answer in enumerate(cleanedU1): encoded_text1 = tokenizer(answer, return_tensors='pt') output1 = model(**encoded_text1) scores1 = output1[0][0].detach().numpy() scores1 = softmax(scores1) utilities1.append(scores1) source2 = [] for idx,d in enumerate(utilities1): start,end = -d[1]/2,d[1]/2 source2.append( { "question":idx+1, "type":"neutral", "value":d[1], "start":start, "end":end } ) source2.append( { "question":idx+1, "type":"negativo", "value":d[0], "start":start, "end":start-d[0] } ) source2.append( { "question":idx+1, "type":"positivo", "value":d[2], "start":end, "end":end+d[2] } ) source2 = alt.pd.DataFrame(source2) u1 = alt.Chart(source2).mark_bar().encode( x=alt.X('start:Q', title=""), x2='end:Q', y=alt.Y('question:N', axis=y_axis), color=alt.Color( 'type:N', legend=alt.Legend( title='Sentimiento:'), scale=color_scale, ) ) #emocion emotion_analyzer = create_analyzer(task="emotion", lang="es") emotions = emotion_analyzer.predict(person1_all_text) emotions_data = [] for emotion in emotions: emotion = emotion.probas emotions_data.append([emotion["joy"], emotion["sadness"], emotion["anger"], emotion["surprise"], emotion["disgust"], emotion["fear"], emotion["others"]]) chart_data = pd.DataFrame( emotions_data, columns=["1-alegria","2-tristeza","3-enfado","4-sorpresa","5-disgusto","6-miedo","7-otros"] ) data1 = pd.melt(chart_data.reset_index(), id_vars=["index"]) chart = ( alt.Chart(data1) .mark_bar() .encode( x=alt.X("value", type="quantitative", title=""), y=alt.Y("index", type="nominal", title="", axis=y_axis), color=alt.Color("variable", type="nominal", title="", legend=alt.Legend( title='Emociones:')), order=alt.Order("variable", sort="ascending"), ) ) #autonomia person1_all_text.append(autonomia) embeddings = modelS.encode(person1_all_text) aut = [] for idx,answer in enumerate(person1_all_text[:-1]): aut.append(cosine_similarity([embeddings[-1]],[embeddings[idx]])) aut_similarities1 = [] for answer in aut: aut_similarities1.append(float(answer[0][0])) index = [*range(0,len(aut_similarities1),1)] chart_autonomia1 = pd.DataFrame({ 'x':index, 'y':aut_similarities1 }) a1 = alt.Chart(chart_autonomia1).mark_area().encode( x=alt.X('x', title="Semanas"), y=alt.Y('y', title=""), color=alt.value("#660033") ) person1_all_text.pop() #participacion person1_all_text.append(participacion) cleaned1 = list(map(clean_string, person1_all_text)) embeddings = modelS.encode(cleaned1) par = [] for idx,answer in enumerate(cleaned1[:-1]): par.append(cosine_similarity([embeddings[-1]],[embeddings[idx]])) par_similarities1 = [] for answer in par: par_similarities1.append(float(answer[0][0])) chart_participacion1 = pd.DataFrame({ 'x':index, 'y':par_similarities1 }) p1 = alt.Chart(chart_participacion1).mark_area().encode( x=alt.X('x', title="Semanas"), y=alt.Y('y', title=""), color=alt.value("#33cc33") ) person1_all_text.pop() #compromiso person1_all_text.append(compromiso) cleaned1 = list(map(clean_string, person1_all_text)) embeddings = modelS.encode(cleaned1) com = [] for idx,answer in enumerate(cleaned1[:-1]): com.append(cosine_similarity([embeddings[-1]],[embeddings[idx]])) com_similarities = [] for answer in com: com_similarities.append(float(answer[0][0])) chart_compromiso1 = pd.DataFrame({ 'x':index, 'y':com_similarities }) c1 = alt.Chart(chart_compromiso1).mark_area().encode( x=alt.X('x', title="Semanas"), y=alt.Y('y', title=""), color=alt.value("#ff6600") ) person1_all_text.pop() #PERSONA2 #objetivos person2_objetives.append(objetivos2) cleaned2 = list(map(clean_string, person2_objetives)) embeddings = modelS.encode(cleaned2) aut = [] for idx,answer in enumerate(cleaned2[:-1]): aut.append(cosine_similarity([embeddings[-1]],[embeddings[idx]])) similarities2 = [] for answer in aut: similarities2.append(float(answer[0][0])) index = [*range(0,len(similarities2),1)] chart_objetives2 = pd.DataFrame({ 'x':index, 'y':similarities2 } ) o2 = alt.Chart(chart_objetives2).mark_area().encode( x=alt.X('x', title="Semanas"), y=alt.Y('y', title=""), color=alt.value("#3399ff") ) #dicultades difficulties2 = [] cleanedD2 = list(map(clean_string, person2_difficulties)) for idx,answer in enumerate(cleanedD2): encoded_text2 = tokenizer(answer, return_tensors='pt') output2 = model(**encoded_text2) scores2 = output2[0][0].detach().numpy() scores2 = softmax(scores2) difficulties2.append(scores2) source3 = [] for idx,d in enumerate(difficulties2): start,end = -d[1]/2,d[1]/2 source3.append( { "question":idx+1, "type":"neutral", "value":d[1], "start":start, "end":end } ) source3.append( { "question":idx+1, "type":"negativo", "value":d[0], "start":start, "end":start-d[0] } ) source3.append( { "question":idx+1, "type":"positivo", "value":d[2], "start":end, "end":end+d[2] } ) source3 = alt.pd.DataFrame(source3) d2 = alt.Chart(source3).mark_bar().encode( x=alt.X('start:Q', title=""), x2='end:Q', y=alt.Y('question:N', axis=y_axis), color=alt.Color( 'type:N', legend=alt.Legend( title='Sentimiento:'), scale=color_scale, ) ) #utilidad utilities2 = [] cleanedU2 = list(map(clean_string, person2_utilities)) for idx,answer in enumerate(cleanedU2): encoded_text2 = tokenizer(answer, return_tensors='pt') output2 = model(**encoded_text2) scores2 = output2[0][0].detach().numpy() scores2 = softmax(scores2) utilities2.append(scores2) source4 = [] for idx,d in enumerate(utilities2): start,end = -d[1]/2,d[1]/2 source4.append( { "question":idx+1, "type":"neutral", "value":d[1], "start":start, "end":end } ) source4.append( { "question":idx+1, "type":"negativo", "value":d[0], "start":start, "end":start-d[0] } ) source4.append( { "question":idx+1, "type":"positivo", "value":d[2], "start":end, "end":end+d[2] } ) source4 = alt.pd.DataFrame(source4) u2 = alt.Chart(source4).mark_bar().encode( x=alt.X('start:Q', title=""), x2='end:Q', y=alt.Y('question:N', axis=y_axis), color=alt.Color( 'type:N', legend=alt.Legend( title='Sentimiento:'), scale=color_scale, ) ) #emocion emotions2 = emotion_analyzer.predict(person2_all_text) emotions_data2 = [] for emotion in emotions2: emotion = emotion.probas emotions_data2.append([emotion["joy"], emotion["sadness"], emotion["anger"], emotion["surprise"], emotion["disgust"], emotion["fear"], emotion["others"]]) chart_data2 = pd.DataFrame( emotions_data2, columns=["1-alegria","2-tristeza","3-enfado","4-sorpresa","5-disgusto","6-miedo","7-otros"] ) data2 = pd.melt(chart_data2.reset_index(), id_vars=["index"]) chart2 = ( alt.Chart(data2) .mark_bar() .encode( x=alt.X("value", type="quantitative", title=""), y=alt.Y("index", type="nominal", title="", axis=y_axis), color=alt.Color("variable", type="nominal", title="", legend=alt.Legend( title='Emociones:')), order=alt.Order("variable", sort="ascending"), ) ) #autonomia person2_all_text.append(autonomia) embeddings2 = modelS.encode(person2_all_text) aut2 = [] for idx,answer in enumerate(person2_all_text[:-1]): aut2.append(cosine_similarity([embeddings2[-1]],[embeddings2[idx]])) aut_similarities2 = [] for answer in aut2: aut_similarities2.append(float(answer[0][0])) index = [*range(0,len(aut_similarities2),1)] chart_autonomia2 = pd.DataFrame({ 'x':index, 'y':aut_similarities2 }) a2 = alt.Chart(chart_autonomia2).mark_area().encode( x=alt.X('x', title="Semanas"), y=alt.Y('y', title=""), color=alt.value("#660033") ) person2_all_text.pop() #participacion person2_all_text.append(participacion) cleaned1 = list(map(clean_string, person2_all_text)) embeddings = modelS.encode(cleaned1) par = [] for idx,answer in enumerate(cleaned1[:-1]): par.append(cosine_similarity([embeddings[-1]],[embeddings[idx]])) par_similarities2 = [] for answer in par: par_similarities2.append(float(answer[0][0])) chart_participacion2 = pd.DataFrame({ 'x':index, 'y':par_similarities2 }) p2 = alt.Chart(chart_participacion2).mark_area().encode( x=alt.X('x', title="Semanas"), y=alt.Y('y', title=""), color=alt.value("#33cc33") ) person2_all_text.pop() #compromiso person2_all_text.append(compromiso) cleaned1 = list(map(clean_string, person2_all_text)) embeddings = modelS.encode(cleaned1) com = [] for idx,answer in enumerate(cleaned1[:-1]): com.append(cosine_similarity([embeddings[-1]],[embeddings[idx]])) com_similarities2 = [] for answer in com: com_similarities2.append(float(answer[0][0])) chart_compromiso2 = pd.DataFrame({ 'x':index, 'y':com_similarities2 }) c2 = alt.Chart(chart_compromiso2).mark_area().encode( x=alt.X('x', title="Semanas"), y=alt.Y('y', title=""), color=alt.value("#ff6600") ) person2_all_text.pop() #graficas st.header("Persona 1 (DAW)") with st.container(): col1, col2 = st.columns(2, gap="large") with col1: st.text("Analisis de objetivos:") st.altair_chart(o1, use_container_width=True) with col2: st.text("Word Cloud de objetivos:") st.image(person1) with st.container(): st.text("Sentimiento de dificultad:") st.altair_chart(d1, use_container_width=True) with st.container(): st.text("Sentimiento de utilidad:") st.altair_chart(u1, use_container_width=True) with st.container(): st.text("Analisis de emociones:") st.altair_chart(chart, use_container_width=True) with st.container(): st.text("Analisis de autonomia:") st.altair_chart(a1, use_container_width=True) with st.container(): st.text("Analisis de participacion:") st.altair_chart(p1, use_container_width=True) with st.container(): st.text("Analisis de compromiso:") st.altair_chart(c1, use_container_width=True) st.header("Persona 2 (MARK)") with st.container(): col1, col2 = st.columns(2, gap="large") with col1: st.text("Analisis de objetivos:") st.altair_chart(o2, use_container_width=True) with col2: st.text("Word Cloud de objetivos:") st.image(person2) with st.container(): st.text("Sentimiento de dificultad:") st.altair_chart(d2, use_container_width=True) with st.container(): st.text("Sentimiento de utilidad:") st.altair_chart(u2, use_container_width=True) with st.container(): st.text("Analisis de emociones:") st.altair_chart(chart2, use_container_width=True) with st.container(): st.text("Analisis de autonomia:") st.altair_chart(a2, use_container_width=True) with st.container(): st.text("Analisis de participacion:") st.altair_chart(p2, use_container_width=True) with st.container(): st.text("Analisis de compromiso:") st.altair_chart(c2, use_container_width=True)