Spaces:

fcernafukuzaki
/

pysentimiento_gradio

Sleeping

App Files Files Community

fcernafukuzaki commited on Oct 31, 2024

Commit

270bf7f

verified ·

1 Parent(s): e5b959f

Upload 2 files

Browse files

Files changed (2) hide show

app.py +197 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import datetime
+from tqdm import tqdm
+# Pandas
+import pandas as pd
+# Expresiones regulares
+import re
+# Matplotlib, Seaborn y Plotly
+import matplotlib.pyplot as plt
+import seaborn as sns
+# NLTK
+import nltk
+from nltk.corpus import stopwords
+# spaCy
+import spacy
+# PySentimiento y Transformers
+from pysentimiento import create_analyzer
+from sentence_transformers import SentenceTransformer
+# Word cloud
+from PIL import Image
+import uuid
+import gradio as gr
+nltk.download('stopwords')
+nltk.download('punkt')
+### Reformado. Antes hacia reproceso para obtener output y probas. Se puede hacer en un paso.
+def get_sentiment(df,column):
+    analyzer = create_analyzer(task="sentiment", lang="es")
+    analyzer_outputs = []
+    with tqdm(total=len(df), desc="Analyzing Comments") as pbar:
+        # Iterate through each element in the DataFrame column
+        for element in df[column]:
+            # Perform sentiment analysis on each element
+            result = analyzer.predict(element)
+            # Append the result to the list
+            analyzer_outputs.append(result)
+            # Update the progress bar
+            pbar.update(1)
+    # Extracting values into columns
+    output_list = [output.output for output in analyzer_outputs]
+    NEU_list = [output.probas.get('NEU', None) for output in analyzer_outputs]
+    NEG_list = [output.probas.get('NEG', None) for output in analyzer_outputs]
+    POS_list = [output.probas.get('POS', None) for output in analyzer_outputs]
+    # Assigning lists to DataFrame columns
+    df['Polaridad'] = output_list
+    df['sent_NEU'] = NEU_list
+    df['sent_NEG'] = NEG_list
+    df['sent_POS'] = POS_list
+    return df
+### Reformado. Antes hacia reproceso para obtener output y probas. Se puede hacer en un paso.
+def get_emotions(df,column):
+    analyzer = create_analyzer(task="emotion", lang="es")
+    analyzer_outputs = []
+    with tqdm(total=len(df), desc="Analyzing Comments") as pbar:
+        # Iterate through each element in the DataFrame column
+        for element in df[column]:
+            # Perform sentiment analysis on each element
+            result = analyzer.predict(element)
+            # Append the result to the list
+            analyzer_outputs.append(result)
+            # Update the progress bar
+            pbar.update(1)
+    # Extracting values into columns
+    output_list = [output.output for output in analyzer_outputs]
+    anger_list = [output.probas.get('anger', None) for output in analyzer_outputs]
+    sadness_list = [output.probas.get('sadness', None) for output in analyzer_outputs]
+    surprise_list = [output.probas.get('surprise', None) for output in analyzer_outputs]
+    disgust_list = [output.probas.get('disgust', None) for output in analyzer_outputs]
+    joy_list = [output.probas.get('joy', None) for output in analyzer_outputs]
+    fear_list = [output.probas.get('fear', None) for output in analyzer_outputs]
+    others_list = [output.probas.get('others', None) for output in analyzer_outputs]
+    # Assigning lists to DataFrame columns
+    df['Emocion'] = output_list
+    df['emo_anger'] = anger_list
+    df['emo_sadness'] = sadness_list
+    df['emo_surprise'] = surprise_list
+    df['emo_disgust'] = disgust_list
+    df['emo_joy'] = joy_list
+    df['emo_fear'] = fear_list
+    df['emo_others'] = others_list
+    return df
+class ProcesamientoLenguaje:
+    def __init__(self):
+        self.nlp = spacy.load('es_core_news_md', disable=["parser", "ner"])
+    def postags_and_stopwords(self, texts, allowed_postags=['NOUN', 'ADJ','PROPN', 'VB', 'X']):
+        '''Función que procesa todos los textos en un pipeline de spaCy para tokenizar y etiquetar las POS.
+        Luego, filtra todas las palabras de longitud mayor a 2 caracteres que no sean stop words y que se encuentren
+        dentro de las etiquetas permitidas: sustantivo, adjetivo, verbo, nombre propio y todo lo que no caiga en una categoría
+        preestablecida (palabras OOV, nombres propios no reconocidos, etc).
+        Devuelve los textos procesados.
+        '''
+        texts_out = ' '.join([token.text for token in self.nlp(texts) if token.pos_ in
+                    allowed_postags and token.text not in stop_words and len(token.text) > 2])
+        return texts_out
+    def cleaner(self, word):
+        '''Función que toma un texto y remueve distintos símbolos y variaciones de palabras.
+        Devuelve el string limpio.
+        '''
+        word = re.sub(r'https?\S+', '', word) #remueve todas las URLs
+        word = re.sub(r'(?::|;|=)(?:-)?(?:\)|\(|D|P)', "", word) #remueve interrogación, paréntesis, dos puntos, etc
+        word = re.sub(r'ee.uu', 'eeuu', word, flags=re.IGNORECASE) #convierte todas las variaciones de EEUU sin importar el separador en EEUU
+        word = re.sub(r'\#\.', '', word)
+        word = re.sub(r'\n', ' ', word) #remueve todos los line-breaks y los reemplaza con espacios
+        word = re.sub(r',', '', word) #remueve comas
+        word = re.sub(r'\-', ' ', word) #remueve guiones
+        word = re.sub(r'\.{3}', ' ', word) #remueve tres puntos
+        word = re.sub(r'a{2,}', 'a', word) #remueve múltiples instancias de la letra a (p.ej: aaaaaaah, holaaaaaa)
+        word = re.sub(r'é{2,}', 'é', word) #remueve múltiples instancias de la letra é (p.ej: volvééééé)
+        word = re.sub(r'i{2,}', 'i', word) #remueve múltiples instancias de la letra i (p.ej: salíiiiiii)
+        word = re.sub(r'ja{2,}', 'ja', word) #remueve las "risas" (p.ej: jaaaaaa)
+        word = re.sub(r'[^\w\s@ñ]', '', word, flags=re.UNICODE) #remueve todos los símbolos no alfanuméricos excepto @ y ñ
+        word = re.sub(r'\b@\w+\b', '', word) #remueve todos los usuarios de Twitter
+        word = re.sub(r'\b\w{1,2}\b', '', word) #remueve todas las palabras de una o dos letras
+        return word
+def grafico_pie(df, column_name='Polaridad'):
+    file_path = f"{uuid.uuid4()}_sentimiento.jpg"
+    plt.figure(figsize=(8, 6))
+    polaridad_counts = df[column_name].value_counts()
+    plt.pie(polaridad_counts, labels=polaridad_counts.index, autopct='%1.1f%%', startangle=140)
+    plt.title("Distribución de Polaridad")
+    plt.savefig(file_path, bbox_inches="tight")
+    plt.close()
+    return file_path
+def grafico_barras(df, column_name='Emocion'):
+    file_path = f"{uuid.uuid4()}_sentimiento.jpg"
+    plt.figure(figsize=(8, 6))
+    ax = sns.countplot(x=column_name, data=df)
+    for p in ax.patches:
+        ax.annotate(format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
+    plt.xlabel("Emocion")
+    plt.ylabel("Cantidad")
+    plt.title("Histograma de Emocion")
+    plt.savefig(file_path, bbox_inches="tight")
+    plt.close()
+    return file_path
+pln = ProcesamientoLenguaje()
+stop_words = stopwords.words('spanish')
+# Función que lee el archivo CSV
+def procesar_csv(file):
+    if file is None:
+        return "No se ha cargado ningún archivo."
+    df = pd.read_csv(file.name, delimiter=';')
+    df['Fecha'] = pd.to_datetime(df['Fecha'], format='%d/%m/%y')
+    df = get_sentiment(df, "Comentario")
+    df = get_emotions(df, "Comentario")
+    df['Comentario_clean'] = df['Comentario'].apply(pln.cleaner)
+    df['Comentario_clean'] = df['Comentario_clean'].apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in (stop_words)]))
+    df['Comentario_clean'] = df['Comentario_clean'].apply(pln.postags_and_stopwords)
+    output_file = f"{uuid.uuid4()}_processed_output.csv"
+    df.to_csv(output_file, index=False)
+    grafico_pie_path = grafico_pie(df)
+    grafico_barras_path = grafico_barras(df)
+    return df.head(10), output_file, grafico_pie_path, grafico_barras_path  # Muestra las primeras filas
+# Crear la interfaz en Gradio
+interface = gr.Interface(
+    fn=procesar_csv,
+    inputs=gr.File(label="Archivo CSV"),
+    outputs=[gr.Dataframe(label="Vista previa del archivo procesado"),
+             gr.File(label="Descargar CSV procesado"),
+             gr.Image(type="filepath", label="Gráfico de torta"),
+             gr.Image(type="filepath", label="Gráfico de barras")],
+    title="Cargar y visualizar CSV",
+    description="Sube un archivo CSV para ver los primeros registros. El archivo CSV debe tener los campos Fecha y Comentario."
+)
+# Ejecutar la app de Gradio
+interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+matplotlib==3.9.2
+seaborn==0.13.2
+nltk==3.9.1
+pysentimiento==0.7.3
+sentence-transformers==3.2.1
+spacy==3.7.5
+tqdm==4.66.6
+es-core-news-md @ https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.7.0/es_core_news_md-3.7.0-py3-none-any.whl#sha256=0d6d6ebed875869a9759c8c096f2cef581fa32d861646030f771c83e5799de82