Spaces:

Essio
/

demo

Sleeping

App Files Files Community

xerubin commited on Feb 25

Commit

1f9bcbb

•

1 Parent(s): f5b4a41

commit inicial

Browse files

Files changed (7) hide show

app.py +67 -0
as_bert_df.py +126 -0
assets/favicon.ico +0 -0
assets/flags.jpg +0 -0
assets/header.jpg +0 -0
page1.py +150 -0
page2_.py +149 -0

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Sat Dec 23 13:22:00 2023
+@author: Essio Rubin C.
+"""
+from PIL import Image
+import streamlit as st
+from st_pages import Page, show_pages, add_page_title
+# icon
+im = Image.open("assets/favicon.ico")
+st.set_page_config(
+    page_title="Analisis de sentimiento de reseñas de Hoteles",
+    page_icon=im,
+    layout="wide",
+)
+# img header
+img = Image.open("assets/header.jpg")
+st.image(img, width=1000)
+# show links to pages
+show_pages(
+    [
+        Page("app.py", "Presentación", "🏠"),
+        Page("page1.py", "Análisis de reseñas uno a uno", ":blue_book:"),
+        Page("page2_.py", "Análisis de reseñas en lotes", ":books:"),
+    ]
+)
+# show text
+css_text_1 = '''
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
+<p style="font-family: Verdana; font-size: 15px; font-weight: 400; color: #494848;">
+<span style="font-family: Georgia, serif; font-size: 20px; font-weight: 400; color: blue; font-style: italic; ">IA Hotel Reviews</span> es una aplicación que utiliza Inteligencia Artificial para realizar un análisis inteligente de las reseñas que dejan tus clientes. Ayuda a identificar el sentimiento positivo o negativo que hay detrás de estos comentarios. Obtendrás un análisis certero de la opiniones que vierten tus clientes y con ello puedes tomar mejores decisiones sobre tu personal, las instalaciones y otros aspectos de tu hotel con el fin de ganar más clientes e incrementar tus ganancias.
+</p>
+<p style="font-family: Verdana; font-size: 3px;">&nbsp;</p>
+<p style="font-family: Verdana; font-size: 15px; font-weight: 300; ; color: #494848;">
+Este programa es multidioma ya que Las reseñas pueden estar escritas en idioma español, holandés, italiano, alemán, francés o inglés.
+</p>
+'''
+st.write(css_text_1, unsafe_allow_html=True)
+# show image
+img = Image.open("assets/flags.jpg")
+st.image(img, width=350)
+css_text_2 = '''
+<p style="font-family: Verdana; font-size: 3px;">&nbsp;</p>
+<p style="font-family: Verdana; font-size: 15px; font-weight: 400; color: #494848;">
+Te ofrecemos dos opciones de análisis:
+</p>
+<i class="fa-solid fa-gear"></i>
+&nbsp;&nbsp;
+<a style="font-family: Verdana; font-size: 17px; font-weight: 200; text-decoration: none; ; color: #6c88f4;" href="http://localhost:8501/An%C3%A1lisis%20de%20rese%C3%B1as%20uno%20a%20uno">Análisis de reseñas uno a uno</a>
+<i class="fa-solid fa-gear"></i>
+&nbsp;&nbsp;
+<a style="font-family: Verdana; font-size: 17px; font-weight: 200; text-decoration: none; ; color: #6c88f4;" href="http://localhost:8501/An%C3%A1lisis%20de%20rese%C3%B1as%20en%20lotes">Análisis de reseñas en lotes</a>
+'''
+st.write(css_text_2, unsafe_allow_html=True)

as_bert_df.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Sun Dec  3 09:26:33 2023
+@author: ideaUser
+Modelo (huggingface)
+https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment
+"""
+#%%
+import torch
+from transformers import BertTokenizer, BertForSequenceClassification
+from transformers import pipeline
+import pandas as pd
+#%%
+# Download pretrained BERT from Hugging Face for sentiment analisis
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tokenizer = BertTokenizer.from_pretrained(model_name)
+model = BertForSequenceClassification.from_pretrained(model_name)
+# function predict sentiment using BERT pretrained
+def get_review_sentiment(text):
+    classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+    results = classifier(text)
+    return int(results[0]["label"][0])
+#%%
+# function for batch predict stars
+def batch_predict_sentiment_stars(data):
+    sentiment = []
+    for index, row in data.iterrows():
+        sentiment.append(get_review_sentiment(row["review"]))
+    data_result = pd.DataFrame(
+        {
+            "review": list(data["review"]),
+            "predict": sentiment
+        })
+    return data_result
+#%%
+# function for batch predict sentiment label [NEGATIVE, NEUTRAL, POSITIVE]
+def batch_predict_sentiment_3_label(data):
+    sentiment = []
+    for index, row in data.iterrows():
+        stars = get_review_sentiment(row["review"])
+        label = ""
+        if stars < 3:
+            label = "negative"
+        elif stars == 3:
+            label = "neutral"
+        elif stars > 3 & stars <= 5:
+            label = "positive"
+        sentiment.append(label)
+    data_result = pd.DataFrame(
+        {
+            "review": list(data["review"]),
+            "label": list(data["label"]),
+            "predict": sentiment
+        })
+    return data_result
+#%%
+# function for batch predict sentiment label [NEGATIVE, POSITIVE]
+def batch_predict_sentiment_2_label(data):
+    total_rows =  data.shape[0]
+    sentiment = []
+    i = 1
+    for index, row in data.iterrows():
+        stars = get_review_sentiment(row["review"])
+        label = ""
+        if stars < 3:
+            label = "negative"
+        elif stars >= 3 & stars <= 5:
+            label = "positive"
+        sentiment.append(label)
+        if i % 100 == 0:
+            print(i, " / " , total_rows )
+        i += 1
+    # return a dataframe
+    data_result = pd.DataFrame(
+        {
+            "review": list(data["review"]),
+            "label": list(data["label"]),
+            "predict": sentiment
+        })
+    return data_result
+#%%
+if __name__ == "__main__":
+    hotel_reviews = ["El fin de semana mi pareja y yo hicimos una reserva en este hotel, con el fin de descansar y desconectar, fue sólo una noche y menos mal.  Nos llevaron a un ala bastante apartada del hotel porque nos dijeron que era mejor para descansar ya que la parte de fuera era muy “jaleosa”. Nos pareció bien porque era justo lo que buscábamos, y cuál fue nuestra sorpresa? Desde las 6 de la mañana con ruidos, primero lo que suponemos que eran unos tacones en la habitación de arriba (de eso no tiene culpa el hotel, obviamente) y después sobre las 7 o poco más, las limpiadoras moviendo muebles y arrastrando sofás o lo que fuera. Habíamos cogido sólo alojamiento para descansar, pensando en no tener que madrugar como habitualmente, pero fue IMPOSIBLE por los ruidos constantes.  Por destacar algo…",
+    "El hotel en general está bien, las habtiaciones son espaciosas y el personal es muy amable (sobretodo el encargado del roof-top y la piscina) y la zona de la piscina es curiosa. Pero tiene dos grandes fallos: El primero es que el wifi no llegaba bien a la habitación ya que se cortaba continuamente. El segundo fallo es que no se les ocurre otra cosa que poner un edredón nórdico en vez de una sábana fina en pleno agosto en Sevilla.",
+    "El hotel es moderno, amplio y limpio, pero no hemos podido disfrutar de la experiencia porque con tanto ruido no hemos podido descansar. Además la piscina estaba llena de gente y no la hemos podido usar. Los empleados muy amables y la ubicación perfecta."
+    ]
+    data_df = pd.DataFrame(
+        {
+            "review": hotel_reviews,
+            "label":['positive','negative','positive']
+        }
+    )
+    # save dataframe
+    data_df.to_csv('data/sa_data.csv', index=False)
+#%%
+    # pruebas
+    # read csv
+    data_df = pd.read_csv('data/sa_data.csv')
+    #result = batch_predict_sentiment_stars(data_df)
+    #result = batch_predict_sentiment_3_label(data_df)
+    result = batch_predict_sentiment_2_label(data_df)
+    print("Columns:\n", result.dtypes)
+    print("\n(rows, cols): ", result.shape)
+    print("\nData:\n",result)
+#%%

assets/favicon.ico ADDED Viewed

assets/flags.jpg ADDED Viewed

assets/header.jpg ADDED Viewed

page1.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Sat Dec 23 13:22:00 2023
+@author: Essio Rubin C.
+"""
+from PIL import Image
+import streamlit as st
+from st_pages import Page, show_pages, add_page_title
+import time
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+from as_bert_df import batch_predict_sentiment_stars
+c1 = st.container()
+c2 = st.container()
+data_df = pd.DataFrame(
+    {
+        "review": ["",], "predict":[0,]
+    }
+)
+# function to show base page
+def show_base_page():
+    with c1:
+        # the title and icon to the current page
+        add_page_title()
+        # show text
+        text_css = """
+        <p style="font-family: Verdana; font-size: 15px; font-weight: 400; ; color: #494848;">
+        Una reseña debe ser un texto que tenga una o varias oraciones que incluyan opiniones referidas a servicios que brinda
+        un Hotel.
+        La reseña puede estar escrita en idioma español, holandés, italiano, alemán, francés o inglés.
+        </p>
+        """
+        st.write(text_css, unsafe_allow_html=True)
+        # show image
+        img = Image.open("assets/flags.jpg")
+        st.image(img, width=200)
+# function to show data editor
+def show_data_editor(data):
+    with c1:
+        # dataframe editor
+        global st_data
+        global placeholder
+        placeholder = st.empty()
+        #st_data = st.data_editor(
+        st_data = placeholder.data_editor(
+            data,
+            column_config={
+                "review": st.column_config.TextColumn(
+                    "review",
+                    help="Ingrese la reseña.",
+                    width="large",
+                    required=True,
+                    max_chars=500,
+                    validate="[a-zA-Záéíóúñ.,]+$"
+                ),
+                "predict": st.column_config.NumberColumn(
+                    "predict",
+                    help="Sentimiento expresado en cantidad de estrellas.",
+                    width="small",
+                    required=False,
+                    default=0,
+                    min_value=0,
+                    max_value=5,
+                    format="%d ⭐",
+                )
+            },
+            hide_index=True,
+            num_rows="dynamic",
+            height = 260,
+            width = 900,
+        )
+def show_button():
+    global placeholder2
+    placeholder2 = st.empty()
+    with c1:
+        # show buttons
+        global button_1
+        button_1 = placeholder2.button(" :gear: Predecir", type="primary", key='but_1')
+# define action funtion for button
+def predecir():
+    #st.session_state.disabled = True
+    data_df = st_data
+    if data_df.shape[0] > 0:
+        st.write(data_df.shape[0])
+        # progress bar
+        progress_text = "Procesando. Espere."
+        my_bar = st.progress(0, text=progress_text)
+        for percent_complete in range(100):
+            time.sleep(0.01)
+            my_bar.progress(percent_complete + 1, text=progress_text)
+        time.sleep(1)
+        my_bar.empty()
+        result = batch_predict_sentiment_stars(data_df)
+        placeholder.empty()
+        placeholder2.empty()
+        show_data_editor(result)
+        show_chart(result)
+    else:
+        st.error("Debe ingresar una reseña.")
+def show_chart(data):
+    with c2:
+        fig, ax = plt.subplots()
+        fig.set_figwidth(5)
+        fig.set_figheight(3)
+        # title
+        ax.set_title("Histograma de Frecuencias", fontsize = 8)
+        # axis
+        ax.set_xlim([0, 5])
+        # x label
+        ax.set_xlabel('Sentimiento (Número de estrellas)', fontsize = 6)
+        ax.set_ylabel('Frecuencia', fontsize = 6)
+        # Crear un histograma
+        ax.hist(data['predict'], bins=20, color ="green")
+        # Mostrar el gráfico en Streamlit
+        st.pyplot(fig)
+#------------------------------------------
+# main flow
+#------------------------------------------
+show_base_page()
+show_data_editor(data_df)
+show_button()
+if button_1:
+    predecir()

page2_.py ADDED Viewed

	@@ -0,0 +1,149 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Sat Dec 23 13:22:00 2023
+@author: Essio Rubin C.
+"""
+from PIL import Image
+import streamlit as st
+from st_pages import add_page_title
+import time
+import pandas as pd
+import matplotlib.pyplot as plt
+from as_bert_df import batch_predict_sentiment_stars
+c1 = st.container()
+c2 = st.container()
+# function to show base page
+def show_base_page():
+    with c1:
+        # show title and icon to the current page
+        add_page_title()
+        # show text
+        text_css = """
+        <p style="font-family: Verdana; font-size: 15px; font-weight: 400; ; color: #494848;">
+        El archivo de reseñas debe ser un archivo de texto que contenga una reseña por cada línea.
+        La reseña puede estar escrita en idioma español, holandés, italiano, alemán, francés o inglés.
+        </p>
+        <p>&nbsp;</p>
+        """
+        st.write(text_css, unsafe_allow_html=True)
+        # show image
+        img = Image.open("assets/flags.jpg")
+        st.image(img, width=200)
+        # placeholder for hide widget
+        global placeholder, placeholder2
+        placeholder = st.empty()
+        placeholder2 = st.empty()
+        # file upload
+        global uploaded_file
+        uploaded_file = placeholder.file_uploader("Cargar archivo de reseñas ...", type=['txt', 'csv'], key='uploader')
+        global button_1
+        button_1 = placeholder2.button(" :gear: Predecir", type="primary", key='but_1')
+# define action funtion for button
+def predecir():
+    if uploaded_file is not None:
+        global data_df
+        # progress bar
+        progress_text = "Procesando. Espere."
+        my_bar = st.progress(0, text=progress_text)
+        for percent_complete in range(100):
+            time.sleep(0.01)
+            my_bar.progress(percent_complete + 1, text=progress_text)
+        time.sleep(1)
+        my_bar.empty()
+        data_df = pd.read_csv(uploaded_file, sep=";")
+        if data_df.shape[0] > 0:
+            # formatear columnas
+            if data_df.shape[1] < 2:
+                data_df['review'] = 0
+                data_df.columns = ["review","predict"]
+            result = batch_predict_sentiment_stars(data_df)
+            # mostrar resulyados
+            show_data_editor(result)
+            show_chart(result)
+            placeholder.empty()
+            placeholder2.empty()
+        else:
+            st.error("Archivo se encuentra vacío.")
+    else:
+        st.error("Debe subir un archivo de texto")
+def show_data_editor(data_df):
+    with c2:
+        st.data_editor(
+            data_df,
+            column_config={
+                "review": st.column_config.TextColumn(
+                    "review",
+                    help="Ingrese la reseña.",
+                    width="large",
+                    required=True,
+                    max_chars=500,
+                    validate="[a-zA-Z]+$"
+                ),
+                "predict": st.column_config.NumberColumn(
+                    "predict",
+                    help="Sentimiento expresado en cantidad de estrellas.",
+                    width="small",
+                    required=False,
+                    default=0,
+                    min_value=0,
+                    max_value=5,
+                    format="%d ⭐",
+                )
+            },
+            hide_index=True,
+            num_rows="dynamic",
+            height = 260,
+            width = 900,
+        )
+def show_chart(data):
+    fig, ax = plt.subplots()
+    fig.set_figwidth(5)
+    fig.set_figheight(3)
+    # title
+    ax.set_title("Histograma de Frecuencias", fontsize = 8)
+    # axis
+    ax.set_xlim([0, 5])
+    # x label
+    ax.set_xlabel('Sentimiento (Número de estrellas)', fontsize = 6)
+    ax.set_ylabel('Frecuencia', fontsize = 6)
+    # Crear un histograma
+    ax.hist(data['predict'], bins=20, color ="green")
+    # Mostrar el gráfico en Streamlit
+    st.pyplot(fig)
+#------------------------------------------
+# main flow
+#------------------------------------------
+show_base_page()
+if button_1:
+    predecir()