Spaces:

Kuautli
/

ProyectoDS-AnalizaTube

Runtime error

App Files Files Community

Kuautli commited on 29 days ago

Commit

63f9eaa

verified ·

1 Parent(s): 14e6508

Create app.py

Browse files

Files changed (1) hide show

app.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+import pandas as pd
+import plotly.io as pio
+import gradio as gr
+from app_clustering import clustering
+from dotenv import load_dotenv
+if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
+    load_dotenv()
+api_key = os.getenv("youtube_api_key")
+RANDOM_STATE = 333
+def convert_graph_to_html(graph, full_html=False):
+    return pio.to_html(graph, full_html=full_html) if graph else None
+def process_video(url):
+    video_details = None
+    sentiment_daily_graph = None
+    sentiment_count = None
+    sankey_graph = None
+    scores_graph = None
+    if url:
+        video_details = clustering.get_youtube_video_details(url, api_key)
+        comments_df = clustering.get_youtube_comments(api_key, url)
+        comments_df = clustering.add_normalized_embeddings_to_dataframe(comments_df, "comment")
+        comments_df["published_at"] = pd.to_datetime(comments_df["published_at"]).dt.date
+        comments_df = clustering.classify_sentiment_df(comments_df)
+        # Sentiment count
+        sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
+        # Plot daily sentiment
+        sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
+        sentiment_daily_graph_html = convert_graph_to_html(sentiment_daily_graph)
+        umap_df, min_eps, max_eps = clustering.transform_embeddings(comments_df, embeddings_col="embeddings")
+        total = comments_df.shape[0]
+        min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
+        cluster_assignments, cluster_counts, calinski_harabasz_scores, silhouette_scores, most_similar_comments, umap_df = clustering.perform_clustering(
+            umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
+        )
+        # Build Sankey data and plot
+        labels, source, target, values, comments = clustering.build_sankey_data(
+            cluster_assignments, cluster_counts, most_similar_comments, min_items_by_cluster=min_items_by_cluster
+        )
+        sankey_graph = clustering.plot_sankey(labels, source, target, values, comments, height=1000, width=1200)
+        sankey_graph_html = convert_graph_to_html(sankey_graph)
+        # Plot clustering metrics
+        scores_graph, _ = clustering.plot_clustering_metric(silhouette_scores, calinski_harabasz_scores)
+        scores_graph_html = convert_graph_to_html(scores_graph)
+    return video_details, sentiment_daily_graph_html, sentiment_count, sankey_graph_html, scores_graph_html
+# Gradio Interface
+iface = gr.Interface(
+    fn=process_video,
+    inputs=gr.inputs.Textbox(label="YouTube Video URL", placeholder="Ingresa la URL del video..."),
+    outputs=[
+        gr.outputs.JSON(label="Video Details"),
+        gr.outputs.HTML(label="Sentiment Daily Graph"),
+        gr.outputs.JSON(label="Sentiment Count"),
+        gr.outputs.HTML(label="Sankey Graph"),
+        gr.outputs.HTML(label="Clustering Scores Graph")
+    ],
+    title="YouTube Video Sentiment Analysis",
+    description="Ingresa la URL de un video de YouTube para analizar los comentarios y visualizar los resultados."
+)
+if __name__ == "__main__":
+    iface.launch()