Kuautli commited on
Commit
63f9eaa
·
verified ·
1 Parent(s): 14e6508

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import plotly.io as pio
4
+ import gradio as gr
5
+ from app_clustering import clustering
6
+ from dotenv import load_dotenv
7
+
8
+ if os.getenv("HUGGINGFACE_HUB_CACHE") is None:
9
+ load_dotenv()
10
+
11
+ api_key = os.getenv("youtube_api_key")
12
+
13
+ RANDOM_STATE = 333
14
+
15
+
16
+ def convert_graph_to_html(graph, full_html=False):
17
+ return pio.to_html(graph, full_html=full_html) if graph else None
18
+
19
+
20
+ def process_video(url):
21
+ video_details = None
22
+ sentiment_daily_graph = None
23
+ sentiment_count = None
24
+ sankey_graph = None
25
+ scores_graph = None
26
+
27
+ if url:
28
+ video_details = clustering.get_youtube_video_details(url, api_key)
29
+ comments_df = clustering.get_youtube_comments(api_key, url)
30
+ comments_df = clustering.add_normalized_embeddings_to_dataframe(comments_df, "comment")
31
+ comments_df["published_at"] = pd.to_datetime(comments_df["published_at"]).dt.date
32
+ comments_df = clustering.classify_sentiment_df(comments_df)
33
+
34
+ # Sentiment count
35
+ sentiment_count = comments_df["sentimiento"].value_counts().to_dict()
36
+
37
+ # Plot daily sentiment
38
+ sentiment_daily_graph = clustering.plot_sentiment_daily(comments_df)
39
+ sentiment_daily_graph_html = convert_graph_to_html(sentiment_daily_graph)
40
+
41
+ umap_df, min_eps, max_eps = clustering.transform_embeddings(comments_df, embeddings_col="embeddings")
42
+ total = comments_df.shape[0]
43
+ min_items_by_cluster = clustering.determine_min_items_by_cluster(total)
44
+
45
+ cluster_assignments, cluster_counts, calinski_harabasz_scores, silhouette_scores, most_similar_comments, umap_df = clustering.perform_clustering(
46
+ umap_df, min_eps, max_eps, n=10, embeddings_col="embeddings"
47
+ )
48
+
49
+ # Build Sankey data and plot
50
+ labels, source, target, values, comments = clustering.build_sankey_data(
51
+ cluster_assignments, cluster_counts, most_similar_comments, min_items_by_cluster=min_items_by_cluster
52
+ )
53
+ sankey_graph = clustering.plot_sankey(labels, source, target, values, comments, height=1000, width=1200)
54
+ sankey_graph_html = convert_graph_to_html(sankey_graph)
55
+
56
+ # Plot clustering metrics
57
+ scores_graph, _ = clustering.plot_clustering_metric(silhouette_scores, calinski_harabasz_scores)
58
+ scores_graph_html = convert_graph_to_html(scores_graph)
59
+
60
+ return video_details, sentiment_daily_graph_html, sentiment_count, sankey_graph_html, scores_graph_html
61
+
62
+
63
+ # Gradio Interface
64
+ iface = gr.Interface(
65
+ fn=process_video,
66
+ inputs=gr.inputs.Textbox(label="YouTube Video URL", placeholder="Ingresa la URL del video..."),
67
+ outputs=[
68
+ gr.outputs.JSON(label="Video Details"),
69
+ gr.outputs.HTML(label="Sentiment Daily Graph"),
70
+ gr.outputs.JSON(label="Sentiment Count"),
71
+ gr.outputs.HTML(label="Sankey Graph"),
72
+ gr.outputs.HTML(label="Clustering Scores Graph")
73
+ ],
74
+ title="YouTube Video Sentiment Analysis",
75
+ description="Ingresa la URL de un video de YouTube para analizar los comentarios y visualizar los resultados."
76
+ )
77
+
78
+ if __name__ == "__main__":
79
+ iface.launch()