| import streamlit as st |
| import pandas as pd |
| import re |
| import plotly.express as px |
|
|
| |
| from src.predict import predict_sentiments |
| from src.youtube import get_video_comments |
|
|
|
|
| def extract_video_id(url_or_id: str): |
| """ |
| Tries to get the YouTube video ID from different common URL types. |
| Also handles if the input is just the ID itself. |
| A bit of regex to find the ID part in common URLs. |
| """ |
| if not url_or_id: |
| return None |
|
|
| |
| |
| patterns = [ |
| r"watch\?v=([a-zA-Z0-9_-]{11})", |
| r"youtu\.be/([a-zA-Z0-9_-]{11})", |
| r"embed/([a-zA-Z0-9_-]{11})", |
| r"shorts/([a-zA-Z0-9_-]{11})", |
| ] |
|
|
| for pattern in patterns: |
| match = re.search(pattern, url_or_id) |
| if match: |
| return match.group(1) |
|
|
| |
| |
| if len(url_or_id) == 11 and not ( |
| "/" in url_or_id or "?" in url_or_id or "=" in url_or_id or "." in url_or_id |
| ): |
| return url_or_id |
|
|
| return None |
|
|
|
|
| def analyze_youtube_video(video_url_or_id: str): |
| """ |
| Main function for the YouTube analysis part. |
| It gets comments, then predicts their sentiments. |
| Then it summarizes the results. |
| """ |
| video_id = extract_video_id(video_url_or_id) |
| if not video_id: |
| |
| st.error( |
| "Oops! That doesn't look like a valid YouTube URL or Video ID. Please check and try again. Example: Z9kGRMglw-I or youtu.be/3?v=Z9kGRMglw-I" |
| ) |
| return None |
|
|
| summary_data = {} |
| |
|
|
| try: |
| with st.spinner(f"Fetching comments & title for video ID: {video_id}..."): |
| video_data = get_video_comments(video_id) |
| comments_text_list = video_data.get("comments", []) |
| video_title = video_data.get("title", "Video Title Not Found") |
| print( |
| f"DEBUG (streamlit_app.py): Received title from youtube.py: '{video_title}'" |
| ) |
|
|
| |
| if not comments_text_list: |
| st.warning( |
| "Hmm, no comments found for this video. Are comments enabled? Or is it a very new video?" |
| ) |
| |
| summary_data = { |
| "num_comments_fetched": 0, |
| "num_comments_analyzed": 0, |
| "positive": 0, |
| "neutral": 0, |
| "negative": 0, |
| "positive_percentage": 0, |
| "neutral_percentage": 0, |
| "negative_percentage": 0, |
| "num_valid_predictions": 0, |
| } |
| return {"summary": summary_data, "comments_data": []} |
|
|
| st.info( |
| f"Great! Found {len(comments_text_list)} comments. Now thinking about their feelings (sentiments)..." |
| ) |
| |
| with st.spinner("Analyzing sentiments with the model... Please wait."): |
| |
| |
| prediction_results = predict_sentiments(comments_text_list) |
|
|
| positive_count = 0 |
| negative_count = 0 |
| neutral_count = 0 |
| error_count = 0 |
|
|
| for result in prediction_results: |
| label = result.get("label") |
| if label == "positive": |
| positive_count += 1 |
| elif label == "negative": |
| negative_count += 1 |
| elif label == "neutral": |
| neutral_count += 1 |
| else: |
| error_count += 1 |
|
|
| num_valid_predictions = positive_count + negative_count + neutral_count |
| total_comments_processed = len(prediction_results) |
| if error_count > 0: |
| st.warning( |
| f"Could not predict sentiment properly for {error_count} comments." |
| ) |
|
|
| summary_data = { |
| "video_title": video_title, |
| "num_comments_fetched": len(comments_text_list), |
| "num_comments_analyzed": total_comments_processed, |
| "num_valid_predictions": num_valid_predictions, |
| "positive": positive_count, |
| "negative": negative_count, |
| "neutral": neutral_count, |
| "positive_percentage": ( |
| (positive_count / num_valid_predictions) * 100 |
| if num_valid_predictions > 0 |
| else 0 |
| ), |
| "neutral_percentage": ( |
| (neutral_count / num_valid_predictions) * 100 |
| if num_valid_predictions > 0 |
| else 0 |
| ), |
| "negative_percentage": ( |
| (negative_count / num_valid_predictions) * 100 |
| if num_valid_predictions > 0 |
| else 0 |
| ), |
| } |
|
|
| comments_data_for_df = [] |
| for i in range(len(comments_text_list)): |
| comment_text = comments_text_list[i] |
| result = prediction_results[i] |
| label = result.get("label", "Error") |
| scores = result.get("scores", {}) |
| confidence = max(scores.values()) if scores else 0.0 |
|
|
| comments_data_for_df.append( |
| { |
| "Comment Text": comment_text, |
| "Predicted Sentiment": label, |
| "Confidence": confidence, |
| |
| } |
| ) |
|
|
| return {"summary": summary_data, "comments_data": comments_data_for_df} |
|
|
| except Exception as e: |
| |
| st.error(f"Uh oh! An error popped up during analysis: {str(e)}") |
| |
| print(f"Full error in analyze_youtube_video: {e}") |
| import traceback |
|
|
| traceback.print_exc() |
| return None |
|
|
|
|
| |
|
|
| |
| st.set_page_config(page_title="Social Sentiment Analysis", layout="centered") |
|
|
| st.markdown( |
| """ |
| <style> |
| .stApp { |
| background-color: #d6d6d6; |
| } |
| </style> |
| """, |
| unsafe_allow_html=True, |
| ) |
|
|
| st.title("π SOCIAL SENTIMENT ANALYSIS") |
| |
| st.write( |
| """ |
| Welcome to the **Social Sentiment Analyzer!** π |
| |
| This application uses a fine-tuned RoBERTa model to predict the sentiment (Positive, Neutral, or Negative) expressed in text. |
| |
| Use the tabs below to choose your input method: |
| * **Analyze Text Input:** Paste or type any English text directly. |
| * **YouTube Analysis:** Enter a YouTube video URL or ID to analyze its comments. |
| * **Twitter/X Analysis:** Support for analyzing Twitter/X posts is coming soon! |
| |
| Select a tab to begin! |
| """ |
| ) |
|
|
| |
| tab_text_input, tab_youtube, tab_twitter = st.tabs( |
| ["Analyze Text Input", "YouTube Analysis", "Twitter/X Analysis (Coming Soon!)"] |
| ) |
|
|
| with tab_text_input: |
| |
| st.header("Analyze Sentiment of Your Text") |
| st.write( |
| "Enter a sentence or a short paragraph below to see its predicted sentiment distribution." |
| ) |
|
|
| |
| |
| user_text = st.text_area( |
| "Enter text here:", |
| key="text_input_area_key", |
| height=100, |
| placeholder="Type or paste your text...", |
| ) |
|
|
| |
| if st.button("Analyze Text", key="text_input_analyze_btn"): |
| |
| if user_text and not user_text.isspace(): |
| |
| with st.spinner("Analyzing your text..."): |
| try: |
| |
| |
| prediction_results = predict_sentiments([user_text]) |
|
|
| |
| if ( |
| prediction_results |
| and isinstance(prediction_results, list) |
| and len(prediction_results) > 0 |
| ): |
| |
| result = prediction_results[0] |
| predicted_label = result.get("label") |
| scores = result.get( |
| "scores" |
| ) |
|
|
| |
| if ( |
| predicted_label |
| and scores |
| and isinstance(scores, dict) |
| and predicted_label != "Error" |
| ): |
|
|
| |
| st.subheader("Predicted Sentiment:") |
| |
| if predicted_label == "positive": |
| st.success( |
| f"The model thinks the sentiment is: **{predicted_label.capitalize()}** π" |
| ) |
| elif predicted_label == "negative": |
| st.error( |
| f"The model thinks the sentiment is: **{predicted_label.capitalize()}** π" |
| ) |
| else: |
| st.info( |
| f"The model thinks the sentiment is: **{predicted_label.capitalize()}** π" |
| ) |
|
|
| st.write("---") |
| st.subheader( |
| "Detailed Probabilities:" |
| ) |
| if scores and isinstance(scores, dict): |
| |
| prob_col_neg, prob_col_neu, prob_col_pos = st.columns(3) |
|
|
| |
| def get_score(sentiment_name): |
| return scores.get( |
| sentiment_name.lower(), 0.0 |
| ) |
|
|
| value_font_size = "22px" |
| value_font_weight = "bold" |
|
|
| with prob_col_neg: |
| neg_prob = get_score("negative") |
| |
| st.markdown("**Negative π:**") |
| |
| st.markdown( |
| f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:red;'>{neg_prob:.1%}</p>", |
| unsafe_allow_html=True, |
| ) |
|
|
| with prob_col_neu: |
| neu_prob = get_score("neutral") |
| |
| st.markdown("**Neutral π:**") |
| |
| st.markdown( |
| f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:grey;'>{neu_prob:.1%}</p>", |
| unsafe_allow_html=True, |
| ) |
|
|
| with prob_col_pos: |
| pos_prob = get_score("positive") |
| |
| st.markdown("**Positive π:**") |
| |
| st.markdown( |
| f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:green;'>{pos_prob:.1%}</p>", |
| unsafe_allow_html=True, |
| ) |
|
|
| else: |
| |
| st.write("Could not retrieve probability scores.") |
| st.write("---") |
|
|
| |
| st.subheader("Sentiment Probabilities:") |
| |
| |
| |
| score_items = list(scores.items()) |
| if score_items: |
| df_scores = pd.DataFrame( |
| score_items, |
| columns=["Sentiment", "Probability"], |
| ) |
| |
| df_scores["Probability"] = pd.to_numeric( |
| df_scores["Probability"] |
| ) |
|
|
| |
| color_map = { |
| "positive": "green", |
| "neutral": "grey", |
| "negative": "red", |
| } |
| |
| df_scores["Sentiment"] = df_scores[ |
| "Sentiment" |
| ].str.capitalize() |
| df_scores["Sentiment_Lower"] = df_scores[ |
| "Sentiment" |
| ].str.lower() |
| color_map_lower = { |
| k.lower(): v for k, v in color_map.items() |
| } |
|
|
| |
| fig_pie_text = px.pie( |
| df_scores, |
| values="Probability", |
| names="Sentiment", |
| title="Probability Distribution per Class", |
| color="Sentiment_Lower", |
| color_discrete_map=color_map_lower, |
| ) |
|
|
| |
| fig_pie_text.update_traces( |
| textposition="inside", |
| textinfo="percent+label", |
| hovertemplate="Sentiment: %{label}<br>Probability: %{percent}", |
| ) |
| |
| fig_pie_text.update_layout( |
| uniformtext_minsize=16, |
| uniformtext_mode="hide", |
| ) |
|
|
| st.plotly_chart(fig_pie_text, use_container_width=True) |
|
|
| else: |
| st.warning("Received empty scores, cannot plot chart.") |
|
|
| else: |
| |
| st.error( |
| f"Sentiment analysis failed for the input text. Result: {result}" |
| ) |
|
|
| else: |
| |
| st.error( |
| "Received no valid result from the prediction function." |
| ) |
|
|
| except Exception as analysis_e: |
| |
| st.error( |
| f"An error occurred during text analysis: {str(analysis_e)}" |
| ) |
| print(f"Full error during text input analysis: {analysis_e}") |
| import traceback |
|
|
| traceback.print_exc() |
|
|
| else: |
| |
| st.warning("Please enter some text in the text area first!") |
|
|
| with tab_youtube: |
| st.header("YouTube Comment Sentiment Analyzer") |
| |
| video_url_input = st.text_input( |
| "Enter YouTube Video URL or Video ID:", |
| key="youtube_url_input_key", |
| placeholder="e.g., Z9kGRMglw-I or full URL", |
| ) |
|
|
| |
| if st.button("Analyze YouTube Comments", key="youtube_analyze_button_key"): |
| if video_url_input: |
| |
| analysis_results = analyze_youtube_video(video_url_input) |
|
|
| if ( |
| analysis_results and analysis_results["summary"] |
| ): |
| summary = analysis_results["summary"] |
| comments_data = analysis_results["comments_data"] |
| video_title_display = summary.get( |
| "video_title", "Video Title Not Available" |
| ) |
|
|
| st.markdown("---") |
| |
| st.markdown(f"### Analyzing Video: **{video_title_display}**") |
| st.markdown("---") |
|
|
| st.subheader("π Sentiment Summary") |
|
|
| |
| label_font_size = "24px" |
| value_font_size = "28px" |
| value_font_weight = "bold" |
|
|
| |
| positive_color = "green" |
| neutral_color = "grey" |
| negative_color = "red" |
|
|
| |
| col_fetched, col_analyzed, col_pos, col_neu, col_neg = st.columns(5) |
|
|
| |
| with col_fetched: |
| |
| st.markdown( |
| f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Comments Fetched</p>", |
| unsafe_allow_html=True, |
| ) |
| |
| st.markdown( |
| f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; margin-top: 0px;'>{summary.get('num_comments_fetched', 0)}</p>", |
| unsafe_allow_html=True, |
| ) |
|
|
| |
| with col_analyzed: |
| |
| st.markdown( |
| f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Comments Analyzed</p>", |
| unsafe_allow_html=True, |
| ) |
| |
| st.markdown( |
| f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; margin-top: 0px;'>{summary.get('num_comments_analyzed', 0)}</p>", |
| unsafe_allow_html=True, |
| ) |
|
|
| |
| with col_pos: |
| |
| st.markdown( |
| f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Positive π</p>", |
| unsafe_allow_html=True, |
| ) |
| |
| st.markdown( |
| f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{positive_color}; margin-top: 0px;'>{summary.get('positive', 0)}</p>", |
| unsafe_allow_html=True, |
| ) |
|
|
| |
| with col_neu: |
| |
| st.markdown( |
| f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Neutral π</p>", |
| unsafe_allow_html=True, |
| ) |
| |
| st.markdown( |
| f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{neutral_color}; margin-top: 0px;'>{summary.get('neutral', 0)}</p>", |
| unsafe_allow_html=True, |
| ) |
|
|
| |
| with col_neg: |
| |
| st.markdown( |
| f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Negative π</p>", |
| unsafe_allow_html=True, |
| ) |
| |
| st.markdown( |
| f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{negative_color}; margin-top: 0px;'>{summary.get('negative', 0)}</p>", |
| unsafe_allow_html=True, |
| ) |
|
|
| |
| st.markdown("---") |
|
|
| |
| if summary.get("num_valid_predictions", 0) > 0: |
| |
| sentiment_data_for_plot = [ |
| {"Sentiment": "Positive", "Count": summary.get("positive", 0)}, |
| {"Sentiment": "Neutral", "Count": summary.get("neutral", 0)}, |
| {"Sentiment": "Negative", "Count": summary.get("negative", 0)}, |
| ] |
| sentiment_counts_df = pd.DataFrame(sentiment_data_for_plot) |
| |
| sentiment_counts_df_for_plot = sentiment_counts_df[ |
| sentiment_counts_df["Count"] > 0 |
| ].copy() |
|
|
| |
| |
| color_map = { |
| "Positive": "green", |
| "Neutral": "grey", |
| "Negative": "red", |
| } |
|
|
| if not sentiment_counts_df_for_plot.empty: |
| st.subheader("π Sentiment Distribution Charts") |
| |
| |
| fig_pie = px.pie( |
| sentiment_counts_df_for_plot, |
| values="Count", |
| names="Sentiment", |
| title="Pie Chart: Comment Sentiments", |
| color="Sentiment", |
| color_discrete_map=color_map, |
| ) |
|
|
| fig_pie.update_traces( |
| textposition="inside", |
| textinfo="percent+label", |
| hovertemplate="Sentiment: %{label}<br>Count: %{value}<br>Percentage: %{percent}", |
| ) |
|
|
| fig_pie.update_layout( |
| uniformtext_minsize=16, uniformtext_mode="hide" |
| ) |
|
|
| st.plotly_chart(fig_pie, use_container_width=True) |
|
|
| |
| fig_bar = px.bar( |
| sentiment_counts_df_for_plot, |
| x="Sentiment", |
| y="Count", |
| title="Bar Chart: Comment Sentiments", |
| color="Sentiment", |
| color_discrete_map=color_map, |
| labels={ |
| "Count": "Number of Comments", |
| "Sentiment": "Sentiment Category", |
| }, |
| ) |
| st.plotly_chart(fig_bar, use_container_width=True) |
|
|
| else: |
| |
| st.write( |
| "No sentiment data (Positive, Neutral, Negative all zero) to display in charts." |
| ) |
| else: |
| |
| st.write( |
| "Not enough valid sentiment data to display distribution charts." |
| ) |
|
|
| |
| if comments_data: |
| st.subheader( |
| f"π Analyzed Comments (showing first {len(comments_data)} results)" |
| ) |
| comments_display_df = pd.DataFrame(comments_data) |
|
|
| if "Confidence" in comments_display_df.columns: |
| try: |
| |
| comments_display_df["Confidence"] = comments_display_df[ |
| "Confidence" |
| ].map("{:.1%}".format) |
| except (TypeError, ValueError): |
| st.warning( |
| "Could not format confidence scores." |
| ) |
|
|
| st.dataframe( |
| comments_display_df, use_container_width=True, height=400 |
| ) |
| else: |
| st.write("No comments were analyzed to display.") |
| else: |
| |
| st.warning("Please enter a YouTube URL or Video ID first!") |
|
|
| with tab_twitter: |
| st.header("Twitter/X Post Analysis") |
| st.info("This feature is currently under construction. Please check back later!") |
| |
| |
| |
| |
|
|