Spaces:
Runtime error
Runtime error
File size: 5,924 Bytes
03f7867 38abd5a 03f7867 cf33f7d d76fb4c 03f7867 cf33f7d 03f7867 cf33f7d 03f7867 cf33f7d 03f7867 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import os
import re
import streamlit as st
import googleapiclient.discovery
import pandas as pd
from transformers import pipeline
import matplotlib.pyplot as plt
import seaborn as sns
st.title('Анализатор комментариев :red[YouTube] :sunglasses:')
# Инициализируем модель Hugging Face для анализа тональности текста
# Кэшируем ресурс для одной загрузки модели на все сессии
@st.cache_resource
def load_model():
"""
Loads the 'blanchefort/rubert-base-cased-sentiment' model from HuggingFace
and saves to cache for consecutive loads.
"""
model = pipeline(
"sentiment-analysis",
"blanchefort/rubert-base-cased-sentiment")
return model
def extract_video_id(url: str) -> str:
"""
Extracts the video ID from a YouTube video URL.
Args: url (str): The YouTube video URL.
Returns: str: The extracted video ID,
or an empty string if the URL is not valid.
"""
pattern = r"(?<=v=)[\w-]+(?=&|\b)"
match = re.search(pattern, url)
if match:
return match.group()
else:
return ""
def download_comments(video_id: str) -> pd.DataFrame:
"""
Downloads comments from a YouTube video based on the provided video ID
and returns them as a DataFrame.
Args: video_id (str): The video ID of the YouTube video.
Returns: DataFrame: A DataFrame containing the downloaded comments from the video.
"""
DEV_KEY = os.getenv('API_KEY_YOUTUBE')
if DEV_KEY:
print("Key is ok")
youtube = googleapiclient.discovery.build("youtube",
"v3",
developerKey=DEV_KEY)
request = youtube.commentThreads().list(part="snippet",
videoId=video_id,
maxResults=100)
response = request.execute()
comments = []
for item in response['items']:
comment = item['snippet']['topLevelComment']['snippet']
comments.append([comment['authorDisplayName'],
comment['publishedAt'],
comment['updatedAt'],
comment['likeCount'],
comment['textDisplay'],])
return pd.DataFrame(comments,
columns=['author',
'published_at',
'updated_at',
'like_count',
'text',])
def analyze_emotions_in_comments(df: pd.DataFrame) -> tuple:
"""
Takes a DataFrame with comments,
processes the emotional sentiment of each comment in the DataFrame
Args: dataframe (pandas.DataFrame): DataFrame containing comments to analyze.
Returns: tuple: containing the updated DataFrame with the added 'Emotional Sentiment' column
and the total count of processed comments.
"""
model = load_model()
selected_columns = ['text', 'author', 'published_at']
df = df[selected_columns]
res_list = []
res_list = model(df['text'][:513].to_list())
full_df = pd.concat([pd.DataFrame(res_list), df], axis=1)
return (full_df, len(res_list))
def plot_heatmap_from_dataframe(df: pd.DataFrame) -> plt:
"""
Visualizes the data from the input DataFrame and returns a matplotlib plot object.
Args: df (DataFrame): The input DataFrame containing the data to be visualized.
Returns: plt: A matplotlib plot object showing the visualization of the data.
"""
df['published_at'] = pd.to_datetime(df['published_at'])
df['Date'] = df['published_at'].dt.date
df['Hour'] = df['published_at'].dt.hour
pivot_table = df.pivot_table(index='Hour',
columns='Date',
values='text',
aggfunc='count')
plt.figure(figsize=(10, 6))
sns.heatmap(pivot_table,
cmap='YlGnBu')
plt.title('Количество комментариев по часам и датам')
plt.xlabel('Дата')
plt.ylabel('Час')
return plt
def visualize_data(df: pd.DataFrame):
"""
Visualizes the data from the input DataFrame and returns a matplotlib figure object.
Args: df (DataFrame): The input DataFrame containing the data to be visualized.
Returns: fig: A matplotlib figure object
"""
data = df['label'].value_counts()
fig, ax = plt.subplots()
plt.title("Эмоциональная окраска комментариев на YouTube")
label = df['label'].unique()
ax.pie(data, labels=label, autopct='%1.1f%%')
return fig
def change_url():
st.session_state.start = False
if "start" not in st.session_state:
st.session_state.start = False
# Получаем id видеоролика из URL для отправки запроса
url = st.text_input(label="Enter URL from YouTube", on_change=change_url)
video_id = extract_video_id(url)
if video_id != "":
if btn_start := st.button('Загрузить комментарии'):
st.session_state.start = True
if st.session_state.start:
# Выводим таблицу с результатами на странице
comments_df = download_comments(video_id)
with st.spinner('Analyzing comments...'):
full_df, num_comments = analyze_emotions_in_comments(comments_df)
st.success(f'Готово! Обработано {num_comments} комментариев.')
st.write(full_df)
st.markdown('***')
# Выводим heatmap комментариев по часам и датам
st.pyplot(plot_heatmap_from_dataframe(full_df))
st.markdown('***')
# Выводим круговую диаграмму
st.pyplot(visualize_data(full_df))
|