boompack commited on
Commit
282dd48
·
verified ·
1 Parent(s): e5c8ff6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +271 -228
app.py CHANGED
@@ -3,250 +3,293 @@ import re
3
  from collections import Counter
4
  from datetime import datetime
5
  import emoji
6
- from transformers import pipeline
7
  import logging
8
  from typing import Tuple, List, Optional
 
9
 
10
- # Set up logging
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
- class CommentAnalyzer:
15
- def __init__(self):
16
- """Initialize the analyzer with sentiment model and compile regex patterns"""
17
- try:
18
- self.sentiment_model = pipeline("sentiment-analysis")
19
- except Exception as e:
20
- logger.error(f"Failed to load sentiment model: {e}")
21
- raise
22
 
23
- # Compile regex patterns for better performance
24
- self.mention_pattern = re.compile(r'@[\w\.]+')
25
- self.comment_pattern = re.compile(
26
- r'Фото профиля\s+(.+?)\s+' # Username
27
- r'((?:(?!Фото профиля).)+?)\s+' # Comment text
28
- r'(\d+)?\s*(?:нравится|like[s]?)?\s*' # Likes count
29
- r'(\d+)\s*(?:н|w)' # Week number
30
- , re.DOTALL
31
- )
32
-
33
- def clean_text(self, text: str) -> str:
34
- """Clean text by removing extra whitespace and normalizing line breaks"""
35
- return ' '.join(text.split())
36
 
37
- def count_emojis(self, text: str) -> int:
38
- """Count the number of emoji characters in text"""
39
- return len([c for c in text if c in emoji.EMOJI_DATA])
40
 
41
- def extract_mentions(self, text: str) -> List[str]:
42
- """Extract @mentions from text"""
43
- return self.mention_pattern.findall(text)
 
 
44
 
45
- def analyze_sentiment(self, text: str) -> str:
46
- """Analyze text sentiment using the loaded model"""
47
- try:
48
- result = self.sentiment_model(text[:512]) # Limit text length for model
49
- sentiment = result[0]['label']
50
- if sentiment == 'POSITIVE':
51
- return 'positive'
52
- elif sentiment == 'NEGATIVE':
53
- return 'negative'
54
- return 'neutral'
55
- except Exception as e:
56
- logger.warning(f"Sentiment analysis failed: {e}")
57
- return 'neutral'
 
 
 
 
 
 
 
 
 
 
58
 
59
- def extract_comment_data(self, comment_block: str) -> Tuple[Optional[str], Optional[str], int, int]:
60
- """
61
- Extract structured data from a comment block
62
- Returns: (username, comment_text, likes_count, week_number)
63
- """
64
- match = self.comment_pattern.search(comment_block)
65
- if not match:
 
 
66
  return None, None, 0, 0
67
-
68
- username, comment, likes, week = match.groups()
69
- return (
70
- username.strip(),
71
- self.clean_text(comment),
72
- int(likes or 0),
73
- int(week or 0)
74
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- def analyze_post(self,
77
- content_type: str,
78
- link_to_post: str,
79
- post_likes: int,
80
- post_date: str,
81
- description: str,
82
- comment_count: int,
83
- all_comments: str) -> Tuple[str, str, str, str, str]:
84
- """
85
- Analyze Instagram post comments and generate comprehensive analytics
86
-
87
- Args:
88
- content_type: Type of content ("Photo" or "Video")
89
- link_to_post: URL of the post
90
- post_likes: Number of likes on the post
91
- post_date: Date of post publication
92
- description: Post description/caption
93
- comment_count: Total number of comments
94
- all_comments: Raw text containing all comments
95
-
96
- Returns:
97
- Tuple containing:
98
- - Analytics summary
99
- - List of usernames
100
- - List of comments
101
- - Chronological list of likes
102
- - Total likes count
103
- """
104
- try:
105
- # Split comments into blocks
106
- comments_blocks = [block for block in re.split(r'(?=Фото профиля)', all_comments) if block.strip()]
107
-
108
- # Initialize data containers
109
- data = {
110
- 'usernames': [],
111
- 'comments': [],
112
- 'likes': [],
113
- 'weeks': [],
114
- 'emojis': 0,
115
- 'mentions': [],
116
- 'sentiments': [],
117
- 'lengths': []
118
- }
119
-
120
- # Process each comment block
121
- for block in comments_blocks:
122
- username, comment, like_count, week = self.extract_comment_data(block)
123
- if username and comment:
124
- data['usernames'].append(username)
125
- data['comments'].append(comment)
126
- data['likes'].append(like_count)
127
- data['weeks'].append(week)
128
-
129
- # Collect metrics
130
- data['emojis'] += self.count_emojis(comment)
131
- data['mentions'].extend(self.extract_mentions(comment))
132
- data['sentiments'].append(self.analyze_sentiment(comment))
133
- data['lengths'].append(len(comment))
134
-
135
- # Calculate analytics
136
- total_comments = len(data['comments'])
137
- if total_comments == 0:
138
- raise ValueError("No valid comments found in input")
139
 
140
- analytics = {
141
- 'avg_length': sum(data['lengths']) / total_comments,
142
- 'sentiment_dist': Counter(data['sentiments']),
143
- 'active_users': Counter(data['usernames']).most_common(5),
144
- 'top_mentions': Counter(data['mentions']).most_common(5),
145
- 'avg_likes': sum(data['likes']) / total_comments,
146
- 'weeks_range': (min(data['weeks']), max(data['weeks'])),
147
- 'total_likes': sum(data['likes'])
148
- }
149
-
150
- # Generate summary
151
- summary = self._format_analytics_summary(
152
- content_type, link_to_post, data, analytics, total_comments
153
- )
154
-
155
- return (
156
- summary,
157
- '\n'.join(data['usernames']),
158
- '\n'.join(data['comments']),
159
- '\n'.join(map(str, data['likes'])),
160
- str(analytics['total_likes'])
161
- )
 
 
 
 
 
 
 
 
 
 
162
 
163
- except Exception as e:
164
- logger.error(f"Error analyzing post: {e}", exc_info=True)
165
- return (f"Error during analysis: {str(e)}", "", "", "", "0")
166
-
167
- def _format_analytics_summary(self, content_type, link, data, analytics, total_comments):
168
- """Format analytics data into a readable summary"""
169
- return f"""
170
- Content Type: {content_type}
171
- Link to Post: {link}
172
-
173
- ОСНОВНАЯ СТАТИСТИКА:
174
- - Всего комментариев: {total_comments}
175
- - Всего лайков на комментариях: {analytics['total_likes']}
176
- - Среднее количество лайков: {analytics['avg_likes']:.1f}
177
- - Период активности: {analytics['weeks_range'][0]}-{analytics['weeks_range'][1]} недель
178
-
179
- АНАЛИЗ КОНТЕНТА:
180
- - Средняя длина комментария: {analytics['avg_length']:.1f} символов
181
- - Всего эмодзи использовано: {data['emojis']}
182
- - Тональность комментариев:
183
- * Позитивных: {analytics['sentiment_dist']['positive']}
184
- * Нейтральных: {analytics['sentiment_dist']['neutral']}
185
- * Негативных: {analytics['sentiment_dist']['negative']}
186
-
187
- АКТИВНОСТЬ ПОЛЬЗОВАТЕЛЕЙ:
188
- Самые активные комментаторы:
189
- {chr(10).join(f"- {user}: {count} комментариев" for user, count in analytics['active_users'])}
190
-
191
- Самые упоминаемые пользователи:
192
- {chr(10).join(f"- {user}: {count} упоминаний" for user, count in analytics['top_mentions'] if user)}
193
-
194
- ВОВЛЕЧЕННОСТЬ:
195
- - Процент комментариев с лайками: {(sum(1 for l in data['likes'] if l > 0) / total_comments * 100):.1f}%
196
- - Процент комментариев с эмодзи: {(sum(1 for c in data['comments'] if self.count_emojis(c) > 0) / total_comments * 100):.1f}%
197
- """
198
-
199
- def create_interface():
200
- """Create and configure the Gradio interface"""
201
- analyzer = CommentAnalyzer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
- iface = gr.Interface(
204
- fn=analyzer.analyze_post,
205
- inputs=[
206
- gr.Radio(
207
- choices=["Photo", "Video"],
208
- label="Content Type",
209
- value="Photo"
210
- ),
211
- gr.Textbox(
212
- label="Link to Post",
213
- placeholder="Введите ссылку на пост"
214
- ),
215
- gr.Number(
216
- label="Likes",
217
- value=0
218
- ),
219
- gr.Textbox(
220
- label="Post Date",
221
- placeholder="Введите дату публикации"
222
- ),
223
- gr.Textbox(
224
- label="Description",
225
- placeholder="Введите описание поста",
226
- lines=3
227
- ),
228
- gr.Number(
229
- label="Total Comment Count",
230
- value=0
231
- ),
232
- gr.Textbox(
233
- label="All Comments",
234
- placeholder="Вставьте комментарии",
235
- lines=10
236
- )
237
- ],
238
- outputs=[
239
- gr.Textbox(label="Analytics Summary", lines=20),
240
- gr.Textbox(label="Usernames (Output 1)", lines=5),
241
- gr.Textbox(label="Comments (Output 2)", lines=5),
242
- gr.Textbox(label="Likes Chronology (Output 3)", lines=5),
243
- gr.Textbox(label="Total Likes on Comments (Output 4)")
244
- ],
245
- title="Instagram Comment Analyzer Pro",
246
- description="Расширенный анализатор комментариев Instagram с детальной аналитикой"
247
- )
248
- return iface
 
 
 
 
 
249
 
250
  if __name__ == "__main__":
251
- iface = create_interface()
252
  iface.launch()
 
3
  from collections import Counter
4
  from datetime import datetime
5
  import emoji
 
6
  import logging
7
  from typing import Tuple, List, Optional
8
+ import statistics
9
 
10
+ # Настройка логирования
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
+ def clean_text(text):
15
+ """Очищает текст от лишних пробелов и переносов строк"""
16
+ return ' '.join(text.split())
 
 
 
 
 
17
 
18
+ def count_emojis(text):
19
+ """Подсчитывает количество эмодзи в тексте"""
20
+ return len([c for c in text if c in emoji.EMOJI_DATA])
 
 
 
 
 
 
 
 
 
 
21
 
22
+ def extract_mentions(text):
23
+ """Извлекает упоминания пользователей из текста"""
24
+ return re.findall(r'@[\w\.]+', text)
25
 
26
+ def get_comment_words(text):
27
+ """Получает список слов из комментария для анализа"""
28
+ # Очищаем текст от эмодзи и приводим к нижнему регистру
29
+ words = re.findall(r'\w+', text.lower())
30
+ return [w for w in words if len(w) > 2] # Исключаем короткие слова
31
 
32
+ def analyze_sentiment(text):
33
+ """Расширенный анализ тональности по эмодзи и ключевым словам"""
34
+ positive_indicators = ['🔥', '❤️', '👍', '😊', '💪', '👏', '🎉', '♥️', '😍', '🙏',
35
+ 'круто', 'супер', 'класс', 'огонь', 'пушка', 'отлично', 'здорово',
36
+ 'прекрасно', 'молодец', 'красота', 'спасибо', 'топ']
37
+ negative_indicators = ['👎', '😢', '😞', '😠', '😡', '💔', '😕', '😑',
38
+ 'плохо', 'ужас', 'отстой', 'фу', 'жесть', 'ужасно',
39
+ 'разочарован', 'печаль', 'грустно']
40
+
41
+ text_lower = text.lower()
42
+ positive_count = sum(1 for ind in positive_indicators if ind in text_lower)
43
+ negative_count = sum(1 for ind in negative_indicators if ind in text_lower)
44
+
45
+ # Учитываем восклицательные знаки как усилители эмоций
46
+ exclamation_count = text.count('!')
47
+ positive_count += exclamation_count * 0.5 if positive_count > negative_count else 0
48
+ negative_count += exclamation_count * 0.5 if negative_count > positive_count else 0
49
+
50
+ if positive_count > negative_count:
51
+ return 'positive'
52
+ elif negative_count > positive_count:
53
+ return 'negative'
54
+ return 'neutral'
55
 
56
+ def extract_comment_data(comment_text):
57
+ """
58
+ Извлекает данные из отдельного комментария
59
+ Возвращает (username, comment_text, likes_count, week_number)
60
+ """
61
+ try:
62
+ # Извлекаем имя пользователя
63
+ username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
64
+ if not username_match:
65
  return None, None, 0, 0
66
+
67
+ username = username_match.group(1).strip()
68
+
69
+ # Извлекаем текст комментария
70
+ comment_pattern = fr"{username}\n(.*?)(?:\d+ нед\.)"
71
+ comment_match = re.search(comment_pattern, comment_text, re.DOTALL)
72
+ if comment_match:
73
+ comment = clean_text(comment_match.group(1))
74
+ comment = re.sub(fr'^{username}\s*', '', comment)
75
+ comment = re.sub(r'^@[\w\.]+ ', '', comment)
76
+ else:
77
+ comment = ""
78
+
79
+ # Извлекаем количество недель
80
+ week_match = re.search(r'(\d+) нед\.', comment_text)
81
+ weeks = int(week_match.group(1)) if week_match else 0
82
+
83
+ # Ищем количество лайков
84
+ likes = 0
85
+ likes_patterns = [
86
+ r"(\d+) отметк[аи] \"Нравится\"",
87
+ r"Нравится: (\d+)",
88
+ ]
89
+
90
+ for pattern in likes_patterns:
91
+ likes_match = re.search(pattern, comment_text)
92
+ if likes_match:
93
+ likes = int(likes_match.group(1))
94
+ break
95
+
96
+ return username, comment.strip(), likes, weeks
97
+ except Exception as e:
98
+ logger.error(f"Error extracting comment data: {e}")
99
+ return None, None, 0, 0
100
 
101
+ def analyze_post(content_type, link_to_post, post_likes, post_date, description, comment_count, all_comments):
102
+ try:
103
+ # Разделяем комментарии по "Фото профиля"
104
+ comments_blocks = re.split(r'(?=Фото профиля)', all_comments)
105
+ comments_blocks = [block for block in comments_blocks if block.strip()]
106
+
107
+ # Основные списки для данных
108
+ usernames = []
109
+ comments = []
110
+ likes = []
111
+ weeks = []
112
+
113
+ # Дополнительные метрики
114
+ total_emojis = 0
115
+ mentions = []
116
+ sentiments = []
117
+ comment_lengths = []
118
+ words_per_comment = []
119
+ all_words = []
120
+ user_engagement = {} # Словарь для хранения статистики по пользователям
121
+
122
+ # Обработка каждого комментария
123
+ for block in comments_blocks:
124
+ username, comment, like_count, week_number = extract_comment_data(block)
125
+ if username and comment:
126
+ usernames.append(username)
127
+ comments.append(comment)
128
+ likes.append(str(like_count))
129
+ weeks.append(week_number)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ # Базовые метрики
132
+ total_emojis += count_emojis(comment)
133
+ mentions.extend(extract_mentions(comment))
134
+ sentiment = analyze_sentiment(comment)
135
+ sentiments.append(sentiment)
136
+ comment_lengths.append(len(comment))
137
+
138
+ # Расширенные метрики
139
+ words = get_comment_words(comment)
140
+ words_per_comment.append(len(words))
141
+ all_words.extend(words)
142
+
143
+ # Статистика пользователя
144
+ if username not in user_engagement:
145
+ user_engagement[username] = {
146
+ 'comments': 0,
147
+ 'total_likes': 0,
148
+ 'emoji_usage': 0,
149
+ 'avg_length': 0,
150
+ 'sentiments': []
151
+ }
152
+ user_stats = user_engagement[username]
153
+ user_stats['comments'] += 1
154
+ user_stats['total_likes'] += like_count
155
+ user_stats['emoji_usage'] += count_emojis(comment)
156
+ user_stats['avg_length'] += len(comment)
157
+ user_stats['sentiments'].append(sentiment)
158
+
159
+ # Аналитика
160
+ total_comments = len(comments)
161
+ if total_comments == 0:
162
+ raise ValueError("No valid comments found")
163
 
164
+ # Обновляем статистику пользователей
165
+ for username in user_engagement:
166
+ stats = user_engagement[username]
167
+ stats['avg_length'] /= stats['comments']
168
+ stats['engagement_rate'] = stats['total_likes'] / stats['comments']
169
+ stats['sentiment_ratio'] = sum(1 for s in stats['sentiments'] if s == 'positive') / len(stats['sentiments'])
170
+
171
+ # Базовая статистика
172
+ avg_comment_length = sum(comment_lengths) / total_comments
173
+ sentiment_distribution = Counter(sentiments)
174
+ most_active_users = Counter(usernames).most_common(5)
175
+ most_mentioned = Counter(mentions).most_common(5)
176
+ avg_likes = sum(map(int, likes)) / len(likes) if likes else 0
177
+ earliest_week = max(weeks) if weeks else 0
178
+ latest_week = min(weeks) if weeks else 0
179
+
180
+ # Расширенная статистика
181
+ median_comment_length = statistics.median(comment_lengths)
182
+ avg_words_per_comment = sum(words_per_comment) / total_comments
183
+ common_words = Counter(all_words).most_common(10)
184
+
185
+ # Анализ вовлеченности
186
+ engagement_metrics = {
187
+ 'comments_with_likes': sum(1 for l in likes if int(l) > 0),
188
+ 'comments_with_emoji': sum(1 for c in comments if count_emojis(c) > 0),
189
+ 'comments_with_mentions': sum(1 for c in comments if extract_mentions(c)),
190
+ 'avg_engagement_rate': statistics.mean([
191
+ stats['engagement_rate'] for stats in user_engagement.values()
192
+ ])
193
+ }
194
+
195
+ # Временной анализ
196
+ week_distribution = Counter(weeks)
197
+ most_active_weeks = sorted(week_distribution.items(), key=lambda x: x[1], reverse=True)[:3]
198
+
199
+ # Формируем выходные данные
200
+ usernames_output = "\n".join(usernames)
201
+ comments_output = "\n".join(comments)
202
+ likes_chronology_output = "\n".join(likes)
203
+ total_likes_sum = sum(map(int, likes))
204
+
205
+ # Расширенная аналитика
206
+ analytics_summary = (
207
+ f"Content Type: {content_type}\n"
208
+ f"Link to Post: {link_to_post}\n\n"
209
+ f"ОСНОВНАЯ СТАТИСТИКА:\n"
210
+ f"- Всего комментариев: {total_comments}\n"
211
+ f"- Всего лайков на комментариях: {total_likes_sum}\n"
212
+ f"- Среднее количество лайков: {avg_likes:.1f}\n"
213
+ f"- Период активности: {earliest_week}-{latest_week} недель\n\n"
214
+ f"АНАЛИЗ КОНТЕНТА:\n"
215
+ f"- Средняя длина комментария: {avg_comment_length:.1f} символов\n"
216
+ f"- Медианная длина комментария: {median_comment_length} символов\n"
217
+ f"- Среднее количество слов: {avg_words_per_comment:.1f}\n"
218
+ f"- Всего эмодзи использовано: {total_emojis}\n"
219
+ f"- Тональность комментариев:\n"
220
+ f" * Позитивных: {sentiment_distribution['positive']}\n"
221
+ f" * Нейтральных: {sentiment_distribution['neutral']}\n"
222
+ f" * Негативных: {sentiment_distribution['negative']}\n\n"
223
+ f"ПОПУЛЯРНЫЕ СЛОВА:\n"
224
+ + "\n".join([f"- {word}: {count} раз" for word, count in common_words]) + "\n\n"
225
+ f"АКТИВНОСТЬ ПОЛЬЗОВАТЕЛЕЙ:\n"
226
+ f"Самые активные комментаторы:\n"
227
+ + "\n".join([f"- {user}: {count} комментариев" for user, count in most_active_users]) + "\n\n"
228
+ f"Самые упоминаемые пользователи:\n"
229
+ + "\n".join([f"- {user}: {count} упоминаний" for user, count in most_mentioned if user]) + "\n\n"
230
+ f"ВОВЛЕЧЕННОСТЬ:\n"
231
+ f"- Процент комментариев с лайками: {(engagement_metrics['comments_with_likes'] / total_comments * 100):.1f}%\n"
232
+ f"- Процент комментариев с эмодзи: {(engagement_metrics['comments_with_emoji'] / total_comments * 100):.1f}%\n"
233
+ f"- Процент комментариев с упоминаниями: {(engagement_metrics['comments_with_mentions'] / total_comments * 100):.1f}%\n"
234
+ f"- Средний рейтинг вовлеченности: {engagement_metrics['avg_engagement_rate']:.2f}\n\n"
235
+ f"ВРЕМЕННАЯ АКТИВНОСТЬ:\n"
236
+ f"Самые активные недели:\n"
237
+ + "\n".join([f"- {week} неделя: {count} комментариев" for week, count in most_active_weeks])
238
+ )
239
+
240
+ return analytics_summary, usernames_output, comments_output, likes_chronology_output, str(total_likes_sum)
241
 
242
+ except Exception as e:
243
+ logger.error(f"Error in analyze_post: {e}", exc_info=True)
244
+ error_message = f"Произошла ошибка при обработке: {str(e)}\n{str(type(e))}"
245
+ return error_message, error_message, error_message, error_message, "0"
246
+
247
+ # Создаем интерфейс Gradio
248
+ iface = gr.Interface(
249
+ fn=analyze_post,
250
+ inputs=[
251
+ gr.Radio(
252
+ choices=["Photo", "Video"],
253
+ label="Content Type",
254
+ value="Photo"
255
+ ),
256
+ gr.Textbox(
257
+ label="Link to Post",
258
+ placeholder="Введите ссылку на пост"
259
+ ),
260
+ gr.Number(
261
+ label="Likes",
262
+ value=0
263
+ ),
264
+ gr.Textbox(
265
+ label="Post Date",
266
+ placeholder="Введите дату публикации"
267
+ ),
268
+ gr.Textbox(
269
+ label="Description",
270
+ placeholder="Введите описание поста",
271
+ lines=3
272
+ ),
273
+ gr.Number(
274
+ label="Total Comment Count",
275
+ value=0
276
+ ),
277
+ gr.Textbox(
278
+ label="All Comments",
279
+ placeholder="Вставьте комментарии",
280
+ lines=10
281
+ )
282
+ ],
283
+ outputs=[
284
+ gr.Textbox(label="Analytics Summary", lines=20),
285
+ gr.Textbox(label="Usernames (Output 1)", lines=5),
286
+ gr.Textbox(label="Comments (Output 2)", lines=5),
287
+ gr.Textbox(label="Likes Chronology (Output 3)", lines=5),
288
+ gr.Textbox(label="Total Likes on Comments (Output 4)")
289
+ ],
290
+ title="Instagram Comment Analyzer Pro",
291
+ description="Расширенный анализатор комментариев Instagram с детальной аналитикой"
292
+ )
293
 
294
  if __name__ == "__main__":
 
295
  iface.launch()