YixuanWang commited on
Commit
596f852
·
verified ·
1 Parent(s): 6fc36a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -66
app.py CHANGED
@@ -6,6 +6,8 @@ from typing import List, Dict, Tuple
6
  from dataclasses import dataclass
7
  from pathlib import Path
8
  import logging
 
 
9
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
@@ -18,10 +20,12 @@ class RecommendationWeights:
18
 
19
  class TweetPreprocessor:
20
  def __init__(self, data_path: Path):
 
21
  self.data = self._load_data(data_path)
22
 
23
  @staticmethod
24
  def _load_data(data_path: Path) -> pd.DataFrame:
 
25
  try:
26
  data = pd.read_csv(data_path)
27
  required_columns = {'Text', 'Retweets', 'Likes'}
@@ -32,14 +36,29 @@ class TweetPreprocessor:
32
  logger.error(f"Error loading data: {e}")
33
  raise
34
 
 
 
 
 
 
 
 
 
 
 
35
  def calculate_metrics(self) -> pd.DataFrame:
36
- self.data['Sentiment'] = self.data['Text'].apply(self._get_sentiment)
 
 
 
 
37
  self.data['Popularity'] = self._normalize_popularity()
38
- self.data['Credibility'] = np.random.choice([0, 1], size=len(self.data), p=[0.3, 0.7])
39
  return self.data
40
 
41
  @staticmethod
42
  def _get_sentiment(text: str) -> float:
 
43
  try:
44
  return TextBlob(str(text)).sentiment.polarity
45
  except Exception as e:
@@ -47,6 +66,7 @@ class TweetPreprocessor:
47
  return 0.0
48
 
49
  def _normalize_popularity(self) -> pd.Series:
 
50
  popularity = self.data['Retweets'] + self.data['Likes']
51
  return (popularity - popularity.min()) / (popularity.max() - popularity.min() + 1e-6)
52
 
@@ -57,20 +77,28 @@ class RecommendationSystem:
57
  self.setup_system()
58
 
59
  def setup_system(self):
 
60
  self.data = self.preprocessor.calculate_metrics()
61
 
62
- def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> Dict:
63
- if not self._validate_weights(weights):
64
- return {"error": "Invalid weights provided"}
65
-
66
  normalized_weights = self._normalize_weights(weights)
67
 
 
 
68
  self.data['Final_Score'] = (
69
  self.data['Credibility'] * normalized_weights.visibility +
70
  self.data['Sentiment'] * normalized_weights.sentiment +
71
  self.data['Popularity'] * normalized_weights.popularity
72
  )
73
 
 
 
 
 
 
 
 
74
  top_recommendations = (
75
  self.data.nlargest(num_recommendations, 'Final_Score')
76
  )
@@ -78,18 +106,19 @@ class RecommendationSystem:
78
  return self._format_recommendations(top_recommendations)
79
 
80
  def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict:
 
81
  formatted_results = []
82
  for _, row in recommendations.iterrows():
83
  score_details = {
84
- "总分": f"{row['Final_Score']:.2f}",
85
- "可信度": "可信" if row['Credibility'] > 0 else "存疑",
86
- "情感倾向": self._get_sentiment_label(row['Sentiment']),
87
- "热度": f"{row['Popularity']:.2f}",
88
- "互动数": f"点赞 {row['Likes']} · 转发 {row['Retweets']}"
89
  }
90
 
91
  formatted_results.append({
92
- "text": row['Text'],
93
  "scores": score_details
94
  })
95
 
@@ -100,18 +129,21 @@ class RecommendationSystem:
100
 
101
  @staticmethod
102
  def _get_sentiment_label(sentiment_score: float) -> str:
 
103
  if sentiment_score > 0.3:
104
- return "积极"
105
  elif sentiment_score < -0.3:
106
- return "消极"
107
- return "中性"
108
 
109
  @staticmethod
110
  def _validate_weights(weights: RecommendationWeights) -> bool:
 
111
  return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__)
112
 
113
  @staticmethod
114
  def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights:
 
115
  total = weights.visibility + weights.sentiment + weights.popularity
116
  if total == 0:
117
  return RecommendationWeights(1/3, 1/3, 1/3)
@@ -123,14 +155,15 @@ class RecommendationSystem:
123
 
124
  @staticmethod
125
  def _get_score_explanation() -> Dict[str, str]:
 
126
  return {
127
- "可信度": "内容可信度评估",
128
- "情感倾向": "文本的情感分析结果",
129
- "热度": "基于点赞和转发的热度分数"
130
  }
131
 
132
-
133
  def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface:
 
134
  with gr.Blocks(theme=gr.themes.Soft()) as interface:
135
  gr.Markdown("""
136
  # Tweet Recommendation System
@@ -153,7 +186,7 @@ def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.I
153
  html = '<div style="font-family: sans-serif;">'
154
 
155
  html += '''
156
- <div style="margin-bottom: 20px; padding: 15px; background-color: #f5f5f5; border-radius: 8px;">
157
  <h3 style="margin-top: 0;">Score Guide</h3>
158
  <ul style="margin: 0;">
159
  <li><strong>Credibility</strong>: Assessment of content reliability</li>
@@ -170,19 +203,19 @@ def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.I
170
  <div style="margin-bottom: 10px; font-size: 1.1em;">{rec["text"]}</div>
171
  <div style="display: flex; flex-wrap: wrap; gap: 10px; font-size: 0.9em;">
172
  <span style="padding: 3px 8px; background-color: #1976d2; color: white; border-radius: 4px;">
173
- Score: {scores["总分"]}
174
  </span>
175
  <span style="padding: 3px 8px; background-color: #2e7d32; color: white; border-radius: 4px;">
176
- Credibility: {scores["可信度"]}
177
  </span>
178
  <span style="padding: 3px 8px; background-color: #ed6c02; color: white; border-radius: 4px;">
179
- Sentiment: {scores["情感倾向"]}
180
  </span>
181
  <span style="padding: 3px 8px; background-color: #d32f2f; color: white; border-radius: 4px;">
182
- Popularity: {scores["热度"]}
183
  </span>
184
  <span style="padding: 3px 8px; background-color: #7b1fa2; color: white; border-radius: 4px;">
185
- Engagement: {scores["互动数"]}
186
  </span>
187
  </div>
188
  </div>
@@ -190,55 +223,21 @@ def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.I
190
  html += '</div>'
191
  return html
192
 
 
 
 
 
 
193
  submit_btn.click(
194
- fn=lambda v, s, p: format_recommendations(
195
- recommendation_system.get_recommendations(RecommendationWeights(v, s, p))
196
- ),
197
  inputs=[visibility_weight, sentiment_weight, popularity_weight],
198
  outputs=output_html
199
  )
200
 
201
  return interface
202
 
203
-
204
- @staticmethod
205
- def _get_sentiment_label(sentiment_score: float) -> str:
206
- if sentiment_score > 0.3:
207
- return "Positive"
208
- elif sentiment_score < -0.3:
209
- return "Negative"
210
- return "Neutral"
211
-
212
- def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict:
213
- formatted_results = []
214
- for _, row in recommendations.iterrows():
215
- score_details = {
216
- "总分": f"{row['Final_Score']:.2f}",
217
- "可信度": "Reliable" if row['Credibility'] > 0 else "Uncertain",
218
- "情感倾向": self._get_sentiment_label(row['Sentiment']),
219
- "热度": f"{row['Popularity']:.2f}",
220
- "互动数": f"Likes {row['Likes']} · Retweets {row['Retweets']}"
221
- }
222
-
223
- formatted_results.append({
224
- "text": row['Text'],
225
- "scores": score_details
226
- })
227
-
228
- return {
229
- "recommendations": formatted_results,
230
- "score_explanation": self._get_score_explanation()
231
- }
232
-
233
- @staticmethod
234
- def _get_score_explanation() -> Dict[str, str]:
235
- return {
236
- "Credibility": "Content reliability assessment",
237
- "Sentiment": "Text emotional analysis result",
238
- "Popularity": "Score based on likes and retweets"
239
- }
240
-
241
  def main():
 
242
  try:
243
  recommendation_system = RecommendationSystem(
244
  data_path=Path('twitter_dataset.csv')
 
6
  from dataclasses import dataclass
7
  from pathlib import Path
8
  import logging
9
+ import re
10
+ from datetime import datetime
11
 
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
 
20
 
21
  class TweetPreprocessor:
22
  def __init__(self, data_path: Path):
23
+ """Initialize the preprocessor with data path."""
24
  self.data = self._load_data(data_path)
25
 
26
  @staticmethod
27
  def _load_data(data_path: Path) -> pd.DataFrame:
28
+ """Load and validate the dataset."""
29
  try:
30
  data = pd.read_csv(data_path)
31
  required_columns = {'Text', 'Retweets', 'Likes'}
 
36
  logger.error(f"Error loading data: {e}")
37
  raise
38
 
39
+ def _clean_text(self, text: str) -> str:
40
+ """Clean text content."""
41
+ if pd.isna(text) or len(str(text).strip()) < 10:
42
+ return ""
43
+
44
+ text = re.sub(r'http\S+|www.\S+', '', str(text))
45
+ text = re.sub(r'[^\w\s]', '', text)
46
+ text = ' '.join(text.split())
47
+ return text
48
+
49
  def calculate_metrics(self) -> pd.DataFrame:
50
+ """Calculate all metrics for tweets."""
51
+ self.data['Clean_Text'] = self.data['Text'].apply(self._clean_text)
52
+ self.data = self.data[self.data['Clean_Text'].str.len() > 0]
53
+
54
+ self.data['Sentiment'] = self.data['Clean_Text'].apply(self._get_sentiment)
55
  self.data['Popularity'] = self._normalize_popularity()
56
+
57
  return self.data
58
 
59
  @staticmethod
60
  def _get_sentiment(text: str) -> float:
61
+ """Calculate sentiment polarity for a text."""
62
  try:
63
  return TextBlob(str(text)).sentiment.polarity
64
  except Exception as e:
 
66
  return 0.0
67
 
68
  def _normalize_popularity(self) -> pd.Series:
69
+ """Normalize popularity scores."""
70
  popularity = self.data['Retweets'] + self.data['Likes']
71
  return (popularity - popularity.min()) / (popularity.max() - popularity.min() + 1e-6)
72
 
 
77
  self.setup_system()
78
 
79
  def setup_system(self):
80
+ """Initialize the system with preprocessed data."""
81
  self.data = self.preprocessor.calculate_metrics()
82
 
83
+ def recalculate_scores(self, weights: RecommendationWeights):
84
+ """Recalculate scores based on new weights."""
 
 
85
  normalized_weights = self._normalize_weights(weights)
86
 
87
+ self.data['Credibility'] = np.random.choice([0, 1], size=len(self.data), p=[0.3, 0.7])
88
+
89
  self.data['Final_Score'] = (
90
  self.data['Credibility'] * normalized_weights.visibility +
91
  self.data['Sentiment'] * normalized_weights.sentiment +
92
  self.data['Popularity'] * normalized_weights.popularity
93
  )
94
 
95
+ def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> Dict:
96
+ """Get tweet recommendations based on weights."""
97
+ if not self._validate_weights(weights):
98
+ return {"error": "Invalid weights provided"}
99
+
100
+ self.recalculate_scores(weights)
101
+
102
  top_recommendations = (
103
  self.data.nlargest(num_recommendations, 'Final_Score')
104
  )
 
106
  return self._format_recommendations(top_recommendations)
107
 
108
  def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict:
109
+ """Format recommendations for display."""
110
  formatted_results = []
111
  for _, row in recommendations.iterrows():
112
  score_details = {
113
+ "score": f"{row['Final_Score']:.2f}",
114
+ "credibility": "Reliable" if row['Credibility'] > 0 else "Uncertain",
115
+ "sentiment": self._get_sentiment_label(row['Sentiment']),
116
+ "popularity": f"{row['Popularity']:.2f}",
117
+ "engagement": f"Likes {row['Likes']} · Retweets {row['Retweets']}"
118
  }
119
 
120
  formatted_results.append({
121
+ "text": row['Clean_Text'],
122
  "scores": score_details
123
  })
124
 
 
129
 
130
  @staticmethod
131
  def _get_sentiment_label(sentiment_score: float) -> str:
132
+ """Convert sentiment score to label."""
133
  if sentiment_score > 0.3:
134
+ return "Positive"
135
  elif sentiment_score < -0.3:
136
+ return "Negative"
137
+ return "Neutral"
138
 
139
  @staticmethod
140
  def _validate_weights(weights: RecommendationWeights) -> bool:
141
+ """Validate that weights are non-negative."""
142
  return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__)
143
 
144
  @staticmethod
145
  def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights:
146
+ """Normalize weights to sum to 1."""
147
  total = weights.visibility + weights.sentiment + weights.popularity
148
  if total == 0:
149
  return RecommendationWeights(1/3, 1/3, 1/3)
 
155
 
156
  @staticmethod
157
  def _get_score_explanation() -> Dict[str, str]:
158
+ """Provide explanation for different score components."""
159
  return {
160
+ "Credibility": "Content reliability assessment",
161
+ "Sentiment": "Text emotional analysis result",
162
+ "Popularity": "Score based on likes and retweets"
163
  }
164
 
 
165
  def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface:
166
+ """Create and configure the Gradio interface."""
167
  with gr.Blocks(theme=gr.themes.Soft()) as interface:
168
  gr.Markdown("""
169
  # Tweet Recommendation System
 
186
  html = '<div style="font-family: sans-serif;">'
187
 
188
  html += '''
189
+ <div style="margin-bottom: 20px; padding: 15px; background-color: #1a1a1a; color: white; border-radius: 8px;">
190
  <h3 style="margin-top: 0;">Score Guide</h3>
191
  <ul style="margin: 0;">
192
  <li><strong>Credibility</strong>: Assessment of content reliability</li>
 
203
  <div style="margin-bottom: 10px; font-size: 1.1em;">{rec["text"]}</div>
204
  <div style="display: flex; flex-wrap: wrap; gap: 10px; font-size: 0.9em;">
205
  <span style="padding: 3px 8px; background-color: #1976d2; color: white; border-radius: 4px;">
206
+ Score: {scores["score"]}
207
  </span>
208
  <span style="padding: 3px 8px; background-color: #2e7d32; color: white; border-radius: 4px;">
209
+ Credibility: {scores["credibility"]}
210
  </span>
211
  <span style="padding: 3px 8px; background-color: #ed6c02; color: white; border-radius: 4px;">
212
+ Sentiment: {scores["sentiment"]}
213
  </span>
214
  <span style="padding: 3px 8px; background-color: #d32f2f; color: white; border-radius: 4px;">
215
+ Popularity: {scores["popularity"]}
216
  </span>
217
  <span style="padding: 3px 8px; background-color: #7b1fa2; color: white; border-radius: 4px;">
218
+ Engagement: {scores["engagement"]}
219
  </span>
220
  </div>
221
  </div>
 
223
  html += '</div>'
224
  return html
225
 
226
+ def get_recommendations_with_weights(v, s, p):
227
+ """Get recommendations with current weights."""
228
+ weights = RecommendationWeights(v, s, p)
229
+ return format_recommendations(recommendation_system.get_recommendations(weights))
230
+
231
  submit_btn.click(
232
+ fn=get_recommendations_with_weights,
 
 
233
  inputs=[visibility_weight, sentiment_weight, popularity_weight],
234
  outputs=output_html
235
  )
236
 
237
  return interface
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  def main():
240
+ """Main function to run the application."""
241
  try:
242
  recommendation_system = RecommendationSystem(
243
  data_path=Path('twitter_dataset.csv')