Spaces:

YixuanWang
/

Interactive-Recommendation-System

Sleeping

App Files Files Community

YixuanWang commited on Nov 15, 2024

Commit

596f852

verified ·

1 Parent(s): 6fc36a2

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -66

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from typing import List, Dict, Tuple
 from dataclasses import dataclass
 from pathlib import Path
 import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -18,10 +20,12 @@ class RecommendationWeights:
 class TweetPreprocessor:
     def __init__(self, data_path: Path):
         self.data = self._load_data(data_path)
     @staticmethod
     def _load_data(data_path: Path) -> pd.DataFrame:
         try:
             data = pd.read_csv(data_path)
             required_columns = {'Text', 'Retweets', 'Likes'}
@@ -32,14 +36,29 @@ class TweetPreprocessor:
             logger.error(f"Error loading data: {e}")
             raise
     def calculate_metrics(self) -> pd.DataFrame:
-        self.data['Sentiment'] = self.data['Text'].apply(self._get_sentiment)
         self.data['Popularity'] = self._normalize_popularity()
-        self.data['Credibility'] = np.random.choice([0, 1], size=len(self.data), p=[0.3, 0.7])
         return self.data
     @staticmethod
     def _get_sentiment(text: str) -> float:
         try:
             return TextBlob(str(text)).sentiment.polarity
         except Exception as e:
@@ -47,6 +66,7 @@ class TweetPreprocessor:
             return 0.0
     def _normalize_popularity(self) -> pd.Series:
         popularity = self.data['Retweets'] + self.data['Likes']
         return (popularity - popularity.min()) / (popularity.max() - popularity.min() + 1e-6)
@@ -57,20 +77,28 @@ class RecommendationSystem:
         self.setup_system()
     def setup_system(self):
         self.data = self.preprocessor.calculate_metrics()
-    def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> Dict:
-        if not self._validate_weights(weights):
-            return {"error": "Invalid weights provided"}
         normalized_weights = self._normalize_weights(weights)
         self.data['Final_Score'] = (
             self.data['Credibility'] * normalized_weights.visibility +
             self.data['Sentiment'] * normalized_weights.sentiment +
             self.data['Popularity'] * normalized_weights.popularity
         )
         top_recommendations = (
             self.data.nlargest(num_recommendations, 'Final_Score')
         )
@@ -78,18 +106,19 @@ class RecommendationSystem:
         return self._format_recommendations(top_recommendations)
     def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict:
         formatted_results = []
         for _, row in recommendations.iterrows():
             score_details = {
-                "总分": f"{row['Final_Score']:.2f}",
-                "可信度": "可信" if row['Credibility'] > 0 else "存疑",
-                "情感倾向": self._get_sentiment_label(row['Sentiment']),
-                "热度": f"{row['Popularity']:.2f}",
-                "互动数": f"点赞 {row['Likes']} · 转发 {row['Retweets']}"
             }
             formatted_results.append({
-                "text": row['Text'],
                 "scores": score_details
             })
@@ -100,18 +129,21 @@ class RecommendationSystem:
     @staticmethod
     def _get_sentiment_label(sentiment_score: float) -> str:
         if sentiment_score > 0.3:
-            return "积极"
         elif sentiment_score < -0.3:
-            return "消极"
-        return "中性"
     @staticmethod
     def _validate_weights(weights: RecommendationWeights) -> bool:
         return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__)
     @staticmethod
     def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights:
         total = weights.visibility + weights.sentiment + weights.popularity
         if total == 0:
             return RecommendationWeights(1/3, 1/3, 1/3)
@@ -123,14 +155,15 @@ class RecommendationSystem:
     @staticmethod
     def _get_score_explanation() -> Dict[str, str]:
         return {
-            "可信度": "内容可信度评估",
-            "情感倾向": "文本的情感分析结果",
-            "热度": "基于点赞和转发的热度分数"
         }
 def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface:
     with gr.Blocks(theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # Tweet Recommendation System
@@ -153,7 +186,7 @@ def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.I
             html = '<div style="font-family: sans-serif;">'
             html += '''
-            <div style="margin-bottom: 20px; padding: 15px; background-color: #f5f5f5; border-radius: 8px;">
                 <h3 style="margin-top: 0;">Score Guide</h3>
                 <ul style="margin: 0;">
                     <li><strong>Credibility</strong>: Assessment of content reliability</li>
@@ -170,19 +203,19 @@ def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.I
                     <div style="margin-bottom: 10px; font-size: 1.1em;">{rec["text"]}</div>
                     <div style="display: flex; flex-wrap: wrap; gap: 10px; font-size: 0.9em;">
                         <span style="padding: 3px 8px; background-color: #1976d2; color: white; border-radius: 4px;">
-                            Score: {scores["总分"]}
                         </span>
                         <span style="padding: 3px 8px; background-color: #2e7d32; color: white; border-radius: 4px;">
-                            Credibility: {scores["可信度"]}
                         </span>
                         <span style="padding: 3px 8px; background-color: #ed6c02; color: white; border-radius: 4px;">
-                            Sentiment: {scores["情感倾向"]}
                         </span>
                         <span style="padding: 3px 8px; background-color: #d32f2f; color: white; border-radius: 4px;">
-                            Popularity: {scores["热度"]}
                         </span>
                         <span style="padding: 3px 8px; background-color: #7b1fa2; color: white; border-radius: 4px;">
-                            Engagement: {scores["互动数"]}
                         </span>
                     </div>
                 </div>
@@ -190,55 +223,21 @@ def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.I
             html += '</div>'
             return html
         submit_btn.click(
-            fn=lambda v, s, p: format_recommendations(
-                recommendation_system.get_recommendations(RecommendationWeights(v, s, p))
-            ),
             inputs=[visibility_weight, sentiment_weight, popularity_weight],
             outputs=output_html
         )
     return interface
-@staticmethod
-def _get_sentiment_label(sentiment_score: float) -> str:
-    if sentiment_score > 0.3:
-        return "Positive"
-    elif sentiment_score < -0.3:
-        return "Negative"
-    return "Neutral"
-def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict:
-    formatted_results = []
-    for _, row in recommendations.iterrows():
-        score_details = {
-            "总分": f"{row['Final_Score']:.2f}",
-            "可信度": "Reliable" if row['Credibility'] > 0 else "Uncertain",
-            "情感倾向": self._get_sentiment_label(row['Sentiment']),
-            "热度": f"{row['Popularity']:.2f}",
-            "互动数": f"Likes {row['Likes']} · Retweets {row['Retweets']}"
-        }
-        formatted_results.append({
-            "text": row['Text'],
-            "scores": score_details
-        })
-    return {
-        "recommendations": formatted_results,
-        "score_explanation": self._get_score_explanation()
-    }
-@staticmethod
-def _get_score_explanation() -> Dict[str, str]:
-    return {
-        "Credibility": "Content reliability assessment",
-        "Sentiment": "Text emotional analysis result",
-        "Popularity": "Score based on likes and retweets"
-    }
 def main():
     try:
         recommendation_system = RecommendationSystem(
             data_path=Path('twitter_dataset.csv')

 from dataclasses import dataclass
 from pathlib import Path
 import logging
+import re
+from datetime import datetime
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class TweetPreprocessor:
     def __init__(self, data_path: Path):
+        """Initialize the preprocessor with data path."""
         self.data = self._load_data(data_path)
     @staticmethod
     def _load_data(data_path: Path) -> pd.DataFrame:
+        """Load and validate the dataset."""
         try:
             data = pd.read_csv(data_path)
             required_columns = {'Text', 'Retweets', 'Likes'}
             logger.error(f"Error loading data: {e}")
             raise
+    def _clean_text(self, text: str) -> str:
+        """Clean text content."""
+        if pd.isna(text) or len(str(text).strip()) < 10:
+            return ""
+        text = re.sub(r'http\S+|www.\S+', '', str(text))
+        text = re.sub(r'[^\w\s]', '', text)
+        text = ' '.join(text.split())
+        return text
     def calculate_metrics(self) -> pd.DataFrame:
+        """Calculate all metrics for tweets."""
+        self.data['Clean_Text'] = self.data['Text'].apply(self._clean_text)
+        self.data = self.data[self.data['Clean_Text'].str.len() > 0]
+        self.data['Sentiment'] = self.data['Clean_Text'].apply(self._get_sentiment)
         self.data['Popularity'] = self._normalize_popularity()
         return self.data
     @staticmethod
     def _get_sentiment(text: str) -> float:
+        """Calculate sentiment polarity for a text."""
         try:
             return TextBlob(str(text)).sentiment.polarity
         except Exception as e:
             return 0.0
     def _normalize_popularity(self) -> pd.Series:
+        """Normalize popularity scores."""
         popularity = self.data['Retweets'] + self.data['Likes']
         return (popularity - popularity.min()) / (popularity.max() - popularity.min() + 1e-6)
         self.setup_system()
     def setup_system(self):
+        """Initialize the system with preprocessed data."""
         self.data = self.preprocessor.calculate_metrics()
+    def recalculate_scores(self, weights: RecommendationWeights):
+        """Recalculate scores based on new weights."""
         normalized_weights = self._normalize_weights(weights)
+        self.data['Credibility'] = np.random.choice([0, 1], size=len(self.data), p=[0.3, 0.7])
         self.data['Final_Score'] = (
             self.data['Credibility'] * normalized_weights.visibility +
             self.data['Sentiment'] * normalized_weights.sentiment +
             self.data['Popularity'] * normalized_weights.popularity
         )
+    def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> Dict:
+        """Get tweet recommendations based on weights."""
+        if not self._validate_weights(weights):
+            return {"error": "Invalid weights provided"}
+        self.recalculate_scores(weights)
         top_recommendations = (
             self.data.nlargest(num_recommendations, 'Final_Score')
         )
         return self._format_recommendations(top_recommendations)
     def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict:
+        """Format recommendations for display."""
         formatted_results = []
         for _, row in recommendations.iterrows():
             score_details = {
+                "score": f"{row['Final_Score']:.2f}",
+                "credibility": "Reliable" if row['Credibility'] > 0 else "Uncertain",
+                "sentiment": self._get_sentiment_label(row['Sentiment']),
+                "popularity": f"{row['Popularity']:.2f}",
+                "engagement": f"Likes {row['Likes']} · Retweets {row['Retweets']}"
             }
             formatted_results.append({
+                "text": row['Clean_Text'],
                 "scores": score_details
             })
     @staticmethod
     def _get_sentiment_label(sentiment_score: float) -> str:
+        """Convert sentiment score to label."""
         if sentiment_score > 0.3:
+            return "Positive"
         elif sentiment_score < -0.3:
+            return "Negative"
+        return "Neutral"
     @staticmethod
     def _validate_weights(weights: RecommendationWeights) -> bool:
+        """Validate that weights are non-negative."""
         return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__)
     @staticmethod
     def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights:
+        """Normalize weights to sum to 1."""
         total = weights.visibility + weights.sentiment + weights.popularity
         if total == 0:
             return RecommendationWeights(1/3, 1/3, 1/3)
     @staticmethod
     def _get_score_explanation() -> Dict[str, str]:
+        """Provide explanation for different score components."""
         return {
+            "Credibility": "Content reliability assessment",
+            "Sentiment": "Text emotional analysis result",
+            "Popularity": "Score based on likes and retweets"
         }
 def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface:
+    """Create and configure the Gradio interface."""
     with gr.Blocks(theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # Tweet Recommendation System
             html = '<div style="font-family: sans-serif;">'
             html += '''
+            <div style="margin-bottom: 20px; padding: 15px; background-color: #1a1a1a; color: white; border-radius: 8px;">
                 <h3 style="margin-top: 0;">Score Guide</h3>
                 <ul style="margin: 0;">
                     <li><strong>Credibility</strong>: Assessment of content reliability</li>
                     <div style="margin-bottom: 10px; font-size: 1.1em;">{rec["text"]}</div>
                     <div style="display: flex; flex-wrap: wrap; gap: 10px; font-size: 0.9em;">
                         <span style="padding: 3px 8px; background-color: #1976d2; color: white; border-radius: 4px;">
+                            Score: {scores["score"]}
                         </span>
                         <span style="padding: 3px 8px; background-color: #2e7d32; color: white; border-radius: 4px;">
+                            Credibility: {scores["credibility"]}
                         </span>
                         <span style="padding: 3px 8px; background-color: #ed6c02; color: white; border-radius: 4px;">
+                            Sentiment: {scores["sentiment"]}
                         </span>
                         <span style="padding: 3px 8px; background-color: #d32f2f; color: white; border-radius: 4px;">
+                            Popularity: {scores["popularity"]}
                         </span>
                         <span style="padding: 3px 8px; background-color: #7b1fa2; color: white; border-radius: 4px;">
+                            Engagement: {scores["engagement"]}
                         </span>
                     </div>
                 </div>
             html += '</div>'
             return html
+        def get_recommendations_with_weights(v, s, p):
+            """Get recommendations with current weights."""
+            weights = RecommendationWeights(v, s, p)
+            return format_recommendations(recommendation_system.get_recommendations(weights))
         submit_btn.click(
+            fn=get_recommendations_with_weights,
             inputs=[visibility_weight, sentiment_weight, popularity_weight],
             outputs=output_html
         )
     return interface
 def main():
+    """Main function to run the application."""
     try:
         recommendation_system = RecommendationSystem(
             data_path=Path('twitter_dataset.csv')