Mohammed Foud commited on
Commit
b3ed9e6
·
1 Parent(s): 7f87155

first commit

Browse files
Files changed (1) hide show
  1. app.py +99 -10
app.py CHANGED
@@ -11,6 +11,8 @@ import base64
11
  from textblob import TextBlob
12
  from collections import defaultdict
13
  from tabulate import tabulate
 
 
14
 
15
  # Load models and initialize components
16
  model_path = "./final_model"
@@ -48,9 +50,51 @@ def get_initial_summary():
48
  return "Error: Could not load dataset.csv"
49
 
50
  try:
51
- sample_reviews = df['reviews.text'].sample(n=min(50, len(df))).fillna('').tolist()
52
- sample_text = '\n'.join(sample_reviews)
53
- return generate_category_summary(sample_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  except Exception as e:
55
  return f"Error generating initial summary: {str(e)}"
56
 
@@ -146,10 +190,34 @@ def analyze_reviews(reviews_text):
146
  # Original sentiment analysis
147
  df, plot_html = analyze_reviews_sentiment(reviews_text)
148
 
149
- # Generate summary
150
- summary = generate_category_summary(reviews_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- return df, plot_html, summary
153
 
154
  def analyze_reviews_sentiment(reviews_text):
155
  reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
@@ -193,7 +261,7 @@ def create_interface():
193
  with gr.Tab("Review Analysis"):
194
  # Add initial dataset summary
195
  gr.Markdown("## Dataset Overview")
196
- gr.Markdown(initial_summary)
197
 
198
  gr.Markdown("## Analyze New Reviews")
199
  reviews_input = gr.Textbox(
@@ -211,9 +279,8 @@ def create_interface():
211
  plot_output = gr.HTML(label="Sentiment Distribution")
212
 
213
  with gr.Column():
214
- summary_output = gr.Textbox(
215
- label="Review Summary",
216
- lines=5
217
  )
218
 
219
  analyze_button.click(
@@ -224,6 +291,28 @@ def create_interface():
224
 
225
  return demo
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  # Create and launch the interface
228
  if __name__ == "__main__":
229
  demo = create_interface()
 
11
  from textblob import TextBlob
12
  from collections import defaultdict
13
  from tabulate import tabulate
14
+ from sklearn.feature_extraction.text import TfidfVectorizer
15
+ from sklearn.cluster import KMeans
16
 
17
  # Load models and initialize components
18
  model_path = "./final_model"
 
50
  return "Error: Could not load dataset.csv"
51
 
52
  try:
53
+ # Generate summaries for all categories
54
+ summaries = generate_category_summaries(df)
55
+
56
+ # Convert summaries to HTML format for Gradio
57
+ html_output = []
58
+ for category, tables in summaries.items():
59
+ html_output.append(f"<h2>CATEGORY: {category}</h2>")
60
+
61
+ for table in tables:
62
+ html_output.append(f"<h3>{table['section']}</h3>")
63
+ # Convert table to HTML using tabulate
64
+ table_html = tabulate(
65
+ table['data'],
66
+ headers=table['headers'],
67
+ tablefmt="html",
68
+ stralign="left",
69
+ numalign="center"
70
+ )
71
+ # Add some CSS styling
72
+ styled_table = f"""
73
+ <style>
74
+ table {{
75
+ border-collapse: collapse;
76
+ margin: 15px 0;
77
+ width: 100%;
78
+ }}
79
+ th, td {{
80
+ padding: 8px;
81
+ border: 1px solid #ddd;
82
+ text-align: left;
83
+ }}
84
+ th {{
85
+ background-color: #f5f5f5;
86
+ }}
87
+ tr:nth-child(even) {{
88
+ background-color: #f9f9f9;
89
+ }}
90
+ </style>
91
+ {table_html}
92
+ """
93
+ html_output.append(styled_table)
94
+
95
+ html_output.append("<hr>") # Add separator between categories
96
+
97
+ return "\n".join(html_output)
98
  except Exception as e:
99
  return f"Error generating initial summary: {str(e)}"
100
 
 
190
  # Original sentiment analysis
191
  df, plot_html = analyze_reviews_sentiment(reviews_text)
192
 
193
+ # Create a temporary DataFrame with the new reviews
194
+ temp_df = pd.DataFrame({
195
+ 'text': reviews_text.split('\n'),
196
+ 'rating': [3] * len(reviews_text.split('\n')), # Default neutral rating
197
+ 'name': ['New Review'] * len(reviews_text.split('\n')),
198
+ 'cluster_name': ['New Reviews'] * len(reviews_text.split('\n'))
199
+ })
200
+
201
+ # Generate summary tables
202
+ summaries = generate_category_summaries(temp_df)
203
+
204
+ # Convert summaries to HTML
205
+ html_output = []
206
+ for category, tables in summaries.items():
207
+ for table in tables:
208
+ html_output.append(f"<h3>{table['section']}</h3>")
209
+ table_html = tabulate(
210
+ table['data'],
211
+ headers=table['headers'],
212
+ tablefmt="html",
213
+ stralign="left",
214
+ numalign="center"
215
+ )
216
+ html_output.append(table_html)
217
+
218
+ summary_html = "\n".join(html_output)
219
 
220
+ return df, plot_html, summary_html
221
 
222
  def analyze_reviews_sentiment(reviews_text):
223
  reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
 
261
  with gr.Tab("Review Analysis"):
262
  # Add initial dataset summary
263
  gr.Markdown("## Dataset Overview")
264
+ gr.HTML(initial_summary) # Changed from gr.Markdown to gr.HTML
265
 
266
  gr.Markdown("## Analyze New Reviews")
267
  reviews_input = gr.Textbox(
 
279
  plot_output = gr.HTML(label="Sentiment Distribution")
280
 
281
  with gr.Column():
282
+ summary_output = gr.HTML( # Changed from gr.Textbox to gr.HTML
283
+ label="Review Summary"
 
284
  )
285
 
286
  analyze_button.click(
 
291
 
292
  return demo
293
 
294
+ def add_clusters_to_df(df):
295
+ """Add cluster names to the DataFrame if they don't exist"""
296
+ # Create text features
297
+ vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
298
+ text_features = vectorizer.fit_transform(df['text'])
299
+
300
+ # Perform clustering
301
+ n_clusters = 4 # You can adjust this
302
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
303
+ df['cluster_name'] = kmeans.fit_predict(text_features)
304
+
305
+ # Map cluster numbers to names
306
+ cluster_names = {
307
+ 0: "Electronics",
308
+ 1: "Home & Kitchen",
309
+ 2: "Books & Media",
310
+ 3: "Other Products"
311
+ }
312
+ df['cluster_name'] = df['cluster_name'].map(cluster_names)
313
+
314
+ return df
315
+
316
  # Create and launch the interface
317
  if __name__ == "__main__":
318
  demo = create_interface()