Mohammed Foud commited on
Commit
31f3e54
·
1 Parent(s): dc51e14

first commit

Browse files
Files changed (2) hide show
  1. .cursorignore +2 -1
  2. app.py +113 -29
.cursorignore CHANGED
@@ -10,4 +10,5 @@ etc
10
  .vscode
11
  .env
12
  .env.local
13
- dataset.csv
 
 
10
  .vscode
11
  .env
12
  .env.local
13
+ dataset.csv
14
+ final_model
app.py CHANGED
@@ -8,12 +8,19 @@ import torch
8
  from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
9
  import io
10
  import base64
 
 
 
 
11
 
12
  # Load the model and tokenizer
13
  model_path = "./final_model"
14
  tokenizer = AutoTokenizer.from_pretrained(model_path)
15
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
16
 
 
 
 
17
  def predict_sentiment(text):
18
  # Preprocess text
19
  text = text.lower()
@@ -38,14 +45,87 @@ def predict_sentiment(text):
38
 
39
  return sentiment, prob_dict
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def analyze_reviews(reviews_text):
42
- # Split reviews by newline
 
 
 
 
 
 
 
 
 
 
43
  reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
44
 
45
  if not reviews:
46
  return "Please enter at least one review.", None
47
 
48
- # Process each review
49
  results = []
50
  for review in reviews:
51
  sentiment, probs = predict_sentiment(review)
@@ -55,10 +135,8 @@ def analyze_reviews(reviews_text):
55
  'Confidence': probs
56
  })
57
 
58
- # Create DataFrame for display
59
  df = pd.DataFrame(results)
60
 
61
- # Create visualization
62
  plt.figure(figsize=(10, 6))
63
  sentiment_counts = df['Sentiment'].value_counts()
64
  plt.bar(sentiment_counts.index, sentiment_counts.values)
@@ -66,7 +144,6 @@ def analyze_reviews(reviews_text):
66
  plt.xlabel('Sentiment')
67
  plt.ylabel('Count')
68
 
69
- # Save plot to bytes
70
  buf = io.BytesIO()
71
  plt.savefig(buf, format='png')
72
  buf.seek(0)
@@ -76,32 +153,39 @@ def analyze_reviews(reviews_text):
76
  return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
77
 
78
  # Create Gradio interface
79
- with gr.Blocks(title="Amazon Review Sentiment Analysis") as demo:
80
- gr.Markdown("# Amazon Review Sentiment Analysis")
81
- gr.Markdown("Enter one or more reviews (one per line) to analyze their sentiment.")
82
-
83
- with gr.Row():
84
- with gr.Column():
85
  reviews_input = gr.Textbox(
86
- label="Enter Reviews",
87
- placeholder="Enter your reviews here (one per line)...",
88
- lines=10
89
  )
90
- analyze_btn = gr.Button("Analyze Reviews")
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- with gr.Column():
93
- results_table = gr.Dataframe(
94
- headers=["Review", "Sentiment", "Confidence"],
95
- datatype=["str", "str", "str"],
96
- col_count=(3, "fixed")
97
- )
98
- plot_output = gr.HTML()
99
 
100
- analyze_btn.click(
101
- fn=analyze_reviews,
102
- inputs=reviews_input,
103
- outputs=[results_table, plot_output]
104
- )
105
 
106
- if __name__ == "__main__":
107
- demo.launch(share=True)
 
 
8
  from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
9
  import io
10
  import base64
11
+ from textblob import TextBlob
12
+ from collections import defaultdict
13
+ from tabulate import tabulate
14
+ from transformers import pipeline
15
 
16
  # Load the model and tokenizer
17
  model_path = "./final_model"
18
  tokenizer = AutoTokenizer.from_pretrained(model_path)
19
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
20
 
21
+ # Initialize the summarizer
22
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
23
+
24
  def predict_sentiment(text):
25
  # Preprocess text
26
  text = text.lower()
 
45
 
46
  return sentiment, prob_dict
47
 
48
+ def analyze_sentiment(reviews):
49
+ """Perform sentiment analysis on reviews"""
50
+ pros = defaultdict(int)
51
+ cons = defaultdict(int)
52
+
53
+ for review in reviews:
54
+ blob = TextBlob(str(review))
55
+ for sentence in blob.sentences:
56
+ polarity = sentence.sentiment.polarity
57
+ words = [word for word, tag in blob.tags
58
+ if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
59
+
60
+ if polarity > 0.3: # Positive
61
+ for word in words:
62
+ pros[word] += 1
63
+ elif polarity < -0.3: # Negative
64
+ for word in words:
65
+ cons[word] += 1
66
+
67
+ pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
68
+ cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []
69
+
70
+ return pros_sorted, cons_sorted
71
+
72
+ def generate_category_summary(reviews_text):
73
+ """Generate summary for a set of reviews"""
74
+ reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
75
+
76
+ if not reviews:
77
+ return "Please enter at least one review."
78
+
79
+ # Analyze sentiment and get pros/cons
80
+ pros, cons = analyze_sentiment(reviews)
81
+
82
+ # Create summary text
83
+ summary_text = f"""
84
+ Review Analysis Summary:
85
+
86
+ PROS:
87
+ {', '.join(pros[:5]) if pros else 'No significant positive feedback'}
88
+
89
+ CONS:
90
+ {', '.join(cons[:5]) if cons else 'No major complaints'}
91
+
92
+ Based on {len(reviews)} reviews analyzed.
93
+ """
94
+
95
+ # Generate concise summary using BART
96
+ if len(summary_text) > 100:
97
+ try:
98
+ generated_summary = summarizer(
99
+ summary_text,
100
+ max_length=150,
101
+ min_length=50,
102
+ do_sample=False,
103
+ truncation=True
104
+ )[0]['summary_text']
105
+ except Exception as e:
106
+ generated_summary = f"Error generating summary: {str(e)}"
107
+ else:
108
+ generated_summary = summary_text
109
+
110
+ return generated_summary
111
+
112
  def analyze_reviews(reviews_text):
113
+ # Original sentiment analysis
114
+ df, plot_html = analyze_reviews_sentiment(reviews_text)
115
+
116
+ # Generate summary
117
+ summary = generate_category_summary(reviews_text)
118
+
119
+ return df, plot_html, summary
120
+
121
+ # Rename original analyze_reviews to analyze_reviews_sentiment
122
+ def analyze_reviews_sentiment(reviews_text):
123
+ # Original implementation
124
  reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
125
 
126
  if not reviews:
127
  return "Please enter at least one review.", None
128
 
 
129
  results = []
130
  for review in reviews:
131
  sentiment, probs = predict_sentiment(review)
 
135
  'Confidence': probs
136
  })
137
 
 
138
  df = pd.DataFrame(results)
139
 
 
140
  plt.figure(figsize=(10, 6))
141
  sentiment_counts = df['Sentiment'].value_counts()
142
  plt.bar(sentiment_counts.index, sentiment_counts.values)
 
144
  plt.xlabel('Sentiment')
145
  plt.ylabel('Count')
146
 
 
147
  buf = io.BytesIO()
148
  plt.savefig(buf, format='png')
149
  buf.seek(0)
 
153
  return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
154
 
155
  # Create Gradio interface
156
+ def create_interface():
157
+ with gr.Blocks() as demo:
158
+ gr.Markdown("# Review Analysis System")
159
+
160
+ with gr.Tab("Review Analysis"):
 
161
  reviews_input = gr.Textbox(
162
+ label="Enter reviews (one per line)",
163
+ placeholder="Enter product reviews here...",
164
+ lines=5
165
  )
166
+ analyze_button = gr.Button("Analyze Reviews")
167
+
168
+ with gr.Row():
169
+ with gr.Column():
170
+ sentiment_output = gr.Dataframe(
171
+ label="Sentiment Analysis Results"
172
+ )
173
+ plot_output = gr.HTML(label="Sentiment Distribution")
174
+
175
+ with gr.Column():
176
+ summary_output = gr.Textbox(
177
+ label="Review Summary",
178
+ lines=5
179
+ )
180
 
181
+ analyze_button.click(
182
+ analyze_reviews,
183
+ inputs=[reviews_input],
184
+ outputs=[sentiment_output, plot_output, summary_output]
185
+ )
 
 
186
 
187
+ return demo
 
 
 
 
188
 
189
+ # Create and launch the interface
190
+ demo = create_interface()
191
+ demo.launch()