Spaces:

mfoud444
/

oop

Build error

App Files Files Community

Mohammed Foud commited on 26 days ago

Commit

31f3e54

1 Parent(s): dc51e14

first commit

Browse files

Files changed (2) hide show

.cursorignore +2 -1
app.py +113 -29

.cursorignore CHANGED Viewed

@@ -10,4 +10,5 @@ etc
 .vscode
 .env
 .env.local
-dataset.csv

 .vscode
 .env
 .env.local
+dataset.csv
+final_model

app.py CHANGED Viewed

@@ -8,12 +8,19 @@ import torch
 from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
 import io
 import base64
 # Load the model and tokenizer
 model_path = "./final_model"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForSequenceClassification.from_pretrained(model_path)
 def predict_sentiment(text):
     # Preprocess text
     text = text.lower()
@@ -38,14 +45,87 @@ def predict_sentiment(text):
     return sentiment, prob_dict
 def analyze_reviews(reviews_text):
-    # Split reviews by newline
     reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
     if not reviews:
         return "Please enter at least one review.", None
-    # Process each review
     results = []
     for review in reviews:
         sentiment, probs = predict_sentiment(review)
@@ -55,10 +135,8 @@ def analyze_reviews(reviews_text):
             'Confidence': probs
         })
-    # Create DataFrame for display
     df = pd.DataFrame(results)
-    # Create visualization
     plt.figure(figsize=(10, 6))
     sentiment_counts = df['Sentiment'].value_counts()
     plt.bar(sentiment_counts.index, sentiment_counts.values)
@@ -66,7 +144,6 @@ def analyze_reviews(reviews_text):
     plt.xlabel('Sentiment')
     plt.ylabel('Count')
-    # Save plot to bytes
     buf = io.BytesIO()
     plt.savefig(buf, format='png')
     buf.seek(0)
@@ -76,32 +153,39 @@ def analyze_reviews(reviews_text):
     return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
 # Create Gradio interface
-with gr.Blocks(title="Amazon Review Sentiment Analysis") as demo:
-    gr.Markdown("# Amazon Review Sentiment Analysis")
-    gr.Markdown("Enter one or more reviews (one per line) to analyze their sentiment.")
-    with gr.Row():
-        with gr.Column():
             reviews_input = gr.Textbox(
-                label="Enter Reviews",
-                placeholder="Enter your reviews here (one per line)...",
-                lines=10
             )
-            analyze_btn = gr.Button("Analyze Reviews")
-        with gr.Column():
-            results_table = gr.Dataframe(
-                headers=["Review", "Sentiment", "Confidence"],
-                datatype=["str", "str", "str"],
-                col_count=(3, "fixed")
-            )
-            plot_output = gr.HTML()
-    analyze_btn.click(
-        fn=analyze_reviews,
-        inputs=reviews_input,
-        outputs=[results_table, plot_output]
-    )
-if __name__ == "__main__":
-    demo.launch(share=True)

 from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
 import io
 import base64
+from textblob import TextBlob
+from collections import defaultdict
+from tabulate import tabulate
+from transformers import pipeline
 # Load the model and tokenizer
 model_path = "./final_model"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForSequenceClassification.from_pretrained(model_path)
+# Initialize the summarizer
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 def predict_sentiment(text):
     # Preprocess text
     text = text.lower()
     return sentiment, prob_dict
+def analyze_sentiment(reviews):
+    """Perform sentiment analysis on reviews"""
+    pros = defaultdict(int)
+    cons = defaultdict(int)
+    for review in reviews:
+        blob = TextBlob(str(review))
+        for sentence in blob.sentences:
+            polarity = sentence.sentiment.polarity
+            words = [word for word, tag in blob.tags
+                    if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
+            if polarity > 0.3:  # Positive
+                for word in words:
+                    pros[word] += 1
+            elif polarity < -0.3:  # Negative
+                for word in words:
+                    cons[word] += 1
+    pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
+    cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []
+    return pros_sorted, cons_sorted
+def generate_category_summary(reviews_text):
+    """Generate summary for a set of reviews"""
+    reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
+    if not reviews:
+        return "Please enter at least one review."
+    # Analyze sentiment and get pros/cons
+    pros, cons = analyze_sentiment(reviews)
+    # Create summary text
+    summary_text = f"""
+    Review Analysis Summary:
+    PROS:
+    {', '.join(pros[:5]) if pros else 'No significant positive feedback'}
+    CONS:
+    {', '.join(cons[:5]) if cons else 'No major complaints'}
+    Based on {len(reviews)} reviews analyzed.
+    """
+    # Generate concise summary using BART
+    if len(summary_text) > 100:
+        try:
+            generated_summary = summarizer(
+                summary_text,
+                max_length=150,
+                min_length=50,
+                do_sample=False,
+                truncation=True
+            )[0]['summary_text']
+        except Exception as e:
+            generated_summary = f"Error generating summary: {str(e)}"
+    else:
+        generated_summary = summary_text
+    return generated_summary
 def analyze_reviews(reviews_text):
+    # Original sentiment analysis
+    df, plot_html = analyze_reviews_sentiment(reviews_text)
+    # Generate summary
+    summary = generate_category_summary(reviews_text)
+    return df, plot_html, summary
+# Rename original analyze_reviews to analyze_reviews_sentiment
+def analyze_reviews_sentiment(reviews_text):
+    # Original implementation
     reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
     if not reviews:
         return "Please enter at least one review.", None
     results = []
     for review in reviews:
         sentiment, probs = predict_sentiment(review)
             'Confidence': probs
         })
     df = pd.DataFrame(results)
     plt.figure(figsize=(10, 6))
     sentiment_counts = df['Sentiment'].value_counts()
     plt.bar(sentiment_counts.index, sentiment_counts.values)
     plt.xlabel('Sentiment')
     plt.ylabel('Count')
     buf = io.BytesIO()
     plt.savefig(buf, format='png')
     buf.seek(0)
     return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
 # Create Gradio interface
+def create_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Review Analysis System")
+        with gr.Tab("Review Analysis"):
             reviews_input = gr.Textbox(
+                label="Enter reviews (one per line)",
+                placeholder="Enter product reviews here...",
+                lines=5
             )
+            analyze_button = gr.Button("Analyze Reviews")
+            with gr.Row():
+                with gr.Column():
+                    sentiment_output = gr.Dataframe(
+                        label="Sentiment Analysis Results"
+                    )
+                    plot_output = gr.HTML(label="Sentiment Distribution")
+                with gr.Column():
+                    summary_output = gr.Textbox(
+                        label="Review Summary",
+                        lines=5
+                    )
+        analyze_button.click(
+            analyze_reviews,
+            inputs=[reviews_input],
+            outputs=[sentiment_output, plot_output, summary_output]
+        )
+    return demo
+# Create and launch the interface
+demo = create_interface()
+demo.launch()