Mohammed Foud
commited on
Commit
·
b3ed9e6
1
Parent(s):
7f87155
first commit
Browse files
app.py
CHANGED
@@ -11,6 +11,8 @@ import base64
|
|
11 |
from textblob import TextBlob
|
12 |
from collections import defaultdict
|
13 |
from tabulate import tabulate
|
|
|
|
|
14 |
|
15 |
# Load models and initialize components
|
16 |
model_path = "./final_model"
|
@@ -48,9 +50,51 @@ def get_initial_summary():
|
|
48 |
return "Error: Could not load dataset.csv"
|
49 |
|
50 |
try:
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
except Exception as e:
|
55 |
return f"Error generating initial summary: {str(e)}"
|
56 |
|
@@ -146,10 +190,34 @@ def analyze_reviews(reviews_text):
|
|
146 |
# Original sentiment analysis
|
147 |
df, plot_html = analyze_reviews_sentiment(reviews_text)
|
148 |
|
149 |
-
#
|
150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
-
return df, plot_html,
|
153 |
|
154 |
def analyze_reviews_sentiment(reviews_text):
|
155 |
reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
|
@@ -193,7 +261,7 @@ def create_interface():
|
|
193 |
with gr.Tab("Review Analysis"):
|
194 |
# Add initial dataset summary
|
195 |
gr.Markdown("## Dataset Overview")
|
196 |
-
gr.
|
197 |
|
198 |
gr.Markdown("## Analyze New Reviews")
|
199 |
reviews_input = gr.Textbox(
|
@@ -211,9 +279,8 @@ def create_interface():
|
|
211 |
plot_output = gr.HTML(label="Sentiment Distribution")
|
212 |
|
213 |
with gr.Column():
|
214 |
-
summary_output = gr.Textbox
|
215 |
-
label="Review Summary"
|
216 |
-
lines=5
|
217 |
)
|
218 |
|
219 |
analyze_button.click(
|
@@ -224,6 +291,28 @@ def create_interface():
|
|
224 |
|
225 |
return demo
|
226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
# Create and launch the interface
|
228 |
if __name__ == "__main__":
|
229 |
demo = create_interface()
|
|
|
11 |
from textblob import TextBlob
|
12 |
from collections import defaultdict
|
13 |
from tabulate import tabulate
|
14 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
15 |
+
from sklearn.cluster import KMeans
|
16 |
|
17 |
# Load models and initialize components
|
18 |
model_path = "./final_model"
|
|
|
50 |
return "Error: Could not load dataset.csv"
|
51 |
|
52 |
try:
|
53 |
+
# Generate summaries for all categories
|
54 |
+
summaries = generate_category_summaries(df)
|
55 |
+
|
56 |
+
# Convert summaries to HTML format for Gradio
|
57 |
+
html_output = []
|
58 |
+
for category, tables in summaries.items():
|
59 |
+
html_output.append(f"<h2>CATEGORY: {category}</h2>")
|
60 |
+
|
61 |
+
for table in tables:
|
62 |
+
html_output.append(f"<h3>{table['section']}</h3>")
|
63 |
+
# Convert table to HTML using tabulate
|
64 |
+
table_html = tabulate(
|
65 |
+
table['data'],
|
66 |
+
headers=table['headers'],
|
67 |
+
tablefmt="html",
|
68 |
+
stralign="left",
|
69 |
+
numalign="center"
|
70 |
+
)
|
71 |
+
# Add some CSS styling
|
72 |
+
styled_table = f"""
|
73 |
+
<style>
|
74 |
+
table {{
|
75 |
+
border-collapse: collapse;
|
76 |
+
margin: 15px 0;
|
77 |
+
width: 100%;
|
78 |
+
}}
|
79 |
+
th, td {{
|
80 |
+
padding: 8px;
|
81 |
+
border: 1px solid #ddd;
|
82 |
+
text-align: left;
|
83 |
+
}}
|
84 |
+
th {{
|
85 |
+
background-color: #f5f5f5;
|
86 |
+
}}
|
87 |
+
tr:nth-child(even) {{
|
88 |
+
background-color: #f9f9f9;
|
89 |
+
}}
|
90 |
+
</style>
|
91 |
+
{table_html}
|
92 |
+
"""
|
93 |
+
html_output.append(styled_table)
|
94 |
+
|
95 |
+
html_output.append("<hr>") # Add separator between categories
|
96 |
+
|
97 |
+
return "\n".join(html_output)
|
98 |
except Exception as e:
|
99 |
return f"Error generating initial summary: {str(e)}"
|
100 |
|
|
|
190 |
# Original sentiment analysis
|
191 |
df, plot_html = analyze_reviews_sentiment(reviews_text)
|
192 |
|
193 |
+
# Create a temporary DataFrame with the new reviews
|
194 |
+
temp_df = pd.DataFrame({
|
195 |
+
'text': reviews_text.split('\n'),
|
196 |
+
'rating': [3] * len(reviews_text.split('\n')), # Default neutral rating
|
197 |
+
'name': ['New Review'] * len(reviews_text.split('\n')),
|
198 |
+
'cluster_name': ['New Reviews'] * len(reviews_text.split('\n'))
|
199 |
+
})
|
200 |
+
|
201 |
+
# Generate summary tables
|
202 |
+
summaries = generate_category_summaries(temp_df)
|
203 |
+
|
204 |
+
# Convert summaries to HTML
|
205 |
+
html_output = []
|
206 |
+
for category, tables in summaries.items():
|
207 |
+
for table in tables:
|
208 |
+
html_output.append(f"<h3>{table['section']}</h3>")
|
209 |
+
table_html = tabulate(
|
210 |
+
table['data'],
|
211 |
+
headers=table['headers'],
|
212 |
+
tablefmt="html",
|
213 |
+
stralign="left",
|
214 |
+
numalign="center"
|
215 |
+
)
|
216 |
+
html_output.append(table_html)
|
217 |
+
|
218 |
+
summary_html = "\n".join(html_output)
|
219 |
|
220 |
+
return df, plot_html, summary_html
|
221 |
|
222 |
def analyze_reviews_sentiment(reviews_text):
|
223 |
reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
|
|
|
261 |
with gr.Tab("Review Analysis"):
|
262 |
# Add initial dataset summary
|
263 |
gr.Markdown("## Dataset Overview")
|
264 |
+
gr.HTML(initial_summary) # Changed from gr.Markdown to gr.HTML
|
265 |
|
266 |
gr.Markdown("## Analyze New Reviews")
|
267 |
reviews_input = gr.Textbox(
|
|
|
279 |
plot_output = gr.HTML(label="Sentiment Distribution")
|
280 |
|
281 |
with gr.Column():
|
282 |
+
summary_output = gr.HTML( # Changed from gr.Textbox to gr.HTML
|
283 |
+
label="Review Summary"
|
|
|
284 |
)
|
285 |
|
286 |
analyze_button.click(
|
|
|
291 |
|
292 |
return demo
|
293 |
|
294 |
+
def add_clusters_to_df(df):
|
295 |
+
"""Add cluster names to the DataFrame if they don't exist"""
|
296 |
+
# Create text features
|
297 |
+
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
|
298 |
+
text_features = vectorizer.fit_transform(df['text'])
|
299 |
+
|
300 |
+
# Perform clustering
|
301 |
+
n_clusters = 4 # You can adjust this
|
302 |
+
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
303 |
+
df['cluster_name'] = kmeans.fit_predict(text_features)
|
304 |
+
|
305 |
+
# Map cluster numbers to names
|
306 |
+
cluster_names = {
|
307 |
+
0: "Electronics",
|
308 |
+
1: "Home & Kitchen",
|
309 |
+
2: "Books & Media",
|
310 |
+
3: "Other Products"
|
311 |
+
}
|
312 |
+
df['cluster_name'] = df['cluster_name'].map(cluster_names)
|
313 |
+
|
314 |
+
return df
|
315 |
+
|
316 |
# Create and launch the interface
|
317 |
if __name__ == "__main__":
|
318 |
demo = create_interface()
|