student-abdullah commited on
Commit
ee275ef
0 Parent(s):

Initial commit

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="1">
8
+ <item index="0" class="java.lang.String" itemvalue="tf_keras" />
9
+ </list>
10
+ </value>
11
+ </option>
12
+ </inspection_tool>
13
+ </profile>
14
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (senti-analy-repo)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/senti-analy-repo.iml" filepath="$PROJECT_DIR$/.idea/senti-analy-repo.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/senti-analy-repo.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="Python 3.11 (senti-analy-repo)" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from wordcloud import WordCloud, STOPWORDS
5
+ from reportlab.lib.pagesizes import letter
6
+ from reportlab.pdfgen import canvas
7
+ from reportlab.lib.units import inch
8
+ from io import BytesIO
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
+ import torch
11
+ import chardet
12
+ import os
13
+
14
+ # Load model and tokenizer
15
+ tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
16
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
17
+
18
+ # Function to analyze sentiment
19
+ def analyze_sentiment(text):
20
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
21
+ with torch.no_grad():
22
+ outputs = model(**inputs)
23
+ scores = outputs.logits.softmax(dim=1)
24
+ labels = ['NEGATIVE', 'POSITIVE']
25
+ score, label = torch.max(scores, dim=1)
26
+ return {"label": labels[label.item()], "score": score.item()}
27
+
28
+ # Function to detect file encoding
29
+ def detect_encoding(file):
30
+ rawdata = file.read()
31
+ result = chardet.detect(rawdata)
32
+ return result['encoding']
33
+
34
+ def generate_pdf(pie_chart_path, pos_wordcloud_path, neg_wordcloud_path):
35
+ pdf_output = BytesIO()
36
+ pdf_height = 16.5 * inch # Total vertical height calculated
37
+ pdf_width = 8.27 * inch # A4 width
38
+ c = canvas.Canvas(pdf_output, pagesize=(pdf_width, pdf_height))
39
+
40
+ # Set starting vertical position
41
+ y_position = pdf_height - 1 * inch
42
+
43
+ # Add title
44
+ c.setFont("Helvetica-Bold", 20)
45
+ c.drawString(2.2 * inch, y_position, "Sentiment Analysis Report")
46
+
47
+ # Update vertical position after title
48
+ y_position -= 2 * inch
49
+
50
+ # Add pie chart with width 5 inches and height double the width
51
+ pie_chart_width = 5 * inch
52
+ pie_chart_height = 4 * inch
53
+ c.drawImage(pie_chart_path, 1.5 * inch, y_position - pie_chart_height, width=pie_chart_width, height=pie_chart_height)
54
+
55
+ # Update vertical position after pie chart
56
+ y_position -= (pie_chart_height + 1 * inch) # Add some spacing
57
+
58
+ # Add Positive Keywords heading
59
+ c.setFont("Helvetica-Bold", 12)
60
+ c.drawString(3 * inch, y_position, "Positive Keywords")
61
+
62
+ # Add positive word cloud
63
+ c.drawImage(pos_wordcloud_path, 1 * inch, y_position - 3.3 * inch, width=6 * inch, height=3 * inch) # 2:1 ratio
64
+
65
+ # Update vertical position after positive word cloud
66
+ y_position -= (3 * inch + 1 * inch) # Add some spacing
67
+
68
+ # Add Negative Keywords heading
69
+ c.setFont("Helvetica-Bold", 12)
70
+ c.drawString(3 * inch, y_position, "Negative Keywords")
71
+
72
+ # Add negative word cloud
73
+ c.drawImage(neg_wordcloud_path, 1 * inch, y_position - 3.3 * inch, width=6 * inch, height=3 * inch) # 2:1 ratio
74
+
75
+ c.save()
76
+ pdf_output.seek(0)
77
+
78
+ return pdf_output
79
+
80
+
81
+ # Streamlit UI
82
+ st.title("Sentiment Analysis and Reporting")
83
+
84
+ # Initialize session state for button visibility
85
+ if 'show_pdf_download' not in st.session_state:
86
+ st.session_state.show_pdf_download = False
87
+
88
+ # Sidebar for encoding detection and reset button
89
+ st.sidebar.header("File Encoding Checker")
90
+
91
+ # File uploader in the sidebar
92
+ uploaded_file = st.sidebar.file_uploader("Upload CSV file for Encoding Check", type=["csv"])
93
+
94
+ if uploaded_file:
95
+ # Detect the encoding
96
+ encoding = detect_encoding(uploaded_file)
97
+ st.sidebar.write(f"Detected encoding: {encoding}")
98
+
99
+ # Reset button in the sidebar
100
+ if st.sidebar.button("Reset Analysis"):
101
+ if os.path.exists("sentiment_pie_chart.png"):
102
+ os.remove("sentiment_pie_chart.png")
103
+ if os.path.exists("pos_wordcloud.png"):
104
+ os.remove("pos_wordcloud.png")
105
+ if os.path.exists("neg_wordcloud.png"):
106
+ os.remove("neg_wordcloud.png")
107
+ st.sidebar.write("Files deleted. Please re-upload a file to start over.")
108
+
109
+ # File uploader for sentiment analysis
110
+ uploaded_file = st.file_uploader("Upload CSV file for Sentiment Analysis", type=["csv"])
111
+
112
+ # Dropdown for encoding specification in the main panel
113
+ encodings = ['utf-8', 'latin-1', 'ISO-8859-1', 'ASCII', 'UTF-16', 'UTF-32', 'ANSI', "Windows-1251", 'Windows-1252']
114
+ user_encoding = st.selectbox("Select Encoding", options=encodings, index=0)
115
+
116
+ # Button to start processing
117
+ if st.button("Go"):
118
+ if uploaded_file:
119
+ try:
120
+ # Load the CSV file into DataFrame with specified encoding
121
+ uploaded_file.seek(0) # Reset the file pointer to the beginning
122
+ df = pd.read_csv(uploaded_file, encoding=user_encoding)
123
+ except UnicodeDecodeError:
124
+ st.error("Error decoding the file. Please specify the correct encoding.")
125
+ else:
126
+ # Check if the DataFrame has exactly one column
127
+ if df.shape[1] != 1:
128
+ st.warning("The CSV file should only contain one column with review data.")
129
+ else:
130
+ # Rename the column to 'review'
131
+ df.columns = ['review']
132
+
133
+ # Clean up the DataFrame
134
+ df['review'] = df['review'].astype(str).str.strip()
135
+ df = df[df['review'].apply(len) <= 512]
136
+
137
+ # Apply sentiment analysis
138
+ df['sentiment'] = df['review'].apply(analyze_sentiment)
139
+ df['sentiment_label'] = df['sentiment'].apply(lambda x: x['label'])
140
+ df['sentiment_score'] = df['sentiment'].apply(lambda x: x['score'])
141
+
142
+ # Drop the original 'sentiment' column
143
+ df = df.drop(columns=['sentiment'])
144
+
145
+ # Pie chart data
146
+ sentiment_counts = df['sentiment_label'].value_counts()
147
+
148
+ # Create pie chart
149
+ fig, ax = plt.subplots()
150
+ ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=45)
151
+ ax.set_title('Distribution of Sentiment')
152
+ pie_chart_path = "sentiment_pie_chart.png"
153
+ plt.savefig(pie_chart_path)
154
+
155
+ # Create word clouds
156
+ stopwords = set(STOPWORDS)
157
+
158
+ pos_reviews = df[df['sentiment_label'] == 'POSITIVE']['review'].str.cat(sep=' ')
159
+ neg_reviews = df[df['sentiment_label'] == 'NEGATIVE']['review'].str.cat(sep=' ')
160
+
161
+ pos_wordcloud = WordCloud(max_font_size=80, max_words=10, background_color='white', stopwords=stopwords).generate(pos_reviews)
162
+ neg_wordcloud = WordCloud(max_font_size=80, max_words=10, background_color='white', stopwords=stopwords).generate(neg_reviews)
163
+
164
+ # Save word clouds to files
165
+ pos_wordcloud_path = "pos_wordcloud.png"
166
+ neg_wordcloud_path = "neg_wordcloud.png"
167
+ pos_wordcloud.to_file(pos_wordcloud_path)
168
+ neg_wordcloud.to_file(neg_wordcloud_path)
169
+
170
+ # Create PDF
171
+ pdf_output = generate_pdf(pie_chart_path, pos_wordcloud_path, neg_wordcloud_path)
172
+
173
+ # Display options
174
+ st.write("Processing complete!")
175
+
176
+ # Update session state to show the appropriate buttons
177
+ st.session_state.show_pdf_download = True
178
+
179
+ # Display buttons
180
+ download_pdf = st.download_button("Download PDF Report", pdf_output, file_name="sentiment_analysis_report.pdf", mime="application/pdf")
181
+ else:
182
+ st.info("Please upload a CSV file to get started.")
requirements.txt ADDED
Binary file (3.18 kB). View file