dejanseo commited on
Commit
fd3439d
·
verified ·
1 Parent(s): eb6289f

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +62 -54
src/streamlit_app.py CHANGED
@@ -92,30 +92,38 @@ def classify_sentences(text):
92
  sentence_scores.append((sentence, sentiment))
93
  return sentence_scores
94
 
95
- # Streamlit UI
96
- st.title("Sentiment Classification Model")
97
-
98
- url = st.text_input("Enter URL:")
99
-
100
- if url:
101
- text = get_text_from_url(url)
102
- if text:
103
- scores, chunk_scores_list, chunks = classify_long_text(text)
104
- scores_dict = {sentiment_labels[i]: scores[i] for i in range(len(sentiment_labels))}
105
-
106
- # Ensure the exact order of labels in the graph
107
- sentiment_order = [
108
- "very positive", "positive", "somewhat positive",
109
- "neutral",
110
- "somewhat negative", "negative", "very negative"
111
- ]
112
- ordered_scores_dict = OrderedDict((label, scores_dict[label]) for label in sentiment_order)
113
-
114
- # Prepare the DataFrame and reindex
115
- df = pd.DataFrame.from_dict(ordered_scores_dict, orient='index', columns=['Likelihood']).reindex(sentiment_order)
116
-
117
- # Use Altair to plot the bar chart
118
- chart = alt.Chart(df.reset_index()).mark_bar().encode(
 
 
 
 
 
 
 
 
119
  x=alt.X('index', sort=sentiment_order, title='Sentiment'),
120
  y='Likelihood'
121
  ).properties(
@@ -123,35 +131,36 @@ if url:
123
  height=400
124
  )
125
 
126
- st.altair_chart(chart, use_container_width=True)
127
-
128
- # Display each chunk and its own chart
129
- for i, (chunk_scores, chunk) in enumerate(zip(chunk_scores_list, chunks)):
130
- chunk_scores_dict = {sentiment_labels[j]: chunk_scores[j] for j in range(len(sentiment_labels))}
131
- ordered_chunk_scores_dict = OrderedDict((label, chunk_scores_dict[label]) for label in sentiment_order)
132
- df_chunk = pd.DataFrame.from_dict(ordered_chunk_scores_dict, orient='index', columns=['Likelihood']).reindex(sentiment_order)
133
-
134
- chunk_chart = alt.Chart(df_chunk.reset_index()).mark_bar().encode(
135
- x=alt.X('index', sort=sentiment_order, title='Sentiment'),
136
- y='Likelihood'
137
- ).properties(
138
- width=600,
139
- height=400
140
- )
141
-
142
- st.write(f"Chunk {i + 1}:")
143
- st.write(chunk)
144
- st.altair_chart(chunk_chart, use_container_width=True)
145
-
146
- # Sentence-level classification with background colors
147
- st.write("Extracted Text with Sentiment Highlights:")
148
- sentence_scores = classify_sentences(text)
149
- for sentence, sentiment in sentence_scores:
150
- bg_color = background_colors[sentiment]
151
- st.markdown(f'<span style="background-color: {bg_color}">{sentence}</span>', unsafe_allow_html=True)
152
-
153
- else:
154
- st.write("Could not extract text from the provided URL.")
 
155
 
156
  # Additional information at the end
157
  st.markdown("""
@@ -161,4 +170,3 @@ The model is designed to be deployed in an automated pipeline capable of classif
161
  Interested in using this in an automated pipeline for bulk sentiment processing?
162
  Please [book an appointment](https://dejan.ai/call/) to discuss your needs.
163
  """)
164
-
 
92
  sentence_scores.append((sentence, sentiment))
93
  return sentence_scores
94
 
95
+ def render_analysis(text):
96
+ scores, chunk_scores_list, chunks = classify_long_text(text)
97
+ scores_dict = {sentiment_labels[i]: scores[i] for i in range(len(sentiment_labels))}
98
+
99
+ # Ensure the exact order of labels in the graph
100
+ sentiment_order = [
101
+ "very positive", "positive", "somewhat positive",
102
+ "neutral",
103
+ "somewhat negative", "negative", "very negative"
104
+ ]
105
+ ordered_scores_dict = OrderedDict((label, scores_dict[label]) for label in sentiment_order)
106
+
107
+ # Prepare the DataFrame and reindex
108
+ df = pd.DataFrame.from_dict(ordered_scores_dict, orient='index', columns=['Likelihood']).reindex(sentiment_order)
109
+
110
+ # Use Altair to plot the bar chart
111
+ chart = alt.Chart(df.reset_index()).mark_bar().encode(
112
+ x=alt.X('index', sort=sentiment_order, title='Sentiment'),
113
+ y='Likelihood'
114
+ ).properties(
115
+ width=600,
116
+ height=400
117
+ )
118
+ st.altair_chart(chart, use_container_width=True)
119
+
120
+ # Display each chunk and its own chart
121
+ for i, (chunk_scores, chunk) in enumerate(zip(chunk_scores_list, chunks)):
122
+ chunk_scores_dict = {sentiment_labels[j]: chunk_scores[j] for j in range(len(sentiment_labels))}
123
+ ordered_chunk_scores_dict = OrderedDict((label, chunk_scores_dict[label]) for label in sentiment_order)
124
+ df_chunk = pd.DataFrame.from_dict(ordered_chunk_scores_dict, orient='index', columns=['Likelihood']).reindex(sentiment_order)
125
+
126
+ chunk_chart = alt.Chart(df_chunk.reset_index()).mark_bar().encode(
127
  x=alt.X('index', sort=sentiment_order, title='Sentiment'),
128
  y='Likelihood'
129
  ).properties(
 
131
  height=400
132
  )
133
 
134
+ st.write(f"Chunk {i + 1}:")
135
+ st.write(chunk)
136
+ st.altair_chart(chunk_chart, use_container_width=True)
137
+
138
+ # Sentence-level classification with background colors
139
+ st.write("Extracted Text with Sentiment Highlights:")
140
+ sentence_scores = classify_sentences(text)
141
+ for sentence, sentiment in sentence_scores:
142
+ bg_color = background_colors[sentiment]
143
+ st.markdown(f'<span style="background-color: {bg_color}">{sentence}</span>', unsafe_allow_html=True)
144
+
145
+ # Streamlit UI
146
+ st.title("Sentiment Classification Model")
147
+
148
+ tab_paste, tab_scrape = st.tabs(["Paste Text", "Scrape URL"])
149
+
150
+ with tab_paste:
151
+ pasted_text = st.text_area("Paste text for analysis:", height=300, placeholder="Paste text here…")
152
+ if pasted_text and pasted_text.strip():
153
+ render_analysis(pasted_text.strip())
154
+
155
+ with tab_scrape:
156
+ st.info("If the site returns 403 (forbidden), use the 'Paste Text' tab to analyze content manually.")
157
+ url = st.text_input("Enter URL:")
158
+ if url:
159
+ text = get_text_from_url(url)
160
+ if text:
161
+ render_analysis(text)
162
+ else:
163
+ st.write("Could not extract text from the provided URL.")
164
 
165
  # Additional information at the end
166
  st.markdown("""
 
170
  Interested in using this in an automated pipeline for bulk sentiment processing?
171
  Please [book an appointment](https://dejan.ai/call/) to discuss your needs.
172
  """)