Spaces:
Runtime error
Runtime error
c2r plot
Browse files- data/df/ist_logs.txt +0 -0
- src/read_logs.py +131 -41
data/df/ist_logs.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
src/read_logs.py
CHANGED
@@ -20,7 +20,7 @@ class LogAnalyser:
|
|
20 |
|
21 |
self.path = container_param.selectbox(
|
22 |
'Select the log path', log_file_paths)
|
23 |
-
self.df_path = f'data/df/{self.path.split("/")[-1]}'
|
24 |
# if 'button1_counter' not in st.session_state:
|
25 |
# st.session_state.button1_counter = 0
|
26 |
# if 'df' not in st.session_state:
|
@@ -43,8 +43,10 @@ class LogAnalyser:
|
|
43 |
def display_logs(self):
|
44 |
# self.container_param.markdown(
|
45 |
# f'st.session_state.button1_counter: {st.session_state.button1_counter}')
|
|
|
|
|
46 |
|
47 |
-
if not exists(self.df_path) or self.container_button.button('Detect Emotion'):
|
48 |
self.df = self.get_log()
|
49 |
# else:
|
50 |
self.df = pd.read_csv(self.df_path)
|
@@ -56,7 +58,45 @@ class LogAnalyser:
|
|
56 |
# st.session_state.path=self.path
|
57 |
|
58 |
self.update_df()
|
|
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
def get_ngram_pattern(self, s, n=2):
|
61 |
gnp = ''
|
62 |
for i in range(len(s)-(n-1)):
|
@@ -64,12 +104,21 @@ class LogAnalyser:
|
|
64 |
return gnp
|
65 |
|
66 |
def update_df(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
reaction_weight = set_input(self.container_param,
|
68 |
label='Reaction Weight w', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
|
69 |
key_slider='w_slider', key_input='w_input',)
|
70 |
self.container_param_rv = self.container_param.columns([1, 1])
|
71 |
random_value_mode = self.container_param_rv[0].radio(
|
72 |
-
"
|
73 |
# random_value = random.random()
|
74 |
if random_value_mode == "Fixed":
|
75 |
random_value = set_input(self.container_param,
|
@@ -80,7 +129,6 @@ class LogAnalyser:
|
|
80 |
step=.01,)
|
81 |
table_mode = self.container_param.radio(
|
82 |
"Table Style:", ["Dataframe", "Table"])
|
83 |
-
self.emotion_type = self.container_param.select_slider('Emotion', ['Max-only', '2', '3', '4', '5', '6', 'All 7'])
|
84 |
self.show_pe_data = self.container_param.checkbox(
|
85 |
'Show Probability Emote', value=True, key='show_pe_data_log')
|
86 |
self.score_threshold = set_input(self.container_param,
|
@@ -89,15 +137,23 @@ class LogAnalyser:
|
|
89 |
|
90 |
df_reaction_pattern = pd.DataFrame()
|
91 |
reaction_pattern_dict = dict()
|
92 |
-
for story_id in
|
93 |
reaction_num = 0
|
94 |
reaction_frequency = 0
|
95 |
probability_emote = 0
|
96 |
-
random_value = 0
|
97 |
reaction_show = False
|
|
|
98 |
|
99 |
def get_subset_condition(data):
|
100 |
return (data.Story == story_id) & (data.Turn == 'user')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
subset_condition = get_subset_condition(self.df)
|
102 |
dfs = self.df[subset_condition]
|
103 |
for i, (index, row) in enumerate(dfs.iterrows()):
|
@@ -117,6 +173,14 @@ class LogAnalyser:
|
|
117 |
self.df.at[index, 'probability_emote'] = probability_emote
|
118 |
self.df.at[index, 'random_value'] = random_value
|
119 |
self.df.at[index, 'reaction_show'] = reaction_show
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
s = ''
|
121 |
df_edit = self.df[get_subset_condition(
|
122 |
self.df)].reaction_show.copy()
|
@@ -130,15 +194,23 @@ class LogAnalyser:
|
|
130 |
reaction_pattern_dict['reaction_pattern'] = s
|
131 |
|
132 |
for i in range(2, 8):
|
133 |
-
reaction_pattern_dict[f'{i}-gram_pattern'] = self.get_ngram_pattern(
|
|
|
134 |
df_reaction_pattern = pd.concat(
|
135 |
[df_reaction_pattern, pd.DataFrame(reaction_pattern_dict, index=[f'Story_{story_id}'])])
|
136 |
# st.markdown(df_edit)
|
137 |
# st.markdown(s)
|
138 |
|
139 |
-
for
|
|
|
|
|
140 |
dfs = self.df[(self.df.Story == story_id)].copy()
|
141 |
columns2hide = ['Unnamed: 0', 'Story', ]
|
|
|
|
|
|
|
|
|
|
|
142 |
if not self.show_pe_data:
|
143 |
columns2hide += [
|
144 |
"reaction_frequency", "probability_emote", "random_value", "reaction_show"]
|
@@ -146,18 +218,16 @@ class LogAnalyser:
|
|
146 |
dfs.drop(c, axis=1, inplace=True)
|
147 |
|
148 |
st.markdown(f'#### Story {story_id}')
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
#
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
#
|
159 |
-
# 'props': 'color: #000000' # background-color: #eeee66;font-size: 1.01em;
|
160 |
-
# }]) # .hide_index()
|
161 |
|
162 |
if table_mode == 'Dataframe':
|
163 |
st.dataframe(dfs)
|
@@ -166,29 +236,47 @@ class LogAnalyser:
|
|
166 |
elif table_mode == 'Table':
|
167 |
st.table(dfs)
|
168 |
# st.table(df_reaction_pattern.iloc[story_id-1])
|
169 |
-
create_dowload_button(
|
|
|
170 |
# print(dfs.render())
|
171 |
if table_mode == 'Dataframe':
|
172 |
st.dataframe(df_reaction_pattern)
|
173 |
elif table_mode == 'Table':
|
174 |
st.table(df_reaction_pattern)
|
175 |
# @st.cache
|
|
|
176 |
def dfstyle_color_text_col(self, s):
|
|
|
177 |
result = ['background-color: white']*len(s)
|
178 |
-
if s.Emotion == 'neutral' and s.Turn == 'user':
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
# printj.red(result)
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
# if s.probability_emote!=s.probability_emote:
|
193 |
# result[5] = 'color: #eeeeee'
|
194 |
return result
|
@@ -237,17 +325,19 @@ class LogAnalyser:
|
|
237 |
if self.emotion_type == 'max':
|
238 |
emotion_type = 'max'
|
239 |
else:
|
240 |
-
emotion_type = 'sorted' #
|
241 |
-
emotion = self.gen.get_emotion(
|
242 |
-
|
|
|
243 |
log_dict['Emotion'] = emotion['label']
|
244 |
log_dict['Score'] = emotion['score']
|
245 |
-
elif emotion_type =='sorted':
|
246 |
-
|
247 |
-
log_dict['Score'] = emotion[0]['score']
|
248 |
-
for sorted_i in range(1, len(emotion)):
|
249 |
log_dict[f'Emotion_{sorted_i+1}'] = emotion[sorted_i]['label']
|
250 |
log_dict[f'Score_{sorted_i+1}'] = emotion[sorted_i]['score']
|
|
|
|
|
|
|
251 |
df = pd.concat(
|
252 |
[df, pd.DataFrame(log_dict, index=[f'idx_{i}'])])
|
253 |
df = df.reset_index(drop=True)
|
|
|
20 |
|
21 |
self.path = container_param.selectbox(
|
22 |
'Select the log path', log_file_paths)
|
23 |
+
self.df_path = f'data/df/{self.path.split("/")[-1].split(".")[0]}.csv'
|
24 |
# if 'button1_counter' not in st.session_state:
|
25 |
# st.session_state.button1_counter = 0
|
26 |
# if 'df' not in st.session_state:
|
|
|
43 |
def display_logs(self):
|
44 |
# self.container_param.markdown(
|
45 |
# f'st.session_state.button1_counter: {st.session_state.button1_counter}')
|
46 |
+
self.emotion_type = self.container_param.select_slider(
|
47 |
+
'Emotion', ['Max-only', '2', '3', '4', '5', '6', 'All 7'])
|
48 |
|
49 |
+
if (not exists(self.df_path) or self.container_button.button('Detect Emotion')) and ('debug' not in self.df_path):
|
50 |
self.df = self.get_log()
|
51 |
# else:
|
52 |
self.df = pd.read_csv(self.df_path)
|
|
|
58 |
# st.session_state.path=self.path
|
59 |
|
60 |
self.update_df()
|
61 |
+
self.get_c2_plot()
|
62 |
|
63 |
+
def get_c2_plot(self):
|
64 |
+
# c2_threshold=0
|
65 |
+
c2_threshold_list = np.arange(0,1,0.01)
|
66 |
+
|
67 |
+
list_stories = self.df.Story.unique()
|
68 |
+
total_num_stories = len(list_stories)
|
69 |
+
num_stories2show = 9 # int(set_input(self.container_param,
|
70 |
+
# label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
|
71 |
+
# key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
|
72 |
+
list_stories2show = list_stories[:num_stories2show]
|
73 |
+
|
74 |
+
c2r_sum_list = []
|
75 |
+
for c2_threshold in c2_threshold_list:
|
76 |
+
for story_id in list_stories2show:
|
77 |
+
subset_condition = self.get_subset_condition(self.df, story_id)
|
78 |
+
dfs = self.df[subset_condition]
|
79 |
+
for i, (index, row) in enumerate(dfs.iterrows()):
|
80 |
+
c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
|
81 |
+
self.df.at[index, 'c2'] = c2
|
82 |
+
review = self.df.e_review[index]
|
83 |
+
self.df.at[index, 'c2r'] = self.get_criteria_review(
|
84 |
+
c2, review=review)
|
85 |
+
c2r_sum_list.append(self.df['c2r'].sum())
|
86 |
+
fig = px.line(x=c2_threshold_list, y=c2r_sum_list, title='c2r')
|
87 |
+
fig.show()
|
88 |
+
pass
|
89 |
+
@staticmethod
|
90 |
+
def get_subset_condition(data, story_id):
|
91 |
+
return (data.Story == story_id) & (data.Turn == 'user')
|
92 |
+
|
93 |
+
@staticmethod
|
94 |
+
def get_criteria_review(c, review):
|
95 |
+
# printj.green(f'{c} {type(c)}')
|
96 |
+
# printj.green(f'{review} {type(review)}')
|
97 |
+
result = int(c == True and (review == 'o' or review == None)) + int(
|
98 |
+
c == False and review == 'x')
|
99 |
+
return result
|
100 |
def get_ngram_pattern(self, s, n=2):
|
101 |
gnp = ''
|
102 |
for i in range(len(s)-(n-1)):
|
|
|
104 |
return gnp
|
105 |
|
106 |
def update_df(self):
|
107 |
+
list_stories = self.df.Story.unique()
|
108 |
+
total_num_stories = len(list_stories)
|
109 |
+
num_stories2show = int(set_input(self.container_param,
|
110 |
+
label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
|
111 |
+
key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
|
112 |
+
list_stories2show = list_stories[:num_stories2show]
|
113 |
+
c2_threshold = set_input(self.container_param,
|
114 |
+
label='c2_threshold', min_value=0.0, max_value=1.0, value=0.7, step=0.01,
|
115 |
+
key_slider='c2_threshold_slider', key_input='c2_threshold_input',)
|
116 |
reaction_weight = set_input(self.container_param,
|
117 |
label='Reaction Weight w', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
|
118 |
key_slider='w_slider', key_input='w_input',)
|
119 |
self.container_param_rv = self.container_param.columns([1, 1])
|
120 |
random_value_mode = self.container_param_rv[0].radio(
|
121 |
+
"Threshold Value:", ["Random", "Fixed"], index=1)
|
122 |
# random_value = random.random()
|
123 |
if random_value_mode == "Fixed":
|
124 |
random_value = set_input(self.container_param,
|
|
|
129 |
step=.01,)
|
130 |
table_mode = self.container_param.radio(
|
131 |
"Table Style:", ["Dataframe", "Table"])
|
|
|
132 |
self.show_pe_data = self.container_param.checkbox(
|
133 |
'Show Probability Emote', value=True, key='show_pe_data_log')
|
134 |
self.score_threshold = set_input(self.container_param,
|
|
|
137 |
|
138 |
df_reaction_pattern = pd.DataFrame()
|
139 |
reaction_pattern_dict = dict()
|
140 |
+
for story_id in list_stories2show:
|
141 |
reaction_num = 0
|
142 |
reaction_frequency = 0
|
143 |
probability_emote = 0
|
144 |
+
# random_value = 0
|
145 |
reaction_show = False
|
146 |
+
# c2 = True
|
147 |
|
148 |
def get_subset_condition(data):
|
149 |
return (data.Story == story_id) & (data.Turn == 'user')
|
150 |
+
|
151 |
+
def get_criteria_review(c, review):
|
152 |
+
# printj.green(f'{c} {type(c)}')
|
153 |
+
# printj.green(f'{review} {type(review)}')
|
154 |
+
result = int(c == True and (review == 'o' or review == None)) + int(
|
155 |
+
c == False and review == 'x')
|
156 |
+
return result
|
157 |
subset_condition = get_subset_condition(self.df)
|
158 |
dfs = self.df[subset_condition]
|
159 |
for i, (index, row) in enumerate(dfs.iterrows()):
|
|
|
173 |
self.df.at[index, 'probability_emote'] = probability_emote
|
174 |
self.df.at[index, 'random_value'] = random_value
|
175 |
self.df.at[index, 'reaction_show'] = reaction_show
|
176 |
+
self.df.at[index, 'c1'] = reaction_show
|
177 |
+
c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
|
178 |
+
self.df.at[index, 'c2'] = c2
|
179 |
+
review = self.df.e_review[index]
|
180 |
+
self.df.at[index, 'c1r'] = get_criteria_review(
|
181 |
+
reaction_show, review=review)
|
182 |
+
self.df.at[index, 'c2r'] = get_criteria_review(
|
183 |
+
c2, review=review)
|
184 |
s = ''
|
185 |
df_edit = self.df[get_subset_condition(
|
186 |
self.df)].reaction_show.copy()
|
|
|
194 |
reaction_pattern_dict['reaction_pattern'] = s
|
195 |
|
196 |
for i in range(2, 8):
|
197 |
+
reaction_pattern_dict[f'{i}-gram_pattern'] = self.get_ngram_pattern(
|
198 |
+
s, n=i)
|
199 |
df_reaction_pattern = pd.concat(
|
200 |
[df_reaction_pattern, pd.DataFrame(reaction_pattern_dict, index=[f'Story_{story_id}'])])
|
201 |
# st.markdown(df_edit)
|
202 |
# st.markdown(s)
|
203 |
|
204 |
+
for c in ['c1r', 'c2r']:
|
205 |
+
st.markdown(f'Sum of {c} : {self.df[c].sum()}')
|
206 |
+
for story_id in list_stories2show:
|
207 |
dfs = self.df[(self.df.Story == story_id)].copy()
|
208 |
columns2hide = ['Unnamed: 0', 'Story', ]
|
209 |
+
if self.emotion_type == 'Max-only':
|
210 |
+
columns2hide += [
|
211 |
+
f'Emotion_{sorted_i+1}' for sorted_i in range(7)]
|
212 |
+
columns2hide += [
|
213 |
+
f'Score_{sorted_i+1}' for sorted_i in range(7)]
|
214 |
if not self.show_pe_data:
|
215 |
columns2hide += [
|
216 |
"reaction_frequency", "probability_emote", "random_value", "reaction_show"]
|
|
|
218 |
dfs.drop(c, axis=1, inplace=True)
|
219 |
|
220 |
st.markdown(f'#### Story {story_id}')
|
221 |
+
|
222 |
+
dfs = dfs.style
|
223 |
+
if self.show_pe_data:
|
224 |
+
dfs = dfs.apply(self.dfstyle_color_text_col, axis=1)
|
225 |
+
# dfs = dfs.applymap(self.dfstyle_color_text)
|
226 |
+
dfs = dfs.apply(self.rower, axis=None)
|
227 |
+
dfs = dfs.set_table_styles([{
|
228 |
+
'selector': 'tr:hover',
|
229 |
+
'props': 'color: #000000' # background-color: #eeee66;font-size: 1.01em;
|
230 |
+
}]) # .hide_index()
|
|
|
|
|
231 |
|
232 |
if table_mode == 'Dataframe':
|
233 |
st.dataframe(dfs)
|
|
|
236 |
elif table_mode == 'Table':
|
237 |
st.table(dfs)
|
238 |
# st.table(df_reaction_pattern.iloc[story_id-1])
|
239 |
+
create_dowload_button(
|
240 |
+
dfs, sheet_name=f'story_{story_id}', file_name=f'data_story_{story_id}.xlsx')
|
241 |
# print(dfs.render())
|
242 |
if table_mode == 'Dataframe':
|
243 |
st.dataframe(df_reaction_pattern)
|
244 |
elif table_mode == 'Table':
|
245 |
st.table(df_reaction_pattern)
|
246 |
# @st.cache
|
247 |
+
|
248 |
def dfstyle_color_text_col(self, s):
|
249 |
+
num_col = len(s)
|
250 |
result = ['background-color: white']*len(s)
|
251 |
+
# if s.Emotion == 'neutral' and s.Turn == 'user':
|
252 |
+
# result[-6:-1] = ['color: #992222'] + \
|
253 |
+
# ['color: #333333']+['color: #fcfcfc']*3
|
254 |
+
for si, sc in enumerate(s):
|
255 |
+
if sc != sc:
|
256 |
+
result[si] = 'color: #fcfcfc'
|
257 |
+
# printj.red.bold_on_white(s)
|
258 |
+
# printj.red.bold_on_cyan(si)
|
259 |
+
# printj.red.bold_on_cyan(sc)
|
260 |
+
# if s.Score < self.score_threshold and s.Turn == 'user':
|
261 |
+
# result[-5:-1] = ['color: #992222'] + ['color: #fcfcfc']*3
|
262 |
# printj.red(result)
|
263 |
+
# printj.red.bold_on_cyan(s)
|
264 |
+
# printj.red.bold_on_cyan(type(s))
|
265 |
+
# printj.red.bold_on_white(s.keys().tolist())
|
266 |
+
# printj.red.bold_on_white(type(s.keys().tolist()))
|
267 |
+
# idx_reaction_show = s.keys().tolist().index("reaction_show")
|
268 |
+
# printj.red.bold_on_white(idx_reaction_show)
|
269 |
+
# if s.reaction_show == 1:
|
270 |
+
# # result[idx_reaction_show] = 'color: #222222'
|
271 |
+
# pass
|
272 |
+
# elif s.reaction_show == 0:
|
273 |
+
# # result[idx_reaction_show] = 'color: #222222'
|
274 |
+
# pass
|
275 |
+
# else:
|
276 |
+
# # print(s.reaction_show)
|
277 |
+
# # print(type(s.reaction_show))
|
278 |
+
# hide_length = 3
|
279 |
+
# result[idx_reaction_show-hide_length:] = ['color: #fcfcfc']*(num_col-idx_reaction_show+hide_length)
|
280 |
# if s.probability_emote!=s.probability_emote:
|
281 |
# result[5] = 'color: #eeeeee'
|
282 |
return result
|
|
|
325 |
if self.emotion_type == 'max':
|
326 |
emotion_type = 'max'
|
327 |
else:
|
328 |
+
emotion_type = 'sorted' #
|
329 |
+
emotion = self.gen.get_emotion(
|
330 |
+
log_dict['Sentence'], filter_by=emotion_type)
|
331 |
+
if emotion_type == 'max':
|
332 |
log_dict['Emotion'] = emotion['label']
|
333 |
log_dict['Score'] = emotion['score']
|
334 |
+
elif emotion_type == 'sorted':
|
335 |
+
for sorted_i in range(len(emotion)):
|
|
|
|
|
336 |
log_dict[f'Emotion_{sorted_i+1}'] = emotion[sorted_i]['label']
|
337 |
log_dict[f'Score_{sorted_i+1}'] = emotion[sorted_i]['score']
|
338 |
+
log_dict['Emotion'] = emotion[0]['label']
|
339 |
+
log_dict['Score'] = emotion[0]['score']
|
340 |
+
log_dict['e_review'] = ' '
|
341 |
df = pd.concat(
|
342 |
[df, pd.DataFrame(log_dict, index=[f'idx_{i}'])])
|
343 |
df = df.reset_index(drop=True)
|