jitesh commited on
Commit
50a12ff
1 Parent(s): bf902a7
Files changed (2) hide show
  1. data/df/ist_logs.txt +0 -0
  2. src/read_logs.py +131 -41
data/df/ist_logs.txt CHANGED
The diff for this file is too large to render. See raw diff
 
src/read_logs.py CHANGED
@@ -20,7 +20,7 @@ class LogAnalyser:
20
 
21
  self.path = container_param.selectbox(
22
  'Select the log path', log_file_paths)
23
- self.df_path = f'data/df/{self.path.split("/")[-1]}'
24
  # if 'button1_counter' not in st.session_state:
25
  # st.session_state.button1_counter = 0
26
  # if 'df' not in st.session_state:
@@ -43,8 +43,10 @@ class LogAnalyser:
43
  def display_logs(self):
44
  # self.container_param.markdown(
45
  # f'st.session_state.button1_counter: {st.session_state.button1_counter}')
 
 
46
 
47
- if not exists(self.df_path) or self.container_button.button('Detect Emotion'):
48
  self.df = self.get_log()
49
  # else:
50
  self.df = pd.read_csv(self.df_path)
@@ -56,7 +58,45 @@ class LogAnalyser:
56
  # st.session_state.path=self.path
57
 
58
  self.update_df()
 
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  def get_ngram_pattern(self, s, n=2):
61
  gnp = ''
62
  for i in range(len(s)-(n-1)):
@@ -64,12 +104,21 @@ class LogAnalyser:
64
  return gnp
65
 
66
  def update_df(self):
 
 
 
 
 
 
 
 
 
67
  reaction_weight = set_input(self.container_param,
68
  label='Reaction Weight w', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
69
  key_slider='w_slider', key_input='w_input',)
70
  self.container_param_rv = self.container_param.columns([1, 1])
71
  random_value_mode = self.container_param_rv[0].radio(
72
- "Random Value:", ["Random", "Fixed"])
73
  # random_value = random.random()
74
  if random_value_mode == "Fixed":
75
  random_value = set_input(self.container_param,
@@ -80,7 +129,6 @@ class LogAnalyser:
80
  step=.01,)
81
  table_mode = self.container_param.radio(
82
  "Table Style:", ["Dataframe", "Table"])
83
- self.emotion_type = self.container_param.select_slider('Emotion', ['Max-only', '2', '3', '4', '5', '6', 'All 7'])
84
  self.show_pe_data = self.container_param.checkbox(
85
  'Show Probability Emote', value=True, key='show_pe_data_log')
86
  self.score_threshold = set_input(self.container_param,
@@ -89,15 +137,23 @@ class LogAnalyser:
89
 
90
  df_reaction_pattern = pd.DataFrame()
91
  reaction_pattern_dict = dict()
92
- for story_id in self.df.Story.unique():
93
  reaction_num = 0
94
  reaction_frequency = 0
95
  probability_emote = 0
96
- random_value = 0
97
  reaction_show = False
 
98
 
99
  def get_subset_condition(data):
100
  return (data.Story == story_id) & (data.Turn == 'user')
 
 
 
 
 
 
 
101
  subset_condition = get_subset_condition(self.df)
102
  dfs = self.df[subset_condition]
103
  for i, (index, row) in enumerate(dfs.iterrows()):
@@ -117,6 +173,14 @@ class LogAnalyser:
117
  self.df.at[index, 'probability_emote'] = probability_emote
118
  self.df.at[index, 'random_value'] = random_value
119
  self.df.at[index, 'reaction_show'] = reaction_show
 
 
 
 
 
 
 
 
120
  s = ''
121
  df_edit = self.df[get_subset_condition(
122
  self.df)].reaction_show.copy()
@@ -130,15 +194,23 @@ class LogAnalyser:
130
  reaction_pattern_dict['reaction_pattern'] = s
131
 
132
  for i in range(2, 8):
133
- reaction_pattern_dict[f'{i}-gram_pattern'] = self.get_ngram_pattern(s, n=i)
 
134
  df_reaction_pattern = pd.concat(
135
  [df_reaction_pattern, pd.DataFrame(reaction_pattern_dict, index=[f'Story_{story_id}'])])
136
  # st.markdown(df_edit)
137
  # st.markdown(s)
138
 
139
- for story_id in self.df.Story.unique():
 
 
140
  dfs = self.df[(self.df.Story == story_id)].copy()
141
  columns2hide = ['Unnamed: 0', 'Story', ]
 
 
 
 
 
142
  if not self.show_pe_data:
143
  columns2hide += [
144
  "reaction_frequency", "probability_emote", "random_value", "reaction_show"]
@@ -146,18 +218,16 @@ class LogAnalyser:
146
  dfs.drop(c, axis=1, inplace=True)
147
 
148
  st.markdown(f'#### Story {story_id}')
149
-
150
-
151
- # dfs = dfs.style
152
- # dfs = dfs.hide_index()
153
- # if self.show_pe_data:
154
- # dfs = dfs.apply(self.dfstyle_color_text_col, axis=1)
155
- # # dfs = dfs.applymap(self.dfstyle_color_text)
156
- # dfs = dfs.apply(self.rower, axis=None)
157
- # dfs = dfs.set_table_styles([{
158
- # 'selector': 'tr:hover',
159
- # 'props': 'color: #000000' # background-color: #eeee66;font-size: 1.01em;
160
- # }]) # .hide_index()
161
 
162
  if table_mode == 'Dataframe':
163
  st.dataframe(dfs)
@@ -166,29 +236,47 @@ class LogAnalyser:
166
  elif table_mode == 'Table':
167
  st.table(dfs)
168
  # st.table(df_reaction_pattern.iloc[story_id-1])
169
- create_dowload_button(dfs, sheet_name=f'story_{story_id}', file_name=f'data_story_{story_id}.xlsx')
 
170
  # print(dfs.render())
171
  if table_mode == 'Dataframe':
172
  st.dataframe(df_reaction_pattern)
173
  elif table_mode == 'Table':
174
  st.table(df_reaction_pattern)
175
  # @st.cache
 
176
  def dfstyle_color_text_col(self, s):
 
177
  result = ['background-color: white']*len(s)
178
- if s.Emotion == 'neutral' and s.Turn == 'user':
179
- result[2:-1] = ['color: #992222'] + \
180
- ['color: #333333']+['color: #fcfcfc']*3
181
- if s.Score < self.score_threshold and s.Turn == 'user':
182
- result[3:-1] = ['color: #992222'] + ['color: #fcfcfc']*3
 
 
 
 
 
 
183
  # printj.red(result)
184
- if s.reaction_show == 1:
185
- result[-1] = 'color: #222222'
186
- elif s.reaction_show == 0:
187
- result[-1] = 'color: #222222'
188
- else:
189
- # print(s.reaction_show)
190
- # print(type(s.reaction_show))
191
- result[4:] = ['color: #fcfcfc']*4
 
 
 
 
 
 
 
 
 
192
  # if s.probability_emote!=s.probability_emote:
193
  # result[5] = 'color: #eeeeee'
194
  return result
@@ -237,17 +325,19 @@ class LogAnalyser:
237
  if self.emotion_type == 'max':
238
  emotion_type = 'max'
239
  else:
240
- emotion_type = 'sorted' #
241
- emotion = self.gen.get_emotion(log_dict['Sentence'], filter_by=emotion_type)
242
- if emotion_type =='max':
 
243
  log_dict['Emotion'] = emotion['label']
244
  log_dict['Score'] = emotion['score']
245
- elif emotion_type =='sorted':
246
- log_dict['Emotion'] = emotion[0]['label']
247
- log_dict['Score'] = emotion[0]['score']
248
- for sorted_i in range(1, len(emotion)):
249
  log_dict[f'Emotion_{sorted_i+1}'] = emotion[sorted_i]['label']
250
  log_dict[f'Score_{sorted_i+1}'] = emotion[sorted_i]['score']
 
 
 
251
  df = pd.concat(
252
  [df, pd.DataFrame(log_dict, index=[f'idx_{i}'])])
253
  df = df.reset_index(drop=True)
 
20
 
21
  self.path = container_param.selectbox(
22
  'Select the log path', log_file_paths)
23
+ self.df_path = f'data/df/{self.path.split("/")[-1].split(".")[0]}.csv'
24
  # if 'button1_counter' not in st.session_state:
25
  # st.session_state.button1_counter = 0
26
  # if 'df' not in st.session_state:
 
43
  def display_logs(self):
44
  # self.container_param.markdown(
45
  # f'st.session_state.button1_counter: {st.session_state.button1_counter}')
46
+ self.emotion_type = self.container_param.select_slider(
47
+ 'Emotion', ['Max-only', '2', '3', '4', '5', '6', 'All 7'])
48
 
49
+ if (not exists(self.df_path) or self.container_button.button('Detect Emotion')) and ('debug' not in self.df_path):
50
  self.df = self.get_log()
51
  # else:
52
  self.df = pd.read_csv(self.df_path)
 
58
  # st.session_state.path=self.path
59
 
60
  self.update_df()
61
+ self.get_c2_plot()
62
 
63
+ def get_c2_plot(self):
64
+ # c2_threshold=0
65
+ c2_threshold_list = np.arange(0,1,0.01)
66
+
67
+ list_stories = self.df.Story.unique()
68
+ total_num_stories = len(list_stories)
69
+ num_stories2show = 9 # int(set_input(self.container_param,
70
+ # label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
71
+ # key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
72
+ list_stories2show = list_stories[:num_stories2show]
73
+
74
+ c2r_sum_list = []
75
+ for c2_threshold in c2_threshold_list:
76
+ for story_id in list_stories2show:
77
+ subset_condition = self.get_subset_condition(self.df, story_id)
78
+ dfs = self.df[subset_condition]
79
+ for i, (index, row) in enumerate(dfs.iterrows()):
80
+ c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
81
+ self.df.at[index, 'c2'] = c2
82
+ review = self.df.e_review[index]
83
+ self.df.at[index, 'c2r'] = self.get_criteria_review(
84
+ c2, review=review)
85
+ c2r_sum_list.append(self.df['c2r'].sum())
86
+ fig = px.line(x=c2_threshold_list, y=c2r_sum_list, title='c2r')
87
+ fig.show()
88
+ pass
89
+ @staticmethod
90
+ def get_subset_condition(data, story_id):
91
+ return (data.Story == story_id) & (data.Turn == 'user')
92
+
93
+ @staticmethod
94
+ def get_criteria_review(c, review):
95
+ # printj.green(f'{c} {type(c)}')
96
+ # printj.green(f'{review} {type(review)}')
97
+ result = int(c == True and (review == 'o' or review == None)) + int(
98
+ c == False and review == 'x')
99
+ return result
100
  def get_ngram_pattern(self, s, n=2):
101
  gnp = ''
102
  for i in range(len(s)-(n-1)):
 
104
  return gnp
105
 
106
  def update_df(self):
107
+ list_stories = self.df.Story.unique()
108
+ total_num_stories = len(list_stories)
109
+ num_stories2show = int(set_input(self.container_param,
110
+ label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
111
+ key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
112
+ list_stories2show = list_stories[:num_stories2show]
113
+ c2_threshold = set_input(self.container_param,
114
+ label='c2_threshold', min_value=0.0, max_value=1.0, value=0.7, step=0.01,
115
+ key_slider='c2_threshold_slider', key_input='c2_threshold_input',)
116
  reaction_weight = set_input(self.container_param,
117
  label='Reaction Weight w', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
118
  key_slider='w_slider', key_input='w_input',)
119
  self.container_param_rv = self.container_param.columns([1, 1])
120
  random_value_mode = self.container_param_rv[0].radio(
121
+ "Threshold Value:", ["Random", "Fixed"], index=1)
122
  # random_value = random.random()
123
  if random_value_mode == "Fixed":
124
  random_value = set_input(self.container_param,
 
129
  step=.01,)
130
  table_mode = self.container_param.radio(
131
  "Table Style:", ["Dataframe", "Table"])
 
132
  self.show_pe_data = self.container_param.checkbox(
133
  'Show Probability Emote', value=True, key='show_pe_data_log')
134
  self.score_threshold = set_input(self.container_param,
 
137
 
138
  df_reaction_pattern = pd.DataFrame()
139
  reaction_pattern_dict = dict()
140
+ for story_id in list_stories2show:
141
  reaction_num = 0
142
  reaction_frequency = 0
143
  probability_emote = 0
144
+ # random_value = 0
145
  reaction_show = False
146
+ # c2 = True
147
 
148
  def get_subset_condition(data):
149
  return (data.Story == story_id) & (data.Turn == 'user')
150
+
151
+ def get_criteria_review(c, review):
152
+ # printj.green(f'{c} {type(c)}')
153
+ # printj.green(f'{review} {type(review)}')
154
+ result = int(c == True and (review == 'o' or review == None)) + int(
155
+ c == False and review == 'x')
156
+ return result
157
  subset_condition = get_subset_condition(self.df)
158
  dfs = self.df[subset_condition]
159
  for i, (index, row) in enumerate(dfs.iterrows()):
 
173
  self.df.at[index, 'probability_emote'] = probability_emote
174
  self.df.at[index, 'random_value'] = random_value
175
  self.df.at[index, 'reaction_show'] = reaction_show
176
+ self.df.at[index, 'c1'] = reaction_show
177
+ c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
178
+ self.df.at[index, 'c2'] = c2
179
+ review = self.df.e_review[index]
180
+ self.df.at[index, 'c1r'] = get_criteria_review(
181
+ reaction_show, review=review)
182
+ self.df.at[index, 'c2r'] = get_criteria_review(
183
+ c2, review=review)
184
  s = ''
185
  df_edit = self.df[get_subset_condition(
186
  self.df)].reaction_show.copy()
 
194
  reaction_pattern_dict['reaction_pattern'] = s
195
 
196
  for i in range(2, 8):
197
+ reaction_pattern_dict[f'{i}-gram_pattern'] = self.get_ngram_pattern(
198
+ s, n=i)
199
  df_reaction_pattern = pd.concat(
200
  [df_reaction_pattern, pd.DataFrame(reaction_pattern_dict, index=[f'Story_{story_id}'])])
201
  # st.markdown(df_edit)
202
  # st.markdown(s)
203
 
204
+ for c in ['c1r', 'c2r']:
205
+ st.markdown(f'Sum of {c} : {self.df[c].sum()}')
206
+ for story_id in list_stories2show:
207
  dfs = self.df[(self.df.Story == story_id)].copy()
208
  columns2hide = ['Unnamed: 0', 'Story', ]
209
+ if self.emotion_type == 'Max-only':
210
+ columns2hide += [
211
+ f'Emotion_{sorted_i+1}' for sorted_i in range(7)]
212
+ columns2hide += [
213
+ f'Score_{sorted_i+1}' for sorted_i in range(7)]
214
  if not self.show_pe_data:
215
  columns2hide += [
216
  "reaction_frequency", "probability_emote", "random_value", "reaction_show"]
 
218
  dfs.drop(c, axis=1, inplace=True)
219
 
220
  st.markdown(f'#### Story {story_id}')
221
+
222
+ dfs = dfs.style
223
+ if self.show_pe_data:
224
+ dfs = dfs.apply(self.dfstyle_color_text_col, axis=1)
225
+ # dfs = dfs.applymap(self.dfstyle_color_text)
226
+ dfs = dfs.apply(self.rower, axis=None)
227
+ dfs = dfs.set_table_styles([{
228
+ 'selector': 'tr:hover',
229
+ 'props': 'color: #000000' # background-color: #eeee66;font-size: 1.01em;
230
+ }]) # .hide_index()
 
 
231
 
232
  if table_mode == 'Dataframe':
233
  st.dataframe(dfs)
 
236
  elif table_mode == 'Table':
237
  st.table(dfs)
238
  # st.table(df_reaction_pattern.iloc[story_id-1])
239
+ create_dowload_button(
240
+ dfs, sheet_name=f'story_{story_id}', file_name=f'data_story_{story_id}.xlsx')
241
  # print(dfs.render())
242
  if table_mode == 'Dataframe':
243
  st.dataframe(df_reaction_pattern)
244
  elif table_mode == 'Table':
245
  st.table(df_reaction_pattern)
246
  # @st.cache
247
+
248
  def dfstyle_color_text_col(self, s):
249
+ num_col = len(s)
250
  result = ['background-color: white']*len(s)
251
+ # if s.Emotion == 'neutral' and s.Turn == 'user':
252
+ # result[-6:-1] = ['color: #992222'] + \
253
+ # ['color: #333333']+['color: #fcfcfc']*3
254
+ for si, sc in enumerate(s):
255
+ if sc != sc:
256
+ result[si] = 'color: #fcfcfc'
257
+ # printj.red.bold_on_white(s)
258
+ # printj.red.bold_on_cyan(si)
259
+ # printj.red.bold_on_cyan(sc)
260
+ # if s.Score < self.score_threshold and s.Turn == 'user':
261
+ # result[-5:-1] = ['color: #992222'] + ['color: #fcfcfc']*3
262
  # printj.red(result)
263
+ # printj.red.bold_on_cyan(s)
264
+ # printj.red.bold_on_cyan(type(s))
265
+ # printj.red.bold_on_white(s.keys().tolist())
266
+ # printj.red.bold_on_white(type(s.keys().tolist()))
267
+ # idx_reaction_show = s.keys().tolist().index("reaction_show")
268
+ # printj.red.bold_on_white(idx_reaction_show)
269
+ # if s.reaction_show == 1:
270
+ # # result[idx_reaction_show] = 'color: #222222'
271
+ # pass
272
+ # elif s.reaction_show == 0:
273
+ # # result[idx_reaction_show] = 'color: #222222'
274
+ # pass
275
+ # else:
276
+ # # print(s.reaction_show)
277
+ # # print(type(s.reaction_show))
278
+ # hide_length = 3
279
+ # result[idx_reaction_show-hide_length:] = ['color: #fcfcfc']*(num_col-idx_reaction_show+hide_length)
280
  # if s.probability_emote!=s.probability_emote:
281
  # result[5] = 'color: #eeeeee'
282
  return result
 
325
  if self.emotion_type == 'max':
326
  emotion_type = 'max'
327
  else:
328
+ emotion_type = 'sorted' #
329
+ emotion = self.gen.get_emotion(
330
+ log_dict['Sentence'], filter_by=emotion_type)
331
+ if emotion_type == 'max':
332
  log_dict['Emotion'] = emotion['label']
333
  log_dict['Score'] = emotion['score']
334
+ elif emotion_type == 'sorted':
335
+ for sorted_i in range(len(emotion)):
 
 
336
  log_dict[f'Emotion_{sorted_i+1}'] = emotion[sorted_i]['label']
337
  log_dict[f'Score_{sorted_i+1}'] = emotion[sorted_i]['score']
338
+ log_dict['Emotion'] = emotion[0]['label']
339
+ log_dict['Score'] = emotion[0]['score']
340
+ log_dict['e_review'] = ' '
341
  df = pd.concat(
342
  [df, pd.DataFrame(log_dict, index=[f'idx_{i}'])])
343
  df = df.reset_index(drop=True)