File size: 19,959 Bytes
bfa0c67
 
 
 
 
 
 
1115bb5
dfd7729
bfa0c67
 
 
 
 
 
dfd7729
bfa0c67
 
 
 
 
 
50a12ff
83b0d23
 
bfa0c67
 
 
dfd7729
bfa0c67
 
 
 
 
 
 
 
 
 
 
 
dfd7729
bfa0c67
83b0d23
 
50a12ff
93ec191
 
 
dfd7729
bf902a7
 
83b0d23
dfd7729
 
 
 
 
bfa0c67
 
93ec191
 
 
 
 
 
 
 
83b0d23
93ec191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50a12ff
 
93ec191
50a12ff
 
 
93ec191
 
 
50a12ff
93ec191
50a12ff
 
93ec191
50a12ff
93ec191
 
50a12ff
 
93ec191
 
 
50a12ff
93ec191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50a12ff
 
 
93ec191
50a12ff
 
 
 
 
93ec191
 
 
e82c355
 
 
 
 
 
bfa0c67
50a12ff
 
 
93ec191
 
50a12ff
bfa0c67
 
 
 
 
93ec191
bfa0c67
 
 
93ec191
 
bfa0c67
 
 
 
93ec191
 
 
dfd7729
 
83b0d23
 
e82c355
 
 
 
83b0d23
 
50a12ff
bfa0c67
e82c355
 
50a12ff
e82c355
50a12ff
e82c355
93ec191
83b0d23
dfd7729
e82c355
dfd7729
 
 
 
 
 
 
 
 
bfa0c67
dfd7729
 
 
 
 
50a12ff
 
 
 
93ec191
50a12ff
93ec191
50a12ff
83b0d23
93ec191
 
e82c355
83b0d23
 
 
 
e82c355
 
 
 
 
50a12ff
 
83b0d23
 
 
 
 
93ec191
 
 
50a12ff
93ec191
 
50a12ff
93ec191
83b0d23
93ec191
 
50a12ff
 
 
 
 
83b0d23
 
 
 
aa1abb2
83b0d23
bfa0c67
50a12ff
 
 
 
 
 
 
 
 
 
83b0d23
dfd7729
83b0d23
e82c355
 
dfd7729
 
e82c355
50a12ff
 
83b0d23
 
 
 
 
93ec191
50a12ff
e82c355
50a12ff
83b0d23
50a12ff
 
 
 
 
 
 
 
 
 
 
bf902a7
50a12ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83b0d23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfa0c67
dfd7729
 
 
 
83b0d23
 
dfd7729
 
 
 
bfa0c67
 
 
dfd7729
bfa0c67
dfd7729
bfa0c67
 
 
 
 
 
 
 
 
 
 
bf902a7
 
 
 
50a12ff
 
 
 
bf902a7
 
50a12ff
 
bf902a7
 
50a12ff
 
 
bfa0c67
 
 
dfd7729
bfa0c67
 
 
 
 
 
 
 
 
 
 
dfd7729
 
93ec191
 
dfd7729
93ec191
 
bf902a7
93ec191
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
import random
import numpy as np
import pandas as pd
import plotly.express as px
import streamlit as st
import xlsxwriter
from os import listdir
from .lib import set_input, create_dowload_button
from os.path import isfile, join, exists
import printj


class LogAnalyser:
    def __init__(self, gen, container_guide, container_param, container_button):
        self.gen, self.container_guide, self.container_param, self.container_button = gen, container_guide, container_param, container_button
        # self.gen.initialise_classifier_model()
        dirpath = 'data'
        log_file_paths = sorted(
            [join(dirpath, f) for f in listdir(dirpath) if isfile(join(dirpath, f)) and f.startswith('ist_log')])

        self.path = container_param.selectbox(
            'Select the log path', log_file_paths)
        self.df_path = f'data/df/{self.path.split("/")[-1].split(".")[0]}.csv'
        # if 'button1_counter' not in st.session_state:
        #     st.session_state.button1_counter = 0
        # if 'df' not in st.session_state:
        #     self.df=0
        st.markdown(self.get_text())

    @staticmethod
    @st.cache
    def get_text():
        return '''  
                    
        ### Equation
        ```
        frequency_penalty = 1 - emotion_frequency
        probability_emote = w * emotion_confidence + (1 - w) * frequency_penalty
        Show_Emotion = probability_emote > (Random value between 0 and 1)
        ```
        '''

    def display_logs(self):
        # self.container_param.markdown(
        #     f'st.session_state.button1_counter: {st.session_state.button1_counter}')
        self.emotion_type = self.container_param.select_slider(
            'How many Emotion data to show?', ['Max-only', '2', '3', '4', '5', '6', 'All 7'])
        self.debug = 'debug' in self.df_path
        if (not exists(self.df_path) or self.container_button.button('Detect Emotion')) and (not self.debug):
            self.df = self.get_log()
        # else:
        self.df = pd.read_csv(self.df_path)

        # if 'path' not in st.session_state:
        #     st.session_state.path=self.path
        # if 'df' not in st.session_state or st.session_state.path!=self.path:
        #     st.session_state.df=self.get_log(self.path, self.gen)
        #     st.session_state.path=self.path

        self.update_df()
        if self.debug:
            self.get_c1_plot()
            self.get_c2_plot()

    def get_c1_plot(self):
        # c2_threshold=0
        c1_threshold_list = np.arange(0, 1, 0.01)
        c1_reaction_weight_list = np.arange(0, 1, 0.1)

        # reaction_weight=0.5
        list_stories = self.df.Story.unique()
        total_num_stories = len(list_stories)
        num_stories2show = 9  # int(set_input(self.container_param,
        # label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
        # key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
        list_stories2show = list_stories[:num_stories2show]

        c1r_sum_list = []
        df_c1_analysis = pd.DataFrame()
        c1_analysis_dict = dict()
        for reaction_weight in c1_reaction_weight_list:
            reaction_weight=np.round(reaction_weight, 2)
            for c1_threshold in c1_threshold_list:
                df_c1 = self.df.copy()
    
                for story_id in list_stories2show:
                    reaction_num = 0
                    reaction_frequency = 0
                    probability_emote = 0
                    reaction_show = False

                    subset_condition = self.get_subset_condition(df_c1, story_id)
                    dfs = df_c1[subset_condition]
                    for i, (index, row) in enumerate(dfs.iterrows()):
                        if row.Emotion == 'neutral' or row.Score < self.score_threshold:
                            reaction_show = False
                        else:
                            reaction_frequency = reaction_num/(i+1)
                            probability_emote = row.Score*reaction_weight + \
                                (1-reaction_weight)*(1-reaction_frequency)
                            reaction_show = True if probability_emote > c1_threshold else False
                        if reaction_show:
                            reaction_num += 1

                        df_c1.at[index, 'reaction_frequency'] = reaction_frequency
                        df_c1.at[index, 'probability_emote'] = probability_emote
                        df_c1.at[index, 'c1_threshold'] = c1_threshold
                        df_c1.at[index, 'reaction_show'] = reaction_show
                        df_c1.at[index, 'c1'] = reaction_show
                        review = df_c1.e_review[index]
                        df_c1.at[index, 'c1r'] = self.get_criteria_review(
                            reaction_show, review=review)
                c1r_sum = df_c1['c1r'].sum()
                c1r_sum_list.append(c1r_sum)
                c1_analysis_dict['c1_threshold']=c1_threshold
                c1_analysis_dict['reaction_weight']=reaction_weight
                c1_analysis_dict['c1r_sum']=c1r_sum
                df_c1_analysis=pd.concat([df_c1_analysis, pd.DataFrame(c1_analysis_dict, index=[0])])
            
            
            
        # fig = px.line(x=c1_threshold_list, y=c1r_sum_list)
        fig = px.line(data_frame=df_c1_analysis, x='c1_threshold', y='c1r_sum', color='reaction_weight')
        fig.update_layout(
            title="Criteria 1 analysis  `PE > Threshold`",
            xaxis_title="PE Threshold",
            yaxis_title="Count of good reviews",
            # legend_title="Legend Title",
            font=dict(
                # family="Courier New, monospace",
                size=14,
                color="#006064"
            ),
            
            )
        st.plotly_chart(fig, use_container_width=True)
    def get_c2_plot(self):
        # c2_threshold=0
        c2_threshold_list = np.arange(0, 1, 0.01)
        
        list_stories = self.df.Story.unique()
        total_num_stories = len(list_stories)
        num_stories2show = 9  # int(set_input(self.container_param,
        # label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
        # key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
        list_stories2show = list_stories[:num_stories2show]

        c2r_sum_list = []
        for c2_threshold in c2_threshold_list:
            df_c2 = self.df.copy()
            for story_id in list_stories2show:
                subset_condition = self.get_subset_condition(df_c2, story_id)
                dfs = df_c2[subset_condition]
                for i, (index, row) in enumerate(dfs.iterrows()):
                    c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
                    df_c2.at[index, 'c2'] = c2
                    review = df_c2.e_review[index]
                    df_c2.at[index, 'c2r'] = self.get_criteria_review(
                        c2, review=review)
            c2r_sum_list.append(df_c2['c2r'].sum())
        fig = px.line(x=c2_threshold_list, y=c2r_sum_list)
        fig.update_layout(
            title="Criteria 2 analysis  `CS > Threshold`",
            xaxis_title="CS Threshold",
            yaxis_title="Count of good reviews",
            # legend_title="Legend Title",
            font=dict(
                # family="Courier New, monospace",
                size=14,
                color="#006064"
            ),
            
            )
        st.plotly_chart(fig, use_container_width=True)
        # fig.show()
        pass

    @staticmethod
    def get_subset_condition(data, story_id):
        return (data.Story == story_id) & (data.Turn == 'user')

    @staticmethod
    def get_criteria_review(c, review):
        # printj.green(f'{c} {type(c)}')
        # printj.green(f'{review} {type(review)}')
        result = int(c == True and (review == 'o' or review == None)) + int(
            c == False and review == 'x')
        return np.round(result, 0)
        # return str(np.round(result, 0))

    def get_ngram_pattern(self, s, n=2):
        gnp = ''
        for i in range(len(s)-(n-1)):
            gnp += '1' if '1' in s[i:i+n] else '0'
        return gnp

    def update_df(self):
        list_stories = self.df.Story.unique()
        total_num_stories = len(list_stories)
        num_stories2show = int(set_input(self.container_param,
                                         label='No. of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
                                         key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
        list_stories2show = list_stories[:num_stories2show]
        reaction_weight = set_input(self.container_param,
                                    label='Reaction Weight w', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
                                    key_slider='w_slider', key_input='w_input',)
        self.container_param_rv = self.container_param.columns([1, 1])
        random_value_mode = self.container_param_rv[0].radio(
            "C1 Threshold type", ["Random", "Fixed"], index=1)
        # random_value = random.random()
        if random_value_mode == "Fixed":
            random_value = set_input(self.container_param,
                                     label='C1 Threshold', 
                                     key_slider='rand_slider', key_input='rand_input',
                                     min_value=0.,
                                     max_value=1.,
                                     value=.5,
                                     step=.01,)
        c2_threshold = set_input(self.container_param,
                                 label='C2 Threshold', min_value=0.0, max_value=1.0, value=0.7, step=0.01,
                                 key_slider='c2_threshold_slider', key_input='c2_threshold_input',)
        table_mode = self.container_param.radio(
            "Table Style:", ["Dataframe", "Table"])
        self.show_pe_data = self.container_param.checkbox(
            'Show Probability Emote', value=True, key='show_pe_data_log')
        self.score_threshold = set_input(self.container_param,
                                         label='Score Threshold', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
                                         key_slider='score_threshold_slider', key_input='score_threshold_input',)

        df_reaction_pattern = pd.DataFrame()
        reaction_pattern_dict = dict()
        for story_id in list_stories2show:
            reaction_num = 0
            reaction_frequency = 0
            probability_emote = 0
            # random_value = 0
            reaction_show = False
            # c2 = True

            subset_condition = self.get_subset_condition(self.df, story_id)
            dfs = self.df[subset_condition]
            for i, (index, row) in enumerate(dfs.iterrows()):
                if row.Emotion == 'neutral' or row.Score < self.score_threshold:
                    reaction_show = False
                else:
                    reaction_frequency = reaction_num/(i+1)
                    probability_emote = row.Score*reaction_weight + \
                        (1-reaction_weight)*(1-reaction_frequency)
                    if random_value_mode == "Random":
                        random_value = random.random()
                    reaction_show = True if probability_emote > random_value else False
                if reaction_show:
                    reaction_num += 1

                self.df.at[index, 'reaction_frequency'] = reaction_frequency
                self.df.at[index, 'probability_emote'] = probability_emote
                self.df.at[index, 'random_value'] = random_value
                self.df.at[index, 'reaction_show'] = reaction_show
                self.df.at[index, 'c1'] = reaction_show
                c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
                self.df.at[index, 'c2'] = c2
                review = self.df.e_review[index]
                self.df.at[index, 'c1r'] = self.get_criteria_review(
                    reaction_show, review=review)
                self.df.at[index, 'c2r'] = self.get_criteria_review(
                    c2, review=review)
            s = ''
            df_edit = self.df[self.get_subset_condition(
                self.df, story_id)].reaction_show.copy()
            df_edit = df_edit.dropna()
            for v in df_edit:
                s += str(int(v))
            # df_reaction_pattern.at[story_id]
            # reaction_pattern_dict['story_id']=story_id
            reaction_pattern_dict['reaction_length'] = len(s)
            reaction_pattern_dict['reaction_1'] = s.count('1')
            reaction_pattern_dict['reaction_pattern'] = s

            for i in range(2, 8):
                reaction_pattern_dict[f'{i}-gram_pattern'] = self.get_ngram_pattern(
                    s, n=i)
            df_reaction_pattern = pd.concat(
                [df_reaction_pattern, pd.DataFrame(reaction_pattern_dict, index=[f'Story_{story_id}'])])
            # st.markdown(df_edit)
            # st.markdown(s)

        # for c in ['c1r', 'c2r']:
        #     st.markdown(f'Sum of {c} : {self.df[c].sum()}')
        df_show = self.df.copy()
        for c in ['c1r', 'c2r']:
            df_show[c] = df_show[c].fillna(0).astype(int)
            st.markdown(f'Sum of {c} : {df_show[c].sum()}')
        for story_id in list_stories2show:
            dfs = df_show[(df_show.Story == story_id)].copy()
            columns2hide = ['Unnamed: 0', 'Story', ]
            if not self.debug:
                columns2hide += ['e_review']
            if self.emotion_type == 'Max-only':
                columns2hide += [
                    f'Emotion_{sorted_i+1}' for sorted_i in range(7)]
                columns2hide += [
                    f'Score_{sorted_i+1}' for sorted_i in range(7)]
            if not self.show_pe_data:
                columns2hide += [
                    "reaction_frequency", "probability_emote", "random_value", "reaction_show"]
            for c in columns2hide:
                dfs.drop(c, axis=1, inplace=True)

            st.markdown(f'#### Story {story_id}')

            dfs = dfs.style
            if self.show_pe_data:
                dfs = dfs.apply(self.dfstyle_color_text_col, axis=1)
            # dfs = dfs.applymap(self.dfstyle_color_text)
            dfs = dfs.apply(self.rower, axis=None)
            dfs = dfs.set_table_styles([{
                'selector': 'tr:hover',
                'props': 'color: #000000'  # background-color: #eeee66;font-size: 1.01em;
            }])   # .hide_index()

            if table_mode == 'Dataframe':
                st.dataframe(dfs)
                # set_na_rep(" ").s
                # st.dataframe(df_reaction_pattern.iloc[story_id-1])
            elif table_mode == 'Table':
                st.table(dfs)
                # st.table(df_reaction_pattern.iloc[story_id-1])
            create_dowload_button(
                dfs, sheet_name=f'story_{story_id}', file_name=f'data_story_{story_id}.xlsx')
            # print(dfs.render())
        if table_mode == 'Dataframe':
            st.dataframe(df_reaction_pattern)
        elif table_mode == 'Table':
            st.table(df_reaction_pattern)
    # @st.cache 

    def dfstyle_color_text_col(self, s):
        num_col = len(s)
        result = ['background-color: white']*len(s)
        # if s.Emotion == 'neutral' and s.Turn == 'user':
        #     result[-6:-1] = ['color: #992222'] + \
        #         ['color: #333333']+['color: #fcfcfc']*3
        for si, sc in enumerate(s):
            if sc != sc:
                result[si] = 'color: #fcfcfc'
                # printj.red.bold_on_white(s)
                # printj.red.bold_on_cyan(si)
                # printj.red.bold_on_cyan(sc)
        # if s.Score < self.score_threshold and s.Turn == 'user':
        #     result[-5:-1] = ['color: #992222'] + ['color: #fcfcfc']*3
            # printj.red(result)
        # printj.red.bold_on_cyan(s)
        # printj.red.bold_on_cyan(type(s))
        # printj.red.bold_on_white(s.keys().tolist())
        # printj.red.bold_on_white(type(s.keys().tolist()))
        # idx_reaction_show = s.keys().tolist().index("reaction_show")
        # printj.red.bold_on_white(idx_reaction_show)
        # if s.reaction_show == 1:
        #     # result[idx_reaction_show] = 'color: #222222'
        #     pass
        # elif s.reaction_show == 0:
        #     # result[idx_reaction_show] = 'color: #222222'
        #     pass
        # else:
        #     # print(s.reaction_show)
        #     # print(type(s.reaction_show))
        #     hide_length = 3
        #     result[idx_reaction_show-hide_length:] = ['color: #fcfcfc']*(num_col-idx_reaction_show+hide_length)
        # if s.probability_emote!=s.probability_emote:
        #     result[5] = 'color: #eeeeee'
        return result
    # @staticmethod
    # @st.cache
    # def dfstyle_color_text(val):
    #     if type(val)==str:
    #         color = 'red' if val =='neutral' else 'black'
    #     # elif type(val)==float:
    #     #     color = 'red' if val > .50000 else 'black'
    #     elif val==None:
    #         color = '#ffffff'
    #     else:
    #         color = None
    #     return 'color: %s' % color if color is not None else ''

    @staticmethod
    @st.cache
    def rower(data):
        s = data.index % 2 != 0
        s = pd.concat([pd.Series(s)] * data.shape[1],
                      axis=1)
        z = pd.DataFrame(np.where(s, 'background-color:#f9f9f9', ''),
                         index=data.index, columns=data.columns)
        return z

    def get_log(self):
        df = pd.DataFrame(data=[], columns=[])
        log_dict = dict()

        with open(self.path) as f:
            lines = f.readlines()
        self.gen.initialise_classifier_model()
        story_num = 0
        for i, line in enumerate(lines):
            if line.startswith('H:'):
                log_dict['Turn'] = 'haru'
            elif line.startswith('U:'):
                log_dict['Turn'] = 'user'
            else:
                story_num += 1
                continue
            log_dict['Sentence'] = line[3:]
            log_dict['Story'] = story_num
            emotion_type = 'sorted'  # 'max'
            if self.emotion_type == 'max':
                emotion_type = 'max'
            else:
                emotion_type = 'sorted'  #
            emotion = self.gen.get_emotion(
                log_dict['Sentence'], filter_by=emotion_type)
            if emotion_type == 'max':
                log_dict['Emotion'] = emotion['label']
                log_dict['Score'] = emotion['score']
            elif emotion_type == 'sorted':
                for sorted_i in range(len(emotion)):
                    log_dict[f'Emotion_{sorted_i+1}'] = emotion[sorted_i]['label']
                    log_dict[f'Score_{sorted_i+1}'] = emotion[sorted_i]['score']
                log_dict['Emotion'] = emotion[0]['label']
                log_dict['Score'] = emotion[0]['score']
            log_dict['e_review'] = ' '
            df = pd.concat(
                [df, pd.DataFrame(log_dict, index=[f'idx_{i}'])])
            df = df.reset_index(drop=True)
            df.to_csv(self.df_path)
        return df


def display_logs(gen, container_guide, container_param, container_button):

    la = LogAnalyser(gen, container_guide, container_param, container_button)
    la.display_logs()
    # df = la.update_df(la.df)


if __name__ == '__main__':
    # df = LogAnalyser.get_log(path='data/ist_logs.txt')
    # initialize data of lists.
    # data = {'Name': ['Tom', 'nick', 'krish', 'jack'],
    #         'Age': [20, 21, 19, 18]}

    # # Create DataFrame
    # df = pd.DataFrame(data)
    # print(df, type(df))
    os.system('./run.sh')