Spaces:

jitesh
/

storytelling

Runtime error

App Files Files Community

jitesh commited on Mar 1, 2022

Commit

50a12ff

•

1 Parent(s): bf902a7

c2r plot

Browse files

Files changed (2) hide show

data/df/ist_logs.txt +0 -0
src/read_logs.py +131 -41

data/df/ist_logs.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

src/read_logs.py CHANGED Viewed

@@ -20,7 +20,7 @@ class LogAnalyser:
         self.path = container_param.selectbox(
             'Select the log path', log_file_paths)
-        self.df_path = f'data/df/{self.path.split("/")[-1]}'
         # if 'button1_counter' not in st.session_state:
         #     st.session_state.button1_counter = 0
         # if 'df' not in st.session_state:
@@ -43,8 +43,10 @@ class LogAnalyser:
     def display_logs(self):
         # self.container_param.markdown(
         #     f'st.session_state.button1_counter: {st.session_state.button1_counter}')
-        if not exists(self.df_path) or self.container_button.button('Detect Emotion'):
             self.df = self.get_log()
         # else:
         self.df = pd.read_csv(self.df_path)
@@ -56,7 +58,45 @@ class LogAnalyser:
         #     st.session_state.path=self.path
         self.update_df()
     def get_ngram_pattern(self, s, n=2):
         gnp = ''
         for i in range(len(s)-(n-1)):
@@ -64,12 +104,21 @@ class LogAnalyser:
         return gnp
     def update_df(self):
         reaction_weight = set_input(self.container_param,
                                     label='Reaction Weight w', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
                                     key_slider='w_slider', key_input='w_input',)
         self.container_param_rv = self.container_param.columns([1, 1])
         random_value_mode = self.container_param_rv[0].radio(
-            "Random Value:", ["Random", "Fixed"])
         # random_value = random.random()
         if random_value_mode == "Fixed":
             random_value = set_input(self.container_param,
@@ -80,7 +129,6 @@ class LogAnalyser:
                                      step=.01,)
         table_mode = self.container_param.radio(
             "Table Style:", ["Dataframe", "Table"])
-        self.emotion_type = self.container_param.select_slider('Emotion', ['Max-only', '2', '3', '4', '5', '6', 'All 7'])
         self.show_pe_data = self.container_param.checkbox(
             'Show Probability Emote', value=True, key='show_pe_data_log')
         self.score_threshold = set_input(self.container_param,
@@ -89,15 +137,23 @@ class LogAnalyser:
         df_reaction_pattern = pd.DataFrame()
         reaction_pattern_dict = dict()
-        for story_id in self.df.Story.unique():
             reaction_num = 0
             reaction_frequency = 0
             probability_emote = 0
-            random_value = 0
             reaction_show = False
             def get_subset_condition(data):
                 return (data.Story == story_id) & (data.Turn == 'user')
             subset_condition = get_subset_condition(self.df)
             dfs = self.df[subset_condition]
             for i, (index, row) in enumerate(dfs.iterrows()):
@@ -117,6 +173,14 @@ class LogAnalyser:
                 self.df.at[index, 'probability_emote'] = probability_emote
                 self.df.at[index, 'random_value'] = random_value
                 self.df.at[index, 'reaction_show'] = reaction_show
             s = ''
             df_edit = self.df[get_subset_condition(
                 self.df)].reaction_show.copy()
@@ -130,15 +194,23 @@ class LogAnalyser:
             reaction_pattern_dict['reaction_pattern'] = s
             for i in range(2, 8):
-                reaction_pattern_dict[f'{i}-gram_pattern'] = self.get_ngram_pattern(s, n=i)
             df_reaction_pattern = pd.concat(
                 [df_reaction_pattern, pd.DataFrame(reaction_pattern_dict, index=[f'Story_{story_id}'])])
             # st.markdown(df_edit)
             # st.markdown(s)
-        for story_id in self.df.Story.unique():
             dfs = self.df[(self.df.Story == story_id)].copy()
             columns2hide = ['Unnamed: 0', 'Story', ]
             if not self.show_pe_data:
                 columns2hide += [
                     "reaction_frequency", "probability_emote", "random_value", "reaction_show"]
@@ -146,18 +218,16 @@ class LogAnalyser:
                 dfs.drop(c, axis=1, inplace=True)
             st.markdown(f'#### Story {story_id}')
-            # dfs = dfs.style
-            # dfs = dfs.hide_index()
-            # if self.show_pe_data:
-            #     dfs = dfs.apply(self.dfstyle_color_text_col, axis=1)
-            # # dfs = dfs.applymap(self.dfstyle_color_text)
-            # dfs = dfs.apply(self.rower, axis=None)
-            # dfs = dfs.set_table_styles([{
-            #     'selector': 'tr:hover',
-            #     'props': 'color: #000000'  # background-color: #eeee66;font-size: 1.01em;
-            # }])   # .hide_index()
             if table_mode == 'Dataframe':
                 st.dataframe(dfs)
@@ -166,29 +236,47 @@ class LogAnalyser:
             elif table_mode == 'Table':
                 st.table(dfs)
                 # st.table(df_reaction_pattern.iloc[story_id-1])
-            create_dowload_button(dfs, sheet_name=f'story_{story_id}', file_name=f'data_story_{story_id}.xlsx')
             # print(dfs.render())
         if table_mode == 'Dataframe':
             st.dataframe(df_reaction_pattern)
         elif table_mode == 'Table':
             st.table(df_reaction_pattern)
     # @st.cache
     def dfstyle_color_text_col(self, s):
         result = ['background-color: white']*len(s)
-        if s.Emotion == 'neutral' and s.Turn == 'user':
-            result[2:-1] = ['color: #992222'] + \
-                ['color: #333333']+['color: #fcfcfc']*3
-        if s.Score < self.score_threshold and s.Turn == 'user':
-            result[3:-1] = ['color: #992222'] + ['color: #fcfcfc']*3
             # printj.red(result)
-        if s.reaction_show == 1:
-            result[-1] = 'color: #222222'
-        elif s.reaction_show == 0:
-            result[-1] = 'color: #222222'
-        else:
-            # print(s.reaction_show)
-            # print(type(s.reaction_show))
-            result[4:] = ['color: #fcfcfc']*4
         # if s.probability_emote!=s.probability_emote:
         #     result[5] = 'color: #eeeeee'
         return result
@@ -237,17 +325,19 @@ class LogAnalyser:
             if self.emotion_type == 'max':
                 emotion_type = 'max'
             else:
-                emotion_type = 'sorted'  #
-            emotion = self.gen.get_emotion(log_dict['Sentence'], filter_by=emotion_type)
-            if emotion_type =='max':
                 log_dict['Emotion'] = emotion['label']
                 log_dict['Score'] = emotion['score']
-            elif emotion_type =='sorted':
-                log_dict['Emotion'] = emotion[0]['label']
-                log_dict['Score'] = emotion[0]['score']
-                for sorted_i in range(1, len(emotion)):
                     log_dict[f'Emotion_{sorted_i+1}'] = emotion[sorted_i]['label']
                     log_dict[f'Score_{sorted_i+1}'] = emotion[sorted_i]['score']
             df = pd.concat(
                 [df, pd.DataFrame(log_dict, index=[f'idx_{i}'])])
             df = df.reset_index(drop=True)

         self.path = container_param.selectbox(
             'Select the log path', log_file_paths)
+        self.df_path = f'data/df/{self.path.split("/")[-1].split(".")[0]}.csv'
         # if 'button1_counter' not in st.session_state:
         #     st.session_state.button1_counter = 0
         # if 'df' not in st.session_state:
     def display_logs(self):
         # self.container_param.markdown(
         #     f'st.session_state.button1_counter: {st.session_state.button1_counter}')
+        self.emotion_type = self.container_param.select_slider(
+            'Emotion', ['Max-only', '2', '3', '4', '5', '6', 'All 7'])
+        if (not exists(self.df_path) or self.container_button.button('Detect Emotion')) and ('debug' not in self.df_path):
             self.df = self.get_log()
         # else:
         self.df = pd.read_csv(self.df_path)
         #     st.session_state.path=self.path
         self.update_df()
+        self.get_c2_plot()
+    def get_c2_plot(self):
+        # c2_threshold=0
+        c2_threshold_list = np.arange(0,1,0.01)
+        list_stories = self.df.Story.unique()
+        total_num_stories = len(list_stories)
+        num_stories2show = 9 # int(set_input(self.container_param,
+                                    # label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
+                                    # key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
+        list_stories2show = list_stories[:num_stories2show]
+        c2r_sum_list = []
+        for c2_threshold in c2_threshold_list:
+            for story_id in list_stories2show:
+                subset_condition = self.get_subset_condition(self.df, story_id)
+                dfs = self.df[subset_condition]
+                for i, (index, row) in enumerate(dfs.iterrows()):
+                    c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
+                    self.df.at[index, 'c2'] = c2
+                    review = self.df.e_review[index]
+                    self.df.at[index, 'c2r'] = self.get_criteria_review(
+                        c2, review=review)
+            c2r_sum_list.append(self.df['c2r'].sum())
+        fig = px.line(x=c2_threshold_list, y=c2r_sum_list, title='c2r')
+        fig.show()
+        pass
+    @staticmethod
+    def get_subset_condition(data, story_id):
+        return (data.Story == story_id) & (data.Turn == 'user')
+    @staticmethod
+    def get_criteria_review(c, review):
+        # printj.green(f'{c} {type(c)}')
+        # printj.green(f'{review} {type(review)}')
+        result = int(c == True and (review == 'o' or review == None)) + int(
+            c == False and review == 'x')
+        return result
     def get_ngram_pattern(self, s, n=2):
         gnp = ''
         for i in range(len(s)-(n-1)):
         return gnp
     def update_df(self):
+        list_stories = self.df.Story.unique()
+        total_num_stories = len(list_stories)
+        num_stories2show = int(set_input(self.container_param,
+                                    label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
+                                    key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
+        list_stories2show = list_stories[:num_stories2show]
+        c2_threshold = set_input(self.container_param,
+                                    label='c2_threshold', min_value=0.0, max_value=1.0, value=0.7, step=0.01,
+                                    key_slider='c2_threshold_slider', key_input='c2_threshold_input',)
         reaction_weight = set_input(self.container_param,
                                     label='Reaction Weight w', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
                                     key_slider='w_slider', key_input='w_input',)
         self.container_param_rv = self.container_param.columns([1, 1])
         random_value_mode = self.container_param_rv[0].radio(
+            "Threshold Value:", ["Random", "Fixed"], index=1)
         # random_value = random.random()
         if random_value_mode == "Fixed":
             random_value = set_input(self.container_param,
                                      step=.01,)
         table_mode = self.container_param.radio(
             "Table Style:", ["Dataframe", "Table"])
         self.show_pe_data = self.container_param.checkbox(
             'Show Probability Emote', value=True, key='show_pe_data_log')
         self.score_threshold = set_input(self.container_param,
         df_reaction_pattern = pd.DataFrame()
         reaction_pattern_dict = dict()
+        for story_id in list_stories2show:
             reaction_num = 0
             reaction_frequency = 0
             probability_emote = 0
+            # random_value = 0
             reaction_show = False
+            # c2 = True
             def get_subset_condition(data):
                 return (data.Story == story_id) & (data.Turn == 'user')
+            def get_criteria_review(c, review):
+                # printj.green(f'{c} {type(c)}')
+                # printj.green(f'{review} {type(review)}')
+                result = int(c == True and (review == 'o' or review == None)) + int(
+                    c == False and review == 'x')
+                return result
             subset_condition = get_subset_condition(self.df)
             dfs = self.df[subset_condition]
             for i, (index, row) in enumerate(dfs.iterrows()):
                 self.df.at[index, 'probability_emote'] = probability_emote
                 self.df.at[index, 'random_value'] = random_value
                 self.df.at[index, 'reaction_show'] = reaction_show
+                self.df.at[index, 'c1'] = reaction_show
+                c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
+                self.df.at[index, 'c2'] = c2
+                review = self.df.e_review[index]
+                self.df.at[index, 'c1r'] = get_criteria_review(
+                    reaction_show, review=review)
+                self.df.at[index, 'c2r'] = get_criteria_review(
+                    c2, review=review)
             s = ''
             df_edit = self.df[get_subset_condition(
                 self.df)].reaction_show.copy()
             reaction_pattern_dict['reaction_pattern'] = s
             for i in range(2, 8):
+                reaction_pattern_dict[f'{i}-gram_pattern'] = self.get_ngram_pattern(
+                    s, n=i)
             df_reaction_pattern = pd.concat(
                 [df_reaction_pattern, pd.DataFrame(reaction_pattern_dict, index=[f'Story_{story_id}'])])
             # st.markdown(df_edit)
             # st.markdown(s)
+        for c in ['c1r', 'c2r']:
+            st.markdown(f'Sum of {c} : {self.df[c].sum()}')
+        for story_id in list_stories2show:
             dfs = self.df[(self.df.Story == story_id)].copy()
             columns2hide = ['Unnamed: 0', 'Story', ]
+            if self.emotion_type == 'Max-only':
+                columns2hide += [
+                    f'Emotion_{sorted_i+1}' for sorted_i in range(7)]
+                columns2hide += [
+                    f'Score_{sorted_i+1}' for sorted_i in range(7)]
             if not self.show_pe_data:
                 columns2hide += [
                     "reaction_frequency", "probability_emote", "random_value", "reaction_show"]
                 dfs.drop(c, axis=1, inplace=True)
             st.markdown(f'#### Story {story_id}')
+            dfs = dfs.style
+            if self.show_pe_data:
+                dfs = dfs.apply(self.dfstyle_color_text_col, axis=1)
+            # dfs = dfs.applymap(self.dfstyle_color_text)
+            dfs = dfs.apply(self.rower, axis=None)
+            dfs = dfs.set_table_styles([{
+                'selector': 'tr:hover',
+                'props': 'color: #000000'  # background-color: #eeee66;font-size: 1.01em;
+            }])   # .hide_index()
             if table_mode == 'Dataframe':
                 st.dataframe(dfs)
             elif table_mode == 'Table':
                 st.table(dfs)
                 # st.table(df_reaction_pattern.iloc[story_id-1])
+            create_dowload_button(
+                dfs, sheet_name=f'story_{story_id}', file_name=f'data_story_{story_id}.xlsx')
             # print(dfs.render())
         if table_mode == 'Dataframe':
             st.dataframe(df_reaction_pattern)
         elif table_mode == 'Table':
             st.table(df_reaction_pattern)
     # @st.cache
     def dfstyle_color_text_col(self, s):
+        num_col = len(s)
         result = ['background-color: white']*len(s)
+        # if s.Emotion == 'neutral' and s.Turn == 'user':
+        #     result[-6:-1] = ['color: #992222'] + \
+        #         ['color: #333333']+['color: #fcfcfc']*3
+        for si, sc in enumerate(s):
+            if sc != sc:
+                result[si] = 'color: #fcfcfc'
+                # printj.red.bold_on_white(s)
+                # printj.red.bold_on_cyan(si)
+                # printj.red.bold_on_cyan(sc)
+        # if s.Score < self.score_threshold and s.Turn == 'user':
+        #     result[-5:-1] = ['color: #992222'] + ['color: #fcfcfc']*3
             # printj.red(result)
+        # printj.red.bold_on_cyan(s)
+        # printj.red.bold_on_cyan(type(s))
+        # printj.red.bold_on_white(s.keys().tolist())
+        # printj.red.bold_on_white(type(s.keys().tolist()))
+        # idx_reaction_show = s.keys().tolist().index("reaction_show")
+        # printj.red.bold_on_white(idx_reaction_show)
+        # if s.reaction_show == 1:
+        #     # result[idx_reaction_show] = 'color: #222222'
+        #     pass
+        # elif s.reaction_show == 0:
+        #     # result[idx_reaction_show] = 'color: #222222'
+        #     pass
+        # else:
+        #     # print(s.reaction_show)
+        #     # print(type(s.reaction_show))
+        #     hide_length = 3
+        #     result[idx_reaction_show-hide_length:] = ['color: #fcfcfc']*(num_col-idx_reaction_show+hide_length)
         # if s.probability_emote!=s.probability_emote:
         #     result[5] = 'color: #eeeeee'
         return result
             if self.emotion_type == 'max':
                 emotion_type = 'max'
             else:
+                emotion_type = 'sorted'  #
+            emotion = self.gen.get_emotion(
+                log_dict['Sentence'], filter_by=emotion_type)
+            if emotion_type == 'max':
                 log_dict['Emotion'] = emotion['label']
                 log_dict['Score'] = emotion['score']
+            elif emotion_type == 'sorted':
+                for sorted_i in range(len(emotion)):
                     log_dict[f'Emotion_{sorted_i+1}'] = emotion[sorted_i]['label']
                     log_dict[f'Score_{sorted_i+1}'] = emotion[sorted_i]['score']
+                log_dict['Emotion'] = emotion[0]['label']
+                log_dict['Score'] = emotion[0]['score']
+            log_dict['e_review'] = ' '
             df = pd.concat(
                 [df, pd.DataFrame(log_dict, index=[f'idx_{i}'])])
             df = df.reset_index(drop=True)