Spaces:

jitesh
/

storytelling

Runtime error

File size: 19,959 Bytes

bfa0c67
 
 
 
 
 
 
1115bb5
dfd7729
bfa0c67
 
 
 
 
 
dfd7729
bfa0c67
 
 
 
 
 
50a12ff
83b0d23
 
bfa0c67
 
 
dfd7729
bfa0c67
 
 
 
 
 
 
 
 
 
 
 
dfd7729
bfa0c67
83b0d23
 
50a12ff
93ec191
 
 
dfd7729
bf902a7
 
83b0d23
dfd7729
 
 
 
 
bfa0c67
 
93ec191
 
 
 
 
 
 
 
83b0d23
93ec191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50a12ff
 
93ec191
50a12ff
 
 
93ec191
 
 
50a12ff
93ec191
50a12ff
 
93ec191
50a12ff
93ec191
 
50a12ff
 
93ec191
 
 
50a12ff
93ec191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50a12ff
 
 
93ec191
50a12ff
 
 
 
 
93ec191
 
 
e82c355
 
 
 
 
 
bfa0c67
50a12ff
 
 
93ec191
 
50a12ff
bfa0c67
 
 
 
 
93ec191
bfa0c67
 
 
93ec191
 
bfa0c67
 
 
 
93ec191
 
 
dfd7729
 
83b0d23
 
e82c355
 
 
 
83b0d23
 
50a12ff
bfa0c67
e82c355
 
50a12ff
e82c355
50a12ff
e82c355
93ec191
83b0d23
dfd7729
e82c355
dfd7729
 
 
 
 
 
 
 
 
bfa0c67
dfd7729
 
 
 
 
50a12ff
 
 
 
93ec191
50a12ff
93ec191
50a12ff
83b0d23
93ec191
 
e82c355
83b0d23
 
 
 
e82c355
 
 
 
 
50a12ff
 
83b0d23
 
 
 
 
93ec191
 
 
50a12ff
93ec191
 
50a12ff
93ec191
83b0d23
93ec191
 
50a12ff
 
 
 
 
83b0d23
 
 
 
aa1abb2
83b0d23
bfa0c67
50a12ff
 
 
 
 
 
 
 
 
 
83b0d23
dfd7729
83b0d23
e82c355
 
dfd7729
 
e82c355
50a12ff
 
83b0d23
 
 
 
 
93ec191
50a12ff
e82c355
50a12ff
83b0d23
50a12ff
 
 
 
 
 
 
 
 
 
 
bf902a7
50a12ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83b0d23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfa0c67
dfd7729
 
 
 
83b0d23
 
dfd7729
 
 
 
bfa0c67
 
 
dfd7729
bfa0c67
dfd7729
bfa0c67
 
 
 
 
 
 
 
 
 
 
bf902a7
 
 
 
50a12ff
 
 
 
bf902a7
 
50a12ff
 
bf902a7
 
50a12ff
 
 
bfa0c67
 
 
dfd7729
bfa0c67
 
 
 
 
 
 
 
 
 
 
dfd7729
 
93ec191
 
dfd7729
93ec191
 
bf902a7
93ec191

import random
import numpy as np
import pandas as pd
import plotly.express as px
import streamlit as st
import xlsxwriter
from os import listdir
from .lib import set_input, create_dowload_button
from os.path import isfile, join, exists
import printj


class LogAnalyser:
    def __init__(self, gen, container_guide, container_param, container_button):
        self.gen, self.container_guide, self.container_param, self.container_button = gen, container_guide, container_param, container_button
        # self.gen.initialise_classifier_model()
        dirpath = 'data'
        log_file_paths = sorted(
            [join(dirpath, f) for f in listdir(dirpath) if isfile(join(dirpath, f)) and f.startswith('ist_log')])

        self.path = container_param.selectbox(
            'Select the log path', log_file_paths)
        self.df_path = f'data/df/{self.path.split("/")[-1].split(".")[0]}.csv'
        # if 'button1_counter' not in st.session_state:
        #     st.session_state.button1_counter = 0
        # if 'df' not in st.session_state:
        #     self.df=0
        st.markdown(self.get_text())

    @staticmethod
    @st.cache
    def get_text():
        return '''  
                    
        ### Equation
        ```
        frequency_penalty = 1 - emotion_frequency
        probability_emote = w * emotion_confidence + (1 - w) * frequency_penalty
        Show_Emotion = probability_emote > (Random value between 0 and 1)
        ```
        '''

    def display_logs(self):
        # self.container_param.markdown(
        #     f'st.session_state.button1_counter: {st.session_state.button1_counter}')
        self.emotion_type = self.container_param.select_slider(
            'How many Emotion data to show?', ['Max-only', '2', '3', '4', '5', '6', 'All 7'])
        self.debug = 'debug' in self.df_path
        if (not exists(self.df_path) or self.container_button.button('Detect Emotion')) and (not self.debug):
            self.df = self.get_log()
        # else:
        self.df = pd.read_csv(self.df_path)

        # if 'path' not in st.session_state:
        #     st.session_state.path=self.path
        # if 'df' not in st.session_state or st.session_state.path!=self.path:
        #     st.session_state.df=self.get_log(self.path, self.gen)
        #     st.session_state.path=self.path

        self.update_df()
        if self.debug:
            self.get_c1_plot()
            self.get_c2_plot()

    def get_c1_plot(self):
        # c2_threshold=0
        c1_threshold_list = np.arange(0, 1, 0.01)
        c1_reaction_weight_list = np.arange(0, 1, 0.1)

        # reaction_weight=0.5
        list_stories = self.df.Story.unique()
        total_num_stories = len(list_stories)
        num_stories2show = 9  # int(set_input(self.container_param,
        # label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
        # key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
        list_stories2show = list_stories[:num_stories2show]

        c1r_sum_list = []
        df_c1_analysis = pd.DataFrame()
        c1_analysis_dict = dict()
        for reaction_weight in c1_reaction_weight_list:
            reaction_weight=np.round(reaction_weight, 2)
            for c1_threshold in c1_threshold_list:
                df_c1 = self.df.copy()
    
                for story_id in list_stories2show:
                    reaction_num = 0
                    reaction_frequency = 0
                    probability_emote = 0
                    reaction_show = False

                    subset_condition = self.get_subset_condition(df_c1, story_id)
                    dfs = df_c1[subset_condition]
                    for i, (index, row) in enumerate(dfs.iterrows()):
                        if row.Emotion == 'neutral' or row.Score < self.score_threshold:
                            reaction_show = False
                        else:
                            reaction_frequency = reaction_num/(i+1)
                            probability_emote = row.Score*reaction_weight + \
                                (1-reaction_weight)*(1-reaction_frequency)
                            reaction_show = True if probability_emote > c1_threshold else False
                        if reaction_show:
                            reaction_num += 1

                        df_c1.at[index, 'reaction_frequency'] = reaction_frequency
                        df_c1.at[index, 'probability_emote'] = probability_emote
                        df_c1.at[index, 'c1_threshold'] = c1_threshold
                        df_c1.at[index, 'reaction_show'] = reaction_show
                        df_c1.at[index, 'c1'] = reaction_show
                        review = df_c1.e_review[index]
                        df_c1.at[index, 'c1r'] = self.get_criteria_review(
                            reaction_show, review=review)
                c1r_sum = df_c1['c1r'].sum()
                c1r_sum_list.append(c1r_sum)
                c1_analysis_dict['c1_threshold']=c1_threshold
                c1_analysis_dict['reaction_weight']=reaction_weight
                c1_analysis_dict['c1r_sum']=c1r_sum
                df_c1_analysis=pd.concat([df_c1_analysis, pd.DataFrame(c1_analysis_dict, index=[0])])
            
            
            
        # fig = px.line(x=c1_threshold_list, y=c1r_sum_list)
        fig = px.line(data_frame=df_c1_analysis, x='c1_threshold', y='c1r_sum', color='reaction_weight')
        fig.update_layout(
            title="Criteria 1 analysis  `PE > Threshold`",
            xaxis_title="PE Threshold",
            yaxis_title="Count of good reviews",
            # legend_title="Legend Title",
            font=dict(
                # family="Courier New, monospace",
                size=14,
                color="#006064"
            ),
            
            )
        st.plotly_chart(fig, use_container_width=True)
    def get_c2_plot(self):
        # c2_threshold=0
        c2_threshold_list = np.arange(0, 1, 0.01)
        
        list_stories = self.df.Story.unique()
        total_num_stories = len(list_stories)
        num_stories2show = 9  # int(set_input(self.container_param,
        # label='Number of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
        # key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
        list_stories2show = list_stories[:num_stories2show]

        c2r_sum_list = []
        for c2_threshold in c2_threshold_list:
            df_c2 = self.df.copy()
            for story_id in list_stories2show:
                subset_condition = self.get_subset_condition(df_c2, story_id)
                dfs = df_c2[subset_condition]
                for i, (index, row) in enumerate(dfs.iterrows()):
                    c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
                    df_c2.at[index, 'c2'] = c2
                    review = df_c2.e_review[index]
                    df_c2.at[index, 'c2r'] = self.get_criteria_review(
                        c2, review=review)
            c2r_sum_list.append(df_c2['c2r'].sum())
        fig = px.line(x=c2_threshold_list, y=c2r_sum_list)
        fig.update_layout(
            title="Criteria 2 analysis  `CS > Threshold`",
            xaxis_title="CS Threshold",
            yaxis_title="Count of good reviews",
            # legend_title="Legend Title",
            font=dict(
                # family="Courier New, monospace",
                size=14,
                color="#006064"
            ),
            
            )
        st.plotly_chart(fig, use_container_width=True)
        # fig.show()
        pass

    @staticmethod
    def get_subset_condition(data, story_id):
        return (data.Story == story_id) & (data.Turn == 'user')

    @staticmethod
    def get_criteria_review(c, review):
        # printj.green(f'{c} {type(c)}')
        # printj.green(f'{review} {type(review)}')
        result = int(c == True and (review == 'o' or review == None)) + int(
            c == False and review == 'x')
        return np.round(result, 0)
        # return str(np.round(result, 0))

    def get_ngram_pattern(self, s, n=2):
        gnp = ''
        for i in range(len(s)-(n-1)):
            gnp += '1' if '1' in s[i:i+n] else '0'
        return gnp

    def update_df(self):
        list_stories = self.df.Story.unique()
        total_num_stories = len(list_stories)
        num_stories2show = int(set_input(self.container_param,
                                         label='No. of stories to show', min_value=1, max_value=total_num_stories, value=9, step=1,
                                         key_slider='num_stories2show_slider', key_input='num_stories2show_input',))
        list_stories2show = list_stories[:num_stories2show]
        reaction_weight = set_input(self.container_param,
                                    label='Reaction Weight w', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
                                    key_slider='w_slider', key_input='w_input',)
        self.container_param_rv = self.container_param.columns([1, 1])
        random_value_mode = self.container_param_rv[0].radio(
            "C1 Threshold type", ["Random", "Fixed"], index=1)
        # random_value = random.random()
        if random_value_mode == "Fixed":
            random_value = set_input(self.container_param,
                                     label='C1 Threshold', 
                                     key_slider='rand_slider', key_input='rand_input',
                                     min_value=0.,
                                     max_value=1.,
                                     value=.5,
                                     step=.01,)
        c2_threshold = set_input(self.container_param,
                                 label='C2 Threshold', min_value=0.0, max_value=1.0, value=0.7, step=0.01,
                                 key_slider='c2_threshold_slider', key_input='c2_threshold_input',)
        table_mode = self.container_param.radio(
            "Table Style:", ["Dataframe", "Table"])
        self.show_pe_data = self.container_param.checkbox(
            'Show Probability Emote', value=True, key='show_pe_data_log')
        self.score_threshold = set_input(self.container_param,
                                         label='Score Threshold', min_value=0.0, max_value=1.0, value=0.5, step=0.01,
                                         key_slider='score_threshold_slider', key_input='score_threshold_input',)

        df_reaction_pattern = pd.DataFrame()
        reaction_pattern_dict = dict()
        for story_id in list_stories2show:
            reaction_num = 0
            reaction_frequency = 0
            probability_emote = 0
            # random_value = 0
            reaction_show = False
            # c2 = True

            subset_condition = self.get_subset_condition(self.df, story_id)
            dfs = self.df[subset_condition]
            for i, (index, row) in enumerate(dfs.iterrows()):
                if row.Emotion == 'neutral' or row.Score < self.score_threshold:
                    reaction_show = False
                else:
                    reaction_frequency = reaction_num/(i+1)
                    probability_emote = row.Score*reaction_weight + \
                        (1-reaction_weight)*(1-reaction_frequency)
                    if random_value_mode == "Random":
                        random_value = random.random()
                    reaction_show = True if probability_emote > random_value else False
                if reaction_show:
                    reaction_num += 1

                self.df.at[index, 'reaction_frequency'] = reaction_frequency
                self.df.at[index, 'probability_emote'] = probability_emote
                self.df.at[index, 'random_value'] = random_value
                self.df.at[index, 'reaction_show'] = reaction_show
                self.df.at[index, 'c1'] = reaction_show
                c2 = row.Emotion != 'neutral' and row.Score > c2_threshold
                self.df.at[index, 'c2'] = c2
                review = self.df.e_review[index]
                self.df.at[index, 'c1r'] = self.get_criteria_review(
                    reaction_show, review=review)
                self.df.at[index, 'c2r'] = self.get_criteria_review(
                    c2, review=review)
            s = ''
            df_edit = self.df[self.get_subset_condition(
                self.df, story_id)].reaction_show.copy()
            df_edit = df_edit.dropna()
            for v in df_edit:
                s += str(int(v))
            # df_reaction_pattern.at[story_id]
            # reaction_pattern_dict['story_id']=story_id
            reaction_pattern_dict['reaction_length'] = len(s)
            reaction_pattern_dict['reaction_1'] = s.count('1')
            reaction_pattern_dict['reaction_pattern'] = s

            for i in range(2, 8):
                reaction_pattern_dict[f'{i}-gram_pattern'] = self.get_ngram_pattern(
                    s, n=i)
            df_reaction_pattern = pd.concat(
                [df_reaction_pattern, pd.DataFrame(reaction_pattern_dict, index=[f'Story_{story_id}'])])
            # st.markdown(df_edit)
            # st.markdown(s)

        # for c in ['c1r', 'c2r']:
        #     st.markdown(f'Sum of {c} : {self.df[c].sum()}')
        df_show = self.df.copy()
        for c in ['c1r', 'c2r']:
            df_show[c] = df_show[c].fillna(0).astype(int)
            st.markdown(f'Sum of {c} : {df_show[c].sum()}')
        for story_id in list_stories2show:
            dfs = df_show[(df_show.Story == story_id)].copy()
            columns2hide = ['Unnamed: 0', 'Story', ]
            if not self.debug:
                columns2hide += ['e_review']
            if self.emotion_type == 'Max-only':
                columns2hide += [
                    f'Emotion_{sorted_i+1}' for sorted_i in range(7)]
                columns2hide += [
                    f'Score_{sorted_i+1}' for sorted_i in range(7)]
            if not self.show_pe_data:
                columns2hide += [
                    "reaction_frequency", "probability_emote", "random_value", "reaction_show"]
            for c in columns2hide:
                dfs.drop(c, axis=1, inplace=True)

            st.markdown(f'#### Story {story_id}')

            dfs = dfs.style
            if self.show_pe_data:
                dfs = dfs.apply(self.dfstyle_color_text_col, axis=1)
            # dfs = dfs.applymap(self.dfstyle_color_text)
            dfs = dfs.apply(self.rower, axis=None)
            dfs = dfs.set_table_styles([{
                'selector': 'tr:hover',
                'props': 'color: #000000'  # background-color: #eeee66;font-size: 1.01em;
            }])   # .hide_index()

            if table_mode == 'Dataframe':
                st.dataframe(dfs)
                # set_na_rep(" ").s
                # st.dataframe(df_reaction_pattern.iloc[story_id-1])
            elif table_mode == 'Table':
                st.table(dfs)
                # st.table(df_reaction_pattern.iloc[story_id-1])
            create_dowload_button(
                dfs, sheet_name=f'story_{story_id}', file_name=f'data_story_{story_id}.xlsx')
            # print(dfs.render())
        if table_mode == 'Dataframe':
            st.dataframe(df_reaction_pattern)
        elif table_mode == 'Table':
            st.table(df_reaction_pattern)
    # @st.cache 

    def dfstyle_color_text_col(self, s):
        num_col = len(s)
        result = ['background-color: white']*len(s)
        # if s.Emotion == 'neutral' and s.Turn == 'user':
        #     result[-6:-1] = ['color: #992222'] + \
        #         ['color: #333333']+['color: #fcfcfc']*3
        for si, sc in enumerate(s):
            if sc != sc:
                result[si] = 'color: #fcfcfc'
                # printj.red.bold_on_white(s)
                # printj.red.bold_on_cyan(si)
                # printj.red.bold_on_cyan(sc)
        # if s.Score < self.score_threshold and s.Turn == 'user':
        #     result[-5:-1] = ['color: #992222'] + ['color: #fcfcfc']*3
            # printj.red(result)
        # printj.red.bold_on_cyan(s)
        # printj.red.bold_on_cyan(type(s))
        # printj.red.bold_on_white(s.keys().tolist())
        # printj.red.bold_on_white(type(s.keys().tolist()))
        # idx_reaction_show = s.keys().tolist().index("reaction_show")
        # printj.red.bold_on_white(idx_reaction_show)
        # if s.reaction_show == 1:
        #     # result[idx_reaction_show] = 'color: #222222'
        #     pass
        # elif s.reaction_show == 0:
        #     # result[idx_reaction_show] = 'color: #222222'
        #     pass
        # else:
        #     # print(s.reaction_show)
        #     # print(type(s.reaction_show))
        #     hide_length = 3
        #     result[idx_reaction_show-hide_length:] = ['color: #fcfcfc']*(num_col-idx_reaction_show+hide_length)
        # if s.probability_emote!=s.probability_emote:
        #     result[5] = 'color: #eeeeee'
        return result
    # @staticmethod
    # @st.cache
    # def dfstyle_color_text(val):
    #     if type(val)==str:
    #         color = 'red' if val =='neutral' else 'black'
    #     # elif type(val)==float:
    #     #     color = 'red' if val > .50000 else 'black'
    #     elif val==None:
    #         color = '#ffffff'
    #     else:
    #         color = None
    #     return 'color: %s' % color if color is not None else ''

    @staticmethod
    @st.cache
    def rower(data):
        s = data.index % 2 != 0
        s = pd.concat([pd.Series(s)] * data.shape[1],
                      axis=1)
        z = pd.DataFrame(np.where(s, 'background-color:#f9f9f9', ''),
                         index=data.index, columns=data.columns)
        return z

    def get_log(self):
        df = pd.DataFrame(data=[], columns=[])
        log_dict = dict()

        with open(self.path) as f:
            lines = f.readlines()
        self.gen.initialise_classifier_model()
        story_num = 0
        for i, line in enumerate(lines):
            if line.startswith('H:'):
                log_dict['Turn'] = 'haru'
            elif line.startswith('U:'):
                log_dict['Turn'] = 'user'
            else:
                story_num += 1
                continue
            log_dict['Sentence'] = line[3:]
            log_dict['Story'] = story_num
            emotion_type = 'sorted'  # 'max'
            if self.emotion_type == 'max':
                emotion_type = 'max'
            else:
                emotion_type = 'sorted'  #
            emotion = self.gen.get_emotion(
                log_dict['Sentence'], filter_by=emotion_type)
            if emotion_type == 'max':
                log_dict['Emotion'] = emotion['label']
                log_dict['Score'] = emotion['score']
            elif emotion_type == 'sorted':
                for sorted_i in range(len(emotion)):
                    log_dict[f'Emotion_{sorted_i+1}'] = emotion[sorted_i]['label']
                    log_dict[f'Score_{sorted_i+1}'] = emotion[sorted_i]['score']
                log_dict['Emotion'] = emotion[0]['label']
                log_dict['Score'] = emotion[0]['score']
            log_dict['e_review'] = ' '
            df = pd.concat(
                [df, pd.DataFrame(log_dict, index=[f'idx_{i}'])])
            df = df.reset_index(drop=True)
            df.to_csv(self.df_path)
        return df


def display_logs(gen, container_guide, container_param, container_button):

    la = LogAnalyser(gen, container_guide, container_param, container_button)
    la.display_logs()
    # df = la.update_df(la.df)


if __name__ == '__main__':
    # df = LogAnalyser.get_log(path='data/ist_logs.txt')
    # initialize data of lists.
    # data = {'Name': ['Tom', 'nick', 'krish', 'jack'],
    #         'Age': [20, 21, 19, 18]}

    # # Create DataFrame
    # df = pd.DataFrame(data)
    # print(df, type(df))
    os.system('./run.sh')