File size: 9,337 Bytes
441ec1f
20684dc
 
441ec1f
 
 
 
 
20684dc
441ec1f
20684dc
441ec1f
 
 
 
bfa0c67
441ec1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfa0c67
 
 
441ec1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf902a7
441ec1f
bf902a7
 
 
 
 
441ec1f
bf902a7
441ec1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20684dc
441ec1f
20684dc
 
441ec1f
 
20684dc
441ec1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf902a7
 
441ec1f
 
 
 
 
bf902a7
441ec1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf902a7
 
441ec1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf902a7
441ec1f
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

# import nltk
import re
import sys
import time

import numpy as np
import pandas as pd
import printj
import streamlit as st
from transformers import pipeline  # , set_seed


class StoryGenerator:
    def __init__(self):
        # self.initialise_models()
        self.stats_df = pd.DataFrame(data=[], columns=[])
        self.stories = []
        self.data = []

    @staticmethod
    @st.cache(allow_output_mutation=True)
    def get_generator():
        return pipeline('text-generation', model='gpt2')

    @staticmethod
    @st.cache(allow_output_mutation=True)
    def get_classifier():
        return pipeline("text-classification",
                        model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)

    def initialise_classifier_model(self):
        self.classifier = self.get_classifier()
        
    def initialise_models(self):
        # start = time.time()
        self.generator = self.get_generator()
        self.classifier = self.get_classifier()
        # initialising_time = time.time()-start
        # print(f'Initialising Time: {initialising_time}')
        # set_seed(42)
        # sys.exit()

    def reset():
        self.clear_stories()
        self.clear_stats()

    def clear_stories(self):
        self.data = []
        self.stories = []

    def clear_stats(self):
        self.stats_df = pd.DataFrame(data=[], columns=[])

    def get_emotion(self, text, filter_by='max'):
        emotions = self.classifier(text)
        if filter_by=='max':
            emotion = max(emotions[0], key=lambda x: x['score'])
            return emotion
        elif filter_by=='sorted':
            return sorted(emotions[0], key=lambda x: x['score'], reverse=True)

             
    @staticmethod
    def get_num_token(text):
        # return len(nltk.word_tokenize(text))
        return len(re.findall(r'\w+', text))

    @staticmethod
    def check_show_emotion(confidence_score, frequency, w):
        frequency_penalty = 1 - frequency
        probability_emote = w * confidence_score + (1-w) * frequency_penalty
        return probability_emote > np.random.random_sample()

    def story(self,
              story_till_now="Hello, I'm a language model,",
              num_generation=4,
              length=10):
        # last_length = 0

        for i in range(num_generation):
            last_length = len(story_till_now)
            genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(story_till_now) +
                                                     length, num_return_sequences=1)
            story_till_now = genreate_robot_sentence[0]['generated_text']
            new_sentence = story_till_now[last_length:]
            emotion = self.get_emotion(new_sentence)
            # printj.yellow(f'Sentence {i}:')
            # story_to_print = f'{printj.ColorText.cyan(story_till_now[:last_length])}{printj.ColorText.green(story_till_now[last_length:])}\n'
            # print(story_to_print)
            # printj.purple(f'Emotion: {emotion}')
        return story_till_now, emotion

    def next_sentence(self,
                      story_till_now="Hello, I'm a language model,",
                      length=10):
        last_length = len(story_till_now)
        genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(story_till_now) +
                                                 length, num_return_sequences=1)
        story_till_now = genreate_robot_sentence[0]['generated_text']
        new_sentence = story_till_now[last_length:]
        emotion = self.get_emotion(new_sentence)
        return story_till_now, emotion, new_sentence

    def auto_ist(self,
                 story_till_now="Hello, I'm a language model,",
                 num_generation=4,
                 length=20, reaction_weight=0.5):
        stats_df = pd.DataFrame(data=[], columns=[])
        stats_dict = dict()
        num_reactions = 0
        reaction_frequency = 0
        emotion = self.get_emotion(story_till_now)  # first line emotion
        story_data = [{
            'sentence': story_till_now,
            'turn': 'first',
            'emotion': emotion['label'],
            'confidence_score': emotion['score'],
        }]
        for i in range(num_generation):
            # Text generation for User
            last_length = len(story_till_now)
            printj.cyan(story_till_now)
            # printj.red.bold_on_white(
            #     f'loop: {i}; generate user text; length: {last_length}')
            genreate_user_sentence = self.generator(story_till_now, max_length=self.get_num_token(
                story_till_now)+length, num_return_sequences=1)
            story_till_now = genreate_user_sentence[0]['generated_text']
            new_sentence_user = story_till_now[last_length:]

            # printj.red.bold_on_white(f'loop: {i}; check emotion')
            # Emotion self.classifier for User
            emotion_user = self.get_emotion(new_sentence_user)
            if emotion_user['label'] == 'neutral':
                show_emotion_user = False
            else:
                reaction_frequency = num_reactions/(i+1)
                show_emotion_user = self.check_show_emotion(
                    confidence_score=emotion_user['score'], frequency=reaction_frequency, w=reaction_weight)
            if show_emotion_user:
                num_reactions += 1

            story_data.append({
                'sentence': new_sentence_user,
                'turn': 'user',
                'emotion': emotion_user['label'],
                'confidence_score': emotion_user['score'],
            })
            stats_dict['sentence_no'] = i
            stats_dict['turn'] = 'user'
            stats_dict['sentence'] = new_sentence_user
            stats_dict['show_emotion'] = show_emotion_user
            stats_dict['emotion_label'] = emotion_user['label']
            stats_dict['emotion_score'] = emotion_user['score']
            stats_dict['num_reactions'] = num_reactions
            stats_dict['reaction_frequency'] = reaction_frequency
            stats_dict['reaction_weight'] = reaction_weight
            stats_df = pd.concat(
                [stats_df, pd.DataFrame(stats_dict, index=[f'idx_{i}'])])
            # Text generation for Robot
            last_length = len(story_till_now)
            printj.cyan(story_till_now)
            # printj.red.bold_on_white(
            #     f'loop: {i}; generate robot text; length: {last_length}')
            genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(
                story_till_now)+length, num_return_sequences=1)
            story_till_now = genreate_robot_sentence[0]['generated_text']
            new_sentence_robot = story_till_now[last_length:]
            emotion_robot = self.get_emotion(new_sentence_robot)

            story_data.append({
                'sentence': new_sentence_robot,
                'turn': 'robot',
                'emotion': emotion_robot['label'],
                'confidence_score': emotion_robot['score'],
            })
            stats_dict['sentence_no'] = i
            stats_dict['turn'] = 'robot'
            stats_dict['sentence'] = new_sentence_robot
            stats_dict['show_emotion'] = None
            stats_dict['emotion_label'] = emotion_robot['label']
            stats_dict['emotion_score'] = emotion_robot['score']
            stats_dict['num_reactions'] = None
            stats_dict['reaction_frequency'] = None
            stats_dict['reaction_weight'] = None
            stats_df = pd.concat(
                [stats_df, pd.DataFrame(stats_dict, index=[f'idx_{i}'])])

        return stats_df, story_till_now, story_data

    def get_stats(self,
                  story_till_now="Hello, I'm a language model,",
                  num_generation=4,
                  length=20, reaction_weight=-1, num_tests=2):
        use_random_w = reaction_weight == -1
        # self.stories = []
        try:
            num_rows = max(self.stats_df.story_id)+1
        except Exception:
            num_rows = 0
        for story_id in range(num_tests):
            if use_random_w:
                # reaction_weight = np.random.random_sample()
                reaction_weight = np.round(np.random.random_sample(), 1)
            stats_df0, _story_till_now, story_data = self.auto_ist(
                story_till_now=story_till_now,
                num_generation=num_generation,
                length=length, reaction_weight=reaction_weight)
            stats_df0.insert(loc=0, column='story_id', value=story_id+num_rows)

            # stats_df0['story_id'] = story_id
            self.stats_df = pd.concat([self.stats_df, stats_df0])
            printj.yellow(f'story_id: {story_id}')
            printj.green(stats_df0)
            self.stories.append(_story_till_now)
            self.data.append(story_data)
        self.stats_df = self.stats_df.reset_index(drop=True)
        # print(self.stats_df)

    def save_stats(self, path='pandas_simple.xlsx'):
        writer = pd.ExcelWriter(path, engine='xlsxwriter')

        # Convert the dataframe to an XlsxWriter Excel object.
        self.stats_df.to_excel(writer, sheet_name='IST')

        # Close the Pandas Excel writer and output the Excel file.
        writer.save()