jh000107 commited on
Commit
ff0c789
1 Parent(s): 4d17192

adding GPT part

Browse files
Files changed (3) hide show
  1. app.py +300 -372
  2. app_spring2023.ipynb +483 -0
  3. app_spring2023.py +396 -0
app.py CHANGED
@@ -1,396 +1,324 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[ ]:
5
-
6
-
7
- import numpy as np
8
- import tensorflow as tf
9
- import tensorflow_addons as tfa
10
- from tensorflow.keras import layers
11
- import transformers
12
- import sentencepiece as spm
13
- #show the version of the package imported with text instructions\
14
- print("Tensorflow version: ", tf.__version__)
15
- print("Tensorflow Addons version: ", tfa.__version__)
16
- print("Transformers version: ", transformers.__version__)
17
- print("Sentencepiece version: ", spm.__version__)
18
- print("Numpy version: ", np.__version__)
19
-
20
-
21
- # In[ ]:
22
-
23
-
24
- class MeanPool(tf.keras.layers.Layer):
25
- def call(self, inputs, mask=None):
26
- broadcast_mask = tf.expand_dims(tf.cast(mask, "float32"), -1)
27
- embedding_sum = tf.reduce_sum(inputs * broadcast_mask, axis=1)
28
- mask_sum = tf.reduce_sum(broadcast_mask, axis=1)
29
- mask_sum = tf.math.maximum(mask_sum, tf.constant([1e-9]))
30
- return embedding_sum / mask_sum
31
- class WeightsSumOne(tf.keras.constraints.Constraint):
32
- def __call__(self, w):
33
- return tf.nn.softmax(w, axis=0)
34
-
35
-
36
- # In[ ]:
37
-
38
-
39
- tokenizer = transformers.AutoTokenizer.from_pretrained("microsoft/deberta-v3-large"
40
- )
41
- tokenizer.save_pretrained('./tokenizer/')
42
-
43
- cfg = transformers.AutoConfig.from_pretrained("microsoft/deberta-v3-large", output_hidden_states=True)
44
- cfg.hidden_dropout_prob = 0
45
- cfg.attention_probs_dropout_prob = 0
46
- cfg.save_pretrained('./tokenizer/')
47
-
48
-
49
- # In[ ]:
50
-
51
-
52
- def deberta_encode(texts, tokenizer=tokenizer):
53
- input_ids = []
54
- attention_mask = []
55
-
56
- for text in texts:
57
- token = tokenizer(text,
58
- add_special_tokens=True,
59
- max_length=512,
60
- return_attention_mask=True,
61
- return_tensors="np",
62
- truncation=True,
63
- padding='max_length')
64
- input_ids.append(token['input_ids'][0])
65
- attention_mask.append(token['attention_mask'][0])
66
-
67
- return np.array(input_ids, dtype="int32"), np.array(attention_mask, dtype="int32")
68
-
69
-
70
- # In[ ]:
71
-
72
-
73
- MAX_LENGTH=512
74
- BATCH_SIZE=8
75
-
76
-
77
- # In[ ]:
78
-
79
-
80
- def get_model():
81
- input_ids = tf.keras.layers.Input(
82
- shape=(MAX_LENGTH,), dtype=tf.int32, name="input_ids"
83
- )
84
-
85
- attention_masks = tf.keras.layers.Input(
86
- shape=(MAX_LENGTH,), dtype=tf.int32, name="attention_masks"
87
- )
88
-
89
- deberta_model = transformers.TFAutoModel.from_pretrained("microsoft/deberta-v3-large", config=cfg)
90
-
91
-
92
- REINIT_LAYERS = 1
93
- normal_initializer = tf.keras.initializers.GlorotUniform()
94
- zeros_initializer = tf.keras.initializers.Zeros()
95
- ones_initializer = tf.keras.initializers.Ones()
96
-
97
- # print(f'\nRe-initializing encoder block:')
98
- for encoder_block in deberta_model.deberta.encoder.layer[-REINIT_LAYERS:]:
99
- # print(f'{encoder_block}')
100
- for layer in encoder_block.submodules:
101
- if isinstance(layer, tf.keras.layers.Dense):
102
- layer.kernel.assign(normal_initializer(shape=layer.kernel.shape, dtype=layer.kernel.dtype))
103
- if layer.bias is not None:
104
- layer.bias.assign(zeros_initializer(shape=layer.bias.shape, dtype=layer.bias.dtype))
105
-
106
- elif isinstance(layer, tf.keras.layers.LayerNormalization):
107
- layer.beta.assign(zeros_initializer(shape=layer.beta.shape, dtype=layer.beta.dtype))
108
- layer.gamma.assign(ones_initializer(shape=layer.gamma.shape, dtype=layer.gamma.dtype))
109
-
110
- deberta_output = deberta_model.deberta(
111
- input_ids, attention_mask=attention_masks
112
- )
113
- hidden_states = deberta_output.hidden_states
114
-
115
- #WeightedLayerPool + MeanPool of the last 4 hidden states
116
- stack_meanpool = tf.stack(
117
- [MeanPool()(hidden_s, mask=attention_masks) for hidden_s in hidden_states[-4:]],
118
- axis=2)
119
-
120
- weighted_layer_pool = layers.Dense(1,
121
- use_bias=False,
122
- kernel_constraint=WeightsSumOne())(stack_meanpool)
123
-
124
- weighted_layer_pool = tf.squeeze(weighted_layer_pool, axis=-1)
125
- output=layers.Dense(15,activation='linear')(weighted_layer_pool)
126
- #x = layers.Dense(6, activation='linear')(x)
127
-
128
- #output = layers.Rescaling(scale=4.0, offset=1.0)(x)
129
- model = tf.keras.Model(inputs=[input_ids, attention_masks], outputs=output)
130
-
131
- #Compile model with Layer-wise Learning Rate Decay
132
- layer_list = [deberta_model.deberta.embeddings] + list(deberta_model.deberta.encoder.layer)
133
- layer_list.reverse()
134
-
135
- INIT_LR = 1e-5
136
- LLRDR = 0.9
137
- LR_SCH_DECAY_STEPS = 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- lr_schedules = [tf.keras.optimizers.schedules.ExponentialDecay(
140
- initial_learning_rate=INIT_LR * LLRDR ** i,
141
- decay_steps=LR_SCH_DECAY_STEPS,
142
- decay_rate=0.3) for i in range(len(layer_list))]
143
- lr_schedule_head = tf.keras.optimizers.schedules.ExponentialDecay(
144
- initial_learning_rate=1e-4,
145
- decay_steps=LR_SCH_DECAY_STEPS,
146
- decay_rate=0.3)
147
 
148
- optimizers = [tf.keras.optimizers.Adam(learning_rate=lr_sch) for lr_sch in lr_schedules]
 
149
 
150
- optimizers_and_layers = [(tf.keras.optimizers.Adam(learning_rate=lr_schedule_head), model.layers[-4:])] +\
151
- list(zip(optimizers, layer_list))
 
 
152
 
153
- optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)
 
 
 
 
 
 
 
154
 
155
- model.compile(optimizer=optimizer,
156
- loss='mse',
157
- metrics=[tf.keras.metrics.RootMeanSquaredError()],
158
- )
159
- return model
160
-
161
-
162
- # In[ ]:
163
-
164
-
165
- tf.keras.backend.clear_session()
166
- model = get_model()
167
- model.load_weights('./best_model_fold2.h5')
168
-
169
-
170
- # In[ ]:
171
-
172
-
173
-
174
-
175
-
176
- # In[ ]:
177
-
178
-
179
- # map the integer labels to their original string representation
180
- label_mapping = {
181
- 0: 'Greeting',
182
- 1: 'Curiosity',
183
- 2: 'Interest',
184
- 3: 'Obscene',
185
- 4: 'Annoyed',
186
- 5: 'Openness',
187
- 6: 'Anxious',
188
- 7: 'Acceptance',
189
- 8: 'Uninterested',
190
- 9: 'Informative',
191
- 10: 'Accusatory',
192
- 11: 'Denial',
193
- 12: 'Confused',
194
- 13: 'Disapproval',
195
- 14: 'Remorse'
196
- }
197
-
198
- #label_strings = [label_mapping[label] for label in labels]
199
-
200
- #print(label_strings)
201
-
202
 
203
- # In[ ]:
204
 
 
205
 
206
- def inference(texts):
207
- prediction = model.predict(deberta_encode([texts]))
208
- labels = np.argmax(prediction, axis=1)
209
- label_strings = [label_mapping[label] for label in labels]
210
- return label_strings[0]
211
 
212
-
213
- # # GPT
214
-
215
- # In[ ]:
216
-
217
-
218
- import openai
219
- import os
220
- import pandas as pd
221
- import gradio as gr
222
-
223
-
224
- # In[ ]:
225
-
226
-
227
- openai.organization = os.environ['org_id']
228
- openai.api_key = os.environ['openai_api']
229
- model_version = "gpt-3.5-turbo"
230
- model_token_limit = 10
231
- model_temperature = 0.1
232
-
233
-
234
- # In[ ]:
235
-
236
-
237
- def generatePrompt () :
238
- labels = ["Openness",
239
- "Anxious",
240
- "Confused",
241
- "Disapproval",
242
- "Remorse",
243
- "Uninterested",
244
- "Accusatory",
245
- "Annoyed",
246
- "Interest",
247
- "Curiosity",
248
- "Acceptance",
249
- "Obscene",
250
- "Denial",
251
- "Informative",
252
- "Greeting"]
253
-
254
- formatted_labels = ', '.join(labels[:-1]) + ', or ' + labels[-1] + '.'
255
-
256
- label_set = ["Openness", "Anxious", "Confused", "Disapproval", "Remorse", "Accusatory",
257
- "Denial", "Obscene", "Uninterested", "Annoyed", "Informative", "Greeting",
258
- "Interest", "Curiosity", "Acceptance"]
259
-
260
- formatted_labels = ', '.join(label_set[:-1]) + ', or ' + label_set[-1] + '.\n'
261
-
262
- # The basic task to assign GPT (in natural language)
263
- base_task = "Classify the following text messages into one of the following categories using one word: " + formatted_labels
264
- base_task += "Provide only a one word response. Use only the labels provided.\n"
265
-
266
- return base_task
267
-
268
-
269
- # In[ ]:
270
-
271
-
272
- def predict(message):
273
 
274
- prompt = [{"role": "user", "content": generatePrompt () + "Text: "+ message}]
275
 
276
- response = openai.ChatCompletion.create(
277
- model=model_version,
278
- temperature=model_temperature,
279
- max_tokens=model_token_limit,
280
- messages=prompt
281
- )
282
-
283
- return response["choices"][0]["message"]["content"]
284
-
285
-
286
- # # Update
287
-
288
- # In[ ]:
289
-
290
-
291
- model_version = "gpt-3.5-turbo"
292
- model_token_limit = 2000
293
- model_temperature = 0.1
294
 
 
295
 
296
- # In[ ]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
 
 
 
298
 
299
- def revision(message):
300
- base_prompt = "Here is a conversation between a Caller and a Volunteer. The Volunteer is trying to be as non-accusatory as possible but also wants to get as much information about the caller as possible. What should the volunteer say next in this exchange? Proved 3 possible responses."
301
 
302
- prompt = [{"role": "user", "content": base_prompt + message}]
303
 
304
- response = openai.ChatCompletion.create(
305
- model=model_version,
306
- temperature=model_temperature,
307
- max_tokens=model_token_limit,
308
- messages=prompt
309
- )
310
 
311
- return response["choices"][0]["message"]["content"]
312
-
313
-
314
- # In[ ]:
315
 
 
 
 
 
 
 
316
 
317
  import gradio as gr
318
 
319
- def combine(a):
320
- return a + "hello"
321
-
322
-
323
-
324
-
325
- with gr.Blocks() as demo:
326
- gr.Markdown("## DeBERTa Sentiment Analysis")
327
- gr.Markdown("This is a custom DeBERTa model architecture for sentiment analysis with 15 labels: Openness, Anxiety, Confusion, Disapproval, Remorse, Accusation, Denial, Obscenity, Disinterest, Annoyance, Information, Greeting, Interest, Curiosity, or Acceptance.<br />Please enter your sentence(s) in the input box below and click the Submit button. The model will then process the input and provide the sentiment in one of the labels.<br/>The Test Example section below provides some input examples. Click on them and submit them to the model to see how it works.")
328
-
329
- txt = gr.Textbox(label="Input", lines=2)
330
- txt_1 = gr.Textbox(value="", label="Output")
331
  btn = gr.Button(value="Submit")
332
- btn.click(inference, inputs=txt, outputs= txt_1)
333
-
334
- demoExample = [
335
- "Hello, how are you?",
336
- "I am so happy to be here!",
337
- "i don't have time for u"
338
- ]
339
-
340
- gr.Markdown("## Text Examples")
341
- gr.Examples(
342
- demoExample,
343
- txt,
344
- txt_1,
345
- inference
346
- )
347
-
348
- with gr.Blocks() as gptdemo:
349
-
350
- gr.Markdown("## GPT Sentiment Analysis")
351
- gr.Markdown("This a custom GPT model for sentiment analysis with 15 labels: Openness, Anxiety, Confusion, Disapproval, Remorse, Accusation, Denial, Obscenity, Disinterest, Annoyance, Information, Greeting, Interest, Curiosity, or Acceptance.<br />Please enter your sentence(s) in the input box below and click the Submit button. The model will then process the input and provide the sentiment in one of the labels.<br />The Test Example section below provides some input examples. Click on them and submit them to the model to see how it works.Please note that the input may be collected by service providers.")
352
- txt = gr.Textbox(label="Input", lines=2)
353
- txt_1 = gr.Textbox(value="", label="Output")
354
- btn = gr.Button(value="Submit")
355
- btn.click(predict, inputs=txt, outputs= txt_1)
356
-
357
- gptExample = [
358
- "Hello, how are you?",
359
- "Are you busy at the moment?",
360
- "I'm doing real good"
361
- ]
362
-
363
- gr.Markdown("## Text Examples")
364
- gr.Examples(
365
- gptExample,
366
- txt,
367
- txt_1,
368
- predict
369
- )
370
-
371
-
372
- with gr.Blocks() as revisiondemo:
373
- gr.Markdown("## Conversation Revision")
374
- gr.Markdown("This is a custom GPT model designed to generate possible response texts based on previous contexts. You can input a conversation between a caller and a volunteer, and the model will provide three possible responses based on the input. <br />The Test Example section below provides some input examples. Click on them and submit them to the model to see how it works. Please note that the input may be collected by service providers.")
375
- txt = gr.Textbox(label="Input", lines=2)
376
- txt_1 = gr.Textbox(value="", label="Output",lines=4)
377
- btn = gr.Button(value="Submit")
378
- btn.click(revision, inputs=txt, outputs= txt_1)
379
-
380
- revisionExample = ["Caller: sup\nVolunteer: Hey, how's it going?\nCaller: not very well, actually\nVolunteer: What's the matter?\nCaller: it's my wife, don't worry about it"]
381
-
382
- with gr.Column():
383
- gr.Markdown("## Text Examples")
384
- gr.Examples(
385
- revisionExample,
386
- [txt],
387
- txt_1,
388
- revision
389
- )
390
-
391
-
392
-
393
-
394
- gr.TabbedInterface([demo, gptdemo,revisiondemo], ["Model", "GPT","Text Revision"]
395
- ).launch(inline=False)
396
 
 
 
1
+ import os
2
+ import openai
3
+ from openai import OpenAI
4
+ from dotenv import load_dotenv, find_dotenv
5
+
6
+ %matplotlib inline
7
+ import re
8
+ import matplotlib.pyplot as plt
9
+
10
+ sample_input = \
11
+ """
12
+ Visitor: Heyyy
13
+ Visitor: How are you this evening
14
+ Agent: better now ;) call me
15
+ Visitor: I am at work for now, be off around 10pm
16
+ Visitor: Need some company
17
+ Visitor: Are you independent honey
18
+ Agent: well since you arent available at the moment ill just come out and say-these sites are bad news. \
19
+ did you know that most of the girls on here are here against their will? \
20
+ Most of them got dragged into this lifestyle by an abuser, \
21
+ oftentimes before they were of legal consenting age. isnt that sad?
22
+ Agent: we are with some guys who are trying to spread awareness of the realities of this "industry".
23
+ Agent: https://exoduscry.com/choice/
24
+ Visitor: Thanks
25
+ Agent: i encourage you to watch this video. it is jarring to think about how bad someone else's options must be to choose to be on these sites
26
+ Visitor: Ooohhh
27
+ Agent: selling their body to make ends meet or appease a pimp
28
+ Visitor: That's really awful
29
+ Agent: it is. you seem like the kind of guy who wouldnt wont to proliferate that kind of harmful lifestyle. am i right in thinking that?
30
+ Visitor: Well iam just looking for attention
31
+ Visitor: My marriage is not going well lol
32
+ Agent: i know that it is hard to find ourselves lonely and without much alternative to meet that perceived need but \
33
+ its humbling to think that our needs can force someone else into such a dark place
34
+ Agent: hey, thanks for sharing that my man. i know it can be hard
35
+ Agent: marraige is the most humbling of relationships, isnt it?
36
+ Visitor: She leaves with her friends n no time for me
37
+ Agent: ive been there my guy. i know that it is alot easier to numb that loneliness for sure
38
+ Visitor: I want to be faithful
39
+ Agent: does your wife know how you feel when she chooses her friends instead of you?
40
+ Visitor: I been drinking lately
41
+ Visitor: Yes, she takes pills
42
+ Agent: if so, i hope you are praying for her to realize the hurt she is causing and to seek change
43
+ Visitor: She had surgery 4 yes ago n it's been hard for her n her addiction on pills
44
+ Visitor: Yes for now i am looking for a female friend to talk n see what can we do for each other
45
+ Agent: that is hard my man. physical pain is a huge obstacle in life for sure so i hear you
46
+ Visitor: Well chat later. thanks
47
+ Agent: have you considered pursuing other men who can encourage you instead of looking for the easy way out?
48
+ Agent: what is your name my friend? i will be praying for you by name if you wouldnt mind sharing it
49
+ Agent: well, i gotta run. watch that video i sent and i will definitely be praying for you. \
50
+ I hope you pray for yourself and for your wife - God can definitely intervene and cause complete change in the situation if He wills it. \
51
+ He is good and He hears you. You are loved by Him, brother. Good night
52
+ """
53
+
54
+ sample_output = \
55
+ """
56
+ Visitor: Heyyy
57
+ [Greeting]
58
+ Visitor: How are you this evening
59
+ [Greeting]
60
+ Agent: better now ;) call me
61
+ [Openness]
62
+ Visitor: I am at work for now, be off around 10pm
63
+ [Interest]
64
+ Visitor: Need some company
65
+ [Interest]
66
+ Visitor: Are you independent honey
67
+ [Interest]
68
+ Agent: well since you arent available at the moment ill just come out and say-these sites are bad news. \
69
+ did you know that most of the girls on here are here against their will? \
70
+ Most of them got dragged into this lifestyle by an abuser, \
71
+ oftentimes before they were of legal consenting age. isnt that sad?
72
+ [Informative]
73
+ Agent: we are with some guys who are trying to spread awareness of the realities of this "industry".
74
+ [Informative]
75
+ Agent: https://exoduscry.com/choice/
76
+ [Informative]
77
+ Visitor: Thanks
78
+ [Acceptance]
79
+ Agent: i encourage you to watch this video. it is jarring to think about how bad someone else's options must be to choose to be on these sites
80
+ [Informative]
81
+ Visitor: Ooohhh
82
+ [Interest]
83
+ Agent: selling their body to make ends meet or appease a pimp
84
+ [Informative]
85
+ Visitor: That's really awful
86
+ [Remorse]
87
+ Agent: it is. you seem like the kind of guy who wouldnt wont to proliferate that kind of harmful lifestyle. am i right in thinking that?
88
+ [Accusatory]
89
+ Visitor: Well iam just looking for attention
90
+ [Anxious]
91
+ Visitor: My marriage is not going well lol
92
+ [Anxious]
93
+ Agent: i know that it is hard to find ourselves lonely and without much alternative to meet that perceived need but \
94
+ its humbling to think that our needs can force someone else into such a dark place
95
+ [Informative]
96
+ Agent: hey, thanks for sharing that my man. i know it can be hard
97
+ [Acceptance]
98
+ Agent: marraige is the most humbling of relationships, isnt it?
99
+ [Openness]
100
+ Visitor: She leaves with her friends n no time for me
101
+ [Annoyed]
102
+ Agent: ive been there my guy. i know that it is alot easier to numb that loneliness for sure
103
+ [Acceptance]
104
+ Visitor: I want to be faithful
105
+ [Acceptance]
106
+ Agent: does your wife know how you feel when she chooses her friends instead of you?
107
+ [Curiosity]
108
+ Visitor: I been drinking lately
109
+ [Anxious]
110
+ Visitor: Yes, she takes pills
111
+ [Anxious]
112
+ Agent: if so, i hope you are praying for her to realize the hurt she is causing and to seek change
113
+ [Interest]
114
+ Visitor: She had surgery 4 yes ago n it's been hard for her n her addiction on pills
115
+ [Anxious]
116
+ Visitor: Yes for now i am looking for a female friend to talk n see what can we do for each other
117
+ [Informative]
118
+ Agent: that is hard my man. physical pain is a huge obstacle in life for sure so i hear you
119
+ [Acceptance]
120
+ Visitor: Well chat later. thanks
121
+ [Openness]
122
+ Agent: have you considered pursuing other men who can encourage you instead of looking for the easy way out?
123
+ [Informative]
124
+ Agent: what is your name my friend? i will be praying for you by name if you wouldnt mind sharing it
125
+ [Openness]
126
+ Agent: well, i gotta run. watch that video i sent and i will definitely be praying for you. \
127
+ I hope you pray for yourself and for your wife - God can definitely intervene and cause complete change in the situation if He wills it. \
128
+ He is good and He hears you. You are loved by Him, brother. Good night
129
+ [Openness]
130
+
131
+ Sentiment Flow Analysis on the Visitor's side:
132
+
133
+ The Visitor begins the conversation with a friendly and casual tone, expressing a desire for company and showing interest in the Agent. \
134
+ However, as the Agent provides information about the harsh realities of the commercial sex industry, the Visitor's sentiment shifts to acceptance of the information \
135
+ and a sense of confusion and remorse about the situation.
136
+
137
+ The Visitor then reveals personal issues, indicating anxiety and seeking attention due to marital problems. \
138
+ The sentiment continues to be anxious as the Visitor discusses personal struggles with alcohol and his wife's pill addiction, \
139
+ showing a need for companionship and support.
140
+
141
+ Despite the heavy topics, the Visitor expresses a desire to remain faithful and shows interest in finding a friend, albeit with a hint of desperation. \
142
+ The Visitor openly takes the Agent's information and the conversation flows smoothly as both the Visitor and the Agent \
143
+ show openness toward each other.
144
+ """
145
+
146
+ def get_completion(conversation, model="gpt-4-1106-preview"):
147
+
148
+ prompt = f"""
149
+ The EPIK Project is about mobilizing male allies \
150
+ to disrupt the commercial sex market, \
151
+ equipping them to combat the roots of exploitation \
152
+ and encouraging them to collaborate effectively \
153
+ with the wider anti-trafficking movement. \
154
+ You are an adept expert conversation sentiment analyzer. \
155
+ Your job is to analyze the conversation and provide a report \
156
+ based on the sentiment flow of the conversation on the visitor's \
157
+ perspective. Visitor indicates the potential buyer, and Agent indicates the volunteer from EPIK. \
158
+ The conversation is going to be given in the format:
159
 
160
+ Visitor: <Visitor's message here>
161
+ Agent: <Agent's message here>
 
 
 
 
 
 
162
 
163
+ The actual conversation is delimited by triple backticks
164
+ ```{conversation}```
165
 
166
+ Here is the list of sentiment labels you should use delimited by square brackets. \
167
+ ["Openness", "Anxious", "Confused", "Disapproval", "Remorse", "Accusatory", \
168
+ "Denial", "Obscene", "Uninterested", "Annoyed", "Informative", "Greeting", \
169
+ "Interest", "Curiosity", "Acceptance"]
170
 
171
+ Your output should look like:
172
+ ```
173
+ Speaker: <Speaker's message here>
174
+ [sentiment label]
175
+ ...
176
+ Speaker: <Speaker's message here>
177
+ [sentiment label]
178
+ ```
179
 
180
+ where Speaker can either be Visitor or Agent. Then, you should write your report on the sentiment flow \
181
+ on the Visitor's side below.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ Here is a sample input delimited by triple backticks
184
 
185
+ ```{sample_input}```
186
 
187
+ Here is a same output that you should try to aim for delimited by sqaure brackets
 
 
 
 
188
 
189
+ [{sample_output}]
190
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ client = OpenAI()
193
 
194
+ messages = [{"role": "user", "content": prompt}]
195
+ response = client.chat.completions.create(
196
+ model=model,
197
+ messages=messages,
198
+ temperature=0, # this is the degree of randomness of the model's output
199
+ )
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ analysis = response.choices[0].message.content
202
 
203
+ def extract_conv_with_labels(analysis):
204
+ analysis = analysis.replace("\n", " ")
205
+ BETWEEN_BACKTICKS = "\\`\\`\\`(.*?)\\`\\`\\`"
206
+ match = re.search(BETWEEN_BACKTICKS, analysis)
207
+ if match:
208
+ conv_with_labels = match.group()[4:-4]
209
+ else:
210
+ return "OUTPUT IS IN WRONG FORMAT"
211
+
212
+ # just reformatting it for better format
213
+ conv_with_labels = conv_with_labels.split('] ')
214
+ temp = [utterance + ']' for utterance in conv_with_labels[:-1]]
215
+ conv_with_labels = temp + [conv_with_labels[-1]]
216
+ return conv_with_labels
217
+
218
+ grouped_sentiments = {
219
+ 'Acceptance': 3,
220
+ 'Openness': 3,
221
+ 'Interest': 2,
222
+ 'Curiosity': 2,
223
+ 'Informative': 1,
224
+ 'Greeting': 0,
225
+ 'None': 0,
226
+ 'Uninterested': -1,
227
+ 'Anxious': -2,
228
+ 'Confused': -2,
229
+ 'Annoyed': -2,
230
+ 'Remorse': -2,
231
+ 'Disapproval': -3,
232
+ 'Accusatory': -3,
233
+ 'Denial': -3,
234
+ 'Obscene': -3
235
+ }
236
+
237
+
238
+ def sentiment_flow_plot(conv):
239
+ conv_with_labels = extract_conv_with_labels(analysis)
240
+ num_utterances = len(conv_with_labels)
241
+
242
+ visitor_Y = [''] * num_utterances
243
+ agent_Y = [''] * num_utterances
244
+
245
+ for i in range(num_utterances):
246
+ utterance = conv_with_labels[i]
247
+ match = re.search(r'\[(.*?)\]$', utterance)
248
+ if match:
249
+ label = match.group(1)
250
+ else:
251
+ print("OUTPUT IS IN WRONG FORMAT")
252
+ break
253
+
254
+ if utterance.startswith('Visitor'):
255
+ visitor_Y[i] = label
256
+ if i == 0:
257
+ agent_Y[i] = 'None'
258
+ else:
259
+ agent_Y[i] = agent_Y[i-1]
260
+ elif utterance.startswith('Agent'):
261
+ agent_Y[i] = label
262
+ if i == 0:
263
+ visitor_Y[i] = 'None'
264
+ else:
265
+ visitor_Y[i] = visitor_Y[i-1]
266
+
267
+ X = range(1,num_utterances+1)
268
+ visitor_Y_converted = [grouped_sentiments[visitor_Y[i]] for i in range(num_utterances)]
269
+ agent_Y_converted = [grouped_sentiments[agent_Y[i]] for i in range(num_utterances)]
270
+
271
+ plt.style.use('seaborn')
272
+
273
+ fig, ax = plt.subplots()
274
+
275
+
276
+ ax.plot(X, visitor_Y_converted, label='Visitor', color='blue', marker='o')
277
+ ax.plot(X, agent_Y_converted, label='Agent', color='green', marker='o')
278
+
279
+ plt.yticks(ticks=[-3,-2,-1,0,1,2,3],
280
+ labels=['Disapproval/Accusatory/Denial/Obscene', 'Anxious/Confused/Annoyed/Remorse',
281
+ 'Uninterested', 'Greeting/None', 'Informative', 'Interest/Curiosity', 'Acceptance/Openness'])
282
+
283
+ for label in ax.get_yticklabels():
284
+ label.set_rotation(45)
285
 
286
+ plt.xlabel('Number of utterances')
287
+ plt.ylabel('Sentiments')
288
+ plt.title('Sentiment Flow Plot')
289
 
290
+ plt.close(fig)
 
291
 
292
+ return fig
293
 
294
+ fig = sentiment_flow_plot(analysis)
 
 
 
 
 
295
 
296
+ return response.choices[0].message.content, fig
 
 
 
297
 
298
+ def set_key(key):
299
+ with open("_.env", "w") as file:
300
+ file.write(f"OPENAI_API_KEY={key}")
301
+
302
+ load_dotenv(find_dotenv("_.env"), override=True)
303
+ return
304
 
305
  import gradio as gr
306
 
307
+ with gr.Blocks() as gpt_analysis:
308
+ gr.Markdown("## Conversation Analysis")
309
+ gr.Markdown(
310
+ "This is a custom GPT model designed to provide \
311
+ a report on overall sentiment flow of the conversation on the \
312
+ volunteer's perspective.<br /> Click on them and submit them to the model to see how it works.")
313
+ api_key = gr.Textbox(label="Key", lines=1)
314
+ btn_key = gr.Button(value="Submit Key")
315
+ btn_key.click(set_key, inputs=api_key)
316
+ conversation = gr.Textbox(label="Input", lines=2)
 
 
317
  btn = gr.Button(value="Submit")
318
+ with gr.Row():
319
+ output_box = gr.Textbox(value="", label="Output",lines=4)
320
+ plot_box = gr.Plot(label="Analysis Plot")
321
+
322
+ btn.click(get_completion, inputs=conversation, outputs=[output_box, plot_box])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
+ gr.TabbedInterface([gpt_analysis], ["GPT Anlysis"]).launch(inline=False)
app_spring2023.ipynb ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import numpy as np\n",
10
+ "import tensorflow as tf\n",
11
+ "import tensorflow_addons as tfa\n",
12
+ "from tensorflow.keras import layers\n",
13
+ "import transformers\n",
14
+ "import sentencepiece as spm\n",
15
+ "#show the version of the package imported with text instructions\\\n",
16
+ "print(\"Tensorflow version: \", tf.__version__)\n",
17
+ "print(\"Tensorflow Addons version: \", tfa.__version__)\n",
18
+ "print(\"Transformers version: \", transformers.__version__)\n",
19
+ "print(\"Sentencepiece version: \", spm.__version__)\n",
20
+ "print(\"Numpy version: \", np.__version__)"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "class MeanPool(tf.keras.layers.Layer):\n",
30
+ " def call(self, inputs, mask=None):\n",
31
+ " broadcast_mask = tf.expand_dims(tf.cast(mask, \"float32\"), -1)\n",
32
+ " embedding_sum = tf.reduce_sum(inputs * broadcast_mask, axis=1)\n",
33
+ " mask_sum = tf.reduce_sum(broadcast_mask, axis=1)\n",
34
+ " mask_sum = tf.math.maximum(mask_sum, tf.constant([1e-9]))\n",
35
+ " return embedding_sum / mask_sum\n",
36
+ "class WeightsSumOne(tf.keras.constraints.Constraint):\n",
37
+ " def __call__(self, w):\n",
38
+ " return tf.nn.softmax(w, axis=0)"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": null,
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "tokenizer = transformers.AutoTokenizer.from_pretrained(\"microsoft/deberta-v3-large\"\n",
48
+ ")\n",
49
+ "tokenizer.save_pretrained('./tokenizer/')\n",
50
+ "\n",
51
+ "cfg = transformers.AutoConfig.from_pretrained(\"microsoft/deberta-v3-large\", output_hidden_states=True)\n",
52
+ "cfg.hidden_dropout_prob = 0\n",
53
+ "cfg.attention_probs_dropout_prob = 0\n",
54
+ "cfg.save_pretrained('./tokenizer/')"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": null,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "def deberta_encode(texts, tokenizer=tokenizer):\n",
64
+ " input_ids = []\n",
65
+ " attention_mask = []\n",
66
+ " \n",
67
+ " for text in texts:\n",
68
+ " token = tokenizer(text, \n",
69
+ " add_special_tokens=True, \n",
70
+ " max_length=512, \n",
71
+ " return_attention_mask=True, \n",
72
+ " return_tensors=\"np\", \n",
73
+ " truncation=True, \n",
74
+ " padding='max_length')\n",
75
+ " input_ids.append(token['input_ids'][0])\n",
76
+ " attention_mask.append(token['attention_mask'][0])\n",
77
+ " \n",
78
+ " return np.array(input_ids, dtype=\"int32\"), np.array(attention_mask, dtype=\"int32\")"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": null,
84
+ "metadata": {},
85
+ "outputs": [],
86
+ "source": [
87
+ "MAX_LENGTH=512\n",
88
+ "BATCH_SIZE=8"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": null,
94
+ "metadata": {},
95
+ "outputs": [],
96
+ "source": [
97
+ "def get_model():\n",
98
+ " input_ids = tf.keras.layers.Input(\n",
99
+ " shape=(MAX_LENGTH,), dtype=tf.int32, name=\"input_ids\"\n",
100
+ " )\n",
101
+ " \n",
102
+ " attention_masks = tf.keras.layers.Input(\n",
103
+ " shape=(MAX_LENGTH,), dtype=tf.int32, name=\"attention_masks\"\n",
104
+ " )\n",
105
+ " \n",
106
+ " deberta_model = transformers.TFAutoModel.from_pretrained(\"microsoft/deberta-v3-large\", config=cfg)\n",
107
+ " \n",
108
+ " \n",
109
+ " REINIT_LAYERS = 1\n",
110
+ " normal_initializer = tf.keras.initializers.GlorotUniform()\n",
111
+ " zeros_initializer = tf.keras.initializers.Zeros()\n",
112
+ " ones_initializer = tf.keras.initializers.Ones()\n",
113
+ "\n",
114
+ "# print(f'\\nRe-initializing encoder block:')\n",
115
+ " for encoder_block in deberta_model.deberta.encoder.layer[-REINIT_LAYERS:]:\n",
116
+ "# print(f'{encoder_block}')\n",
117
+ " for layer in encoder_block.submodules:\n",
118
+ " if isinstance(layer, tf.keras.layers.Dense):\n",
119
+ " layer.kernel.assign(normal_initializer(shape=layer.kernel.shape, dtype=layer.kernel.dtype))\n",
120
+ " if layer.bias is not None:\n",
121
+ " layer.bias.assign(zeros_initializer(shape=layer.bias.shape, dtype=layer.bias.dtype))\n",
122
+ "\n",
123
+ " elif isinstance(layer, tf.keras.layers.LayerNormalization):\n",
124
+ " layer.beta.assign(zeros_initializer(shape=layer.beta.shape, dtype=layer.beta.dtype))\n",
125
+ " layer.gamma.assign(ones_initializer(shape=layer.gamma.shape, dtype=layer.gamma.dtype))\n",
126
+ "\n",
127
+ " deberta_output = deberta_model.deberta(\n",
128
+ " input_ids, attention_mask=attention_masks\n",
129
+ " )\n",
130
+ " hidden_states = deberta_output.hidden_states\n",
131
+ " \n",
132
+ " #WeightedLayerPool + MeanPool of the last 4 hidden states\n",
133
+ " stack_meanpool = tf.stack(\n",
134
+ " [MeanPool()(hidden_s, mask=attention_masks) for hidden_s in hidden_states[-4:]], \n",
135
+ " axis=2)\n",
136
+ " \n",
137
+ " weighted_layer_pool = layers.Dense(1,\n",
138
+ " use_bias=False,\n",
139
+ " kernel_constraint=WeightsSumOne())(stack_meanpool)\n",
140
+ " \n",
141
+ " weighted_layer_pool = tf.squeeze(weighted_layer_pool, axis=-1)\n",
142
+ " output=layers.Dense(15,activation='linear')(weighted_layer_pool)\n",
143
+ " #x = layers.Dense(6, activation='linear')(x)\n",
144
+ " \n",
145
+ " #output = layers.Rescaling(scale=4.0, offset=1.0)(x)\n",
146
+ " model = tf.keras.Model(inputs=[input_ids, attention_masks], outputs=output)\n",
147
+ " \n",
148
+ " #Compile model with Layer-wise Learning Rate Decay\n",
149
+ " layer_list = [deberta_model.deberta.embeddings] + list(deberta_model.deberta.encoder.layer)\n",
150
+ " layer_list.reverse()\n",
151
+ " \n",
152
+ " INIT_LR = 1e-5\n",
153
+ " LLRDR = 0.9\n",
154
+ " LR_SCH_DECAY_STEPS = 1600\n",
155
+ " \n",
156
+ " lr_schedules = [tf.keras.optimizers.schedules.ExponentialDecay(\n",
157
+ " initial_learning_rate=INIT_LR * LLRDR ** i, \n",
158
+ " decay_steps=LR_SCH_DECAY_STEPS, \n",
159
+ " decay_rate=0.3) for i in range(len(layer_list))]\n",
160
+ " lr_schedule_head = tf.keras.optimizers.schedules.ExponentialDecay(\n",
161
+ " initial_learning_rate=1e-4, \n",
162
+ " decay_steps=LR_SCH_DECAY_STEPS, \n",
163
+ " decay_rate=0.3)\n",
164
+ " \n",
165
+ " optimizers = [tf.keras.optimizers.Adam(learning_rate=lr_sch) for lr_sch in lr_schedules]\n",
166
+ " \n",
167
+ " optimizers_and_layers = [(tf.keras.optimizers.Adam(learning_rate=lr_schedule_head), model.layers[-4:])] +\\\n",
168
+ " list(zip(optimizers, layer_list))\n",
169
+ " \n",
170
+ " optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)\n",
171
+ " \n",
172
+ " model.compile(optimizer=optimizer,\n",
173
+ " loss='mse',\n",
174
+ " metrics=[tf.keras.metrics.RootMeanSquaredError()],\n",
175
+ " )\n",
176
+ " return model"
177
+ ]
178
+ },
179
+ {
180
+ "cell_type": "code",
181
+ "execution_count": null,
182
+ "metadata": {},
183
+ "outputs": [],
184
+ "source": [
185
+ "tf.keras.backend.clear_session()\n",
186
+ "model = get_model()\n",
187
+ "model.load_weights('./best_model_fold2.h5')"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": null,
193
+ "metadata": {},
194
+ "outputs": [],
195
+ "source": []
196
+ },
197
+ {
198
+ "cell_type": "code",
199
+ "execution_count": null,
200
+ "metadata": {},
201
+ "outputs": [],
202
+ "source": [
203
+ "# map the integer labels to their original string representation\n",
204
+ "label_mapping = {\n",
205
+ " 0: 'Greeting',\n",
206
+ " 1: 'Curiosity',\n",
207
+ " 2: 'Interest',\n",
208
+ " 3: 'Obscene',\n",
209
+ " 4: 'Annoyed',\n",
210
+ " 5: 'Openness',\n",
211
+ " 6: 'Anxious',\n",
212
+ " 7: 'Acceptance',\n",
213
+ " 8: 'Uninterested',\n",
214
+ " 9: 'Informative',\n",
215
+ " 10: 'Accusatory',\n",
216
+ " 11: 'Denial',\n",
217
+ " 12: 'Confused',\n",
218
+ " 13: 'Disapproval',\n",
219
+ " 14: 'Remorse'\n",
220
+ "}\n",
221
+ "\n",
222
+ "#label_strings = [label_mapping[label] for label in labels]\n",
223
+ "\n",
224
+ "#print(label_strings)"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": null,
230
+ "metadata": {},
231
+ "outputs": [],
232
+ "source": [
233
+ "def inference(texts):\n",
234
+ " prediction = model.predict(deberta_encode([texts]))\n",
235
+ " labels = np.argmax(prediction, axis=1)\n",
236
+ " label_strings = [label_mapping[label] for label in labels]\n",
237
+ " return label_strings[0]"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "markdown",
242
+ "metadata": {},
243
+ "source": [
244
+ "# GPT"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": null,
250
+ "metadata": {},
251
+ "outputs": [],
252
+ "source": [
253
+ "import openai\n",
254
+ "import os\n",
255
+ "import pandas as pd\n",
256
+ "import gradio as gr"
257
+ ]
258
+ },
259
+ {
260
+ "cell_type": "code",
261
+ "execution_count": null,
262
+ "metadata": {},
263
+ "outputs": [],
264
+ "source": [
265
+ "openai.organization = os.environ['org_id']\n",
266
+ "openai.api_key = os.environ['openai_api']\n",
267
+ "model_version = \"gpt-3.5-turbo\"\n",
268
+ "model_token_limit = 10\n",
269
+ "model_temperature = 0.1\n"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": null,
275
+ "metadata": {},
276
+ "outputs": [],
277
+ "source": [
278
+ "def generatePrompt () :\n",
279
+ " labels = [\"Openness\", \n",
280
+ " \"Anxious\",\n",
281
+ " \"Confused\",\n",
282
+ " \"Disapproval\",\n",
283
+ " \"Remorse\",\n",
284
+ " \"Uninterested\",\n",
285
+ " \"Accusatory\",\n",
286
+ " \"Annoyed\",\n",
287
+ " \"Interest\",\n",
288
+ " \"Curiosity\",\n",
289
+ " \"Acceptance\",\n",
290
+ " \"Obscene\",\n",
291
+ " \"Denial\",\n",
292
+ " \"Informative\",\n",
293
+ " \"Greeting\"]\n",
294
+ "\n",
295
+ " formatted_labels = ', '.join(labels[:-1]) + ', or ' + labels[-1] + '.'\n",
296
+ "\n",
297
+ " label_set = [\"Openness\", \"Anxious\", \"Confused\", \"Disapproval\", \"Remorse\", \"Accusatory\",\n",
298
+ " \"Denial\", \"Obscene\", \"Uninterested\", \"Annoyed\", \"Informative\", \"Greeting\",\n",
299
+ " \"Interest\", \"Curiosity\", \"Acceptance\"]\n",
300
+ "\n",
301
+ " formatted_labels = ', '.join(label_set[:-1]) + ', or ' + label_set[-1] + '.\\n'\n",
302
+ "\n",
303
+ " # The basic task to assign GPT (in natural language)\n",
304
+ " base_task = \"Classify the following text messages into one of the following categories using one word: \" + formatted_labels\n",
305
+ " base_task += \"Provide only a one word response. Use only the labels provided.\\n\"\n",
306
+ "\n",
307
+ " return base_task"
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "execution_count": null,
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": [
316
+ "def predict(message):\n",
317
+ " \n",
318
+ " prompt = [{\"role\": \"user\", \"content\": generatePrompt () + \"Text: \"+ message}]\n",
319
+ " \n",
320
+ " response = openai.ChatCompletion.create(\n",
321
+ " model=model_version,\n",
322
+ " temperature=model_temperature,\n",
323
+ " max_tokens=model_token_limit,\n",
324
+ " messages=prompt\n",
325
+ " )\n",
326
+ " \n",
327
+ " return response[\"choices\"][0][\"message\"][\"content\"]"
328
+ ]
329
+ },
330
+ {
331
+ "cell_type": "markdown",
332
+ "metadata": {},
333
+ "source": [
334
+ "# Update"
335
+ ]
336
+ },
337
+ {
338
+ "cell_type": "code",
339
+ "execution_count": null,
340
+ "metadata": {},
341
+ "outputs": [],
342
+ "source": [
343
+ "model_version = \"gpt-3.5-turbo\"\n",
344
+ "model_token_limit = 2000\n",
345
+ "model_temperature = 0.1"
346
+ ]
347
+ },
348
+ {
349
+ "cell_type": "code",
350
+ "execution_count": null,
351
+ "metadata": {},
352
+ "outputs": [],
353
+ "source": [
354
+ "def revision(message):\n",
355
+ " base_prompt = \"Here is a conversation between a Caller and a Volunteer. The Volunteer is trying to be as non-accusatory as possible but also wants to get as much information about the caller as possible. What should the volunteer say next in this exchange? Proved 3 possible responses.\"\n",
356
+ "\n",
357
+ " prompt = [{\"role\": \"user\", \"content\": base_prompt + message}]\n",
358
+ " \n",
359
+ " response = openai.ChatCompletion.create(\n",
360
+ " model=model_version,\n",
361
+ " temperature=model_temperature,\n",
362
+ " max_tokens=model_token_limit,\n",
363
+ " messages=prompt\n",
364
+ " )\n",
365
+ "\n",
366
+ " return response[\"choices\"][0][\"message\"][\"content\"]"
367
+ ]
368
+ },
369
+ {
370
+ "cell_type": "code",
371
+ "execution_count": null,
372
+ "metadata": {},
373
+ "outputs": [],
374
+ "source": [
375
+ "import gradio as gr\n",
376
+ "\n",
377
+ "def combine(a):\n",
378
+ " return a + \"hello\"\n",
379
+ "\n",
380
+ "\n",
381
+ "\n",
382
+ "\n",
383
+ "with gr.Blocks() as demo:\n",
384
+ " gr.Markdown(\"## DeBERTa Sentiment Analysis\")\n",
385
+ " gr.Markdown(\"This is a custom DeBERTa model architecture for sentiment analysis with 15 labels: Openness, Anxiety, Confusion, Disapproval, Remorse, Accusation, Denial, Obscenity, Disinterest, Annoyance, Information, Greeting, Interest, Curiosity, or Acceptance.<br />Please enter your sentence(s) in the input box below and click the Submit button. The model will then process the input and provide the sentiment in one of the labels.<br/>The Test Example section below provides some input examples. Click on them and submit them to the model to see how it works.\")\n",
386
+ "\n",
387
+ " txt = gr.Textbox(label=\"Input\", lines=2)\n",
388
+ " txt_1 = gr.Textbox(value=\"\", label=\"Output\")\n",
389
+ " btn = gr.Button(value=\"Submit\")\n",
390
+ " btn.click(inference, inputs=txt, outputs= txt_1)\n",
391
+ "\n",
392
+ " demoExample = [\n",
393
+ " \"Hello, how are you?\",\n",
394
+ " \"I am so happy to be here!\",\n",
395
+ " \"i don't have time for u\"\n",
396
+ " ]\n",
397
+ "\n",
398
+ " gr.Markdown(\"## Text Examples\")\n",
399
+ " gr.Examples(\n",
400
+ " demoExample,\n",
401
+ " txt,\n",
402
+ " txt_1,\n",
403
+ " inference\n",
404
+ " )\n",
405
+ "\n",
406
+ "with gr.Blocks() as gptdemo:\n",
407
+ "\n",
408
+ " gr.Markdown(\"## GPT Sentiment Analysis\")\n",
409
+ " gr.Markdown(\"This a custom GPT model for sentiment analysis with 15 labels: Openness, Anxiety, Confusion, Disapproval, Remorse, Accusation, Denial, Obscenity, Disinterest, Annoyance, Information, Greeting, Interest, Curiosity, or Acceptance.<br />Please enter your sentence(s) in the input box below and click the Submit button. The model will then process the input and provide the sentiment in one of the labels.<br />The Test Example section below provides some input examples. Click on them and submit them to the model to see how it works.Please note that the input may be collected by service providers.\")\n",
410
+ " txt = gr.Textbox(label=\"Input\", lines=2)\n",
411
+ " txt_1 = gr.Textbox(value=\"\", label=\"Output\")\n",
412
+ " btn = gr.Button(value=\"Submit\")\n",
413
+ " btn.click(predict, inputs=txt, outputs= txt_1)\n",
414
+ "\n",
415
+ " gptExample = [\n",
416
+ " \"Hello, how are you?\",\n",
417
+ " \"Are you busy at the moment?\",\n",
418
+ " \"I'm doing real good\"\n",
419
+ " ]\n",
420
+ "\n",
421
+ " gr.Markdown(\"## Text Examples\")\n",
422
+ " gr.Examples(\n",
423
+ " gptExample,\n",
424
+ " txt,\n",
425
+ " txt_1,\n",
426
+ " predict\n",
427
+ " )\n",
428
+ "\n",
429
+ "\n",
430
+ "with gr.Blocks() as revisiondemo:\n",
431
+ " gr.Markdown(\"## Conversation Revision\")\n",
432
+ " gr.Markdown(\"This is a custom GPT model designed to generate possible response texts based on previous contexts. You can input a conversation between a caller and a volunteer, and the model will provide three possible responses based on the input. <br />The Test Example section below provides some input examples. Click on them and submit them to the model to see how it works. Please note that the input may be collected by service providers.\")\n",
433
+ " txt = gr.Textbox(label=\"Input\", lines=2)\n",
434
+ " txt_1 = gr.Textbox(value=\"\", label=\"Output\",lines=4)\n",
435
+ " btn = gr.Button(value=\"Submit\")\n",
436
+ " btn.click(revision, inputs=txt, outputs= txt_1)\n",
437
+ "\n",
438
+ " revisionExample = [\"Caller: sup\\nVolunteer: Hey, how's it going?\\nCaller: not very well, actually\\nVolunteer: What's the matter?\\nCaller: it's my wife, don't worry about it\"]\n",
439
+ "\n",
440
+ " with gr.Column():\n",
441
+ " gr.Markdown(\"## Text Examples\")\n",
442
+ " gr.Examples(\n",
443
+ " revisionExample,\n",
444
+ " [txt],\n",
445
+ " txt_1,\n",
446
+ " revision\n",
447
+ " )\n",
448
+ "\n",
449
+ "\n",
450
+ "\n",
451
+ "\n",
452
+ "gr.TabbedInterface([demo, gptdemo,revisiondemo], [\"Model\", \"GPT\",\"Text Revision\"]\n",
453
+ ").launch(inline=False)"
454
+ ]
455
+ }
456
+ ],
457
+ "metadata": {
458
+ "kernelspec": {
459
+ "display_name": "Python 3",
460
+ "language": "python",
461
+ "name": "python3"
462
+ },
463
+ "language_info": {
464
+ "codemirror_mode": {
465
+ "name": "ipython",
466
+ "version": 3
467
+ },
468
+ "file_extension": ".py",
469
+ "mimetype": "text/x-python",
470
+ "name": "python",
471
+ "nbconvert_exporter": "python",
472
+ "pygments_lexer": "ipython3",
473
+ "version": "3.10.9"
474
+ },
475
+ "vscode": {
476
+ "interpreter": {
477
+ "hash": "76d9096663e4677afe736ff46b3dcdaff586dfdb471519f50b872333a086db78"
478
+ }
479
+ }
480
+ },
481
+ "nbformat": 4,
482
+ "nbformat_minor": 2
483
+ }
app_spring2023.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[ ]:
5
+
6
+
7
+ import numpy as np
8
+ import tensorflow as tf
9
+ import tensorflow_addons as tfa
10
+ from tensorflow.keras import layers
11
+ import transformers
12
+ import sentencepiece as spm
13
+ #show the version of the package imported with text instructions\
14
+ print("Tensorflow version: ", tf.__version__)
15
+ print("Tensorflow Addons version: ", tfa.__version__)
16
+ print("Transformers version: ", transformers.__version__)
17
+ print("Sentencepiece version: ", spm.__version__)
18
+ print("Numpy version: ", np.__version__)
19
+
20
+
21
+ # In[ ]:
22
+
23
+
24
+ class MeanPool(tf.keras.layers.Layer):
25
+ def call(self, inputs, mask=None):
26
+ broadcast_mask = tf.expand_dims(tf.cast(mask, "float32"), -1)
27
+ embedding_sum = tf.reduce_sum(inputs * broadcast_mask, axis=1)
28
+ mask_sum = tf.reduce_sum(broadcast_mask, axis=1)
29
+ mask_sum = tf.math.maximum(mask_sum, tf.constant([1e-9]))
30
+ return embedding_sum / mask_sum
31
+ class WeightsSumOne(tf.keras.constraints.Constraint):
32
+ def __call__(self, w):
33
+ return tf.nn.softmax(w, axis=0)
34
+
35
+
36
+ # In[ ]:
37
+
38
+
39
+ tokenizer = transformers.AutoTokenizer.from_pretrained("microsoft/deberta-v3-large"
40
+ )
41
+ tokenizer.save_pretrained('./tokenizer/')
42
+
43
+ cfg = transformers.AutoConfig.from_pretrained("microsoft/deberta-v3-large", output_hidden_states=True)
44
+ cfg.hidden_dropout_prob = 0
45
+ cfg.attention_probs_dropout_prob = 0
46
+ cfg.save_pretrained('./tokenizer/')
47
+
48
+
49
+ # In[ ]:
50
+
51
+
52
+ def deberta_encode(texts, tokenizer=tokenizer):
53
+ input_ids = []
54
+ attention_mask = []
55
+
56
+ for text in texts:
57
+ token = tokenizer(text,
58
+ add_special_tokens=True,
59
+ max_length=512,
60
+ return_attention_mask=True,
61
+ return_tensors="np",
62
+ truncation=True,
63
+ padding='max_length')
64
+ input_ids.append(token['input_ids'][0])
65
+ attention_mask.append(token['attention_mask'][0])
66
+
67
+ return np.array(input_ids, dtype="int32"), np.array(attention_mask, dtype="int32")
68
+
69
+
70
+ # In[ ]:
71
+
72
+
73
+ MAX_LENGTH=512
74
+ BATCH_SIZE=8
75
+
76
+
77
+ # In[ ]:
78
+
79
+
80
+ def get_model():
81
+ input_ids = tf.keras.layers.Input(
82
+ shape=(MAX_LENGTH,), dtype=tf.int32, name="input_ids"
83
+ )
84
+
85
+ attention_masks = tf.keras.layers.Input(
86
+ shape=(MAX_LENGTH,), dtype=tf.int32, name="attention_masks"
87
+ )
88
+
89
+ deberta_model = transformers.TFAutoModel.from_pretrained("microsoft/deberta-v3-large", config=cfg)
90
+
91
+
92
+ REINIT_LAYERS = 1
93
+ normal_initializer = tf.keras.initializers.GlorotUniform()
94
+ zeros_initializer = tf.keras.initializers.Zeros()
95
+ ones_initializer = tf.keras.initializers.Ones()
96
+
97
+ # print(f'\nRe-initializing encoder block:')
98
+ for encoder_block in deberta_model.deberta.encoder.layer[-REINIT_LAYERS:]:
99
+ # print(f'{encoder_block}')
100
+ for layer in encoder_block.submodules:
101
+ if isinstance(layer, tf.keras.layers.Dense):
102
+ layer.kernel.assign(normal_initializer(shape=layer.kernel.shape, dtype=layer.kernel.dtype))
103
+ if layer.bias is not None:
104
+ layer.bias.assign(zeros_initializer(shape=layer.bias.shape, dtype=layer.bias.dtype))
105
+
106
+ elif isinstance(layer, tf.keras.layers.LayerNormalization):
107
+ layer.beta.assign(zeros_initializer(shape=layer.beta.shape, dtype=layer.beta.dtype))
108
+ layer.gamma.assign(ones_initializer(shape=layer.gamma.shape, dtype=layer.gamma.dtype))
109
+
110
+ deberta_output = deberta_model.deberta(
111
+ input_ids, attention_mask=attention_masks
112
+ )
113
+ hidden_states = deberta_output.hidden_states
114
+
115
+ #WeightedLayerPool + MeanPool of the last 4 hidden states
116
+ stack_meanpool = tf.stack(
117
+ [MeanPool()(hidden_s, mask=attention_masks) for hidden_s in hidden_states[-4:]],
118
+ axis=2)
119
+
120
+ weighted_layer_pool = layers.Dense(1,
121
+ use_bias=False,
122
+ kernel_constraint=WeightsSumOne())(stack_meanpool)
123
+
124
+ weighted_layer_pool = tf.squeeze(weighted_layer_pool, axis=-1)
125
+ output=layers.Dense(15,activation='linear')(weighted_layer_pool)
126
+ #x = layers.Dense(6, activation='linear')(x)
127
+
128
+ #output = layers.Rescaling(scale=4.0, offset=1.0)(x)
129
+ model = tf.keras.Model(inputs=[input_ids, attention_masks], outputs=output)
130
+
131
+ #Compile model with Layer-wise Learning Rate Decay
132
+ layer_list = [deberta_model.deberta.embeddings] + list(deberta_model.deberta.encoder.layer)
133
+ layer_list.reverse()
134
+
135
+ INIT_LR = 1e-5
136
+ LLRDR = 0.9
137
+ LR_SCH_DECAY_STEPS = 1600
138
+
139
+ lr_schedules = [tf.keras.optimizers.schedules.ExponentialDecay(
140
+ initial_learning_rate=INIT_LR * LLRDR ** i,
141
+ decay_steps=LR_SCH_DECAY_STEPS,
142
+ decay_rate=0.3) for i in range(len(layer_list))]
143
+ lr_schedule_head = tf.keras.optimizers.schedules.ExponentialDecay(
144
+ initial_learning_rate=1e-4,
145
+ decay_steps=LR_SCH_DECAY_STEPS,
146
+ decay_rate=0.3)
147
+
148
+ optimizers = [tf.keras.optimizers.Adam(learning_rate=lr_sch) for lr_sch in lr_schedules]
149
+
150
+ optimizers_and_layers = [(tf.keras.optimizers.Adam(learning_rate=lr_schedule_head), model.layers[-4:])] +\
151
+ list(zip(optimizers, layer_list))
152
+
153
+ optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)
154
+
155
+ model.compile(optimizer=optimizer,
156
+ loss='mse',
157
+ metrics=[tf.keras.metrics.RootMeanSquaredError()],
158
+ )
159
+ return model
160
+
161
+
162
+ # In[ ]:
163
+
164
+
165
+ tf.keras.backend.clear_session()
166
+ model = get_model()
167
+ model.load_weights('./best_model_fold2.h5')
168
+
169
+
170
+ # In[ ]:
171
+
172
+
173
+
174
+
175
+
176
+ # In[ ]:
177
+
178
+
179
+ # map the integer labels to their original string representation
180
+ label_mapping = {
181
+ 0: 'Greeting',
182
+ 1: 'Curiosity',
183
+ 2: 'Interest',
184
+ 3: 'Obscene',
185
+ 4: 'Annoyed',
186
+ 5: 'Openness',
187
+ 6: 'Anxious',
188
+ 7: 'Acceptance',
189
+ 8: 'Uninterested',
190
+ 9: 'Informative',
191
+ 10: 'Accusatory',
192
+ 11: 'Denial',
193
+ 12: 'Confused',
194
+ 13: 'Disapproval',
195
+ 14: 'Remorse'
196
+ }
197
+
198
+ #label_strings = [label_mapping[label] for label in labels]
199
+
200
+ #print(label_strings)
201
+
202
+
203
+ # In[ ]:
204
+
205
+
206
+ def inference(texts):
207
+ prediction = model.predict(deberta_encode([texts]))
208
+ labels = np.argmax(prediction, axis=1)
209
+ label_strings = [label_mapping[label] for label in labels]
210
+ return label_strings[0]
211
+
212
+
213
+ # # GPT
214
+
215
+ # In[ ]:
216
+
217
+
218
+ import openai
219
+ import os
220
+ import pandas as pd
221
+ import gradio as gr
222
+
223
+
224
+ # In[ ]:
225
+
226
+
227
+ openai.organization = os.environ['org_id']
228
+ openai.api_key = os.environ['openai_api']
229
+ model_version = "gpt-3.5-turbo"
230
+ model_token_limit = 10
231
+ model_temperature = 0.1
232
+
233
+
234
+ # In[ ]:
235
+
236
+
237
+ def generatePrompt () :
238
+ labels = ["Openness",
239
+ "Anxious",
240
+ "Confused",
241
+ "Disapproval",
242
+ "Remorse",
243
+ "Uninterested",
244
+ "Accusatory",
245
+ "Annoyed",
246
+ "Interest",
247
+ "Curiosity",
248
+ "Acceptance",
249
+ "Obscene",
250
+ "Denial",
251
+ "Informative",
252
+ "Greeting"]
253
+
254
+ formatted_labels = ', '.join(labels[:-1]) + ', or ' + labels[-1] + '.'
255
+
256
+ label_set = ["Openness", "Anxious", "Confused", "Disapproval", "Remorse", "Accusatory",
257
+ "Denial", "Obscene", "Uninterested", "Annoyed", "Informative", "Greeting",
258
+ "Interest", "Curiosity", "Acceptance"]
259
+
260
+ formatted_labels = ', '.join(label_set[:-1]) + ', or ' + label_set[-1] + '.\n'
261
+
262
+ # The basic task to assign GPT (in natural language)
263
+ base_task = "Classify the following text messages into one of the following categories using one word: " + formatted_labels
264
+ base_task += "Provide only a one word response. Use only the labels provided.\n"
265
+
266
+ return base_task
267
+
268
+
269
+ # In[ ]:
270
+
271
+
272
+ def predict(message):
273
+
274
+ prompt = [{"role": "user", "content": generatePrompt () + "Text: "+ message}]
275
+
276
+ response = openai.ChatCompletion.create(
277
+ model=model_version,
278
+ temperature=model_temperature,
279
+ max_tokens=model_token_limit,
280
+ messages=prompt
281
+ )
282
+
283
+ return response["choices"][0]["message"]["content"]
284
+
285
+
286
+ # # Update
287
+
288
+ # In[ ]:
289
+
290
+
291
+ model_version = "gpt-3.5-turbo"
292
+ model_token_limit = 2000
293
+ model_temperature = 0.1
294
+
295
+
296
+ # In[ ]:
297
+
298
+
299
+ def revision(message):
300
+ base_prompt = "Here is a conversation between a Caller and a Volunteer. The Volunteer is trying to be as non-accusatory as possible but also wants to get as much information about the caller as possible. What should the volunteer say next in this exchange? Proved 3 possible responses."
301
+
302
+ prompt = [{"role": "user", "content": base_prompt + message}]
303
+
304
+ response = openai.ChatCompletion.create(
305
+ model=model_version,
306
+ temperature=model_temperature,
307
+ max_tokens=model_token_limit,
308
+ messages=prompt
309
+ )
310
+
311
+ return response["choices"][0]["message"]["content"]
312
+
313
+
314
+ # In[ ]:
315
+
316
+
317
+ import gradio as gr
318
+
319
+ def combine(a):
320
+ return a + "hello"
321
+
322
+
323
+
324
+
325
+ with gr.Blocks() as demo:
326
+ gr.Markdown("## DeBERTa Sentiment Analysis")
327
+ gr.Markdown("This is a custom DeBERTa model architecture for sentiment analysis with 15 labels: Openness, Anxiety, Confusion, Disapproval, Remorse, Accusation, Denial, Obscenity, Disinterest, Annoyance, Information, Greeting, Interest, Curiosity, or Acceptance.<br />Please enter your sentence(s) in the input box below and click the Submit button. The model will then process the input and provide the sentiment in one of the labels.<br/>The Test Example section below provides some input examples. Click on them and submit them to the model to see how it works.")
328
+
329
+ txt = gr.Textbox(label="Input", lines=2)
330
+ txt_1 = gr.Textbox(value="", label="Output")
331
+ btn = gr.Button(value="Submit")
332
+ btn.click(inference, inputs=txt, outputs= txt_1)
333
+
334
+ demoExample = [
335
+ "Hello, how are you?",
336
+ "I am so happy to be here!",
337
+ "i don't have time for u"
338
+ ]
339
+
340
+ gr.Markdown("## Text Examples")
341
+ gr.Examples(
342
+ demoExample,
343
+ txt,
344
+ txt_1,
345
+ inference
346
+ )
347
+
348
+ with gr.Blocks() as gptdemo:
349
+
350
+ gr.Markdown("## GPT Sentiment Analysis")
351
+ gr.Markdown("This a custom GPT model for sentiment analysis with 15 labels: Openness, Anxiety, Confusion, Disapproval, Remorse, Accusation, Denial, Obscenity, Disinterest, Annoyance, Information, Greeting, Interest, Curiosity, or Acceptance.<br />Please enter your sentence(s) in the input box below and click the Submit button. The model will then process the input and provide the sentiment in one of the labels.<br />The Test Example section below provides some input examples. Click on them and submit them to the model to see how it works.Please note that the input may be collected by service providers.")
352
+ txt = gr.Textbox(label="Input", lines=2)
353
+ txt_1 = gr.Textbox(value="", label="Output")
354
+ btn = gr.Button(value="Submit")
355
+ btn.click(predict, inputs=txt, outputs= txt_1)
356
+
357
+ gptExample = [
358
+ "Hello, how are you?",
359
+ "Are you busy at the moment?",
360
+ "I'm doing real good"
361
+ ]
362
+
363
+ gr.Markdown("## Text Examples")
364
+ gr.Examples(
365
+ gptExample,
366
+ txt,
367
+ txt_1,
368
+ predict
369
+ )
370
+
371
+
372
+ with gr.Blocks() as revisiondemo:
373
+ gr.Markdown("## Conversation Revision")
374
+ gr.Markdown("This is a custom GPT model designed to generate possible response texts based on previous contexts. You can input a conversation between a caller and a volunteer, and the model will provide three possible responses based on the input. <br />The Test Example section below provides some input examples. Click on them and submit them to the model to see how it works. Please note that the input may be collected by service providers.")
375
+ txt = gr.Textbox(label="Input", lines=2)
376
+ txt_1 = gr.Textbox(value="", label="Output",lines=4)
377
+ btn = gr.Button(value="Submit")
378
+ btn.click(revision, inputs=txt, outputs= txt_1)
379
+
380
+ revisionExample = ["Caller: sup\nVolunteer: Hey, how's it going?\nCaller: not very well, actually\nVolunteer: What's the matter?\nCaller: it's my wife, don't worry about it"]
381
+
382
+ with gr.Column():
383
+ gr.Markdown("## Text Examples")
384
+ gr.Examples(
385
+ revisionExample,
386
+ [txt],
387
+ txt_1,
388
+ revision
389
+ )
390
+
391
+
392
+
393
+
394
+ gr.TabbedInterface([demo, gptdemo,revisiondemo], ["Model", "GPT","Text Revision"]
395
+ ).launch(inline=False)
396
+