dhruvsaxena11 commited on
Commit
584ad1e
1 Parent(s): 7cf0b4d

Upload emotion_recognition_fine_tuning.py

Browse files
Files changed (1) hide show
  1. emotion_recognition_fine_tuning.py +230 -0
emotion_recognition_fine_tuning.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Emotion Recognition_Fine Tuning
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1pZgt5n6943GB5oq_h43LjAYoA4yi-EST
8
+ """
9
+
10
+ !pip install datasets
11
+
12
+ from datasets import load_dataset
13
+
14
+ dataset = load_dataset("dair-ai/emotion")
15
+
16
+ import pandas as pd
17
+ df_train=pd.DataFrame(dataset["train"])
18
+ df_train.head()
19
+
20
+ # @title label
21
+
22
+ from matplotlib import pyplot as plt
23
+ df_train['label'].plot(kind='hist', bins=20, title='label')
24
+ # plt.gca().spines[['top', 'right',]].set_visible(False)
25
+
26
+ df_test=pd.DataFrame(dataset["test"])
27
+ df_test.head()
28
+
29
+ # @title label
30
+
31
+ from matplotlib import pyplot as plt
32
+ df_test['label'].plot(kind='hist', bins=20, title='label')
33
+ plt.gca().spines[['top', 'right',]].set_visible(False)
34
+
35
+ dataset.keys()
36
+
37
+ df_valid=pd.DataFrame(dataset["validation"])
38
+ df_valid.head()
39
+
40
+ df_train.info()
41
+
42
+ import numpy as np
43
+
44
+ train_label=np.array(df_train.label)
45
+ test_label=np.array(df_test.label)
46
+ valid_label=np.array(df_valid.label)
47
+
48
+ # Assuming df_train.text is a Pandas Series
49
+ train_text = df_train.text.tolist()
50
+ test_text = df_test.text.tolist()
51
+ valid_text = df_valid.text.tolist()
52
+
53
+ # @title Label Value Distribution
54
+
55
+ df_valid['label'].value_counts().plot(kind='bar')
56
+
57
+ print(df_train.shape)
58
+ print(df_test.shape)
59
+ print(df_valid.shape)
60
+
61
+ labels={0:"sadness",1:"joy",2:"love",3:"anger",4:"fear",5:"surprise"}
62
+ labels2={"sadness":0,"joy":1,"love":2,"anger":3,"fear":4,"surprise":5}
63
+
64
+ from tensorflow.keras.utils import to_categorical
65
+ one_hot_train_labels = to_categorical(df_train.label)
66
+ one_hot_valid_labels = to_categorical(df_valid.label)
67
+ one_hot_train_labels
68
+
69
+ import tensorflow as tf
70
+ # Reshape the labels to match the model's output shape
71
+ one_hot_train_labels = tf.reshape(one_hot_train_labels, (-1, 2))
72
+ one_hot_valid_labels = tf.reshape(one_hot_valid_labels, (-1, 2))
73
+ one_hot_train_labels
74
+
75
+ from transformers import AutoTokenizer
76
+
77
+ tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
78
+ tokenized_data_train = tokenizer(train_text, return_tensors="np", padding=True)
79
+ tokenized_data_valid = tokenizer(valid_text, return_tensors="np", padding=True)
80
+
81
+
82
+
83
+ train_label.shape
84
+
85
+ # Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
86
+ tokenized_data_train = dict(tokenized_data_train)
87
+ tokenized_data_valid = dict(tokenized_data_valid)
88
+
89
+ # labels = np.array(dataset["label"])
90
+
91
+ from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
92
+ import tensorflow as tf
93
+
94
+ # Load pre-trained BERT model
95
+ model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased",num_labels=6, ignore_mismatched_sizes=True)
96
+
97
+
98
+
99
+ model.layers[0].trainable=False
100
+
101
+ for layer in model.layers:
102
+ print(layer)
103
+
104
+ model.layers
105
+
106
+ from transformers import AutoConfig
107
+
108
+ # Load pre-trained BERT model
109
+ # model = TFAutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")
110
+
111
+ # Load model configuration
112
+ # config = AutoConfig.from_pretrained("SamLowe/roberta-base-go_emotions")
113
+
114
+ # Define the number of classes in your downstream task
115
+ num_classes_downstream = 6 # Replace with the actual number of classes in your task
116
+
117
+ # Replace the classifier layer with a new one for your task
118
+ # model.config.num_labels = num_classes_downstream
119
+ model.classifier = tf.keras.layers.Dense(num_classes_downstream, activation='softmax')
120
+
121
+ # Continue with fine-tuning and model compilation
122
+
123
+ model.classifier
124
+
125
+ model.summary()
126
+
127
+ model.config.id2label=labels
128
+ model.config.label2id=labels2
129
+
130
+ model.logits
131
+
132
+ # Compile the model with the optimizer and loss function
133
+ model.compile(optimizer="Adam",metrics=["accuracy"])
134
+
135
+ # Assuming tokenized_data_train, train_label, tokenized_data_valid, and valid_label are defined
136
+ model.fit(tokenized_data_train, train_label, epochs=30, validation_data=(tokenized_data_valid, valid_label))
137
+
138
+ val=tokenizer(["hello boys I am good"],padding=True,return_tensors="np")
139
+
140
+ import tensorflow as tf
141
+
142
+ # Assuming 'val' is your input data
143
+ predictions = loaded_model.predict(val)
144
+
145
+ # Extract logits from predictions
146
+ logits = predictions[0] # Assuming logits are the first element in the tuple
147
+
148
+ # Apply softmax using tf.nn.softmax
149
+ probabilities = tf.nn.softmax(logits)
150
+
151
+ # Now 'probabilities' contains the softmax probabilities
152
+ print(probabilities.numpy())
153
+
154
+ model.save_pretrained("/content/sample_data/emotion_model_dhruv")
155
+
156
+ from google.colab import files
157
+
158
+ # Specify the file path
159
+ file_path = '/content/sample_data/emotion_model_dhruv' # Adjust the file path accordingly
160
+
161
+ # Download the file
162
+ files.download(file_path)
163
+
164
+ import shutil
165
+
166
+ # Specify the directory to be copied
167
+ directory_to_copy = '/content/sample_data/emotion_model_dhruv' # Adjust the directory path accordingly
168
+
169
+ # Specify the destination directory in Google Drive
170
+ destination_directory = '/content/drive/My Drive/'
171
+
172
+ # Copy the directory to Google Drive
173
+ shutil.copytree(directory_to_copy, os.path.join(destination_directory, os.path.basename(directory_to_copy)))
174
+
175
+ from transformers import TFBertForSequenceClassification
176
+
177
+ # Load the fine-tuned model from the saved directory
178
+ loaded_model = TFBertForSequenceClassification.from_pretrained("/content/sample_data/emotion_model_dhruv")
179
+
180
+ my=dict(tokenizer(["hello boys I am good"],padding=True,return_tensors="np"))
181
+ my
182
+
183
+ # Assuming my['input_ids'], my['token_type_ids'], and my['attention_mask'] are your input tensors
184
+ input_ids = my['input_ids']
185
+ token_type_ids = my['token_type_ids']
186
+ attention_mask = my['attention_mask']
187
+
188
+ # Make predictions using the loaded model, setting verbose=0 or verbose=1
189
+ # predictions = loaded_model.predict([input_ids, token_type_ids, attention_mask], verbose=0)
190
+ # or
191
+ predictions = loaded_model.predict([input_ids, token_type_ids, attention_mask], verbose=1)
192
+
193
+ my['input_ids'] = np.array(my['input_ids'])
194
+ my['token_type_ids'] = np.array(my['token_type_ids'])
195
+ my['attention_mask'] = np.array(my['attention_mask'])
196
+
197
+ """Our Application"""
198
+
199
+ from transformers import TFBertForSequenceClassification,AutoTokenizer
200
+
201
+ import numpy as np
202
+
203
+ import tensorflow as tf # Apply softmax using tf.nn.softmax
204
+
205
+ # Load the fine-tuned model from the saved directory
206
+ loaded_model = TFBertForSequenceClassification.from_pretrained("/content/drive/MyDrive/emotion_model_dhruv")
207
+ loaded_tokenizer=AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
208
+
209
+ def predict_emotion(text):
210
+
211
+ text_token=loaded_tokenizer(text,padding=True,return_tensors="np")
212
+ outputs=loaded_model(text_token)
213
+ probabilities = tf.nn.softmax(outputs.logits)
214
+ final=probabilities.numpy()
215
+ labels=["sadness","joy","love","anger","fear","surprise"]
216
+ final=final.tolist()
217
+ result_dict = {k: v for k, v in zip(labels,final[0])}
218
+ return result_dict
219
+
220
+ predict_emotion("dhruv")
221
+
222
+ my_labels=["sadness","joy","love","anger","fear","surprise"]
223
+
224
+ !pip install gradio
225
+
226
+ import gradio as gr
227
+ inputs = gr.Textbox(lines=1, label="Input Text")
228
+ outputs = gr.Label(num_top_classes=6)
229
+ interface = gr.Interface(fn=predict_emotion, inputs=inputs, outputs=outputs)
230
+ interface.launch()