dhruvsaxena11 commited on
Commit
2597bd8
1 Parent(s): 584ad1e

Update emotion_recognition_fine_tuning.py

Browse files
Files changed (1) hide show
  1. emotion_recognition_fine_tuning.py +2 -188
emotion_recognition_fine_tuning.py CHANGED
@@ -7,192 +7,6 @@ Original file is located at
7
  https://colab.research.google.com/drive/1pZgt5n6943GB5oq_h43LjAYoA4yi-EST
8
  """
9
 
10
- !pip install datasets
11
-
12
- from datasets import load_dataset
13
-
14
- dataset = load_dataset("dair-ai/emotion")
15
-
16
- import pandas as pd
17
- df_train=pd.DataFrame(dataset["train"])
18
- df_train.head()
19
-
20
- # @title label
21
-
22
- from matplotlib import pyplot as plt
23
- df_train['label'].plot(kind='hist', bins=20, title='label')
24
- # plt.gca().spines[['top', 'right',]].set_visible(False)
25
-
26
- df_test=pd.DataFrame(dataset["test"])
27
- df_test.head()
28
-
29
- # @title label
30
-
31
- from matplotlib import pyplot as plt
32
- df_test['label'].plot(kind='hist', bins=20, title='label')
33
- plt.gca().spines[['top', 'right',]].set_visible(False)
34
-
35
- dataset.keys()
36
-
37
- df_valid=pd.DataFrame(dataset["validation"])
38
- df_valid.head()
39
-
40
- df_train.info()
41
-
42
- import numpy as np
43
-
44
- train_label=np.array(df_train.label)
45
- test_label=np.array(df_test.label)
46
- valid_label=np.array(df_valid.label)
47
-
48
- # Assuming df_train.text is a Pandas Series
49
- train_text = df_train.text.tolist()
50
- test_text = df_test.text.tolist()
51
- valid_text = df_valid.text.tolist()
52
-
53
- # @title Label Value Distribution
54
-
55
- df_valid['label'].value_counts().plot(kind='bar')
56
-
57
- print(df_train.shape)
58
- print(df_test.shape)
59
- print(df_valid.shape)
60
-
61
- labels={0:"sadness",1:"joy",2:"love",3:"anger",4:"fear",5:"surprise"}
62
- labels2={"sadness":0,"joy":1,"love":2,"anger":3,"fear":4,"surprise":5}
63
-
64
- from tensorflow.keras.utils import to_categorical
65
- one_hot_train_labels = to_categorical(df_train.label)
66
- one_hot_valid_labels = to_categorical(df_valid.label)
67
- one_hot_train_labels
68
-
69
- import tensorflow as tf
70
- # Reshape the labels to match the model's output shape
71
- one_hot_train_labels = tf.reshape(one_hot_train_labels, (-1, 2))
72
- one_hot_valid_labels = tf.reshape(one_hot_valid_labels, (-1, 2))
73
- one_hot_train_labels
74
-
75
- from transformers import AutoTokenizer
76
-
77
- tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
78
- tokenized_data_train = tokenizer(train_text, return_tensors="np", padding=True)
79
- tokenized_data_valid = tokenizer(valid_text, return_tensors="np", padding=True)
80
-
81
-
82
-
83
- train_label.shape
84
-
85
- # Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
86
- tokenized_data_train = dict(tokenized_data_train)
87
- tokenized_data_valid = dict(tokenized_data_valid)
88
-
89
- # labels = np.array(dataset["label"])
90
-
91
- from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
92
- import tensorflow as tf
93
-
94
- # Load pre-trained BERT model
95
- model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased",num_labels=6, ignore_mismatched_sizes=True)
96
-
97
-
98
-
99
- model.layers[0].trainable=False
100
-
101
- for layer in model.layers:
102
- print(layer)
103
-
104
- model.layers
105
-
106
- from transformers import AutoConfig
107
-
108
- # Load pre-trained BERT model
109
- # model = TFAutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")
110
-
111
- # Load model configuration
112
- # config = AutoConfig.from_pretrained("SamLowe/roberta-base-go_emotions")
113
-
114
- # Define the number of classes in your downstream task
115
- num_classes_downstream = 6 # Replace with the actual number of classes in your task
116
-
117
- # Replace the classifier layer with a new one for your task
118
- # model.config.num_labels = num_classes_downstream
119
- model.classifier = tf.keras.layers.Dense(num_classes_downstream, activation='softmax')
120
-
121
- # Continue with fine-tuning and model compilation
122
-
123
- model.classifier
124
-
125
- model.summary()
126
-
127
- model.config.id2label=labels
128
- model.config.label2id=labels2
129
-
130
- model.logits
131
-
132
- # Compile the model with the optimizer and loss function
133
- model.compile(optimizer="Adam",metrics=["accuracy"])
134
-
135
- # Assuming tokenized_data_train, train_label, tokenized_data_valid, and valid_label are defined
136
- model.fit(tokenized_data_train, train_label, epochs=30, validation_data=(tokenized_data_valid, valid_label))
137
-
138
- val=tokenizer(["hello boys I am good"],padding=True,return_tensors="np")
139
-
140
- import tensorflow as tf
141
-
142
- # Assuming 'val' is your input data
143
- predictions = loaded_model.predict(val)
144
-
145
- # Extract logits from predictions
146
- logits = predictions[0] # Assuming logits are the first element in the tuple
147
-
148
- # Apply softmax using tf.nn.softmax
149
- probabilities = tf.nn.softmax(logits)
150
-
151
- # Now 'probabilities' contains the softmax probabilities
152
- print(probabilities.numpy())
153
-
154
- model.save_pretrained("/content/sample_data/emotion_model_dhruv")
155
-
156
- from google.colab import files
157
-
158
- # Specify the file path
159
- file_path = '/content/sample_data/emotion_model_dhruv' # Adjust the file path accordingly
160
-
161
- # Download the file
162
- files.download(file_path)
163
-
164
- import shutil
165
-
166
- # Specify the directory to be copied
167
- directory_to_copy = '/content/sample_data/emotion_model_dhruv' # Adjust the directory path accordingly
168
-
169
- # Specify the destination directory in Google Drive
170
- destination_directory = '/content/drive/My Drive/'
171
-
172
- # Copy the directory to Google Drive
173
- shutil.copytree(directory_to_copy, os.path.join(destination_directory, os.path.basename(directory_to_copy)))
174
-
175
- from transformers import TFBertForSequenceClassification
176
-
177
- # Load the fine-tuned model from the saved directory
178
- loaded_model = TFBertForSequenceClassification.from_pretrained("/content/sample_data/emotion_model_dhruv")
179
-
180
- my=dict(tokenizer(["hello boys I am good"],padding=True,return_tensors="np"))
181
- my
182
-
183
- # Assuming my['input_ids'], my['token_type_ids'], and my['attention_mask'] are your input tensors
184
- input_ids = my['input_ids']
185
- token_type_ids = my['token_type_ids']
186
- attention_mask = my['attention_mask']
187
-
188
- # Make predictions using the loaded model, setting verbose=0 or verbose=1
189
- # predictions = loaded_model.predict([input_ids, token_type_ids, attention_mask], verbose=0)
190
- # or
191
- predictions = loaded_model.predict([input_ids, token_type_ids, attention_mask], verbose=1)
192
-
193
- my['input_ids'] = np.array(my['input_ids'])
194
- my['token_type_ids'] = np.array(my['token_type_ids'])
195
- my['attention_mask'] = np.array(my['attention_mask'])
196
 
197
  """Our Application"""
198
 
@@ -203,7 +17,7 @@ import numpy as np
203
  import tensorflow as tf # Apply softmax using tf.nn.softmax
204
 
205
  # Load the fine-tuned model from the saved directory
206
- loaded_model = TFBertForSequenceClassification.from_pretrained("/content/drive/MyDrive/emotion_model_dhruv")
207
  loaded_tokenizer=AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
208
 
209
  def predict_emotion(text):
@@ -226,5 +40,5 @@ my_labels=["sadness","joy","love","anger","fear","surprise"]
226
  import gradio as gr
227
  inputs = gr.Textbox(lines=1, label="Input Text")
228
  outputs = gr.Label(num_top_classes=6)
229
- interface = gr.Interface(fn=predict_emotion, inputs=inputs, outputs=outputs)
230
  interface.launch()
 
7
  https://colab.research.google.com/drive/1pZgt5n6943GB5oq_h43LjAYoA4yi-EST
8
  """
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  """Our Application"""
12
 
 
17
  import tensorflow as tf # Apply softmax using tf.nn.softmax
18
 
19
  # Load the fine-tuned model from the saved directory
20
+ loaded_model = TFBertForSequenceClassification.from_pretrained("emotion_model_dhruv")
21
  loaded_tokenizer=AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
22
 
23
  def predict_emotion(text):
 
40
  import gradio as gr
41
  inputs = gr.Textbox(lines=1, label="Input Text")
42
  outputs = gr.Label(num_top_classes=6)
43
+ interface = gr.Interface(fn=predict_emotion, inputs=inputs, outputs=outputs,title="Emotion Recognition in Text - NLP")
44
  interface.launch()