Spaces:
Runtime error
Runtime error
dhruvsaxena11
commited on
Commit
•
2597bd8
1
Parent(s):
584ad1e
Update emotion_recognition_fine_tuning.py
Browse files
emotion_recognition_fine_tuning.py
CHANGED
@@ -7,192 +7,6 @@ Original file is located at
|
|
7 |
https://colab.research.google.com/drive/1pZgt5n6943GB5oq_h43LjAYoA4yi-EST
|
8 |
"""
|
9 |
|
10 |
-
!pip install datasets
|
11 |
-
|
12 |
-
from datasets import load_dataset
|
13 |
-
|
14 |
-
dataset = load_dataset("dair-ai/emotion")
|
15 |
-
|
16 |
-
import pandas as pd
|
17 |
-
df_train=pd.DataFrame(dataset["train"])
|
18 |
-
df_train.head()
|
19 |
-
|
20 |
-
# @title label
|
21 |
-
|
22 |
-
from matplotlib import pyplot as plt
|
23 |
-
df_train['label'].plot(kind='hist', bins=20, title='label')
|
24 |
-
# plt.gca().spines[['top', 'right',]].set_visible(False)
|
25 |
-
|
26 |
-
df_test=pd.DataFrame(dataset["test"])
|
27 |
-
df_test.head()
|
28 |
-
|
29 |
-
# @title label
|
30 |
-
|
31 |
-
from matplotlib import pyplot as plt
|
32 |
-
df_test['label'].plot(kind='hist', bins=20, title='label')
|
33 |
-
plt.gca().spines[['top', 'right',]].set_visible(False)
|
34 |
-
|
35 |
-
dataset.keys()
|
36 |
-
|
37 |
-
df_valid=pd.DataFrame(dataset["validation"])
|
38 |
-
df_valid.head()
|
39 |
-
|
40 |
-
df_train.info()
|
41 |
-
|
42 |
-
import numpy as np
|
43 |
-
|
44 |
-
train_label=np.array(df_train.label)
|
45 |
-
test_label=np.array(df_test.label)
|
46 |
-
valid_label=np.array(df_valid.label)
|
47 |
-
|
48 |
-
# Assuming df_train.text is a Pandas Series
|
49 |
-
train_text = df_train.text.tolist()
|
50 |
-
test_text = df_test.text.tolist()
|
51 |
-
valid_text = df_valid.text.tolist()
|
52 |
-
|
53 |
-
# @title Label Value Distribution
|
54 |
-
|
55 |
-
df_valid['label'].value_counts().plot(kind='bar')
|
56 |
-
|
57 |
-
print(df_train.shape)
|
58 |
-
print(df_test.shape)
|
59 |
-
print(df_valid.shape)
|
60 |
-
|
61 |
-
labels={0:"sadness",1:"joy",2:"love",3:"anger",4:"fear",5:"surprise"}
|
62 |
-
labels2={"sadness":0,"joy":1,"love":2,"anger":3,"fear":4,"surprise":5}
|
63 |
-
|
64 |
-
from tensorflow.keras.utils import to_categorical
|
65 |
-
one_hot_train_labels = to_categorical(df_train.label)
|
66 |
-
one_hot_valid_labels = to_categorical(df_valid.label)
|
67 |
-
one_hot_train_labels
|
68 |
-
|
69 |
-
import tensorflow as tf
|
70 |
-
# Reshape the labels to match the model's output shape
|
71 |
-
one_hot_train_labels = tf.reshape(one_hot_train_labels, (-1, 2))
|
72 |
-
one_hot_valid_labels = tf.reshape(one_hot_valid_labels, (-1, 2))
|
73 |
-
one_hot_train_labels
|
74 |
-
|
75 |
-
from transformers import AutoTokenizer
|
76 |
-
|
77 |
-
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
|
78 |
-
tokenized_data_train = tokenizer(train_text, return_tensors="np", padding=True)
|
79 |
-
tokenized_data_valid = tokenizer(valid_text, return_tensors="np", padding=True)
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
train_label.shape
|
84 |
-
|
85 |
-
# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
|
86 |
-
tokenized_data_train = dict(tokenized_data_train)
|
87 |
-
tokenized_data_valid = dict(tokenized_data_valid)
|
88 |
-
|
89 |
-
# labels = np.array(dataset["label"])
|
90 |
-
|
91 |
-
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
|
92 |
-
import tensorflow as tf
|
93 |
-
|
94 |
-
# Load pre-trained BERT model
|
95 |
-
model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased",num_labels=6, ignore_mismatched_sizes=True)
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
model.layers[0].trainable=False
|
100 |
-
|
101 |
-
for layer in model.layers:
|
102 |
-
print(layer)
|
103 |
-
|
104 |
-
model.layers
|
105 |
-
|
106 |
-
from transformers import AutoConfig
|
107 |
-
|
108 |
-
# Load pre-trained BERT model
|
109 |
-
# model = TFAutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")
|
110 |
-
|
111 |
-
# Load model configuration
|
112 |
-
# config = AutoConfig.from_pretrained("SamLowe/roberta-base-go_emotions")
|
113 |
-
|
114 |
-
# Define the number of classes in your downstream task
|
115 |
-
num_classes_downstream = 6 # Replace with the actual number of classes in your task
|
116 |
-
|
117 |
-
# Replace the classifier layer with a new one for your task
|
118 |
-
# model.config.num_labels = num_classes_downstream
|
119 |
-
model.classifier = tf.keras.layers.Dense(num_classes_downstream, activation='softmax')
|
120 |
-
|
121 |
-
# Continue with fine-tuning and model compilation
|
122 |
-
|
123 |
-
model.classifier
|
124 |
-
|
125 |
-
model.summary()
|
126 |
-
|
127 |
-
model.config.id2label=labels
|
128 |
-
model.config.label2id=labels2
|
129 |
-
|
130 |
-
model.logits
|
131 |
-
|
132 |
-
# Compile the model with the optimizer and loss function
|
133 |
-
model.compile(optimizer="Adam",metrics=["accuracy"])
|
134 |
-
|
135 |
-
# Assuming tokenized_data_train, train_label, tokenized_data_valid, and valid_label are defined
|
136 |
-
model.fit(tokenized_data_train, train_label, epochs=30, validation_data=(tokenized_data_valid, valid_label))
|
137 |
-
|
138 |
-
val=tokenizer(["hello boys I am good"],padding=True,return_tensors="np")
|
139 |
-
|
140 |
-
import tensorflow as tf
|
141 |
-
|
142 |
-
# Assuming 'val' is your input data
|
143 |
-
predictions = loaded_model.predict(val)
|
144 |
-
|
145 |
-
# Extract logits from predictions
|
146 |
-
logits = predictions[0] # Assuming logits are the first element in the tuple
|
147 |
-
|
148 |
-
# Apply softmax using tf.nn.softmax
|
149 |
-
probabilities = tf.nn.softmax(logits)
|
150 |
-
|
151 |
-
# Now 'probabilities' contains the softmax probabilities
|
152 |
-
print(probabilities.numpy())
|
153 |
-
|
154 |
-
model.save_pretrained("/content/sample_data/emotion_model_dhruv")
|
155 |
-
|
156 |
-
from google.colab import files
|
157 |
-
|
158 |
-
# Specify the file path
|
159 |
-
file_path = '/content/sample_data/emotion_model_dhruv' # Adjust the file path accordingly
|
160 |
-
|
161 |
-
# Download the file
|
162 |
-
files.download(file_path)
|
163 |
-
|
164 |
-
import shutil
|
165 |
-
|
166 |
-
# Specify the directory to be copied
|
167 |
-
directory_to_copy = '/content/sample_data/emotion_model_dhruv' # Adjust the directory path accordingly
|
168 |
-
|
169 |
-
# Specify the destination directory in Google Drive
|
170 |
-
destination_directory = '/content/drive/My Drive/'
|
171 |
-
|
172 |
-
# Copy the directory to Google Drive
|
173 |
-
shutil.copytree(directory_to_copy, os.path.join(destination_directory, os.path.basename(directory_to_copy)))
|
174 |
-
|
175 |
-
from transformers import TFBertForSequenceClassification
|
176 |
-
|
177 |
-
# Load the fine-tuned model from the saved directory
|
178 |
-
loaded_model = TFBertForSequenceClassification.from_pretrained("/content/sample_data/emotion_model_dhruv")
|
179 |
-
|
180 |
-
my=dict(tokenizer(["hello boys I am good"],padding=True,return_tensors="np"))
|
181 |
-
my
|
182 |
-
|
183 |
-
# Assuming my['input_ids'], my['token_type_ids'], and my['attention_mask'] are your input tensors
|
184 |
-
input_ids = my['input_ids']
|
185 |
-
token_type_ids = my['token_type_ids']
|
186 |
-
attention_mask = my['attention_mask']
|
187 |
-
|
188 |
-
# Make predictions using the loaded model, setting verbose=0 or verbose=1
|
189 |
-
# predictions = loaded_model.predict([input_ids, token_type_ids, attention_mask], verbose=0)
|
190 |
-
# or
|
191 |
-
predictions = loaded_model.predict([input_ids, token_type_ids, attention_mask], verbose=1)
|
192 |
-
|
193 |
-
my['input_ids'] = np.array(my['input_ids'])
|
194 |
-
my['token_type_ids'] = np.array(my['token_type_ids'])
|
195 |
-
my['attention_mask'] = np.array(my['attention_mask'])
|
196 |
|
197 |
"""Our Application"""
|
198 |
|
@@ -203,7 +17,7 @@ import numpy as np
|
|
203 |
import tensorflow as tf # Apply softmax using tf.nn.softmax
|
204 |
|
205 |
# Load the fine-tuned model from the saved directory
|
206 |
-
loaded_model = TFBertForSequenceClassification.from_pretrained("
|
207 |
loaded_tokenizer=AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
|
208 |
|
209 |
def predict_emotion(text):
|
@@ -226,5 +40,5 @@ my_labels=["sadness","joy","love","anger","fear","surprise"]
|
|
226 |
import gradio as gr
|
227 |
inputs = gr.Textbox(lines=1, label="Input Text")
|
228 |
outputs = gr.Label(num_top_classes=6)
|
229 |
-
interface = gr.Interface(fn=predict_emotion, inputs=inputs, outputs=outputs)
|
230 |
interface.launch()
|
|
|
7 |
https://colab.research.google.com/drive/1pZgt5n6943GB5oq_h43LjAYoA4yi-EST
|
8 |
"""
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
"""Our Application"""
|
12 |
|
|
|
17 |
import tensorflow as tf # Apply softmax using tf.nn.softmax
|
18 |
|
19 |
# Load the fine-tuned model from the saved directory
|
20 |
+
loaded_model = TFBertForSequenceClassification.from_pretrained("emotion_model_dhruv")
|
21 |
loaded_tokenizer=AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
|
22 |
|
23 |
def predict_emotion(text):
|
|
|
40 |
import gradio as gr
|
41 |
inputs = gr.Textbox(lines=1, label="Input Text")
|
42 |
outputs = gr.Label(num_top_classes=6)
|
43 |
+
interface = gr.Interface(fn=predict_emotion, inputs=inputs, outputs=outputs,title="Emotion Recognition in Text - NLP")
|
44 |
interface.launch()
|