Spaces:

dhruvsaxena11
/

Emotion_Recognition_in_Text

Sleeping

App Files Files Community

dhruvsaxena11 commited on Apr 9

Commit

584ad1e

•

1 Parent(s): 7cf0b4d

Upload emotion_recognition_fine_tuning.py

Browse files

Files changed (1) hide show

emotion_recognition_fine_tuning.py +230 -0

emotion_recognition_fine_tuning.py ADDED Viewed

	@@ -0,0 +1,230 @@

+# -*- coding: utf-8 -*-
+"""Emotion Recognition_Fine Tuning
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1pZgt5n6943GB5oq_h43LjAYoA4yi-EST
+"""
+!pip install datasets
+from datasets import load_dataset
+dataset = load_dataset("dair-ai/emotion")
+import pandas as pd
+df_train=pd.DataFrame(dataset["train"])
+df_train.head()
+# @title label
+from matplotlib import pyplot as plt
+df_train['label'].plot(kind='hist', bins=20, title='label')
+# plt.gca().spines[['top', 'right',]].set_visible(False)
+df_test=pd.DataFrame(dataset["test"])
+df_test.head()
+# @title label
+from matplotlib import pyplot as plt
+df_test['label'].plot(kind='hist', bins=20, title='label')
+plt.gca().spines[['top', 'right',]].set_visible(False)
+dataset.keys()
+df_valid=pd.DataFrame(dataset["validation"])
+df_valid.head()
+df_train.info()
+import numpy as np
+train_label=np.array(df_train.label)
+test_label=np.array(df_test.label)
+valid_label=np.array(df_valid.label)
+# Assuming df_train.text is a Pandas Series
+train_text = df_train.text.tolist()
+test_text = df_test.text.tolist()
+valid_text = df_valid.text.tolist()
+# @title Label Value Distribution
+df_valid['label'].value_counts().plot(kind='bar')
+print(df_train.shape)
+print(df_test.shape)
+print(df_valid.shape)
+labels={0:"sadness",1:"joy",2:"love",3:"anger",4:"fear",5:"surprise"}
+labels2={"sadness":0,"joy":1,"love":2,"anger":3,"fear":4,"surprise":5}
+from tensorflow.keras.utils import to_categorical
+one_hot_train_labels = to_categorical(df_train.label)
+one_hot_valid_labels = to_categorical(df_valid.label)
+one_hot_train_labels
+import tensorflow as tf
+# Reshape the labels to match the model's output shape
+one_hot_train_labels = tf.reshape(one_hot_train_labels, (-1, 2))
+one_hot_valid_labels = tf.reshape(one_hot_valid_labels, (-1, 2))
+one_hot_train_labels
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+tokenized_data_train = tokenizer(train_text, return_tensors="np", padding=True)
+tokenized_data_valid = tokenizer(valid_text, return_tensors="np", padding=True)
+train_label.shape
+# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
+tokenized_data_train = dict(tokenized_data_train)
+tokenized_data_valid = dict(tokenized_data_valid)
+# labels = np.array(dataset["label"])
+from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
+import tensorflow as tf
+# Load pre-trained BERT model
+model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased",num_labels=6, ignore_mismatched_sizes=True)
+model.layers[0].trainable=False
+for layer in model.layers:
+    print(layer)
+model.layers
+from transformers import AutoConfig
+# Load pre-trained BERT model
+# model = TFAutoModelForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")
+# Load model configuration
+# config = AutoConfig.from_pretrained("SamLowe/roberta-base-go_emotions")
+# Define the number of classes in your downstream task
+num_classes_downstream = 6  # Replace with the actual number of classes in your task
+# Replace the classifier layer with a new one for your task
+# model.config.num_labels = num_classes_downstream
+model.classifier = tf.keras.layers.Dense(num_classes_downstream, activation='softmax')
+# Continue with fine-tuning and model compilation
+model.classifier
+model.summary()
+model.config.id2label=labels
+model.config.label2id=labels2
+model.logits
+# Compile the model with the optimizer and loss function
+model.compile(optimizer="Adam",metrics=["accuracy"])
+# Assuming tokenized_data_train, train_label, tokenized_data_valid, and valid_label are defined
+model.fit(tokenized_data_train, train_label, epochs=30, validation_data=(tokenized_data_valid, valid_label))
+val=tokenizer(["hello boys I am good"],padding=True,return_tensors="np")
+import tensorflow as tf
+# Assuming 'val' is your input data
+predictions = loaded_model.predict(val)
+# Extract logits from predictions
+logits = predictions[0]  # Assuming logits are the first element in the tuple
+# Apply softmax using tf.nn.softmax
+probabilities = tf.nn.softmax(logits)
+# Now 'probabilities' contains the softmax probabilities
+print(probabilities.numpy())
+model.save_pretrained("/content/sample_data/emotion_model_dhruv")
+from google.colab import files
+# Specify the file path
+file_path = '/content/sample_data/emotion_model_dhruv'  # Adjust the file path accordingly
+# Download the file
+files.download(file_path)
+import shutil
+# Specify the directory to be copied
+directory_to_copy = '/content/sample_data/emotion_model_dhruv'  # Adjust the directory path accordingly
+# Specify the destination directory in Google Drive
+destination_directory = '/content/drive/My Drive/'
+# Copy the directory to Google Drive
+shutil.copytree(directory_to_copy, os.path.join(destination_directory, os.path.basename(directory_to_copy)))
+from transformers import TFBertForSequenceClassification
+# Load the fine-tuned model from the saved directory
+loaded_model = TFBertForSequenceClassification.from_pretrained("/content/sample_data/emotion_model_dhruv")
+my=dict(tokenizer(["hello boys I am good"],padding=True,return_tensors="np"))
+my
+# Assuming my['input_ids'], my['token_type_ids'], and my['attention_mask'] are your input tensors
+input_ids = my['input_ids']
+token_type_ids = my['token_type_ids']
+attention_mask = my['attention_mask']
+# Make predictions using the loaded model, setting verbose=0 or verbose=1
+# predictions = loaded_model.predict([input_ids, token_type_ids, attention_mask], verbose=0)
+# or
+predictions = loaded_model.predict([input_ids, token_type_ids, attention_mask], verbose=1)
+my['input_ids'] = np.array(my['input_ids'])
+my['token_type_ids'] = np.array(my['token_type_ids'])
+my['attention_mask'] = np.array(my['attention_mask'])
+"""Our Application"""
+from transformers import TFBertForSequenceClassification,AutoTokenizer
+import numpy as np
+import tensorflow as tf # Apply softmax using tf.nn.softmax
+# Load the fine-tuned model from the saved directory
+loaded_model = TFBertForSequenceClassification.from_pretrained("/content/drive/MyDrive/emotion_model_dhruv")
+loaded_tokenizer=AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+def predict_emotion(text):
+  text_token=loaded_tokenizer(text,padding=True,return_tensors="np")
+  outputs=loaded_model(text_token)
+  probabilities = tf.nn.softmax(outputs.logits)
+  final=probabilities.numpy()
+  labels=["sadness","joy","love","anger","fear","surprise"]
+  final=final.tolist()
+  result_dict = {k: v for k, v in zip(labels,final[0])}
+  return result_dict
+predict_emotion("dhruv")
+my_labels=["sadness","joy","love","anger","fear","surprise"]
+!pip install gradio
+import gradio as gr
+inputs = gr.Textbox(lines=1, label="Input Text")
+outputs = gr.Label(num_top_classes=6)
+interface = gr.Interface(fn=predict_emotion, inputs=inputs, outputs=outputs)
+interface.launch()