Spaces:

hungln1102
/

emotion_classification_surreynlp_2023

Build error

App Files Files Community

davidlee1102 commited on Apr 1, 2023

Commit

d35f33e

1 Parent(s): 7bef4b5

First commit

Browse files

Files changed (9) hide show

.gitattributes +1 -0
constance_data.py +11 -0
emotion_model.py +26 -0
model/nlp_surrey_coursework_hunglenhat/fingerprint.pb +3 -0
model/nlp_surrey_coursework_hunglenhat/keras_metadata.pb +3 -0
model/nlp_surrey_coursework_hunglenhat/saved_model.pb +3 -0
model/nlp_surrey_coursework_hunglenhat/variables/variables.index +0 -0
model/tokenizer.pickle +3 -0
pre_processing_data.py +80 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model/nlp_surrey_coursework_hunglenhat/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text

constance_data.py ADDED Viewed

	@@ -0,0 +1,11 @@

+emotion_track_list = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity',
+                      'desire',
+                      'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude',
+                      'grief', 'joy', 'love',
+                      'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise',
+                      'neutral']
+decode_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
+decode_cut_list = [4, 6, 8, 10, 14, 15, 17, 18, 20, 21, 22, 25, 26, 27]
+decode_cut_transformed_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

emotion_model.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import numpy as np
+import tensorflow as tf
+import tensorflow_addons as tfa
+from constance_data import emotion_track_list, decode_cut_list
+from pre_processing_data import preprocessing_data, pre_processing_data_2, text_transform
+def emotion_predict(sentence: str):
+    lr = 1e-3
+    wd = 1e-4 * lr
+    model = tf.keras.models.load_model("model/nlp_surrey_coursework_hunglenhat")
+    model.compile(loss='sparse_categorical_crossentropy',
+                  optimizer=tfa.optimizers.AdamW(learning_rate=lr, weight_decay=wd), metrics=['accuracy'])
+    sentence = pre_processing_data_2(sentence)
+    if not sentence:
+        sentence = preprocessing_data(sentence)
+    sentence = text_transform(sentence)
+    try:
+        sentence = model.predict(sentence)
+    except Exception as E:
+        print(E)
+    index_max = np.argmax(sentence)
+    result = emotion_track_list[decode_cut_list[index_max]]
+    return result

model/nlp_surrey_coursework_hunglenhat/fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4f4c2e72bf0f471efafd4a4f43d81e20d41f1270ffe840f7dad866e71a223e0
+size 53

model/nlp_surrey_coursework_hunglenhat/keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45149d48f67806c16c962cfa8db094098ac9e49baf9001e1dc105d7cca8a5384
+size 15056

model/nlp_surrey_coursework_hunglenhat/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e48ebc2bab3ee1e455e5ca7dbd40c5b9887d425ec000a75b256b54b8ad5d7396
+size 801857

model/nlp_surrey_coursework_hunglenhat/variables/variables.index ADDED Viewed

Binary file (2.38 kB). View file

model/tokenizer.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a2b3fb1cc1be8d44ba122aa41103dcbee6c6137bd5f4f5f8e7ceecea0d00839
+size 241110

pre_processing_data.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import contractions
+import spacy
+import nltk
+import pickle
+from nltk.corpus import stopwords
+from nltk.tokenize import RegexpTokenizer
+from keras_preprocessing.sequence import pad_sequences
+nltk.download('punkt')
+nltk.download('wordnet')
+nltk.download('omw-1.4')
+nltk.download('stopwords')
+nltk.download('averaged_perceptron_tagger')
+nlp = spacy.load("en_core_web_sm")
+stop_words = set(stopwords.words('english'))
+def text_transform(string_text):
+    with open('model/tokenizer.pickle', 'rb') as handle:
+        loaded_tokenizer = pickle.load(handle)
+    string_text_list = []
+    string_text_list.append(string_text)
+    sequences = loaded_tokenizer.texts_to_sequences(string_text_list)
+    padded_sequences = pad_sequences(sequences, maxlen=50, padding='post', truncating='post')
+    return padded_sequences
+# python -m spacy download en_core_web_sm
+# pre-processing the data by getting verb, adj, adv; because of the emotion of sentence is depends on these character
+import re
+# pre-processing the data by getting verb, adj, adv; because of the emotion of sentence is depends on these character
+def get_main_words(string_text):
+    tokens = nltk.word_tokenize(string_text)
+    pos_tags = nltk.pos_tag(tokens)
+    pos_string = "{'JJR', 'VB', 'WP', 'WRB', 'NNS', 'JJS', 'JJ', 'RB', 'MD', 'VBZ', 'VBG', 'VBP'}"
+    words = re.findall(r"'(\w+)'", pos_string)
+    string_list = [token for token, tag in pos_tags if tag in words]
+    if string_list:
+        string_list = ' '.join(string_list)
+        return string_list
+    return None
+# complex pre-processing data
+def pre_processing_data_2(string_text):
+    string_text = string_text.lower()
+    string_output = ' '.join([token.lemma_ for token in nlp(string_text)])
+    string_output = contractions.fix(string_output)
+    string_processed = get_main_words(string_output)
+    if string_processed:
+        tokenizer = RegexpTokenizer(r'\w+')
+        string_processed = tokenizer.tokenize(string_processed)
+        string_processed = " ".join(string_processed)
+        return string_processed
+    tokenizer = RegexpTokenizer(r'\w+')
+    string_output = tokenizer.tokenize(string_output)
+    string_output = [w for w in string_output if not w in stop_words]
+    string_output = " ".join(string_output)
+    return string_output
+def preprocessing_data(string_text):
+    string_text = string_text.lower()
+    string_output = ' '.join([token.lemma_ for token in nlp(string_text)])
+    string_output = contractions.fix(string_output)
+    tokenizer = RegexpTokenizer(r'\w+')
+    string_output = tokenizer.tokenize(string_output)
+    string_output = [w for w in string_output if not w in stop_words]
+    string_output = " ".join(string_output)
+    return string_output