import gradio as gr from pythainlp import word_tokenize import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Embedding, Conv1D, MaxPooling1D, Dense, Flatten, Concatenate, Dropout, Dot, Activation, Reshape, Permute, Multiply from keras import backend as K import pandas as pd from transformers import TFAutoModel, AutoTokenizer from sklearn.model_selection import train_test_split import json # load the tokenizer and transformer model tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base",max_length=60) #xlm-roberta-base bert-base-multilingual-cased transformer_model = TFAutoModel.from_pretrained("xlm-roberta-base") #philschmid/tiny-bert-sst2-distilled max_seq_length = 32 env_decode ={} with open('tf_labels6.json', encoding='utf-8') as fh: env_decode = json.load(fh) hour_decode={} with open('tf_labels7.json', encoding='utf-8') as fh: hour_decode = json.load(fh) minute_decode={} with open('tf_labels8.json', encoding='utf-8') as fh: minute_decode = json.load(fh) def create_model(): inputs = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32) embedding_layer = transformer_model(inputs)[0] flatten_layer = Flatten()(embedding_layer) x1 = Dense(64, activation='relu')(flatten_layer) x1 = Dense(32, activation='relu')(x1) x1 = Dense(16, activation='relu')(x1) x2 = Dense(64, activation='relu')(flatten_layer) x2 = Dense(32, activation='relu')(x2) x2 = Dense(16, activation='relu')(x2) x3 = Dense(64, activation='relu')(flatten_layer) x3 = Dense(32, activation='relu')(x3) x3 = Dense(16, activation='relu')(x3) x4 = Dense(64, activation='relu')(flatten_layer) x4 = Dense(32, activation='relu')(x4) x4 = Dense(16, activation='relu')(x4) x5 = Dense(64, activation='relu')(flatten_layer) x5 = Dense(32, activation='relu')(x5) x5 = Dense(16, activation='relu')(x5) x6 = Dense(512, activation='relu')(flatten_layer) x6 = Dense(256, activation='relu')(x6) x6 = Dense(128, activation='relu')(x6) x7 = Dense(128, activation='relu')(flatten_layer) x7 = Dense(64, activation='relu')(x7) x7 = Dense(32, activation='relu')(x7) x8 = Dense(256, activation='relu')(flatten_layer) x8 = Dense(128, activation='relu')(x8) x8 = Dense(64, activation='relu')(x8) output_layer1 = Dense(1, activation='sigmoid', name='output1')(x1) output_layer2 = Dense(1, activation='sigmoid', name='output2')(x2) output_layer3 = Dense(1, activation='sigmoid', name='output3')(x3) output_layer4 = Dense(1, activation='sigmoid', name='output4')(x4) output_layer5 = Dense(1, activation='sigmoid', name='output5')(x5) output_layer6 = Dense(119, activation='softmax', name='output6')(x6) output_layer7 = Dense(25, activation='softmax', name='output7')(x7) output_layer8 = Dense(61, activation='softmax', name='output8')(x8) for i,layer in enumerate(transformer_model.roberta.encoder.layer[:-1]): transformer_model.roberta.encoder.layer[i].trainable = False # define the model #input_layer inputs model = Model(inputs=inputs , outputs=[output_layer1, output_layer2, output_layer3,output_layer4,output_layer5,output_layer6,output_layer7,output_layer8]) opt = keras.optimizers.Adam(learning_rate=3e-5) model.compile(loss=['binary_crossentropy','binary_crossentropy','binary_crossentropy','binary_crossentropy','binary_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'], optimizer=opt, metrics=[ tf.keras.metrics.BinaryAccuracy(), 'categorical_accuracy' ]) model.load_weights("t1_m1.h5") return model model =create_model() def predict(text): test_texts = [text] spilt_thai_text = [word_tokenize(x) for x in test_texts] new_input_ids = tokenizer(spilt_thai_text, padding=True, truncation=True, return_tensors="tf",is_split_into_words=True)["input_ids"] test_padded_sequences = pad_sequences(new_input_ids, maxlen=max_seq_length,padding='post',truncating='post',value=1) #post pre print(test_padded_sequences.shape) predicted_labels = model.predict(test_padded_sequences) output = [] for i in range(len(test_texts)): print(test_texts[i]) valid = 1 if predicted_labels[0][i] > 0.5 else 0 is_scene = 1 if predicted_labels[1][i] > 0.5 else 0 has_num = 1 if predicted_labels[2][i] > 0.5 else 0 print(f'is_valid : {valid}') print(f'is_scene : {is_scene}') print(f'has_num : {has_num}') turn = 1 if predicted_labels[3][i] > 0.5 else 0 print(f'turn_on_off : {turn}') print(f'device : ไฟ') env_id = np.argmax(predicted_labels[5][i]) env_label = env_decode[env_id] hour_id = np.argmax(predicted_labels[6][i]) hour_label = hour_decode[hour_id] minute_id = np.argmax(predicted_labels[7][i]) minute_label = minute_decode[minute_id] print(f'env : {env_label}') print(f'hour : {hour_label}') print(f'minute : {minute_label}') print('----') tmp = { 'valid' : valid, 'is_scene' : is_scene, 'has_num' : has_num, 'turn_on_off' : turn, 'device' : 'ไฟ', 'env' : env_label, 'hour' : hour, 'minute' : minute, } output.append(tmp) return output iface = gr.Interface( fn=predict, inputs='text', outputs='label', examples=[["Hello! My name is Omar"]] ) iface.launch()