Spaces:
Sleeping
Sleeping
File size: 4,078 Bytes
d3bc923 7c9a8a2 5e22f32 5807bc7 9e6c667 5e22f32 ebc3421 09acac3 5e22f32 d3bc923 5e22f32 8d2e061 11dae3a 5e22f32 d3bc923 3ac7ed9 d3bc923 147d2ba 2698c59 09acac3 5e22f32 f6ba3ec 5e22f32 7b5b35f 5e22f32 7b5b35f 5e22f32 f72cb30 5e22f32 9e71d11 5e22f32 228e5ed 5e22f32 5fc0978 2698c59 d3bc923 ba1fa51 09acac3 74c3b09 d3bc923 699ee01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
from todset import todset
import numpy as np
from keras.models import Model
from keras.saving import load_model
from keras.layers import *
from tensorflow.keras.optimizers import RMSprop
from keras.preprocessing.text import Tokenizer
import os
import hashlib
import keras
os.mkdir("cache")
def hash_str(data: str):
return hashlib.md5(data.encode('utf-8')).hexdigest()
def train(message: str, epochs: int, learning_rate: float, emb_size: int, inp_len: int, data: str):
if "→" not in data or "\n" not in data:
return "Dataset example:\nquestion→answer\nquestion→answer\netc."
dset, responses = todset(data)
resps_len = len(responses)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(list(dset.keys()))
vocab_size = len(tokenizer.word_index) + 1
data_hash = hash_str(data)+str(epochs)+str(learning_rate)+str(emb_size)+str(inp_len)+".keras"
if data_hash in os.listdir("cache"):
model = load_model("cache/"+data_hash)
else:
input_layer = Input(shape=(inp_len,))
emb_layer = Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len)(input_layer)
attn_layer = MultiHeadAttention(num_heads=4, key_dim=128)(emb_layer, emb_layer, emb_layer)
noise_layer = GaussianNoise(0.1)(attn_layer)
conv1_layer = Conv1D(64, 8, padding='same', activation='relu', strides=1, input_shape=(64, 128))(noise_layer)
conv2_layer = Conv1D(16, 4, padding='same', activation='relu', strides=1)(conv1_layer)
conv3_layer = Conv1D(8, 2, padding='same', activation='relu', strides=1)(conv2_layer)
flatten_layer = Flatten()(conv3_layer)
attn_flatten_layer = Flatten()(attn_layer)
conv1_flatten_layer = Flatten()(conv1_layer)
conv2_flatten_layer = Flatten()(conv2_layer)
conv3_flatten_layer = Flatten()(conv3_layer)
concat1_layer = Concatenate()([flatten_layer, attn_flatten_layer, conv1_flatten_layer, conv2_flatten_layer, conv3_flatten_layer])
dense1_layer = Dense(512, activation="linear")(concat1_layer)
prelu1_layer = PReLU()(dense1_layer)
dropout_layer = Dropout(0.3)(prelu1_layer)
dense2_layer = Dense(256, activation="tanh")(dropout_layer)
dense3_layer = Dense(256, activation="relu")(dense2_layer)
dense4_layer = Dense(100, activation="tanh")(dense3_layer)
concat2_layer = Concatenate()([dense4_layer, prelu1_layer, attn_flatten_layer, conv1_flatten_layer])
dense4_layer = Dense(resps_len, activation="softmax")(concat2_layer)
model = Model(inputs=input_layer, outputs=dense4_layer)
X = []
y = []
for key in dset:
tokens = tokenizer.texts_to_sequences([key,])[0]
X.append(np.array((list(tokens)+[0,]*inp_len)[:inp_len]))
y.append(dset[key])
X = np.array(X)
y = np.array(y)
model.compile(optimizer=RMSprop(learning_rate=learning_rate), loss="sparse_categorical_crossentropy", metrics=["accuracy",])
model.fit(X, y, epochs=16, batch_size=8, workers=4, use_multiprocessing=True)
model.save(f"cache/{data_hash}")
tokens = tokenizer.texts_to_sequences([message,])[0]
prediction = model.predict(np.array([(list(tokens)+[0,]*inp_len)[:inp_len],]))[0]
keras.backend.clear_session()
return responses[np.argmax(prediction)]
if __name__ == "__main__":
iface = gr.Interface(fn=train, inputs=["text",
gr.inputs.Slider(1, 64, default=32, step=1, label="Epochs"),
gr.inputs.Slider(0.00000001, 0.1, default=0.001, step=0.00000001, label="Learning rate"),
gr.inputs.Slider(1, 256, default=100, step=1, label="Embedding size"),
gr.inputs.Slider(1, 128, default=16, step=1, label="Input Length"),
"text"],
outputs="text")
iface.launch()
|