File size: 4,024 Bytes
d3bc923
 
7c9a8a2
5e22f32
 
 
5807bc7
9e6c667
5e22f32
ebc3421
09acac3
5e22f32
 
d3bc923
44188f8
 
76b74a3
44188f8
5e22f32
 
 
8d2e061
11dae3a
5e22f32
d3bc923
3ac7ed9
d3bc923
 
 
 
59c9077
2698c59
09acac3
5e22f32
 
 
 
 
 
 
 
 
 
 
7b5b35f
5e22f32
7b5b35f
5e22f32
 
 
 
 
 
 
 
f72cb30
5e22f32
 
 
 
 
 
 
 
769f501
5e22f32
 
 
 
8d2e061
5e22f32
5fc0978
2698c59
d3bc923
ba1fa51
09acac3
74c3b09
d3bc923
8d2e061
 
 
 
 
 
 
d3bc923
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
from todset import todset
import numpy as np
from keras.models import Model
from keras.saving import load_model
from keras.layers import *
from tensorflow.keras.optimizers import RMSprop
from keras.preprocessing.text import Tokenizer
import os
import hashlib
import keras

os.mkdir("cache")

emb_size = 128
inp_len = 16
maxshift = 4

def hash_str(data: str):
    return hashlib.md5(data.encode('utf-8')).hexdigest()

def train(message: str, epochs: int, learning_rate: float, emb_size: int, inp_len: int, data: str):
    if "→" not in data or "\n" not in data:
        return "Dataset example:\nquestion→answer\nquestion→answer\netc."
    dset, responses = todset(data)
    resps_len = len(responses)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(list(dset.keys()))
    
    vocab_size = len(tokenizer.word_index) + 1
    data_hash = hash_str(data)+".keras"
    if data_hash in os.listdir("cache"):
        model = load_model("cache/"+data_hash)
    else:
        input_layer = Input(shape=(inp_len,))
        emb_layer = Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len)(input_layer)
        attn_layer = MultiHeadAttention(num_heads=4, key_dim=128)(emb_layer, emb_layer, emb_layer)
        noise_layer = GaussianNoise(0.1)(attn_layer)
        conv1_layer = Conv1D(64, 8, padding='same', activation='relu', strides=1, input_shape=(64, 128))(noise_layer)
        conv2_layer = Conv1D(16, 4, padding='valid', activation='relu', strides=1)(conv1_layer)
        conv3_layer = Conv1D(8, 2, padding='valid', activation='relu', strides=1)(conv2_layer)
        flatten_layer = Flatten()(conv3_layer)
        attn_flatten_layer = Flatten()(attn_layer)
        conv1_flatten_layer = Flatten()(conv1_layer)
        conv2_flatten_layer = Flatten()(conv2_layer)
        conv3_flatten_layer = Flatten()(conv3_layer)
        concat1_layer = Concatenate()([flatten_layer, attn_flatten_layer, conv1_flatten_layer, conv2_flatten_layer, conv3_flatten_layer])
        dense1_layer = Dense(512, activation="linear")(concat1_layer)
        prelu1_layer = PReLU()(dense1_layer)
        dropout_layer = Dropout(0.3)(prelu1_layer)
        dense2_layer = Dense(256, activation="tanh")(dropout_layer)
        dense3_layer = Dense(256, activation="relu")(dense2_layer)
        dense4_layer = Dense(100, activation="tanh")(dense3_layer)
        concat2_layer = Concatenate()([dense4_layer, prelu1_layer, attn_flatten_layer, conv1_flatten_layer])
        dense4_layer = Dense(resps_len, activation="softmax")(concat2_layer)
        model = Model(inputs=input_layer, outputs=dense4_layer)
        
        X = []
        y = []
        
        for key in dset:
            for p in range(maxshift):
                tokens = tokenizer.texts_to_sequences([key,])[0]
                X.append(np.array(([0,]*p+list(tokens)+[0,]*inp_len)[:inp_len]))
                y.append(dset[key])
        
        X = np.array(X)
        y = np.array(y)
        
        model.compile(optimizer=RMSProp(learning_rate=learning_rate), loss="sparse_categorical_crossentropy", metrics=["accuracy",])
        
        model.fit(X, y, epochs=16, batch_size=8, workers=4, use_multiprocessing=True)
        model.save(f"cache/{data_hash}")
    tokens = tokenizer.texts_to_sequences([message,])[0]
    prediction = model.predict(np.array([(list(tokens)+[0,]*inp_len)[:inp_len],]))[0]
    keras.backend.clear_session()
    return responses[np.argmax(prediction)]

iface = gr.Interface(fn=train, inputs=["text",
                                       gr.inputs.Slider(1, 64, 32, step=1, label="Epochs"),
                                       gr.inputs.Slider(0.00000001, 0.1, 0.001, step=0.00000001, label="Learning rate"),
                                       gr.inputs.Slider(1, 256, 100, step=1, label="Embedding size"),
                                       gr.inputs.Slider(1,128, step=1, label="Input Length"),
                                       "text"],
                     outputs="text")
iface.launch()