hatespeech / app.py
hm-auch
inital commit
70b5fc5
raw
history blame
1.93 kB
import transformers
import gradio as gr
import tensorflow as tf
from official.nlp import optimization # to create AdamW optimizer
MODEL_DIRECTORY = 'save/modelV1'
PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
MAX_SEQUENCE_LENGTH = 256
EPOCHS = 2
OPTIMIZER = 'adamw'
INIT_LR = 3e-5
LOSS = tf.keras.losses.BinaryCrossentropy(from_logits=False)
METRICS = tf.metrics.BinaryAccuracy()
def compile_model(model):
steps_per_epoch = 10
num_train_steps = steps_per_epoch * EPOCHS
num_warmup_steps = int(0.1*num_train_steps)
optimizer = optimization.create_optimizer(
init_lr=INIT_LR,
num_train_steps=steps_per_epoch,
num_warmup_steps=num_warmup_steps,
optimizer_type=OPTIMIZER
)
model.compile(optimizer=optimizer, loss=LOSS, metrics=[METRICS])
return model
hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=False) #tf.keras.models.load_model('save/kerasmodel/model.h5') #tf.saved_model.load('save/model') #tf.keras.models.load_model('save/model')
compile_model(hs_detection_model)
def encode(sentences):
return TOKENIZER.batch_encode_plus(
sentences,
max_length=MAX_SEQUENCE_LENGTH, # set the length of the sequences
add_special_tokens=True, # add [CLS] and [SEP] tokens
return_attention_mask=True,
return_token_type_ids=False, # not needed for this type of ML task
pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
return_tensors='tf'
)
def inference(sentence):
print(sentence)
encoded_sentence = encode([sentence])
print(encoded_sentence)
predicition = hs_detection_model.predict(encoded_sentence.values())
print(predicition)
return predicition
iface = gr.Interface(fn=inference, inputs="text", outputs="text") #, live=True)
iface.launch()