zeynepgulhan's picture
Create app.py
27c69e9 verified
raw
history blame
2.25 kB
import time
import gradio as gr
import numpy as np
import torch
# Load model directly
from transformers import AutoModelForSequenceClassification, AutoTokenizer
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def get_model():
start_time = time.time()
model = AutoModelForSequenceClassification.from_pretrained("TURKCELL/gibberish-detection-model-tr")
tokenizer = AutoTokenizer.from_pretrained("TURKCELL/gibberish-detection-model-tr", do_lower_case=True,
use_fast=True)
model.to(device)
print(f'bert model loading time {time.time() - start_time}')
return tokenizer, model
tokenizer, model = get_model()
def get_result_for_one_sample(model, tokenizer, device, sample):
d = {
1: 'gibberish',
0: 'real'
}
test_sample = tokenizer([sample], padding=True, truncation=True, max_length=256, return_tensors='pt').to(device)
# test_sample
output = model(**test_sample)
y_pred = np.argmax(output.logits.detach().to('cpu').numpy(), axis=1)
return d[y_pred[0]]
def process_sentence_with_bert(sentence):
print('processing text with bert')
start = time.time()
result = get_result_for_one_sample(model, tokenizer, device,
sentence) # Bu fonksiyonun implementasyonunu sağlamalısınız.
print(f'bert processing time {time.time() - start}')
return result
def classify_gibberish(sentence, ignore_words_file):
# ignore_words_file işlenmesi gerekiyor. Gradio dosya yükleme ile ilgili bir örneği aşağıda bulabilirsiniz.
result = process_sentence_with_bert(sentence)
return result
iface = gr.Interface(fn=classify_gibberish,
inputs=[gr.Textbox(lines=2, placeholder="Enter Sentence Here..."),
gr.File(label="Upload Ignore Words File")],
outputs=gr.Textbox(label="Gibberish Detection Result"),
title="Simple Gibberish Text Detection For Turkish",
description="""Simple gibberish text detection given text like
adsfdnsfnıunf
sasdlsöefls.""")
iface.launch()