File size: 2,251 Bytes
27c69e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import time
import gradio as gr
import numpy as np
import torch
# Load model directly
from transformers import AutoModelForSequenceClassification, AutoTokenizer
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def get_model():
start_time = time.time()
model = AutoModelForSequenceClassification.from_pretrained("TURKCELL/gibberish-detection-model-tr")
tokenizer = AutoTokenizer.from_pretrained("TURKCELL/gibberish-detection-model-tr", do_lower_case=True,
use_fast=True)
model.to(device)
print(f'bert model loading time {time.time() - start_time}')
return tokenizer, model
tokenizer, model = get_model()
def get_result_for_one_sample(model, tokenizer, device, sample):
d = {
1: 'gibberish',
0: 'real'
}
test_sample = tokenizer([sample], padding=True, truncation=True, max_length=256, return_tensors='pt').to(device)
# test_sample
output = model(**test_sample)
y_pred = np.argmax(output.logits.detach().to('cpu').numpy(), axis=1)
return d[y_pred[0]]
def process_sentence_with_bert(sentence):
print('processing text with bert')
start = time.time()
result = get_result_for_one_sample(model, tokenizer, device,
sentence) # Bu fonksiyonun implementasyonunu sağlamalısınız.
print(f'bert processing time {time.time() - start}')
return result
def classify_gibberish(sentence, ignore_words_file):
# ignore_words_file işlenmesi gerekiyor. Gradio dosya yükleme ile ilgili bir örneği aşağıda bulabilirsiniz.
result = process_sentence_with_bert(sentence)
return result
iface = gr.Interface(fn=classify_gibberish,
inputs=[gr.Textbox(lines=2, placeholder="Enter Sentence Here..."),
gr.File(label="Upload Ignore Words File")],
outputs=gr.Textbox(label="Gibberish Detection Result"),
title="Simple Gibberish Text Detection For Turkish",
description="""Simple gibberish text detection given text like
adsfdnsfnıunf
sasdlsöefls.""")
iface.launch()
|