txtClassifier / app.py
kmkarakaya's picture
Update app.py
ee19423
import numpy as np
import tensorflow as tf
import pickle
import string
import pandas as pd
import gradio as gr
tr_stop_words = pd.read_csv('tr_stop_word.txt',header=None)
@tf.keras.utils.register_keras_serializable()
def custom_standardization(input_string):
""" Remove html line-break tags and handle punctuation """
no_uppercased = tf.strings.lower(input_string, encoding='utf-8')
no_stars = tf.strings.regex_replace(no_uppercased, "\*", " ")
no_repeats = tf.strings.regex_replace(no_stars, "devamını oku", "")
no_html = tf.strings.regex_replace(no_repeats, "<br />", "")
no_digits = tf.strings.regex_replace(no_html, "\w*\d\w*","")
no_punctuations = tf.strings.regex_replace(no_digits, f"([{string.punctuation}])", r" ")
#remove stop words
no_stop_words = ' '+no_punctuations+ ' '
for each in tr_stop_words.values:
no_stop_words = tf.strings.regex_replace(no_stop_words, ' '+each[0]+' ' , r" ")
no_extra_space = tf.strings.regex_replace(no_stop_words, " +"," ")
#remove Turkish chars
no_I = tf.strings.regex_replace(no_extra_space, "ı","i")
no_O = tf.strings.regex_replace(no_I, "ö","o")
no_C = tf.strings.regex_replace(no_O, "ç","c")
no_S = tf.strings.regex_replace(no_C, "ş","s")
no_G = tf.strings.regex_replace(no_S, "ğ","g")
no_U = tf.strings.regex_replace(no_G, "ü","u")
return no_U
loaded_end_to_end_model = tf.keras.models.load_model("end_to_end_model")
pkl_file = open("id_to_category.pkl", "rb")
id_to_category = pickle.load(pkl_file)
def classify (text):
pred=loaded_end_to_end_model.predict([text])
return id_to_category[np.argmax(pred)]
examples=['Dün aldığım samsung telefon bugün şarj tutmuyor',
'THY Uçak biletimi değiştirmek için başvurdum. Kimse geri dönüş yapmadı!']
iface = gr.Interface(fn=classify, inputs="text", outputs="text", examples=examples)
iface.launch()