txtClassifier / app.py
kmkarakaya's picture
Update app.py
ee19423
raw
history blame contribute delete
No virus
1.92 kB
import numpy as np
import tensorflow as tf
import pickle
import string
import pandas as pd
import gradio as gr
tr_stop_words = pd.read_csv('tr_stop_word.txt',header=None)
@tf.keras.utils.register_keras_serializable()
def custom_standardization(input_string):
""" Remove html line-break tags and handle punctuation """
no_uppercased = tf.strings.lower(input_string, encoding='utf-8')
no_stars = tf.strings.regex_replace(no_uppercased, "\*", " ")
no_repeats = tf.strings.regex_replace(no_stars, "devamını oku", "")
no_html = tf.strings.regex_replace(no_repeats, "<br />", "")
no_digits = tf.strings.regex_replace(no_html, "\w*\d\w*","")
no_punctuations = tf.strings.regex_replace(no_digits, f"([{string.punctuation}])", r" ")
#remove stop words
no_stop_words = ' '+no_punctuations+ ' '
for each in tr_stop_words.values:
no_stop_words = tf.strings.regex_replace(no_stop_words, ' '+each[0]+' ' , r" ")
no_extra_space = tf.strings.regex_replace(no_stop_words, " +"," ")
#remove Turkish chars
no_I = tf.strings.regex_replace(no_extra_space, "ı","i")
no_O = tf.strings.regex_replace(no_I, "ö","o")
no_C = tf.strings.regex_replace(no_O, "ç","c")
no_S = tf.strings.regex_replace(no_C, "ş","s")
no_G = tf.strings.regex_replace(no_S, "ğ","g")
no_U = tf.strings.regex_replace(no_G, "ü","u")
return no_U
loaded_end_to_end_model = tf.keras.models.load_model("end_to_end_model")
pkl_file = open("id_to_category.pkl", "rb")
id_to_category = pickle.load(pkl_file)
def classify (text):
pred=loaded_end_to_end_model.predict([text])
return id_to_category[np.argmax(pred)]
examples=['Dün aldığım samsung telefon bugün şarj tutmuyor',
'THY Uçak biletimi değiştirmek için başvurdum. Kimse geri dönüş yapmadı!']
iface = gr.Interface(fn=classify, inputs="text", outputs="text", examples=examples)
iface.launch()