Gulzd commited on
Commit
2735196
·
1 Parent(s): 0afc3cd

create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from simpletransformers.classification import ClassificationModel
4
+ from sklearn.model_selection import train_test_split
5
+ import matplotlib.pyplot as plt
6
+ from collections import Counter
7
+ import nltk
8
+ from nltk.corpus import stopwords
9
+ import re
10
+ import string
11
+ import gradio as gr
12
+
13
+
14
+ nltk.download('stopwords')
15
+ stop_words_list = stopwords.words('turkish')
16
+ false_text = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
17
+
18
+
19
+ def preprocess_text(text):
20
+ # Küçük harflere çevirme
21
+ text = text.lower()
22
+ # Satır sonu karakterlerini kaldırma
23
+ import re
24
+ text = re.sub(r'\n', ' ', text)
25
+ # Rakamları kaldırma
26
+ text = re.sub(r'\d', '', text)
27
+ # Noktalama işaretlerini kaldırma
28
+ import string
29
+ text = text.translate(str.maketrans("", "", string.punctuation))
30
+ # Stop-words'leri kaldırma
31
+ words = text.split()
32
+ words = [word for word in words if not word in stop_words_list]
33
+ # Veri setindeki hatalı verilerin kaldırılması
34
+ words = [word for word in words if not word in false_text]
35
+ # Tekrarlanan karakterlerin kaldırılması
36
+ words = [re.sub(r'(.)\1{1,}', r'\1\1', word) for word in words]
37
+ # Tekrarlanan boşlukların kaldırılması
38
+ words = [word.strip() for word in words if len(word.strip()) > 1]
39
+
40
+ text = " ".join(words)
41
+ return text
42
+
43
+
44
+ def predict(texts):
45
+ model_path = "bert_model"
46
+ model = ClassificationModel('bert', model_path, use_cuda=False)
47
+ predictions, _ = model.predict(texts)
48
+ return [result_predict(prediction) for prediction in predictions]
49
+
50
+ def result_predict(num):
51
+ if num == 4:
52
+ return 'OTHER'
53
+ elif num == 1:
54
+ return 'RACIST'
55
+ elif num == 0:
56
+ return 'INSULT'
57
+ elif num == 3:
58
+ return 'PROFANITY'
59
+ elif num == 2:
60
+ return 'SEXIST'
61
+
62
+ def gradio_comment(comment):
63
+ text_to_predict = ["hayvan gibi iş yapma öküz"]
64
+ results = predict(text_to_predict)
65
+
66
+ for text, result in zip(text_to_predict, results):
67
+ print(f"Metin: {text}\nTahmin: {result}\n")
68
+
69
+
70
+ GradioGUI = gr.Interface(
71
+ fn=gradio_comment,
72
+ inputs='text',
73
+ outputs='text',
74
+ title='Aşağılayıcı Yorum Tespiti',
75
+ css='''span{text-transform: uppercase} p{text-align: center}''')
76
+
77
+ GradioGUI.launch()