Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- pages/1_🗒️_Tahmin.py +24 -20
- pages/2_⏱️_Canlı İzle.py +18 -13
pages/1_🗒️_Tahmin.py
CHANGED
@@ -1,24 +1,24 @@
|
|
1 |
import json
|
|
|
|
|
2 |
import requests
|
3 |
import streamlit as st
|
4 |
-
from
|
5 |
import clean
|
|
|
6 |
|
7 |
st.set_page_config(page_title='Teknofest 2023 Türkçe Doğal Dil İşleme Yarışması ', page_icon=':bar_chart:', layout='wide')
|
8 |
st.title('🗒️ Aşağılayıcı Söylem Tespit Uygulaması')
|
9 |
|
10 |
-
|
11 |
-
tokenizer=AutoTokenizer.from_pretrained("BRAIN-TR/insult-bert-uncased")
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
'LABEL_0': 'insult',
|
17 |
-
'LABEL_1': 'other',
|
18 |
-
'LABEL_2': 'profanity',
|
19 |
-
'LABEL_3': 'racist',
|
20 |
-
'LABEL_4': 'sexist'
|
21 |
-
}
|
22 |
|
23 |
with open('style.css')as f:
|
24 |
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
|
@@ -28,8 +28,14 @@ st.subheader('Söylem Tespiti')
|
|
28 |
def text_input_screen():
|
29 |
def predict(text):
|
30 |
cleaned_text = clean.CLEANING(text, True, False, True).clean()
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def callback():
|
35 |
st.session_state.predict_input_text
|
@@ -56,20 +62,18 @@ def text_input_screen():
|
|
56 |
|
57 |
feedback_file.write(f"{st.session_state.predict_input_text}|{text_data}\n{file_data}")
|
58 |
|
59 |
-
def load_feedback(
|
60 |
feedback_data = []
|
61 |
feedback_data.append("Bildirim Gönder")
|
62 |
-
p_value = label_dict[predict_value[0]['label']]
|
63 |
-
p_score = predict_value[0]['score']
|
64 |
|
65 |
for item in ["profanity", "racist", "sexist", "insult", "other"]:
|
66 |
feedback_data.append(f"{item} ")
|
67 |
return feedback_data
|
68 |
|
69 |
-
|
70 |
|
71 |
-
p_value =
|
72 |
-
p_score =
|
73 |
|
74 |
class_desc = {"profanity": "İçerikte küfür ifadesi bulunmatadır. [profanity]",
|
75 |
"racist": "İçerik ırkı ifadeler içermektedir. [racist]",
|
@@ -83,7 +87,7 @@ def text_input_screen():
|
|
83 |
else:
|
84 |
st.info(f"Bu metin için bir tahminde buladım. Size daha iyi hizmet vermek için bana metnin sınıfını söyler misiniz?")
|
85 |
|
86 |
-
feedback = st.selectbox("Geri Bildirim Gönder", load_feedback(
|
87 |
|
88 |
if not feedback in ["Bildirim Gönder"]:
|
89 |
st.success(f"Geri bildiriminiz alındı, teşekkür ederiz.")
|
|
|
1 |
import json
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
import requests
|
5 |
import streamlit as st
|
6 |
+
from simpletransformers.classification import MultiLabelClassificationModel
|
7 |
import clean
|
8 |
+
from simpletransformers.config.model_args import ClassificationArgs
|
9 |
|
10 |
st.set_page_config(page_title='Teknofest 2023 Türkçe Doğal Dil İşleme Yarışması ', page_icon=':bar_chart:', layout='wide')
|
11 |
st.title('🗒️ Aşağılayıcı Söylem Tespit Uygulaması')
|
12 |
|
13 |
+
target_names = ['is_offensive', 'insult', 'other', 'profanity', 'racist', 'sexist']
|
|
|
14 |
|
15 |
+
model_args = ClassificationArgs(
|
16 |
+
use_multiprocessing=True,
|
17 |
+
use_multiprocessing_for_evaluation=True,
|
18 |
+
no_cache= False
|
19 |
+
)
|
20 |
|
21 |
+
bert_model = MultiLabelClassificationModel('bert','BRAIN-TR/acikhack_multilabel_insult_uncased', use_cuda=False, args=model_args)
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
with open('style.css')as f:
|
24 |
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
|
|
|
28 |
def text_input_screen():
|
29 |
def predict(text):
|
30 |
cleaned_text = clean.CLEANING(text, True, False, True).clean()
|
31 |
+
output_arr = bert_model.predict([cleaned_text])
|
32 |
+
|
33 |
+
def decode_one_hot(input, target_names):
|
34 |
+
return input[0][0], target_names[np.argmax(input[0][1:]) + 1], np.max(output_arr[1][0][1:])
|
35 |
+
|
36 |
+
io, category, score = decode_one_hot(output_arr[0], target_names)
|
37 |
+
|
38 |
+
return io, category, score
|
39 |
|
40 |
def callback():
|
41 |
st.session_state.predict_input_text
|
|
|
62 |
|
63 |
feedback_file.write(f"{st.session_state.predict_input_text}|{text_data}\n{file_data}")
|
64 |
|
65 |
+
def load_feedback(p_value, p_score):
|
66 |
feedback_data = []
|
67 |
feedback_data.append("Bildirim Gönder")
|
|
|
|
|
68 |
|
69 |
for item in ["profanity", "racist", "sexist", "insult", "other"]:
|
70 |
feedback_data.append(f"{item} ")
|
71 |
return feedback_data
|
72 |
|
73 |
+
is_offansive, categories, score = predict(text_input)
|
74 |
|
75 |
+
p_value = categories
|
76 |
+
p_score = score
|
77 |
|
78 |
class_desc = {"profanity": "İçerikte küfür ifadesi bulunmatadır. [profanity]",
|
79 |
"racist": "İçerik ırkı ifadeler içermektedir. [racist]",
|
|
|
87 |
else:
|
88 |
st.info(f"Bu metin için bir tahminde buladım. Size daha iyi hizmet vermek için bana metnin sınıfını söyler misiniz?")
|
89 |
|
90 |
+
feedback = st.selectbox("Geri Bildirim Gönder", load_feedback(categories, score), key="colour",label_visibility="collapsed", on_change=save_feedback)
|
91 |
|
92 |
if not feedback in ["Bildirim Gönder"]:
|
93 |
st.success(f"Geri bildiriminiz alındı, teşekkür ederiz.")
|
pages/2_⏱️_Canlı İzle.py
CHANGED
@@ -2,30 +2,30 @@ import os.path
|
|
2 |
import json
|
3 |
import time
|
4 |
|
|
|
5 |
import requests
|
6 |
import streamlit as st
|
7 |
import pandas as pd
|
8 |
import tweepy
|
9 |
from matplotlib import pyplot as plt
|
10 |
-
from
|
11 |
import clean
|
|
|
12 |
|
13 |
|
14 |
st.set_page_config(page_title='Fees - Cross Chain Monitoring', page_icon=':bar_chart:', layout='wide')
|
15 |
st.title('⏱️ Aşağılayıcı Tweet Tespiti')
|
16 |
|
17 |
-
|
18 |
-
tokenizer=AutoTokenizer.from_pretrained("BRAIN-TR/insult-bert-uncased")
|
19 |
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
label_dict = {
|
23 |
-
'LABEL_0': 'insult',
|
24 |
-
'LABEL_1': 'other',
|
25 |
-
'LABEL_2': 'profanity',
|
26 |
-
'LABEL_3': 'racist',
|
27 |
-
'LABEL_4': 'sexist'
|
28 |
-
}
|
29 |
|
30 |
with open('style.css')as f:
|
31 |
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
|
@@ -45,8 +45,13 @@ def tweets_df(results):
|
|
45 |
|
46 |
def predict(text):
|
47 |
cleaned_text = clean.CLEANING(text, True, False,True).clean()
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
50 |
return predict_label
|
51 |
def has_it_tweet(df, column, tweet_id):
|
52 |
source_data = df[(df[column] == tweet_id)]
|
|
|
2 |
import json
|
3 |
import time
|
4 |
|
5 |
+
import numpy as np
|
6 |
import requests
|
7 |
import streamlit as st
|
8 |
import pandas as pd
|
9 |
import tweepy
|
10 |
from matplotlib import pyplot as plt
|
11 |
+
from simpletransformers.classification import MultiLabelClassificationModel
|
12 |
import clean
|
13 |
+
from simpletransformers.config.model_args import ClassificationArgs
|
14 |
|
15 |
|
16 |
st.set_page_config(page_title='Fees - Cross Chain Monitoring', page_icon=':bar_chart:', layout='wide')
|
17 |
st.title('⏱️ Aşağılayıcı Tweet Tespiti')
|
18 |
|
19 |
+
target_names = ['is_offensive', 'insult', 'other', 'profanity', 'racist', 'sexist']
|
|
|
20 |
|
21 |
+
model_args = ClassificationArgs(
|
22 |
+
use_multiprocessing=True,
|
23 |
+
use_multiprocessing_for_evaluation=True,
|
24 |
+
no_cache= False
|
25 |
+
)
|
26 |
+
|
27 |
+
bert_model = MultiLabelClassificationModel('bert','BRAIN-TR/acikhack_multilabel_insult_uncased', use_cuda=False, args=model_args)
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
with open('style.css')as f:
|
31 |
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
|
|
|
45 |
|
46 |
def predict(text):
|
47 |
cleaned_text = clean.CLEANING(text, True, False,True).clean()
|
48 |
+
output_arr = bert_model.predict([cleaned_text])
|
49 |
+
def decode_one_hot(input, target_names):
|
50 |
+
return input[0][0], target_names[np.argmax(input[0][1:]) + 1], np.max(output_arr[1][0][1:])
|
51 |
+
|
52 |
+
io, category, score = decode_one_hot(output_arr[0], target_names)
|
53 |
+
|
54 |
+
predict_label = category
|
55 |
return predict_label
|
56 |
def has_it_tweet(df, column, tweet_id):
|
57 |
source_data = df[(df[column] == tweet_id)]
|