BRAIN-TR commited on
Commit
14d898d
1 Parent(s): 9edd97c

Upload 2 files

Browse files
pages/1_🗒️_Tahmin.py CHANGED
@@ -1,24 +1,24 @@
1
  import json
 
 
2
  import requests
3
  import streamlit as st
4
- from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
5
  import clean
 
6
 
7
  st.set_page_config(page_title='Teknofest 2023 Türkçe Doğal Dil İşleme Yarışması ', page_icon=':bar_chart:', layout='wide')
8
  st.title('🗒️ Aşağılayıcı Söylem Tespit Uygulaması')
9
 
10
- model=AutoModelForSequenceClassification.from_pretrained("BRAIN-TR/insult-bert-uncased")
11
- tokenizer=AutoTokenizer.from_pretrained("BRAIN-TR/insult-bert-uncased")
12
 
13
- nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
 
 
 
 
14
 
15
- label_dict = {
16
- 'LABEL_0': 'insult',
17
- 'LABEL_1': 'other',
18
- 'LABEL_2': 'profanity',
19
- 'LABEL_3': 'racist',
20
- 'LABEL_4': 'sexist'
21
- }
22
 
23
  with open('style.css')as f:
24
  st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
@@ -28,8 +28,14 @@ st.subheader('Söylem Tespiti')
28
  def text_input_screen():
29
  def predict(text):
30
  cleaned_text = clean.CLEANING(text, True, False, True).clean()
31
- out = nlp(cleaned_text)
32
- return out
 
 
 
 
 
 
33
 
34
  def callback():
35
  st.session_state.predict_input_text
@@ -56,20 +62,18 @@ def text_input_screen():
56
 
57
  feedback_file.write(f"{st.session_state.predict_input_text}|{text_data}\n{file_data}")
58
 
59
- def load_feedback(value):
60
  feedback_data = []
61
  feedback_data.append("Bildirim Gönder")
62
- p_value = label_dict[predict_value[0]['label']]
63
- p_score = predict_value[0]['score']
64
 
65
  for item in ["profanity", "racist", "sexist", "insult", "other"]:
66
  feedback_data.append(f"{item} ")
67
  return feedback_data
68
 
69
- predict_value = predict(text_input)
70
 
71
- p_value = label_dict[predict_value[0]['label']]
72
- p_score = predict_value[0]['score']
73
 
74
  class_desc = {"profanity": "İçerikte küfür ifadesi bulunmatadır. [profanity]",
75
  "racist": "İçerik ırkı ifadeler içermektedir. [racist]",
@@ -83,7 +87,7 @@ def text_input_screen():
83
  else:
84
  st.info(f"Bu metin için bir tahminde buladım. Size daha iyi hizmet vermek için bana metnin sınıfını söyler misiniz?")
85
 
86
- feedback = st.selectbox("Geri Bildirim Gönder", load_feedback(predict_value), key="colour",label_visibility="collapsed", on_change=save_feedback)
87
 
88
  if not feedback in ["Bildirim Gönder"]:
89
  st.success(f"Geri bildiriminiz alındı, teşekkür ederiz.")
 
1
  import json
2
+
3
+ import numpy as np
4
  import requests
5
  import streamlit as st
6
+ from simpletransformers.classification import MultiLabelClassificationModel
7
  import clean
8
+ from simpletransformers.config.model_args import ClassificationArgs
9
 
10
  st.set_page_config(page_title='Teknofest 2023 Türkçe Doğal Dil İşleme Yarışması ', page_icon=':bar_chart:', layout='wide')
11
  st.title('🗒️ Aşağılayıcı Söylem Tespit Uygulaması')
12
 
13
+ target_names = ['is_offensive', 'insult', 'other', 'profanity', 'racist', 'sexist']
 
14
 
15
+ model_args = ClassificationArgs(
16
+ use_multiprocessing=True,
17
+ use_multiprocessing_for_evaluation=True,
18
+ no_cache= False
19
+ )
20
 
21
+ bert_model = MultiLabelClassificationModel('bert','BRAIN-TR/acikhack_multilabel_insult_uncased', use_cuda=False, args=model_args)
 
 
 
 
 
 
22
 
23
  with open('style.css')as f:
24
  st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
 
28
  def text_input_screen():
29
  def predict(text):
30
  cleaned_text = clean.CLEANING(text, True, False, True).clean()
31
+ output_arr = bert_model.predict([cleaned_text])
32
+
33
+ def decode_one_hot(input, target_names):
34
+ return input[0][0], target_names[np.argmax(input[0][1:]) + 1], np.max(output_arr[1][0][1:])
35
+
36
+ io, category, score = decode_one_hot(output_arr[0], target_names)
37
+
38
+ return io, category, score
39
 
40
  def callback():
41
  st.session_state.predict_input_text
 
62
 
63
  feedback_file.write(f"{st.session_state.predict_input_text}|{text_data}\n{file_data}")
64
 
65
+ def load_feedback(p_value, p_score):
66
  feedback_data = []
67
  feedback_data.append("Bildirim Gönder")
 
 
68
 
69
  for item in ["profanity", "racist", "sexist", "insult", "other"]:
70
  feedback_data.append(f"{item} ")
71
  return feedback_data
72
 
73
+ is_offansive, categories, score = predict(text_input)
74
 
75
+ p_value = categories
76
+ p_score = score
77
 
78
  class_desc = {"profanity": "İçerikte küfür ifadesi bulunmatadır. [profanity]",
79
  "racist": "İçerik ırkı ifadeler içermektedir. [racist]",
 
87
  else:
88
  st.info(f"Bu metin için bir tahminde buladım. Size daha iyi hizmet vermek için bana metnin sınıfını söyler misiniz?")
89
 
90
+ feedback = st.selectbox("Geri Bildirim Gönder", load_feedback(categories, score), key="colour",label_visibility="collapsed", on_change=save_feedback)
91
 
92
  if not feedback in ["Bildirim Gönder"]:
93
  st.success(f"Geri bildiriminiz alındı, teşekkür ederiz.")
pages/2_⏱️_Canlı İzle.py CHANGED
@@ -2,30 +2,30 @@ import os.path
2
  import json
3
  import time
4
 
 
5
  import requests
6
  import streamlit as st
7
  import pandas as pd
8
  import tweepy
9
  from matplotlib import pyplot as plt
10
- from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
11
  import clean
 
12
 
13
 
14
  st.set_page_config(page_title='Fees - Cross Chain Monitoring', page_icon=':bar_chart:', layout='wide')
15
  st.title('⏱️ Aşağılayıcı Tweet Tespiti')
16
 
17
- model=AutoModelForSequenceClassification.from_pretrained("BRAIN-TR/insult-bert-uncased")
18
- tokenizer=AutoTokenizer.from_pretrained("BRAIN-TR/insult-bert-uncased")
19
 
20
- nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
 
 
 
 
 
 
21
 
22
- label_dict = {
23
- 'LABEL_0': 'insult',
24
- 'LABEL_1': 'other',
25
- 'LABEL_2': 'profanity',
26
- 'LABEL_3': 'racist',
27
- 'LABEL_4': 'sexist'
28
- }
29
 
30
  with open('style.css')as f:
31
  st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
@@ -45,8 +45,13 @@ def tweets_df(results):
45
 
46
  def predict(text):
47
  cleaned_text = clean.CLEANING(text, True, False,True).clean()
48
- out = nlp(cleaned_text)
49
- predict_label = label_dict[out[0]['label']]
 
 
 
 
 
50
  return predict_label
51
  def has_it_tweet(df, column, tweet_id):
52
  source_data = df[(df[column] == tweet_id)]
 
2
  import json
3
  import time
4
 
5
+ import numpy as np
6
  import requests
7
  import streamlit as st
8
  import pandas as pd
9
  import tweepy
10
  from matplotlib import pyplot as plt
11
+ from simpletransformers.classification import MultiLabelClassificationModel
12
  import clean
13
+ from simpletransformers.config.model_args import ClassificationArgs
14
 
15
 
16
  st.set_page_config(page_title='Fees - Cross Chain Monitoring', page_icon=':bar_chart:', layout='wide')
17
  st.title('⏱️ Aşağılayıcı Tweet Tespiti')
18
 
19
+ target_names = ['is_offensive', 'insult', 'other', 'profanity', 'racist', 'sexist']
 
20
 
21
+ model_args = ClassificationArgs(
22
+ use_multiprocessing=True,
23
+ use_multiprocessing_for_evaluation=True,
24
+ no_cache= False
25
+ )
26
+
27
+ bert_model = MultiLabelClassificationModel('bert','BRAIN-TR/acikhack_multilabel_insult_uncased', use_cuda=False, args=model_args)
28
 
 
 
 
 
 
 
 
29
 
30
  with open('style.css')as f:
31
  st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
 
45
 
46
  def predict(text):
47
  cleaned_text = clean.CLEANING(text, True, False,True).clean()
48
+ output_arr = bert_model.predict([cleaned_text])
49
+ def decode_one_hot(input, target_names):
50
+ return input[0][0], target_names[np.argmax(input[0][1:]) + 1], np.max(output_arr[1][0][1:])
51
+
52
+ io, category, score = decode_one_hot(output_arr[0], target_names)
53
+
54
+ predict_label = category
55
  return predict_label
56
  def has_it_tweet(df, column, tweet_id):
57
  source_data = df[(df[column] == tweet_id)]