TRACES commited on
Commit
4bd2beb
1 Parent(s): e45a1b7

Add untrue information detector

Browse files
Files changed (1) hide show
  1. main.py +32 -7
main.py CHANGED
@@ -11,12 +11,18 @@ from sklearn.feature_extraction.text import TfidfVectorizer
11
  def load_models():
12
  st.session_state.loaded = True
13
 
14
- with open("models/tfidf_vectorizer_svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl", "rb") as f:
15
- st.session_state.tfidf_vectorizer = pickle.load(f)
 
 
 
16
 
17
  with open('models/svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
18
  st.session_state.gpt_detector = pickle.load(f)
19
 
 
 
 
20
  st.session_state.bert = pipeline(task="text-classification",
21
  model=BertForSequenceClassification.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN'], num_labels=2),
22
  tokenizer=AutoTokenizer.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN']))
@@ -38,9 +44,15 @@ def switch_lang(lang):
38
  if 'lang' not in st.session_state:
39
  st.session_state.lang = 'bg'
40
 
41
- if 'gpt_detector_result' not in st.session_state and 'bert_result' not in st.session_state:
 
 
42
  st.session_state.gpt_detector_result = ''
43
  st.session_state.gpt_detector_probability = [1, 0]
 
 
 
 
44
  st.session_state.bert_result = [{'label': '', 'score': 1}]
45
 
46
  content = load_content()
@@ -78,10 +90,14 @@ if st.session_state.agree:
78
  content['text_placeholder'][st.session_state.lang]).strip('\n')
79
 
80
  if st.button(content['analyze_button'][st.session_state.lang]):
81
- user_tfidf = st.session_state.tfidf_vectorizer.transform([user_input])
82
-
83
- st.session_state.gpt_detector_result = st.session_state.gpt_detector.predict(user_tfidf)[0]
84
- st.session_state.gpt_detector_probability = st.session_state.gpt_detector.predict_proba(user_tfidf)[0]
 
 
 
 
85
  st.session_state.bert_result = st.session_state.bert(user_input)
86
 
87
  if st.session_state.gpt_detector_result == 1:
@@ -92,6 +108,15 @@ if st.session_state.agree:
92
  st.success(content['gpt_getect_no'][st.session_state.lang] +
93
  str(round(st.session_state.gpt_detector_probability[0] * 100, 2)) +
94
  content['gpt_no_proba'][st.session_state.lang], icon="✅")
 
 
 
 
 
 
 
 
 
95
 
96
  if st.session_state.bert_result[0]['label'] == 'LABEL_1':
97
  st.warning(content['bert_yes_1'][st.session_state.lang] +
 
11
  def load_models():
12
  st.session_state.loaded = True
13
 
14
+ with open('models/tfidf_vectorizer_svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
15
+ st.session_state.tfidf_vectorizer_disinformation = pickle.load(f)
16
+
17
+ with open('models/tfidf_vectorizer_untrue_inform_detection_tfidf_bg_0.96_F1_score', 'rb') as f:
18
+ st.session_state.tfidf_vectorizer_untrue_inf = pickle.load(f)
19
 
20
  with open('models/svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
21
  st.session_state.gpt_detector = pickle.load(f)
22
 
23
+ with open('models/SVM_model_untrue_inform_detection_tfidf_bg_0.96_F1_score.pkl', 'rb') as f:
24
+ st.session_state.untrue_detector = pickle.load(f)
25
+
26
  st.session_state.bert = pipeline(task="text-classification",
27
  model=BertForSequenceClassification.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN'], num_labels=2),
28
  tokenizer=AutoTokenizer.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN']))
 
44
  if 'lang' not in st.session_state:
45
  st.session_state.lang = 'bg'
46
 
47
+ if 'gpt_detector_result' not in st.session_state \
48
+ and 'untrue_detector_result' not in st.session_state and \
49
+ and 'bert_result' not in st.session_state:
50
  st.session_state.gpt_detector_result = ''
51
  st.session_state.gpt_detector_probability = [1, 0]
52
+
53
+ st.session_state.untrue_detector_result = ''
54
+ st.session_state.untrue_detector_probability = [1, 0]
55
+
56
  st.session_state.bert_result = [{'label': '', 'score': 1}]
57
 
58
  content = load_content()
 
90
  content['text_placeholder'][st.session_state.lang]).strip('\n')
91
 
92
  if st.button(content['analyze_button'][st.session_state.lang]):
93
+ user_tfidf_disinformation = st.session_state.tfidf_vectorizer_disinformation.transform([user_input])
94
+ st.session_state.gpt_detector_result = st.session_state.gpt_detector.predict(user_tfidf_disinformation)[0]
95
+ st.session_state.gpt_detector_probability = st.session_state.gpt_detector.predict_proba(user_tfidf_disinformation)[0]
96
+
97
+ user_tfidf_untrue_inf = st.session_state.tfidf_vectorizer_untrue_inf.transform([user_input])
98
+ st.session_state.untrue_detector_result = st.session_state.untrue_detector.predict(user_tfidf_untrue_inf)[0]
99
+ st.session_state.untrue_detector_probability = st.session_state.untrue_detector.predict_proba(user_tfidf_untrue_inf)[0]
100
+
101
  st.session_state.bert_result = st.session_state.bert(user_input)
102
 
103
  if st.session_state.gpt_detector_result == 1:
 
108
  st.success(content['gpt_getect_no'][st.session_state.lang] +
109
  str(round(st.session_state.gpt_detector_probability[0] * 100, 2)) +
110
  content['gpt_no_proba'][st.session_state.lang], icon="✅")
111
+
112
+ if st.session_state.untrue_detector_result == 1:
113
+ st.warning(content['untrue_getect_yes'][st.session_state.lang] +
114
+ str(round(st.session_state.untrue_detector_probability[1] * 100, 2)) +
115
+ content['untrue_yes_proba'][st.session_state.lang], icon="⚠️")
116
+ else:
117
+ st.success(content['untrue_getect_no'][st.session_state.lang] +
118
+ str(round(st.session_state.untrue_detector_probability[0] * 100, 2)) +
119
+ content['untrue_no_proba'][st.session_state.lang], icon="✅")
120
 
121
  if st.session_state.bert_result[0]['label'] == 'LABEL_1':
122
  st.warning(content['bert_yes_1'][st.session_state.lang] +