Spaces:
Sleeping
Sleeping
Add untrue information detector
Browse files
main.py
CHANGED
@@ -11,12 +11,18 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
11 |
def load_models():
|
12 |
st.session_state.loaded = True
|
13 |
|
14 |
-
with open(
|
15 |
-
st.session_state.
|
|
|
|
|
|
|
16 |
|
17 |
with open('models/svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
|
18 |
st.session_state.gpt_detector = pickle.load(f)
|
19 |
|
|
|
|
|
|
|
20 |
st.session_state.bert = pipeline(task="text-classification",
|
21 |
model=BertForSequenceClassification.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN'], num_labels=2),
|
22 |
tokenizer=AutoTokenizer.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN']))
|
@@ -38,9 +44,15 @@ def switch_lang(lang):
|
|
38 |
if 'lang' not in st.session_state:
|
39 |
st.session_state.lang = 'bg'
|
40 |
|
41 |
-
if 'gpt_detector_result' not in st.session_state
|
|
|
|
|
42 |
st.session_state.gpt_detector_result = ''
|
43 |
st.session_state.gpt_detector_probability = [1, 0]
|
|
|
|
|
|
|
|
|
44 |
st.session_state.bert_result = [{'label': '', 'score': 1}]
|
45 |
|
46 |
content = load_content()
|
@@ -78,10 +90,14 @@ if st.session_state.agree:
|
|
78 |
content['text_placeholder'][st.session_state.lang]).strip('\n')
|
79 |
|
80 |
if st.button(content['analyze_button'][st.session_state.lang]):
|
81 |
-
|
82 |
-
|
83 |
-
st.session_state.
|
84 |
-
|
|
|
|
|
|
|
|
|
85 |
st.session_state.bert_result = st.session_state.bert(user_input)
|
86 |
|
87 |
if st.session_state.gpt_detector_result == 1:
|
@@ -92,6 +108,15 @@ if st.session_state.agree:
|
|
92 |
st.success(content['gpt_getect_no'][st.session_state.lang] +
|
93 |
str(round(st.session_state.gpt_detector_probability[0] * 100, 2)) +
|
94 |
content['gpt_no_proba'][st.session_state.lang], icon="✅")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
if st.session_state.bert_result[0]['label'] == 'LABEL_1':
|
97 |
st.warning(content['bert_yes_1'][st.session_state.lang] +
|
|
|
11 |
def load_models():
|
12 |
st.session_state.loaded = True
|
13 |
|
14 |
+
with open('models/tfidf_vectorizer_svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
|
15 |
+
st.session_state.tfidf_vectorizer_disinformation = pickle.load(f)
|
16 |
+
|
17 |
+
with open('models/tfidf_vectorizer_untrue_inform_detection_tfidf_bg_0.96_F1_score', 'rb') as f:
|
18 |
+
st.session_state.tfidf_vectorizer_untrue_inf = pickle.load(f)
|
19 |
|
20 |
with open('models/svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
|
21 |
st.session_state.gpt_detector = pickle.load(f)
|
22 |
|
23 |
+
with open('models/SVM_model_untrue_inform_detection_tfidf_bg_0.96_F1_score.pkl', 'rb') as f:
|
24 |
+
st.session_state.untrue_detector = pickle.load(f)
|
25 |
+
|
26 |
st.session_state.bert = pipeline(task="text-classification",
|
27 |
model=BertForSequenceClassification.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN'], num_labels=2),
|
28 |
tokenizer=AutoTokenizer.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN']))
|
|
|
44 |
if 'lang' not in st.session_state:
|
45 |
st.session_state.lang = 'bg'
|
46 |
|
47 |
+
if 'gpt_detector_result' not in st.session_state \
|
48 |
+
and 'untrue_detector_result' not in st.session_state and \
|
49 |
+
and 'bert_result' not in st.session_state:
|
50 |
st.session_state.gpt_detector_result = ''
|
51 |
st.session_state.gpt_detector_probability = [1, 0]
|
52 |
+
|
53 |
+
st.session_state.untrue_detector_result = ''
|
54 |
+
st.session_state.untrue_detector_probability = [1, 0]
|
55 |
+
|
56 |
st.session_state.bert_result = [{'label': '', 'score': 1}]
|
57 |
|
58 |
content = load_content()
|
|
|
90 |
content['text_placeholder'][st.session_state.lang]).strip('\n')
|
91 |
|
92 |
if st.button(content['analyze_button'][st.session_state.lang]):
|
93 |
+
user_tfidf_disinformation = st.session_state.tfidf_vectorizer_disinformation.transform([user_input])
|
94 |
+
st.session_state.gpt_detector_result = st.session_state.gpt_detector.predict(user_tfidf_disinformation)[0]
|
95 |
+
st.session_state.gpt_detector_probability = st.session_state.gpt_detector.predict_proba(user_tfidf_disinformation)[0]
|
96 |
+
|
97 |
+
user_tfidf_untrue_inf = st.session_state.tfidf_vectorizer_untrue_inf.transform([user_input])
|
98 |
+
st.session_state.untrue_detector_result = st.session_state.untrue_detector.predict(user_tfidf_untrue_inf)[0]
|
99 |
+
st.session_state.untrue_detector_probability = st.session_state.untrue_detector.predict_proba(user_tfidf_untrue_inf)[0]
|
100 |
+
|
101 |
st.session_state.bert_result = st.session_state.bert(user_input)
|
102 |
|
103 |
if st.session_state.gpt_detector_result == 1:
|
|
|
108 |
st.success(content['gpt_getect_no'][st.session_state.lang] +
|
109 |
str(round(st.session_state.gpt_detector_probability[0] * 100, 2)) +
|
110 |
content['gpt_no_proba'][st.session_state.lang], icon="✅")
|
111 |
+
|
112 |
+
if st.session_state.untrue_detector_result == 1:
|
113 |
+
st.warning(content['untrue_getect_yes'][st.session_state.lang] +
|
114 |
+
str(round(st.session_state.untrue_detector_probability[1] * 100, 2)) +
|
115 |
+
content['untrue_yes_proba'][st.session_state.lang], icon="⚠️")
|
116 |
+
else:
|
117 |
+
st.success(content['untrue_getect_no'][st.session_state.lang] +
|
118 |
+
str(round(st.session_state.untrue_detector_probability[0] * 100, 2)) +
|
119 |
+
content['untrue_no_proba'][st.session_state.lang], icon="✅")
|
120 |
|
121 |
if st.session_state.bert_result[0]['label'] == 'LABEL_1':
|
122 |
st.warning(content['bert_yes_1'][st.session_state.lang] +
|