traces-tool

Sleeping

App Files Files Community

TRACES commited on Mar 9, 2023

Commit

4bd2beb

1 Parent(s): e45a1b7

Add untrue information detector

Browse files

Files changed (1) hide show

main.py +32 -7

main.py CHANGED Viewed

@@ -11,12 +11,18 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 def load_models():
     st.session_state.loaded = True
-    with open("models/tfidf_vectorizer_svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl", "rb") as f:
-        st.session_state.tfidf_vectorizer = pickle.load(f)
     with open('models/svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
         st.session_state.gpt_detector = pickle.load(f)
     st.session_state.bert = pipeline(task="text-classification",
                                      model=BertForSequenceClassification.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN'],  num_labels=2),
                                      tokenizer=AutoTokenizer.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN']))
@@ -38,9 +44,15 @@ def switch_lang(lang):
 if 'lang' not in st.session_state:
     st.session_state.lang = 'bg'
-if 'gpt_detector_result' not in st.session_state and 'bert_result' not in st.session_state:
     st.session_state.gpt_detector_result = ''
     st.session_state.gpt_detector_probability = [1, 0]
     st.session_state.bert_result = [{'label': '', 'score': 1}]
 content = load_content()
@@ -78,10 +90,14 @@ if st.session_state.agree:
                                   content['text_placeholder'][st.session_state.lang]).strip('\n')
         if st.button(content['analyze_button'][st.session_state.lang]):
-            user_tfidf = st.session_state.tfidf_vectorizer.transform([user_input])
-            st.session_state.gpt_detector_result = st.session_state.gpt_detector.predict(user_tfidf)[0]
-            st.session_state.gpt_detector_probability = st.session_state.gpt_detector.predict_proba(user_tfidf)[0]
             st.session_state.bert_result = st.session_state.bert(user_input)
         if st.session_state.gpt_detector_result == 1:
@@ -92,6 +108,15 @@ if st.session_state.agree:
             st.success(content['gpt_getect_no'][st.session_state.lang] +
                        str(round(st.session_state.gpt_detector_probability[0] * 100, 2)) +
                        content['gpt_no_proba'][st.session_state.lang], icon="✅")
         if st.session_state.bert_result[0]['label'] == 'LABEL_1':
             st.warning(content['bert_yes_1'][st.session_state.lang] +

 def load_models():
     st.session_state.loaded = True
+    with open('models/tfidf_vectorizer_svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
+        st.session_state.tfidf_vectorizer_disinformation = pickle.load(f)
+    with open('models/tfidf_vectorizer_untrue_inform_detection_tfidf_bg_0.96_F1_score', 'rb') as f:
+        st.session_state.tfidf_vectorizer_untrue_inf = pickle.load(f)
     with open('models/svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
         st.session_state.gpt_detector = pickle.load(f)
+    with open('models/SVM_model_untrue_inform_detection_tfidf_bg_0.96_F1_score.pkl', 'rb') as f:
+        st.session_state.untrue_detector = pickle.load(f)
     st.session_state.bert = pipeline(task="text-classification",
                                      model=BertForSequenceClassification.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN'],  num_labels=2),
                                      tokenizer=AutoTokenizer.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN']))
 if 'lang' not in st.session_state:
     st.session_state.lang = 'bg'
+if 'gpt_detector_result' not in st.session_state \
+    and 'untrue_detector_result' not in st.session_state and \
+    and 'bert_result' not in st.session_state:
     st.session_state.gpt_detector_result = ''
     st.session_state.gpt_detector_probability = [1, 0]
+    st.session_state.untrue_detector_result = ''
+    st.session_state.untrue_detector_probability = [1, 0]
     st.session_state.bert_result = [{'label': '', 'score': 1}]
 content = load_content()
                                   content['text_placeholder'][st.session_state.lang]).strip('\n')
         if st.button(content['analyze_button'][st.session_state.lang]):
+            user_tfidf_disinformation = st.session_state.tfidf_vectorizer_disinformation.transform([user_input])
+            st.session_state.gpt_detector_result = st.session_state.gpt_detector.predict(user_tfidf_disinformation)[0]
+            st.session_state.gpt_detector_probability = st.session_state.gpt_detector.predict_proba(user_tfidf_disinformation)[0]
+            user_tfidf_untrue_inf = st.session_state.tfidf_vectorizer_untrue_inf.transform([user_input])
+            st.session_state.untrue_detector_result = st.session_state.untrue_detector.predict(user_tfidf_untrue_inf)[0]
+            st.session_state.untrue_detector_probability = st.session_state.untrue_detector.predict_proba(user_tfidf_untrue_inf)[0]
             st.session_state.bert_result = st.session_state.bert(user_input)
         if st.session_state.gpt_detector_result == 1:
             st.success(content['gpt_getect_no'][st.session_state.lang] +
                        str(round(st.session_state.gpt_detector_probability[0] * 100, 2)) +
                        content['gpt_no_proba'][st.session_state.lang], icon="✅")
+        if st.session_state.untrue_detector_result == 1:
+            st.warning(content['untrue_getect_yes'][st.session_state.lang] +
+                       str(round(st.session_state.untrue_detector_probability[1] * 100, 2)) +
+                       content['untrue_yes_proba'][st.session_state.lang], icon="⚠️")
+        else:
+            st.success(content['untrue_getect_no'][st.session_state.lang] +
+                       str(round(st.session_state.untrue_detector_probability[0] * 100, 2)) +
+                       content['untrue_no_proba'][st.session_state.lang], icon="✅")
         if st.session_state.bert_result[0]['label'] == 'LABEL_1':
             st.warning(content['bert_yes_1'][st.session_state.lang] +