File size: 4,616 Bytes
ea0ff68
 
 
 
 
 
 
 
 
 
 
 
 
8c3253e
ea0ff68
 
8c3253e
ea0ff68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json
import os

import streamlit as st
import pickle

from transformers import AutoTokenizer, BertForSequenceClassification, pipeline
from sklearn.feature_extraction.text import TfidfVectorizer


def load_models():
    st.session_state.loaded = True

    with open("models/tfidf_vectorizer_svm_model_gpt_chatgpt_detection_tfidf_bg_0.88F1_score.pkl", "rb") as f:
        st.session_state.tfidf_vectorizer = pickle.load(f)

    with open('models/svm_model_gpt_chatgpt_detection_tfidf_bg_0.88F1_score.pkl', 'rb') as f:
        st.session_state.gpt_detector = pickle.load(f)

    st.session_state.bert = pipeline(task="text-classification",
                                     model=BertForSequenceClassification.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN'],  num_labels=2),
                                     tokenizer=AutoTokenizer.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN']))


def load_content():
    with open('resource/page_content.json', encoding='utf8') as json_file:
        return json.load(json_file)


def switch_lang(lang):
    if 'lang' in st.session_state:
        if lang == 'bg':
            st.session_state.lang = 'bg'
        else:
            st.session_state.lang = 'en'


if 'lang' not in st.session_state:
    st.session_state.lang = 'bg'

if 'gpt_detector_result' not in st.session_state and 'bert_result' not in st.session_state:
    st.session_state.gpt_detector_result = ''
    st.session_state.gpt_detector_probability = [1, 0]
    st.session_state.bert_result = [{'label': '', 'score': 1}]

content = load_content()
if 'loaded' not in st.session_state:
    load_models()

#######################################################################################################################

st.title(content['title'][st.session_state.lang])

col1, col2, col3 = st.columns([1, 1, 10])
with col1:
    st.button(
        label='EN',
        key='en',
        on_click=switch_lang,
        args=['en']
    )
with col2:
    st.button(
        label='BG',
        key='bg',
        on_click=switch_lang,
        args=['bg']
    )

if 'agree' not in st.session_state:
    st.session_state.agree = False

if st.session_state.agree:
    tab_tool, tab_terms = st.tabs([content['tab_tool'][st.session_state.lang], content['tab_terms'][st.session_state.lang]])

    with tab_tool:
        user_input = st.text_area(content['textbox_title'][st.session_state.lang],
                                  content['text_placeholder'][st.session_state.lang]).strip('\n')
    
        if st.button(content['analyze_button'][st.session_state.lang]):
            user_tfidf = st.session_state.tfidf_vectorizer.transform([user_input])
    
            st.session_state.gpt_detector_result = st.session_state.gpt_detector.predict(user_tfidf)[0]
            st.session_state.gpt_detector_probability = st.session_state.gpt_detector.predict_proba(user_tfidf)[0]
            st.session_state.bert_result = st.session_state.bert(user_input)
    
        if st.session_state.gpt_detector_result == 1:
            st.warning(content['gpt_getect_yes'][st.session_state.lang] +
                       str(round(st.session_state.gpt_detector_probability[1] * 100, 2)) +
                       content['gpt_yes_proba'][st.session_state.lang], icon="⚠️")
        else:
            st.success(content['gpt_getect_no'][st.session_state.lang] +
                       str(round(st.session_state.gpt_detector_probability[0] * 100, 2)) +
                       content['gpt_no_proba'][st.session_state.lang], icon="✅")
    
        if st.session_state.bert_result[0]['label'] == 'LABEL_1':
            st.warning(content['bert_yes_1'][st.session_state.lang] +
                       str(round(st.session_state.bert_result[0]['score'] * 100, 2)) +
                       content['bert_yes_2'][st.session_state.lang], icon = "⚠️")
        else:
            st.success(content['bert_no_1'][st.session_state.lang] +
                       str(round(st.session_state.bert_result[0]['score'] * 100, 2)) +
                       content['bert_no_2'][st.session_state.lang], icon="✅")
    
        st.info(content['disinformation_definition'][st.session_state.lang], icon="ℹ️")

    with tab_terms:
        st.write(content['disclaimer'][st.session_state.lang])

else:
    st.write(content['disclaimer_title'][st.session_state.lang])
    st.write(content['disclaimer'][st.session_state.lang])
    if st.button(content['disclaimer_agree_text'][st.session_state.lang]):
        st.session_state.agree = True
        st.experimental_rerun()