File size: 1,935 Bytes
8c933a6
 
 
32ba95a
8c933a6
 
 
 
a6b1837
8c933a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import streamlit as st
import torch
from transformers import RobertaTokenizer
from model import RobertaClass
from text_preprocessing import preprocess_text

# Load the fine-tuned BERT model
model = RobertaClass()
model.load_state_dict(torch.load('trained_model.bin',
                                 map_location=torch.device('cpu')))

# Load the tokenizer
tokenizer = RobertaTokenizer.from_pretrained(
    'roberta-base', truncation=True, do_lower_case=True)

# Define the user interface
st.title('ChatGPT detector')
text_input = st.text_input('Enter text to classify:', '')
submit_button = st.button('Classify')

# Define prediction function
def predict(text):
    '''Predicts the label and confidence level of the input text.'''
    # Preprocess the input text
    text_preprocessed = preprocess_text(text)

    # Tokenize the preprocessed text
    inputs = tokenizer(text_preprocessed, return_tensors='pt',
                       padding=True, truncation=True)
    inputs.pop('token_type_ids', None)  # Remove token_type_ids

    # Perform inference
    with torch.no_grad():
        outputs = model(input_ids=inputs['input_ids'],
                        attention_mask=inputs['attention_mask'])

    # Convert output to probabilities and predicted label
    # probability of positive class
    predicted_prob = torch.sigmoid(outputs).item()
    predicted_label = 1 if predicted_prob >= 0.5 else 0
    if predicted_label == 0:
        predicted_prob = 1 - predicted_prob

    return predicted_label, predicted_prob


# Handle user interaction
if submit_button:
    predicted_label, predicted_prob = predict(text_input)
    # Assuming binary classification
    labels = ['written by a human', 'generated by ChatGPT']
    predicted_category = labels[predicted_label]
    predicted_prob_percentage = round(predicted_prob * 100, 2)
    st.write(
        f"This text was {predicted_category} ({predicted_prob_percentage} % confident)")