File size: 2,236 Bytes
aad3e6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43d75a0
93853e6
 
aad3e6a
43d75a0
aad3e6a
 
824836c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
import numpy as np
import pandas as pd
import re
import shap

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TextClassificationPipeline,
)

tokenizer = AutoTokenizer.from_pretrained("chinhon/fake_tweet_detect")

model = AutoModelForSequenceClassification.from_pretrained("chinhon/fake_tweet_detect")

tweet_detector = TextClassificationPipeline(model=model, tokenizer=tokenizer)

# tweak the extent of text cleaning as you wish
def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"\n", " ", text)
    text = re.sub(r"\'t", " not", text)  # Change 't to 'not'
    text = re.sub(r"(@.*?)[\s]", " ", text)  # Remove @name
    text = re.sub(r"$\d+\W+|\b\d+\b|\W+\d+$", " ", text)  # remove digits
    text = re.sub(r"[^\w\s\#]", "", text)  # remove special characters except hashtags
    text = text.strip(" ")
    text = re.sub(
        " +", " ", text
    ).strip()  # get rid of multiple spaces and replace with a single
    return text

def tweet_detect(text):
    data = [clean_text(text)]
    prediction = tweet_detector(data)

    pred_label = [x.get("label") for x in prediction]

    if pred_label == ["LABEL_1"]:
        return "Fake Tweet"
    elif pred_label == ["LABEL_0"]:
        return "Real Tweet"

#Define Gradio interface
gradio_ui = gr.Interface(
    fn=tweet_detect,
    title="Detect Fake Tweets",
    description="Enter a tweet and see if a Distilbert model can identify if it was written by state-backed trolls. DISCLAIMER: While the model was fine tuned on 100k real and troll tweets, and achieved high accuracy in my tests, its performance drops significantly against the day-to-day barrage of content on Twitter. As such, this app is intended as an example for understanding the limits of AI/ML in highly complex problems like fake media detection, and not as a final arbiter of whether someone's tweet is real or not.",
    inputs=gr.Textbox(lines=10, label="Paste tweet text here [English Only]"),
    outputs=gr.Label(type="auto", label="Prediction"),
    interpretation="shap",
    article="Details of the fine tuning and tests are in this Medium post: https://bit.ly/3tueP36",
)

gradio_ui.launch(enable_queue=True)