Spaces:

yuchuantian
/

AIGC_text_detector

Running

File size: 9,410 Bytes

import gradio as gr
import torch
from transformers.models.bert import BertForSequenceClassification, BertTokenizer
from transformers.models.roberta import RobertaForSequenceClassification, RobertaTokenizer

# torch.set_grad_enabled(False)
print('Loading Models from HuggingFace...')
# load by default
name_en = "yuchuantian/AIGC_detector_env3"
model_en = RobertaForSequenceClassification.from_pretrained(name_en)
tokenizer_en = RobertaTokenizer.from_pretrained(name_en)

name_en3 = "yuchuantian/AIGC_detector_env3short"
model_en3 = RobertaForSequenceClassification.from_pretrained(name_en3)

name_en5 = "yuchuantian/AIGC_detector_env2"
model_en5 = RobertaForSequenceClassification.from_pretrained(name_en5)

name_zh = "yuchuantian/AIGC_detector_zhv3"
model_zh = BertForSequenceClassification.from_pretrained(name_zh)
tokenizer_zh = BertTokenizer.from_pretrained(name_zh)

name_zh4 = "yuchuantian/AIGC_detector_zhv3short"
model_zh4 = BertForSequenceClassification.from_pretrained(name_zh4)

name_zh6 = "yuchuantian/AIGC_detector_zhv2"
model_zh6 = BertForSequenceClassification.from_pretrained(name_zh6)

print('Model Loading from HuggingFace Complete!')


def predict_func(text: str, tokenizer, model):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True)
        outputs = model(**inputs)
        scores = outputs.logits[0].softmax(0).numpy()
        result = {"label": scores.argmax().item(), "score": scores.max().item()}
    return result


def predict_en(text):
    id2label = ['Human', 'AI']
    res = predict_func(text, tokenizer_en, model_en)
    return id2label[res['label']], res['score']

def predict_en3(text):
    id2label = ['Human', 'AI']
    res = predict_func(text, tokenizer_en, model_en3)
    return id2label[res['label']], res['score']

def predict_en5(text):
    id2label = ['Human', 'AI']
    res = predict_func(text, tokenizer_en, model_en5)
    return id2label[res['label']], res['score']


def predict_zh(text):
    id2label = ['人类', 'AI']
    res = predict_func(text, tokenizer_zh, model_zh)
    return id2label[res['label']], res['score']

def predict_zh4(text):
    id2label = ['人类', 'AI']
    res = predict_func(text, tokenizer_zh, model_zh4)
    return id2label[res['label']], res['score']

def predict_zh6(text):
    id2label = ['人类', 'AI']
    res = predict_func(text, tokenizer_zh, model_zh6)
    return id2label[res['label']], res['score']


print(predict_en('Peking University is one of the best universities in the world.'))

print(predict_zh('很高兴认识你！'))


with gr.Blocks() as demo:

    gr.Markdown("""
                ## AIGC Detector 大模型AI文本检测器
                
                **News**

                6/25/2025: The detectors are updated! Latest LLMs and reasoning models are now covered! 🔥🔥🔥

                3/25/2025: Our AIGC Detector demo is available! 🔥🔥🔥

                This app is a demo of our AIGC Detector. If you are interested in our project, please keep tuned at our [GitHub](https://github.com/YuchuanTian/AIGC_text_detector) !

                本app是我们AIGC检测器工作的DEMO。如果您对我们的工作感兴趣，欢迎在[Github主页](https://github.com/YuchuanTian/AIGC_text_detector)上持续关注我们的工作！

                [Paper Link 论文链接](https://arxiv.org/abs/2305.18149)

                The loadable versions are as follows 可加载的检测器版本如下：
                English: [En-v3](https://huggingface.co/yuchuantian/AIGC_detector_env3) / [En-v3-short](https://huggingface.co/yuchuantian/AIGC_detector_env3short) / [En_v2](https://huggingface.co/yuchuantian/AIGC_detector_env2)
                Chinese:  [Zh-v3](https://huggingface.co/yuchuantian/AIGC_detector_zhv3) /  [Zh-v3-short](https://huggingface.co/yuchuantian/AIGC_detector_zhv3short) / [Zh_v2](https://huggingface.co/yuchuantian/AIGC_detector_zhv2)

                Acknowledgement 致谢
                We sincerely thank [Hello-SimpleAI](https://huggingface.co/spaces/Hello-SimpleAI/chatgpt-detector-single) for their code.
                """)

    with gr.Tab("中文-V3"):
        gr.Markdown("""
                    注意: 本检测器提供的结果仅供参考，应谨慎作为事实依据。
                    """)
        t2 = gr.Textbox(lines=5, label='文本',value="北京大学建立于1898年7月3日，初名京师大学堂，辛亥革命后于1912年改为北京大学。1938年更名为国立西南联合大学。1946年10月在北平复员。1952年成为以文理学科为主的综合性大学。")
        button2 = gr.Button("🚀 检测!")
        label2 = gr.Textbox(lines=1, label='预测结果')
        score2 = gr.Textbox(lines=1, label='模型概率')

    
    with gr.Tab("中文-V3-短文本"):
        gr.Markdown("""
                    注意: 本检测器提供的结果仅供参考，应谨慎作为事实依据。
                    """)
        t4 = gr.Textbox(lines=5, label='文本',value="北京大学建立于1898年7月3日，初名京师大学堂，辛亥革命后于1912年改为北京大学。1938年更名为国立西南联合大学。1946年10月在北平复员。1952年成为以文理学科为主的综合性大学。")
        button4 = gr.Button("🚀 检测!")
        label4 = gr.Textbox(lines=1, label='预测结果')
        score4 = gr.Textbox(lines=1, label='模型概率')

    with gr.Tab("中文-V2"):
        gr.Markdown("""
                    注意: 本检测器提供的结果仅供参考，应谨慎作为事实依据。
                    """)
        t6 = gr.Textbox(lines=5, label='文本',value="北京大学建立于1898年7月3日，初名京师大学堂，辛亥革命后于1912年改为北京大学。1938年更名为国立西南联合大学。1946年10月在北平复员。1952年成为以文理学科为主的综合性大学。")
        button6 = gr.Button("🚀 检测!")
        label6 = gr.Textbox(lines=1, label='预测结果')
        score6 = gr.Textbox(lines=1, label='模型概率')


    with gr.Tab("English-V3"):
        gr.Markdown("""
                    Note: The results are for reference only; they could not be used as factual evidence.
                    """)
        t1 = gr.Textbox(lines=5, label='Text',value="Originated as the Imperial University of Peking in 1898, Peking University was China's first national comprehensive university and the supreme education authority at the time. Since the founding of the People's Republic of China in 1949, it has developed into a comprehensive university with fundamental education and research in both humanities and science. The reform and opening-up of China in 1978 has ushered in a new era for the University unseen in history.")
        button1 = gr.Button("🚀 Predict!")
        label1 = gr.Textbox(lines=1, label='Predicted Label')
        score1 = gr.Textbox(lines=1, label='Probability')

    with gr.Tab("English-V3-Short"):
        gr.Markdown("""
                    Note: The results are for reference only; they could not be used as factual evidence.
                    """)
        t3 = gr.Textbox(lines=5, label='Text',value="Originated as the Imperial University of Peking in 1898, Peking University was China's first national comprehensive university and the supreme education authority at the time. Since the founding of the People's Republic of China in 1949, it has developed into a comprehensive university with fundamental education and research in both humanities and science. The reform and opening-up of China in 1978 has ushered in a new era for the University unseen in history.")
        button3 = gr.Button("🚀 Predict!")
        label3 = gr.Textbox(lines=1, label='Predicted Label')
        score3 = gr.Textbox(lines=1, label='Probability')

    with gr.Tab("English-V2"):
        gr.Markdown("""
                    Note: The results are for reference only; they could not be used as factual evidence.
                    """)
        t5 = gr.Textbox(lines=5, label='Text',value="Originated as the Imperial University of Peking in 1898, Peking University was China's first national comprehensive university and the supreme education authority at the time. Since the founding of the People's Republic of China in 1949, it has developed into a comprehensive university with fundamental education and research in both humanities and science. The reform and opening-up of China in 1978 has ushered in a new era for the University unseen in history.")
        button5 = gr.Button("🚀 Predict!")
        label5 = gr.Textbox(lines=1, label='Predicted Label')
        score5 = gr.Textbox(lines=1, label='Probability')

    button1.click(predict_en, inputs=[t1], outputs=[label1,score1])
    button2.click(predict_zh, inputs=[t2], outputs=[label2,score2])
    button3.click(predict_en3, inputs=[t3], outputs=[label3,score3])
    button4.click(predict_zh4, inputs=[t4], outputs=[label4,score4])
    button5.click(predict_en5, inputs=[t5], outputs=[label5,score5])
    button6.click(predict_zh6, inputs=[t6], outputs=[label6,score6])

    # Page Count
    gr.Markdown("""
                <center><a href='https://clustrmaps.com/site/1bsdc'  title='Visit tracker'><img src='//clustrmaps.com/map_v2.png?cl=080808&w=a&t=tt&d=NXQdnwxvIm27veMbB5F7oHNID09nhSvkBRZ_Aji9eIA&co=ffffff&ct=808080'/></a></center>
                """)

demo.launch()