File size: 1,105 Bytes
64bfbe8
 
 
 
c84686e
64bfbe8
c84686e
 
 
64bfbe8
c84686e
 
64bfbe8
 
 
c84686e
64bfbe8
 
 
c84686e
 
64bfbe8
c84686e
64bfbe8
 
 
 
 
 
 
c84686e
64bfbe8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr
from transformers import pipeline, set_seed
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Set seed and define model parameters
set_seed(42)
num_labels = 2
id2label = {0: 'benign', 1: 'phishing'}
label2id = {'benign': 0, 'phishing': 1}
checkpoint = 'bgspaditya/distilbert-phish'

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True, force_download=True)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=num_labels, id2label=id2label, label2id=label2id, force_download=True)

# Define predict function
def predict(url):
    url_classifier = pipeline(task='text-classification', model=model, tokenizer=tokenizer)
    result = url_classifier(url)
    predicted_label = result[0]['label']
    return predicted_label

# Define Gradio interface
gradio_app = gr.Interface(
    predict,
    inputs=gr.Textbox(label="Enter URL"),
    outputs=gr.Label(label="Result"),
    title="Phishing URL Detection",
)

# Launch the Gradio interface
if __name__ == "__main__":
    gradio_app.launch()