File size: 2,820 Bytes
b11db75
5f0c212
 
0ebd7c5
5f0c212
 
 
a39dfac
5f0c212
a5d668d
0ebd7c5
5f0c212
 
66019c8
0ebd7c5
66019c8
5f0c212
66019c8
 
5f0c212
66019c8
 
 
5f0c212
66019c8
 
 
 
5f0c212
0ebd7c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c9cbd6
 
0ebd7c5
 
 
 
4c9cbd6
 
0ebd7c5
 
 
4c9cbd6
0ebd7c5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
from PIL import Image
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import torch

# Initialize the image-to-text pipeline and models
def load_models():
    # Make sure to use the correct model names and tokenizer
    image_pipeline = pipeline("image-to-text", model="microsoft/trocr-large-printed")
    phishing_model = AutoModelForSequenceClassification.from_pretrained("kithangw/phishing_link_detection", num_labels=2)
    phishing_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    return image_pipeline, phishing_model, phishing_tokenizer

# Define the phishing check function
def check_phishing(phishing_model, phishing_tokenizer, url_for_recognize):
    link_token = phishing_tokenizer(url_for_recognize, max_length=512, padding=True, truncation=True, return_tensors='pt')

    with torch.no_grad():  # Disable gradient calculation for inference
        output = phishing_model(**link_token)

    probabilities = torch.nn.functional.softmax(output.logits, dim=-1)
    predicted_class = torch.argmax(probabilities, dim=-1).item()
    predicted_prob = probabilities[0, predicted_class].item()

    labels = ['Not Phishing', 'Phishing']
    prediction_label = labels[predicted_class]
    sentence = f"The URL '{url_for_recognize}' is classified as '{prediction_label}' with a probability of {predicted_prob:.2f}."
    return sentence

def main():
    # Load models
    image_pipeline, phishing_model, phishing_tokenizer = load_models()

    # Streamlit interface
    st.title("Phishing URL Detection from Image")

    # File uploader to scan the image
    uploaded_image = st.file_uploader("Upload an image of the URL", type=["png", "jpg", "jpeg"])

    if uploaded_image is not None:
        image = Image.open(uploaded_image)
        st.image(image, caption='Uploaded URL Image', use_column_width=True)
        
        try:
            # Process the image with the OCR pipeline
            ocr_result = image_pipeline(image)[0]['generated_text'].replace(" ", "").lower()
            # Store the verified URL in session state for access later
            st.session_state['verified_url'] = st.text_input("Recognized URL", ocr_result)
        except Exception as e:
            st.error(f"An error occurred during image processing: {e}")

    if st.button('Detect Phishing'):
        # Check for 'verified_url' in session state instead of local variable
        if 'verified_url' in st.session_state and st.session_state['verified_url']:
            result = check_phishing(phishing_model, phishing_tokenizer, st.session_state['verified_url'])
            st.write(result)
        else:
            st.error("Please upload an image to detect the URL and check for phishing.")

# Run the main function
if __name__ == "__main__":
    main()