Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pickle | |
import pandas as pd | |
from urlextract import URLExtract | |
import URLFeatureExtraction | |
with open("models/spam-clf.pkl", "rb") as f: | |
model = pickle.load(f) | |
with open("models/xgb.dat", "rb") as f: | |
model2 = pickle.load(f) | |
df = pd.DataFrame(columns=['URL', 'Phishy?']) | |
def isPhishing(link): | |
global df | |
features = URLFeatureExtraction.featureExtraction(link) | |
print(features) | |
prediction = model2.predict([features]) | |
print(prediction[0]) | |
df = df._append({'URL': link, 'Phishy?': "UnSafe" if prediction[0] else "Safe"}, ignore_index=True) | |
return prediction[0] | |
def isSpam(Email): | |
out = model.predict([Email]) | |
return "Spam" if out[0] else "Not Spam" | |
def check_URL(Email): | |
extractor = URLExtract() | |
urls = extractor.find_urls(Email) | |
n_urls = len(urls) | |
if not urls: return -1 | |
if urls: | |
bad_urls = sum([isPhishing(url) for url in urls]) | |
else: | |
bad_urls = 0 | |
print("Out of {} urls {} are legitimate.".format(n_urls, n_urls - bad_urls)) | |
return bad_urls | |
def check_Mail(Email): | |
bad_urls = check_URL(Email) | |
if bad_urls==-1: | |
return [isSpam(Email), 0, 'Safe', df] | |
if URLFeatureExtraction.flag or bad_urls: | |
return [isSpam(Email), bad_urls, 'Risky', df] | |
return [isSpam(Email), bad_urls, 'Safe', df] | |
iface = gr.Interface( | |
fn=check_Mail, | |
inputs=gr.Textbox(lines=6, placeholder="Enter or paste email here", label="Email"), | |
outputs=[ | |
gr.Textbox(label="Spam or Not"), | |
gr.Textbox(label="Phishing Links Detected"), | |
gr.Textbox(label="Consider this Mail as"), | |
gr.Dataframe(label="Insights", interactive=False) | |
] | |
) | |
iface.launch() |