Spaces:

akshatsanghvi
/

spam-email-detection

Sleeping

spam-email-detection / app.py

Update file

ddaa8f4 5 months ago

1.78 kB

	import gradio as gr
	import pickle
	import pandas as pd
	from urlextract import URLExtract
	import URLFeatureExtraction

	with open("models/spam-clf.pkl", "rb") as f:
	model = pickle.load(f)

	with open("models/xgb.dat", "rb") as f:
	model2 = pickle.load(f)

	df = pd.DataFrame(columns=['URL', 'Phishy?'])

	def isPhishing(link):
	global df

	features = URLFeatureExtraction.featureExtraction(link)
	print(features)
	prediction = model2.predict([features])
	print(prediction[0])

	df = df._append({'URL': link, 'Phishy?': "UnSafe" if prediction[0] else "Safe"}, ignore_index=True)
	return prediction[0]

	def isSpam(Email):
	out = model.predict([Email])
	return "Spam" if out[0] else "Not Spam"

	def check_URL(Email):
	extractor = URLExtract()
	urls = extractor.find_urls(Email)
	n_urls = len(urls)

	if not urls: return -1
	if urls:
	bad_urls = sum([isPhishing(url) for url in urls])
	else:
	bad_urls = 0
	print("Out of {} urls {} are legitimate.".format(n_urls, n_urls - bad_urls))

	return bad_urls

	def check_Mail(Email):
	bad_urls = check_URL(Email)
	if bad_urls==-1:
	return [isSpam(Email), 0, 'Safe', df]

	if URLFeatureExtraction.flag or bad_urls:
	return [isSpam(Email), bad_urls, 'Risky', df]

	return [isSpam(Email), bad_urls, 'Safe', df]

	iface = gr.Interface(
	fn=check_Mail,
	inputs=gr.Textbox(lines=6, placeholder="Enter or paste email here", label="Email"),
	outputs=[
	gr.Textbox(label="Spam or Not"),
	gr.Textbox(label="Phishing Links Detected"),
	gr.Textbox(label="Consider this Mail as"),
	gr.Dataframe(label="Insights", interactive=False)
	]
	)
	iface.launch()