Spaces:
Sleeping
Sleeping
Upload Inference.py
Browse files- Inference.py +38 -0
Inference.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from NoCodeTextClassifier.preprocessing import *
|
| 2 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import joblib
|
| 6 |
+
|
| 7 |
+
# Input the email
|
| 8 |
+
text = input("Enter the Email: \n")
|
| 9 |
+
|
| 10 |
+
# load train data
|
| 11 |
+
train_path = Path("./ML Engineer/train.csv")
|
| 12 |
+
df = pd.read_csv(train_path)
|
| 13 |
+
|
| 14 |
+
# clean the text
|
| 15 |
+
currency_symbols = r'[\$\£\€\¥\₹\¢\₽\₩\₪]'
|
| 16 |
+
text_cleaner = TextCleaner(currency_symbols)
|
| 17 |
+
df['clean_text'] = df['email'].apply(lambda x: text_cleaner.clean_text(x))
|
| 18 |
+
|
| 19 |
+
# fit the TfIdfVecotrizer with train data
|
| 20 |
+
vectorizer = TfidfVectorizer(max_features=10000)
|
| 21 |
+
X = vectorizer.fit(df['clean_text'])
|
| 22 |
+
|
| 23 |
+
# clean the input email
|
| 24 |
+
clean_text = str(text_cleaner.clean_text(text))
|
| 25 |
+
print(f"\nThe clean text is : {clean_text}")
|
| 26 |
+
|
| 27 |
+
# vectorize the clean email
|
| 28 |
+
y = vectorizer.transform([clean_text])
|
| 29 |
+
|
| 30 |
+
# Load the model from the file
|
| 31 |
+
loaded_model = joblib.load('email_detection_model.pkl')
|
| 32 |
+
|
| 33 |
+
# perform prediction of mail
|
| 34 |
+
predictions = int(loaded_model.predict(y)[0])
|
| 35 |
+
predictions = "spam" if predictions==1 else "not_spam"
|
| 36 |
+
|
| 37 |
+
# print the prediction
|
| 38 |
+
print(f"\nThe prediction is : {predictions}")
|