Commit
•
9c98bd9
1
Parent(s):
0a31d37
Uploading necessary files
Browse files- app.py +41 -0
- log_model_final.pkl +3 -0
- pre_processing.py +60 -0
- templates/index.html +92 -0
- vectorizer.pkl +3 -0
app.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import necessary libraries
|
2 |
+
import joblib
|
3 |
+
import numpy as np
|
4 |
+
from flask import Flask, render_template, request, jsonify
|
5 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
6 |
+
from sklearn.linear_model import LogisticRegression
|
7 |
+
from pre_processing import preprocess_text
|
8 |
+
|
9 |
+
app = Flask(__name__)
|
10 |
+
|
11 |
+
# specify path for model and vectorizer
|
12 |
+
model_path = "log_model_final.pkl"
|
13 |
+
vectorizer_path = "vectorizer.pkl"
|
14 |
+
|
15 |
+
# Load the pre-trained model
|
16 |
+
model = joblib.load(model_path)
|
17 |
+
vectorizer = joblib.load(vectorizer_path)
|
18 |
+
|
19 |
+
@app.route('/')
|
20 |
+
def index():
|
21 |
+
return render_template('index.html')
|
22 |
+
|
23 |
+
@app.route('/predict', methods=['POST'])
|
24 |
+
def predict():
|
25 |
+
# predict
|
26 |
+
try:
|
27 |
+
# Get input text from the form
|
28 |
+
input_text = request.get_json(force=True)['input']
|
29 |
+
print(input_text)
|
30 |
+
test_data = vectorizer.transform([input_text])
|
31 |
+
y_pred = model.predict(test_data)
|
32 |
+
print(y_pred)
|
33 |
+
# Return the prediction as JSON
|
34 |
+
return jsonify({'prediction': y_pred[0]})
|
35 |
+
|
36 |
+
except Exception as e:
|
37 |
+
return jsonify({'error': str(e)})
|
38 |
+
|
39 |
+
|
40 |
+
if __name__ == '__main__':
|
41 |
+
app.run(port=5000, debug=True)
|
log_model_final.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba33944b346a7546c899dd218b63a6703c0f5f1e7be0e1fc52d0d24b776d7ae9
|
3 |
+
size 1093696
|
pre_processing.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import necessary libraries
|
2 |
+
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
import re
|
6 |
+
import string
|
7 |
+
|
8 |
+
# create a function for basic pre-processing
|
9 |
+
|
10 |
+
stopwords= set(['br', 'the', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've",\
|
11 |
+
"you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', \
|
12 |
+
'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their',\
|
13 |
+
'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', \
|
14 |
+
'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', \
|
15 |
+
'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', \
|
16 |
+
'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after',\
|
17 |
+
'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further',\
|
18 |
+
'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more',\
|
19 |
+
'most', 'other', 'some', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', \
|
20 |
+
's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', \
|
21 |
+
've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn',\
|
22 |
+
"hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',\
|
23 |
+
"mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", \
|
24 |
+
'won', "won't", 'wouldn', "wouldn't"])
|
25 |
+
|
26 |
+
def decontracted(phrase):
|
27 |
+
# specific
|
28 |
+
phrase = re.sub(r"won't", "will not", phrase)
|
29 |
+
phrase = re.sub(r"can\'t", "can not", phrase)
|
30 |
+
|
31 |
+
# general
|
32 |
+
phrase = re.sub(r"n\'t", " not", phrase)
|
33 |
+
phrase = re.sub(r"\'re", " are", phrase)
|
34 |
+
phrase = re.sub(r"\'s", " is", phrase)
|
35 |
+
phrase = re.sub(r"\'d", " would", phrase)
|
36 |
+
phrase = re.sub(r"\'ll", " will", phrase)
|
37 |
+
phrase = re.sub(r"\'t", " not", phrase)
|
38 |
+
phrase = re.sub(r"\'ve", " have", phrase)
|
39 |
+
phrase = re.sub(r"\'m", " am", phrase)
|
40 |
+
return phrase
|
41 |
+
|
42 |
+
|
43 |
+
def preprocess_text(text):
|
44 |
+
text = text.lower()
|
45 |
+
text = re.sub(r"\n", " ", text)
|
46 |
+
text = re.sub(r"\d", " ", text)
|
47 |
+
text = re.sub(r"<.*?>+", "", text)
|
48 |
+
text = re.sub("\[.*?\]", "", text)
|
49 |
+
text = re.sub("https?://S+|www.\.\S+", "", text)
|
50 |
+
text = text.translate(str.maketrans("", "", string.punctuation))
|
51 |
+
text = re.sub(r'xx', '', text)
|
52 |
+
text = decontracted(text)
|
53 |
+
words = text.split()
|
54 |
+
words = [word for word in words if not word in stopwords]
|
55 |
+
words = [re.sub(r"(.)\1{1,}", r"\1\1", word) for word in words]
|
56 |
+
words = [word.strip() for word in words if len(word.strip()) > 1]
|
57 |
+
text = " ".join(words)
|
58 |
+
return text
|
59 |
+
|
60 |
+
|
templates/index.html
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!-- templates/index.html -->
|
2 |
+
<!DOCTYPE html>
|
3 |
+
<html lang="en">
|
4 |
+
<head>
|
5 |
+
<meta charset="UTF-8">
|
6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
7 |
+
<title>Logistic Regression Predictor</title>
|
8 |
+
<style>
|
9 |
+
body {
|
10 |
+
font-family: 'Arial', sans-serif;
|
11 |
+
margin: 20px;
|
12 |
+
text-align: center;
|
13 |
+
}
|
14 |
+
|
15 |
+
h1 {
|
16 |
+
color: #333;
|
17 |
+
}
|
18 |
+
|
19 |
+
form {
|
20 |
+
margin-top: 20px;
|
21 |
+
display: flex;
|
22 |
+
flex-direction: column;
|
23 |
+
align-items: center;
|
24 |
+
}
|
25 |
+
|
26 |
+
label {
|
27 |
+
margin-bottom: 10px;
|
28 |
+
}
|
29 |
+
|
30 |
+
input {
|
31 |
+
padding: 8px;
|
32 |
+
margin-bottom: 10px;
|
33 |
+
width: 300px;
|
34 |
+
box-sizing: border-box;
|
35 |
+
}
|
36 |
+
|
37 |
+
button {
|
38 |
+
padding: 10px;
|
39 |
+
background-color: #4CAF50;
|
40 |
+
color: white;
|
41 |
+
border: none;
|
42 |
+
cursor: pointer;
|
43 |
+
font-size: 16px;
|
44 |
+
}
|
45 |
+
|
46 |
+
button:hover {
|
47 |
+
background-color: #45a049;
|
48 |
+
}
|
49 |
+
|
50 |
+
#predictionResult {
|
51 |
+
margin-top: 20px;
|
52 |
+
font-size: 18px;
|
53 |
+
}
|
54 |
+
</style>
|
55 |
+
</head>
|
56 |
+
<body>
|
57 |
+
<h1>Logistic Regression Predictor</h1>
|
58 |
+
|
59 |
+
<form id="predictionForm">
|
60 |
+
<label for="inputText">Input Text:</label>
|
61 |
+
<input type="text" id="inputText" name="inputText" required>
|
62 |
+
<button type="button" onclick="predict()">Submit</button>
|
63 |
+
</form>
|
64 |
+
|
65 |
+
<p id="predictionResult"></p>
|
66 |
+
|
67 |
+
<script>
|
68 |
+
function predict() {
|
69 |
+
var inputText = document.getElementById('inputText').value;
|
70 |
+
|
71 |
+
// Make a POST request to the Flask API
|
72 |
+
fetch('/predict', {
|
73 |
+
method: 'POST',
|
74 |
+
headers: {
|
75 |
+
'Content-Type': 'application/json',
|
76 |
+
},
|
77 |
+
body: JSON.stringify({
|
78 |
+
'input': inputText,
|
79 |
+
}),
|
80 |
+
})
|
81 |
+
.then(response => response.json())
|
82 |
+
.then(data => {
|
83 |
+
document.getElementById('predictionResult').innerText = 'Prediction: ' + data.prediction;
|
84 |
+
})
|
85 |
+
.catch(error => {
|
86 |
+
console.error('Error:', error);
|
87 |
+
document.getElementById('predictionResult').innerText = 'Error occurred. Please try again.';
|
88 |
+
});
|
89 |
+
}
|
90 |
+
</script>
|
91 |
+
</body>
|
92 |
+
</html>
|
vectorizer.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:280f67b47eb1ae3e7386047de966646e9515468bed9eb62dc0fdada533961c22
|
3 |
+
size 4573659
|