|
import pandas as pd |
|
from flask import Flask, request, jsonify |
|
|
|
from sklearn.compose import ColumnTransformer |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.impute import SimpleImputer |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.pipeline import Pipeline |
|
from sklearn.preprocessing import LabelEncoder, StandardScaler |
|
|
|
|
|
data = pd.read_csv('dataset.csv') |
|
|
|
|
|
X = data.drop('PlacedOrNot', axis=1) |
|
y = data['PlacedOrNot'] |
|
|
|
|
|
categorical_features = ['HistoryOfBacklogs'] |
|
for feature in categorical_features: |
|
encoder = LabelEncoder() |
|
X[feature] = encoder.fit_transform(X[feature]) |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
numerical_features = ['Internships', 'CGPA'] |
|
numerical_transformer = StandardScaler() |
|
categorical_features = [ 'HistoryOfBacklogs'] |
|
categorical_transformer = SimpleImputer(strategy='most_frequent') |
|
preprocessor = ColumnTransformer( |
|
transformers=[ |
|
('num', numerical_transformer, numerical_features), |
|
('cat', categorical_transformer, categorical_features) |
|
]) |
|
|
|
pipeline = Pipeline([ |
|
('preprocessor', preprocessor), |
|
('classifier', RandomForestClassifier(random_state=42)) |
|
]) |
|
|
|
|
|
pipeline.fit(X_train, y_train) |
|
|
|
|
|
accuracy = pipeline.score(X_test, y_test) |
|
print('Accuracy:', accuracy) |
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
@app.route('/predict', methods=['POST']) |
|
def predict(): |
|
|
|
data = request.get_json() |
|
|
|
|
|
input_data = pd.DataFrame(data, index=[0]) |
|
|
|
|
|
predictions = pipeline.predict(input_data) |
|
|
|
|
|
response = {'prediction': predictions[0]} |
|
return jsonify(response) |
|
|
|
|
|
if __name__ == '__main__': |
|
app.run(debug=True) |
|
|