rajan9089's picture
app.py
3816915 verified
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
# ------------------- Load Data -------------------
df = pd.read_csv("Balanced_Placement_Data.csv")
features = [
'ssc_percentage', 'hsc_percentage', 'undergrad_degree', 'Graduate_degree_percentage',
'emp_test_percentage', 'Internship_Experience_Months', 'Certifications_Count',
'Technical_Skills_Score', 'Soft_Skills_Score', 'Hackathons_Participated',
'Resume_Score', 'Online_Course_Count', 'Social_Media_Presence'
]
target_columns = ['Placement_Status', 'Domain_of_Interest']
X = df[features]
y = df[target_columns]
categorical_features = ['undergrad_degree']
numerical_features = list(set(features) - set(categorical_features))
preprocessor = ColumnTransformer([
("num", StandardScaler(), numerical_features),
("cat", OneHotEncoder(drop="first"), categorical_features)
])
models = {
"Random Forest": RandomForestClassifier(random_state=42),
"Decision Tree": DecisionTreeClassifier(random_state=42),
"KNN": KNeighborsClassifier(),
"Logistic Regression": LogisticRegression(max_iter=1000),
"SVM": SVC(probability=True)
}
# ------------------- Train Models -------------------
def train_models():
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
results = {}
for name, clf in models.items():
pipe = Pipeline([
("preprocessor", preprocessor),
("classifier", MultiOutputClassifier(clf))
])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
placement_acc = accuracy_score(y_test["Placement_Status"], y_pred[:,0])
domain_acc = accuracy_score(y_test["Domain_of_Interest"], y_pred[:,1])
results[name] = {
"Placement Accuracy": placement_acc,
"Domain Accuracy": domain_acc,
"Model": pipe
}
return results
results = train_models()
best_model_name = max(results, key=lambda m: results[m]["Placement Accuracy"] + results[m]["Domain Accuracy"])
best_model = results[best_model_name]["Model"]
# ------------------- Prediction Function -------------------
def predict_placement_and_domain(
ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage,
emp_test_percentage, Internship_Experience_Months, Certifications_Count,
Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated,
Resume_Score, Online_Course_Count, Social_Media_Presence
):
user_input = {
"ssc_percentage": ssc_percentage,
"hsc_percentage": hsc_percentage,
"undergrad_degree": undergrad_degree,
"Graduate_degree_percentage": Graduate_degree_percentage,
"emp_test_percentage": emp_test_percentage,
"Internship_Experience_Months": Internship_Experience_Months,
"Certifications_Count": Certifications_Count,
"Technical_Skills_Score": Technical_Skills_Score,
"Soft_Skills_Score": Soft_Skills_Score,
"Hackathons_Participated": Hackathons_Participated,
"Resume_Score": Resume_Score,
"Online_Course_Count": Online_Course_Count,
"Social_Media_Presence": Social_Media_Presence
}
input_df = pd.DataFrame([user_input])
prediction = best_model.predict(input_df)
return {
"Placement Status": prediction[0][0],
"Domain of Interest": prediction[0][1],
"Best Model": best_model_name
}
# ------------------- Gradio UI -------------------
with gr.Blocks() as demo:
gr.Markdown("# 🎯 Placement & Domain Predictor")
with gr.Row():
with gr.Column():
ssc_percentage = gr.Number(label="SSC Percentage", value=70)
hsc_percentage = gr.Number(label="HSC Percentage", value=65)
undergrad_degree = gr.Dropdown(choices=list(df['undergrad_degree'].unique()), label="Undergrad Degree")
Graduate_degree_percentage = gr.Number(label="Graduate Degree %", value=60)
emp_test_percentage = gr.Number(label="Employment Test %", value=50)
Internship_Experience_Months = gr.Number(label="Internship Months", value=0)
Certifications_Count = gr.Number(label="Certifications Count", value=1)
Technical_Skills_Score = gr.Number(label="Technical Skills Score", value=60)
Soft_Skills_Score = gr.Number(label="Soft Skills Score", value=60)
Hackathons_Participated = gr.Number(label="Hackathons Participated", value=1)
Resume_Score = gr.Number(label="Resume Score", value=50)
Online_Course_Count = gr.Number(label="Online Course Count", value=2)
Social_Media_Presence = gr.Number(label="Social Media Presence (0/1)", value=1)
btn = gr.Button("Predict")
with gr.Column():
output = gr.JSON(label="Prediction Result")
btn.click(
predict_placement_and_domain,
inputs=[ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage,
emp_test_percentage, Internship_Experience_Months, Certifications_Count,
Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated,
Resume_Score, Online_Course_Count, Social_Media_Presence],
outputs=output
)
demo.launch()