Spaces:

rajan9089
/

Placement_Prediction_And_Job_Classification

Sleeping

App Files Files Community

Placement_Prediction_And_Job_Classification / app.py

rajan9089

app.py

3816915 verified 4 months ago

raw

history blame contribute delete

5.81 kB

	import gradio as gr
	import pandas as pd
	import numpy as np

	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.multioutput import MultiOutputClassifier
	from sklearn.metrics import accuracy_score
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.svm import SVC

	# ------------------- Load Data -------------------
	df = pd.read_csv("Balanced_Placement_Data.csv")

	features = [
	'ssc_percentage', 'hsc_percentage', 'undergrad_degree', 'Graduate_degree_percentage',
	'emp_test_percentage', 'Internship_Experience_Months', 'Certifications_Count',
	'Technical_Skills_Score', 'Soft_Skills_Score', 'Hackathons_Participated',
	'Resume_Score', 'Online_Course_Count', 'Social_Media_Presence'
	]
	target_columns = ['Placement_Status', 'Domain_of_Interest']

	X = df[features]
	y = df[target_columns]

	categorical_features = ['undergrad_degree']
	numerical_features = list(set(features) - set(categorical_features))

	preprocessor = ColumnTransformer([
	("num", StandardScaler(), numerical_features),
	("cat", OneHotEncoder(drop="first"), categorical_features)
	])

	models = {
	"Random Forest": RandomForestClassifier(random_state=42),
	"Decision Tree": DecisionTreeClassifier(random_state=42),
	"KNN": KNeighborsClassifier(),
	"Logistic Regression": LogisticRegression(max_iter=1000),
	"SVM": SVC(probability=True)
	}

	# ------------------- Train Models -------------------
	def train_models():
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42
	)
	results = {}
	for name, clf in models.items():
	pipe = Pipeline([
	("preprocessor", preprocessor),
	("classifier", MultiOutputClassifier(clf))
	])
	pipe.fit(X_train, y_train)
	y_pred = pipe.predict(X_test)

	placement_acc = accuracy_score(y_test["Placement_Status"], y_pred[:,0])
	domain_acc = accuracy_score(y_test["Domain_of_Interest"], y_pred[:,1])

	results[name] = {
	"Placement Accuracy": placement_acc,
	"Domain Accuracy": domain_acc,
	"Model": pipe
	}
	return results

	results = train_models()
	best_model_name = max(results, key=lambda m: results[m]["Placement Accuracy"] + results[m]["Domain Accuracy"])
	best_model = results[best_model_name]["Model"]

	# ------------------- Prediction Function -------------------
	def predict_placement_and_domain(
	ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage,
	emp_test_percentage, Internship_Experience_Months, Certifications_Count,
	Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated,
	Resume_Score, Online_Course_Count, Social_Media_Presence
	):
	user_input = {
	"ssc_percentage": ssc_percentage,
	"hsc_percentage": hsc_percentage,
	"undergrad_degree": undergrad_degree,
	"Graduate_degree_percentage": Graduate_degree_percentage,
	"emp_test_percentage": emp_test_percentage,
	"Internship_Experience_Months": Internship_Experience_Months,
	"Certifications_Count": Certifications_Count,
	"Technical_Skills_Score": Technical_Skills_Score,
	"Soft_Skills_Score": Soft_Skills_Score,
	"Hackathons_Participated": Hackathons_Participated,
	"Resume_Score": Resume_Score,
	"Online_Course_Count": Online_Course_Count,
	"Social_Media_Presence": Social_Media_Presence
	}

	input_df = pd.DataFrame([user_input])
	prediction = best_model.predict(input_df)

	return {
	"Placement Status": prediction[0][0],
	"Domain of Interest": prediction[0][1],
	"Best Model": best_model_name
	}

	# ------------------- Gradio UI -------------------
	with gr.Blocks() as demo:
	gr.Markdown("# 🎯 Placement & Domain Predictor")

	with gr.Row():
	with gr.Column():
	ssc_percentage = gr.Number(label="SSC Percentage", value=70)
	hsc_percentage = gr.Number(label="HSC Percentage", value=65)
	undergrad_degree = gr.Dropdown(choices=list(df['undergrad_degree'].unique()), label="Undergrad Degree")
	Graduate_degree_percentage = gr.Number(label="Graduate Degree %", value=60)
	emp_test_percentage = gr.Number(label="Employment Test %", value=50)
	Internship_Experience_Months = gr.Number(label="Internship Months", value=0)
	Certifications_Count = gr.Number(label="Certifications Count", value=1)
	Technical_Skills_Score = gr.Number(label="Technical Skills Score", value=60)
	Soft_Skills_Score = gr.Number(label="Soft Skills Score", value=60)
	Hackathons_Participated = gr.Number(label="Hackathons Participated", value=1)
	Resume_Score = gr.Number(label="Resume Score", value=50)
	Online_Course_Count = gr.Number(label="Online Course Count", value=2)
	Social_Media_Presence = gr.Number(label="Social Media Presence (0/1)", value=1)

	btn = gr.Button("Predict")

	with gr.Column():
	output = gr.JSON(label="Prediction Result")

	btn.click(
	predict_placement_and_domain,
	inputs=[ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage,
	emp_test_percentage, Internship_Experience_Months, Certifications_Count,
	Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated,
	Resume_Score, Online_Course_Count, Social_Media_Presence],
	outputs=output
	)

	demo.launch()