import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.metrics import accuracy_score import streamlit as st # Load the CSV data data = pd.read_csv('dataset.csv') # Split the data into features and target variable X = data.drop('PlacedOrNot', axis=1) y = data['PlacedOrNot'] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) preprocessor = ColumnTransformer( transformers=[ ('num', StandardScaler(), ['internships', 'cgpa', 'history_of_backlogs']), ('cat', OneHotEncoder(), ['gender', 'stream']) ]) # Create the pipeline with Random Forest classifier pipeline = Pipeline([ ('preprocessor', preprocessor), ('classifier', RandomForestClassifier(random_state=42)) ]) # Fit the pipeline to the training data pipeline.fit(X_train, y_train) # Make predictions on the test data y_pred = pipeline.predict(X_test) # Calculate accuracy of the model accuracy = accuracy_score(y_test, y_pred) print('Accuracy:', accuracy) joblib.dump(pipeline, 'student_placement_model.joblib') # Define Streamlit API # Streamlit API for serving the model st.title('Student Job Placement Prediction') # Input form for user to enter features st.markdown('Please enter the following information:') internships = st.number_input('Number of Internships', min_value=0, max_value=10) cgpa = st.number_input('CGPA', min_value=0.0, max_value=10.0) history_of_backlogs = st.number_input('History of Backlogs', min_value=0, max_value=10) gender = st.selectbox('Gender', ('Male', 'Female')) stream = st.selectbox('Stream', ('Engineering', 'Science', 'Commerce')) submit = st.button('Submit') # Make prediction on user input when 'Submit' button is clicked if submit: # Create a dataframe with user input user_data = pd.DataFrame([[internships, cgpa, history_of_backlogs, gender, stream]], columns=['internships', 'cgpa', 'history_of_backlogs', 'gender', 'stream']) # Make prediction using the pipeline prediction = pipeline.predict(user_data) # Display prediction if prediction[0] == 1: st.success('Congratulations! The student is likely to be placed.') else: st.warning('Sorry, the student is unlikely to be placed.')