File size: 1,021 Bytes
ab66d4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# scripts/generate_data.py
import numpy as np
import pandas as pd
import os

def generate_dataset(n_samples=1000):
    np.random.seed(42)
    study_hours = np.random.normal(10, 2, n_samples)
    tuition_hours = np.random.normal(5, 1, n_samples)
    parental_education = np.random.choice(['High', 'Medium', 'Low'], n_samples)
    school_type = np.random.choice(['Public', 'Private'], n_samples)
    exam_score = 50 + 2 * study_hours + 1.5 * tuition_hours + np.random.normal(0, 5, n_samples)

    df = pd.DataFrame({
        'StudyHours': study_hours,
        'TuitionHours': tuition_hours,
        'ParentalEducation': parental_education,
        'SchoolType': school_type,
        'FinalExamScore': exam_score
    })
    
    # Ensure data directory exists
    os.makedirs('../data', exist_ok=True)
    df.to_csv('../data/sample_dataset.csv', index=False)
    return df

if __name__ == "__main__":
    generate_dataset()
    print("Dataset generated and saved to ../data/sample_dataset.csv")