elifsara commited on
Commit
547d645
1 Parent(s): 4eeba2b

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +89 -0
  2. income_evaluation.csv +0 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.ensemble import GradientBoostingClassifier
5
+ from sklearn.preprocessing import LabelEncoder
6
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
7
+ from sklearn.impute import SimpleImputer
8
+ from sklearn.pipeline import Pipeline
9
+ from sklearn.compose import ColumnTransformer
10
+
11
+
12
+ @st.cache
13
+ def load_data():
14
+ #Veeri setini yüklüyorum
15
+ df=pd.read_csv('income_evaluation.csv')
16
+ return df
17
+
18
+ df=load_data()
19
+
20
+ x=df.drop([' fnlwgt',' income'], axis=1)
21
+ y=df[' income']
22
+
23
+ numeric_features=x.select_dtypes(include=['int64']).columns
24
+ categorical_features=x.select_dtypes(include=['object']).columns
25
+
26
+ # Kategorik sütunlardaki benzersiz değerleri al
27
+ #categorical_values = [x[column].unique() for column in categorical_features]
28
+
29
+ # Kategorik sütunlarda eksik değerleri doldurmak için SimpleImputer kullanın
30
+ #imputer_cat = SimpleImputer(strategy='most_frequent') #TypeError: 'tuple' object is not callable
31
+ #df[categorical_features] = imputer_cat.fit_transform(df[categorical_features])
32
+
33
+ # Sayısal sütunlarda eksik değerleri doldurmak ve ölçeklendirmek için StandardScaler kullanın
34
+ #scaler = StandardScaler()
35
+ #df[numeric_features] = scaler.fit_transform(df[numeric_features])
36
+
37
+ preprocessor = ColumnTransformer( #'DataFrame' object has no attribute '_validate_params'
38
+ transformers=[
39
+ ('cat', OneHotEncoder(), categorical_features),
40
+ ('num', StandardScaler(), numeric_features) # Sayısal sütunları normalize et
41
+ ])
42
+
43
+ # Pipeline oluştur
44
+ pipeline = Pipeline(steps=[
45
+ ('preprocessor', preprocessor),
46
+ ('classifier', GradientBoostingClassifier()) # Lojistik regresyon kullanarak sınıflandırma yap
47
+ ])
48
+
49
+ #x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=.2,random_state=42)
50
+ # Modeli eğit
51
+
52
+ pipeline.fit(x,y)
53
+
54
+ # Uygulama başlığı
55
+ st.title("Gradient Boosting ile Gelir Sınıflandırma Uygulaması")
56
+ age = st.number_input("Yaş", min_value=0)
57
+ workclass = st.selectbox("Çalışma Sınıfı", df[' workclass'].unique())
58
+ education = st.selectbox("Eğitim", df[' education'].unique())
59
+ education_num = st.number_input("Eğitim seviyesi", min_value=0)
60
+ marital_status = st.selectbox("Medeni durumu", df[' marital-status'].unique())
61
+ occupation = st.selectbox('Pozisyonu', df[' occupation'].unique())
62
+ relationship = st.selectbox('ilişki durumu', df[' relationship'].unique())
63
+ race = st.selectbox('Milliyeti', df[' race'].unique())
64
+ sex = st.selectbox('Cinsiyet', df[' sex'].unique())
65
+ capital_gain = st.number_input("Sermaye kazancı", min_value=0)
66
+ capital_loss = st.number_input("Sermaye Kaybı", min_value=0)
67
+ hours_per_week = st.number_input("Haftalık Çalışma Saati", min_value=0)
68
+ native_country = st.selectbox('Doğum Yeri', df[' native-country'].unique())
69
+
70
+
71
+ new_data = pd.DataFrame({
72
+ 'age': [age],
73
+ ' workclass': [workclass],
74
+ ' education': [education],
75
+ ' education-num': [education_num],
76
+ ' marital-status': [marital_status],
77
+ ' occupation': [occupation],
78
+ ' relationship': [relationship],
79
+ ' race': [race],
80
+ ' sex': [sex],
81
+ ' capital-gain': [capital_gain],
82
+ ' capital-loss': [capital_loss],
83
+ ' hours-per-week': [hours_per_week],
84
+ ' native-country': [native_country]
85
+ })
86
+
87
+ if st.button('Predict'):
88
+ predictions = pipeline.predict(new_data)
89
+ st.write("Income Prediction:", predictions)
income_evaluation.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ sklearn