Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- app.py +89 -0
- income_evaluation.csv +0 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.model_selection import train_test_split
|
4 |
+
from sklearn.ensemble import GradientBoostingClassifier
|
5 |
+
from sklearn.preprocessing import LabelEncoder
|
6 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
7 |
+
from sklearn.impute import SimpleImputer
|
8 |
+
from sklearn.pipeline import Pipeline
|
9 |
+
from sklearn.compose import ColumnTransformer
|
10 |
+
|
11 |
+
|
12 |
+
@st.cache
|
13 |
+
def load_data():
|
14 |
+
#Veeri setini yüklüyorum
|
15 |
+
df=pd.read_csv('income_evaluation.csv')
|
16 |
+
return df
|
17 |
+
|
18 |
+
df=load_data()
|
19 |
+
|
20 |
+
x=df.drop([' fnlwgt',' income'], axis=1)
|
21 |
+
y=df[' income']
|
22 |
+
|
23 |
+
numeric_features=x.select_dtypes(include=['int64']).columns
|
24 |
+
categorical_features=x.select_dtypes(include=['object']).columns
|
25 |
+
|
26 |
+
# Kategorik sütunlardaki benzersiz değerleri al
|
27 |
+
#categorical_values = [x[column].unique() for column in categorical_features]
|
28 |
+
|
29 |
+
# Kategorik sütunlarda eksik değerleri doldurmak için SimpleImputer kullanın
|
30 |
+
#imputer_cat = SimpleImputer(strategy='most_frequent') #TypeError: 'tuple' object is not callable
|
31 |
+
#df[categorical_features] = imputer_cat.fit_transform(df[categorical_features])
|
32 |
+
|
33 |
+
# Sayısal sütunlarda eksik değerleri doldurmak ve ölçeklendirmek için StandardScaler kullanın
|
34 |
+
#scaler = StandardScaler()
|
35 |
+
#df[numeric_features] = scaler.fit_transform(df[numeric_features])
|
36 |
+
|
37 |
+
preprocessor = ColumnTransformer( #'DataFrame' object has no attribute '_validate_params'
|
38 |
+
transformers=[
|
39 |
+
('cat', OneHotEncoder(), categorical_features),
|
40 |
+
('num', StandardScaler(), numeric_features) # Sayısal sütunları normalize et
|
41 |
+
])
|
42 |
+
|
43 |
+
# Pipeline oluştur
|
44 |
+
pipeline = Pipeline(steps=[
|
45 |
+
('preprocessor', preprocessor),
|
46 |
+
('classifier', GradientBoostingClassifier()) # Lojistik regresyon kullanarak sınıflandırma yap
|
47 |
+
])
|
48 |
+
|
49 |
+
#x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=.2,random_state=42)
|
50 |
+
# Modeli eğit
|
51 |
+
|
52 |
+
pipeline.fit(x,y)
|
53 |
+
|
54 |
+
# Uygulama başlığı
|
55 |
+
st.title("Gradient Boosting ile Gelir Sınıflandırma Uygulaması")
|
56 |
+
age = st.number_input("Yaş", min_value=0)
|
57 |
+
workclass = st.selectbox("Çalışma Sınıfı", df[' workclass'].unique())
|
58 |
+
education = st.selectbox("Eğitim", df[' education'].unique())
|
59 |
+
education_num = st.number_input("Eğitim seviyesi", min_value=0)
|
60 |
+
marital_status = st.selectbox("Medeni durumu", df[' marital-status'].unique())
|
61 |
+
occupation = st.selectbox('Pozisyonu', df[' occupation'].unique())
|
62 |
+
relationship = st.selectbox('ilişki durumu', df[' relationship'].unique())
|
63 |
+
race = st.selectbox('Milliyeti', df[' race'].unique())
|
64 |
+
sex = st.selectbox('Cinsiyet', df[' sex'].unique())
|
65 |
+
capital_gain = st.number_input("Sermaye kazancı", min_value=0)
|
66 |
+
capital_loss = st.number_input("Sermaye Kaybı", min_value=0)
|
67 |
+
hours_per_week = st.number_input("Haftalık Çalışma Saati", min_value=0)
|
68 |
+
native_country = st.selectbox('Doğum Yeri', df[' native-country'].unique())
|
69 |
+
|
70 |
+
|
71 |
+
new_data = pd.DataFrame({
|
72 |
+
'age': [age],
|
73 |
+
' workclass': [workclass],
|
74 |
+
' education': [education],
|
75 |
+
' education-num': [education_num],
|
76 |
+
' marital-status': [marital_status],
|
77 |
+
' occupation': [occupation],
|
78 |
+
' relationship': [relationship],
|
79 |
+
' race': [race],
|
80 |
+
' sex': [sex],
|
81 |
+
' capital-gain': [capital_gain],
|
82 |
+
' capital-loss': [capital_loss],
|
83 |
+
' hours-per-week': [hours_per_week],
|
84 |
+
' native-country': [native_country]
|
85 |
+
})
|
86 |
+
|
87 |
+
if st.button('Predict'):
|
88 |
+
predictions = pipeline.predict(new_data)
|
89 |
+
st.write("Income Prediction:", predictions)
|
income_evaluation.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
sklearn
|