-- coding: utf-8 --
"""Final_project_of_Credit_Card_Fraud_Detection(1).ipynb
Automatically generated by Colaboratory.
Original file is located at https://colab.research.google.com/drive/1PSHcV_bp0wcT0Kl_f2n5QwtlOZj3M5BV """
import pandas as pd import seaborn as sns import matplotlib.pyplot as plt
data=pd.read_csv('/content/data4.csv')
data.head()
data.shape
data.isnull().sum().sum()
data.keys()
data.info()
data=data.drop(['Unnamed: 0','nameOrig','nameDest'],axis=1)
data.shape
data['isFraud'].value_counts()
plt.pie(data['isFraud'].value_counts(),labels=['Not_Fraud','Fraud'],autopct='%0.2f%%') plt.show()
#sns.countplot('isFraud',data=data) sns.countplot(data=data, x="type", hue="isFraud") plt.show()
plt.figure(figsize=(6,8)) sns.countplot(data=data, x="isFraud", hue="type") plt.show()
data.tail()
data['type'].value_counts()
dict1={'CASH_OUT':0,'TRANSFER':1,'PAYMENT':2,'CASH_IN':3,'DEBIT':4}
data['type']=data['type'].map(dict1)
data.head()
X=data.drop('isFraud',axis=1)
X
y=data['isFraud']
y
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=0)
print(X_train.shape) print(X_test.shape) print(y_train.shape) print(y_test.shape)
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train_sc=sc.fit_transform(X_train) X_test_sc=sc.transform(X_test)
X_train_sc
X_test_sc
from sklearn.linear_model import LogisticRegression
model1=LogisticRegression()
model1.fit(X_train_sc,y_train)
y_pred1=model1.predict(X_test_sc)
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred1))
from sklearn.naive_bayes import GaussianNB
model2=GaussianNB()
model2.fit(X_train_sc,y_train)
y_pred2=model2.predict(X_test_sc)
print(classification_report(y_test,y_pred2))
from sklearn.neighbors import KNeighborsClassifier
model3=KNeighborsClassifier()
model3.fit(X_train_sc,y_train)
y_pred3=model3.predict(X_test_sc)
print(classification_report(y_test,y_pred3))
from sklearn.tree import DecisionTreeClassifier
model4=DecisionTreeClassifier()
model4.fit(X_train_sc,y_train)
y_pred4=model4.predict(X_test_sc)
print(classification_report(y_test,y_pred4))
from sklearn import tree
plt.figure(figsize=(10,10)) tree.plot_tree(model4,filled=True) plt.show()
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
model5=RandomForestClassifier()
model5.fit(X_train_sc,y_train)
y_pred5=model5.predict(X_test_sc)
print(classification_report(y_test,y_pred5))
model6=AdaBoostClassifier()
model6.fit(X_train_sc,y_train)
y_pred6=model6.predict(X_test_sc)
print(classification_report(y_test,y_pred6))
model5.predict([[239,2,5178.72,400705.00,395526.28,0.00,0.00]])
model5.predict([[369,0,89596.79,89596.79,0.0,0.00,89596.79]])