Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	File size: 5,046 Bytes
			
			| 548a2f1 a5f21e4 548a2f1 628ee13 65afb7b 628ee13 548a2f1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# Streamlit app
st.title("Advanced Transaction Anomaly Detection")
# File uploader
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
if uploaded_file:
    # Load the data
    data = pd.read_csv('transaction_anomalies_dataset (1).csv')
    st.subheader("Dataset Preview")
    st.write(data.head())
    # Data Overview
    st.subheader("Dataset Overview")
    st.write("Missing Values:")
    st.write(data.isnull().sum())
    st.write("Descriptive Statistics:")
    st.write(data.describe())
    # Visualization 1: Histogram of Transaction Amount
    if 'Transaction_Amount' in data.columns:
        st.subheader("Transaction Amount Distribution")
        fig_amount = px.histogram(data, x='Transaction_Amount', nbins=30, title="Transaction Amount Distribution")
        st.plotly_chart(fig_amount)
    # Visualization 2: Box Plot of Transaction Amount by Account Type
    if 'Account_Type' in data.columns and 'Transaction_Amount' in data.columns:
        st.subheader("Box Plot: Transaction Amount by Account Type")
        fig_box = px.box(data, x='Account_Type', y='Transaction_Amount', title="Transaction Amount by Account Type")
        st.plotly_chart(fig_box)
        # Check if 'Day_of_Week' column exists
        if 'Day_of_Week' in data.columns:
            # Create bar chart for transactions by day of the week
            fig_day_of_week = px.bar(data, x='Day_of_Week', title='Count of Transactions by Day of the Week')
            # Display the chart in the Streamlit app
            st.plotly_chart(fig_day_of_week)
    # Visualization 3: Correlation Heatmap (Plotly)
    st.subheader("Correlation Heatmap")
    numeric_cols = data.select_dtypes(include=['float64', 'int64'])
    if not numeric_cols.empty:
        corr_matrix = numeric_cols.corr()
        fig_heatmap = go.Figure(data=go.Heatmap(
            z=corr_matrix.values,
            x=corr_matrix.columns,
            y=corr_matrix.columns,
            colorscale='Viridis',
            hoverongaps=False,
        ))
        fig_heatmap.update_layout(title="Correlation Heatmap", xaxis_title="Features", yaxis_title="Features")
        st.plotly_chart(fig_heatmap)
    # Visualization 4: Scatter Plot (Age vs Average Transaction Amount)
    if 'Age' in data.columns and 'Average_Transaction_Amount' in data.columns:
        st.subheader("Scatter Plot: Age vs Average Transaction Amount")
        fig_scatter = px.scatter(data, x='Age',
                                        y='Average_Transaction_Amount',
                                        color='Account_Type',
                                        title='Average Transaction Amount vs. Age',
                                        trendline='ols')
        st.plotly_chart(fig_scatter)
        
    # Anomaly Detection with Isolation Forest
    st.subheader("Anomaly Detection")
    features = ['Transaction_Amount', 'Average_Transaction_Amount', 'Frequency_of_Transactions']
    # Ensure all required features are in the dataset
    if all(feature in data.columns for feature in features):
        X = data[features]
        # Train Isolation Forest
        st.write("Training Isolation Forest model...")
        model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
        model.fit(X)
        # Add anomaly prediction column
        data['anomaly'] = model.predict(X)
        data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0)
        # Display Results
        st.write("Anomaly Detection Results:")
        st.write(data[['anomaly']].value_counts())
        # Visualization: Anomalies vs Normal Transactions
        st.subheader("Anomalies vs Normal Transactions")
        fig_anomalies = px.histogram(data, x='anomaly', title="Anomalies vs Normal Transactions",
                                     labels={'anomaly': 'Anomaly (1) vs Normal (0)'})
        st.plotly_chart(fig_anomalies)
        # User Input for Prediction
        st.subheader("Predict Anomaly for a New Transaction")
        user_inputs = {}
        for feature in features:
            user_input = st.number_input(f"Enter the value for '{feature}':", value=0.0)
            user_inputs[feature] = user_input
        # Create a DataFrame from user inputs
        user_df = pd.DataFrame([user_inputs])
        # Predict anomalies using the model
        user_anomaly_pred = model.predict(user_df)
        user_anomaly_pred_binary = 1 if user_anomaly_pred[0] == -1 else 0
        if user_anomaly_pred_binary == 1:
            st.error("Anomaly detected: This transaction is flagged as an anomaly.")
        else:
            st.success("No anomaly detected: This transaction is normal.")
    else:
        st.error("Required features for anomaly detection are missing in the dataset.")
 |