Spaces:

sohail-shaikh-s07
/

TESTING-FRAUD

Sleeping

App Files Files Community

sohail-shaikh-s07 commited on Mar 14

Commit

84337b0

verified ·

1 Parent(s): 32cd5bb

Upload 3 files

Browse files

Files changed (3) hide show

pages/fraud.py +333 -0
pages/home.py +71 -0
pages/project_details.py +91 -0

pages/fraud.py ADDED Viewed

	@@ -0,0 +1,333 @@

+# pages/fraud.py
+import streamlit as st
+import pandas as pd
+import joblib
+import plotly.express as px
+import plotly.graph_objects as go
+import base64
+# Page title
+st.title("Fraud Detection")
+# Header with an image
+st.image(
+    "https://images.unsplash.com/photo-1611974789855-9c2a0a7236a3?ixlib=rb-1.2.1&auto=format&fit=crop&w=1950&q=80",
+    use_column_width=True,
+)
+# Load pre-trained model
+@st.cache_resource
+def load_model():
+    with open("model.pkl", "rb") as file:
+        model = joblib.load(file)
+    return model
+model = load_model()
+# Function to visualize predictions
+def visualize_predictions(data):
+    # Create a tab layout for different visualizations
+    tab1, tab2, tab3, tab4, tab5 = st.tabs(
+        [
+            "Fraud Distribution",
+            "Transaction Types",
+            "Amount Analysis",
+            "Balance Impact",
+            "Time Patterns",
+        ]
+    )
+    with tab1:
+        st.subheader("Fraud vs. Non-Fraud Distribution")
+        # Pie chart of fraud vs non-fraud
+        fraud_counts = data["prediction_label"].value_counts().reset_index()
+        fraud_counts.columns = ["Category", "Count"]
+        fig_pie = px.pie(
+            fraud_counts,
+            values="Count",
+            names="Category",
+            title="Distribution of Fraud vs Non-Fraud Transactions",
+            color_discrete_sequence=px.colors.sequential.RdBu,
+            hole=0.3,
+        )
+        st.plotly_chart(fig_pie, use_container_width=True)
+        # Add percentage information
+        total = fraud_counts["Count"].sum()
+        fraud_percent = round(
+            (
+                fraud_counts[fraud_counts["Category"] == "Fraud Transactions"][
+                    "Count"
+                ].sum()
+                / total
+            )
+            * 100,
+            2,
+        )
+        st.info(f"Percentage of fraudulent transactions: {fraud_percent}%")
+    with tab2:
+        st.subheader("Transaction Types Analysis")
+        # Bar chart of transaction types with fraud distribution
+        type_fraud = pd.crosstab(data["type"], data["prediction_label"])
+        fig_bar = go.Figure()
+        for col in type_fraud.columns:
+            fig_bar.add_trace(
+                go.Bar(
+                    x=type_fraud.index,
+                    y=type_fraud[col],
+                    name=col,
+                    marker_color="red" if col == "Fraud Transactions" else "blue",
+                )
+            )
+        fig_bar.update_layout(
+            title="Fraud Distribution by Transaction Type",
+            xaxis_title="Transaction Type",
+            yaxis_title="Count",
+            barmode="group",
+        )
+        st.plotly_chart(fig_bar, use_container_width=True)
+        # Calculate fraud percentage by transaction type
+        type_fraud_pct = pd.DataFrame()
+        for col in type_fraud.columns:
+            type_fraud_pct[col + " %"] = round(
+                type_fraud[col] / type_fraud.sum(axis=1) * 100, 2
+            )
+        st.dataframe(
+            type_fraud_pct.reset_index().rename(columns={"index": "Transaction Type"})
+        )
+    with tab3:
+        st.subheader("Transaction Amount Analysis")
+        # Histogram of transaction amounts by fraud status
+        fig_hist = px.histogram(
+            data,
+            x="amount",
+            color="prediction_label",
+            marginal="box",
+            nbins=50,
+            opacity=0.7,
+            title="Distribution of Transaction Amounts",
+            color_discrete_map={
+                "Fraud Transactions": "red",
+                "Not Fraud Transactions": "blue",
+            },
+        )
+        fig_hist.update_layout(xaxis_title="Amount", yaxis_title="Count")
+        st.plotly_chart(fig_hist, use_container_width=True)
+        # Summary statistics for amounts
+        st.subheader("Amount Statistics by Fraud Status")
+        amount_stats = data.groupby("prediction_label")["amount"].describe()
+        st.dataframe(amount_stats)
+    with tab4:
+        st.subheader("Balance Impact Analysis")
+        # Calculate balance change
+        data["orig_balance_change"] = data["newbalanceOrig"] - data["oldbalanceOrg"]
+        data["dest_balance_change"] = data["newbalanceDest"] - data["oldbalanceDest"]
+        # Create a figure for balance changes
+        balance_df = pd.melt(
+            data[["prediction_label", "orig_balance_change", "dest_balance_change"]],
+            id_vars=["prediction_label"],
+            value_vars=["orig_balance_change", "dest_balance_change"],
+            var_name="Account",
+            value_name="Balance Change",
+        )
+        balance_df["Account"] = balance_df["Account"].map(
+            {
+                "orig_balance_change": "Origin Account",
+                "dest_balance_change": "Destination Account",
+            }
+        )
+        fig_box = px.box(
+            balance_df,
+            x="Account",
+            y="Balance Change",
+            color="prediction_label",
+            title="Balance Changes in Origin vs Destination Accounts",
+            color_discrete_map={
+                "Fraud Transactions": "red",
+                "Not Fraud Transactions": "blue",
+            },
+        )
+        st.plotly_chart(fig_box, use_container_width=True)
+    with tab5:
+        st.subheader("Time Patterns")
+        # Time series of transactions by step (time)
+        if "step" in data.columns:
+            step_counts = (
+                data.groupby(["step", "prediction_label"])
+                .size()
+                .reset_index(name="count")
+            )
+            fig_line = px.line(
+                step_counts,
+                x="step",
+                y="count",
+                color="prediction_label",
+                title="Transaction Frequency Over Time",
+                color_discrete_map={
+                    "Fraud Transactions": "red",
+                    "Not Fraud Transactions": "blue",
+                },
+            )
+            fig_line.update_layout(
+                xaxis_title="Time Step", yaxis_title="Number of Transactions"
+            )
+            st.plotly_chart(fig_line, use_container_width=True)
+            # Heatmap of fraud probability by time
+            if len(data["step"].unique()) > 1:
+                pivot_data = pd.pivot_table(
+                    data,
+                    values="prediction",
+                    index="step",
+                    columns="type",
+                    aggfunc="mean",
+                ).fillna(0)
+                fig_heatmap = px.imshow(
+                    pivot_data,
+                    title="Fraud Probability Heatmap by Transaction Type and Time",
+                    color_continuous_scale="Reds",
+                    labels=dict(
+                        x="Transaction Type", y="Time Step", color="Fraud Probability"
+                    ),
+                )
+                st.plotly_chart(fig_heatmap, use_container_width=True)
+        else:
+            st.write("Time step data is not available for time pattern analysis.")
+# Function to add color formatting to the DataFrame
+def color_fraud(val):
+    color = "red" if val == "Fraud Transactions" else "green"
+    return f"background-color: {color}"
+# Function to create a download link for the CSV file
+def get_csv_download_link(df):
+    csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()  # Convert to base64
+    href = f'<a href="data:file/csv;base64,{b64}" download="fraud_predictions.csv">Download CSV File</a>'
+    return href
+# Transaction Data Input section
+st.header("Transaction Data Input")
+st.write("Choose to upload a CSV file or manually input transaction data.")
+# Option to choose upload or manual input
+option = st.radio("Select input method:", ("Upload CSV", "Manual Input"))
+if option == "Upload CSV":
+    # Option to upload a CSV file
+    file_upload = st.file_uploader("Upload CSV", type=["csv"])
+    if file_upload is not None:
+        data = pd.read_csv(file_upload)
+        st.write("Uploaded Data Preview:")
+        st.write(data.head())
+        if st.button("Submit CSV"):
+            # Predict using the uploaded CSV data
+            predictions = model.predict(data)
+            data["prediction"] = predictions
+            data["prediction_label"] = data["prediction"].map(
+                {1: "Fraud Transactions", 0: "Not Fraud Transactions"}
+            )
+            st.write("Predictions:")
+            # Apply color formatting to the DataFrame
+            styled_data = data[
+                ["type", "nameOrig", "nameDest", "prediction_label"]
+            ].style.applymap(color_fraud, subset=["prediction_label"])
+            st.dataframe(styled_data)
+            # Add a download button for the predicted CSV
+            st.markdown(get_csv_download_link(data), unsafe_allow_html=True)
+            # Visualizations for CSV data
+            st.header("Visualization of Prediction Results")
+            visualize_predictions(data)
+elif option == "Manual Input":
+    st.write("Manually input data:")
+    # Manual input of data
+    step = st.number_input("Step", min_value=0)
+    type = st.selectbox("Type", ["TRANSFER", "PAYMENT", "DEBIT", "CASH_OUT", "CASH_IN"])
+    amount = st.number_input("Amount", min_value=0.0)
+    nameOrig = st.text_input("Origin Account Name")
+    oldbalanceOrg = st.number_input("Old Balance (Origin)", min_value=0.0)
+    newbalanceOrig = st.number_input("New Balance (Origin)", min_value=0.0)
+    nameDest = st.text_input("Destination Account Name")
+    oldbalanceDest = st.number_input("Old Balance (Destination)", min_value=0.0)
+    newbalanceDest = st.number_input("New Balance (Destination)", min_value=0.0)
+    isFlaggedFraud = st.selectbox("Is Flagged Fraud?", [0, 1])
+    if st.button("Submit"):
+        # Create a DataFrame from manual input
+        manual_data = pd.DataFrame(
+            {
+                "step": [step],
+                "type": [type],
+                "amount": [amount],
+                "nameOrig": [nameOrig],
+                "oldbalanceOrg": [oldbalanceOrg],
+                "newbalanceOrig": [newbalanceOrig],
+                "nameDest": [nameDest],
+                "oldbalanceDest": [oldbalanceDest],
+                "newbalanceDest": [newbalanceDest],
+                "isFlaggedFraud": [isFlaggedFraud],
+            }
+        )
+        st.write("Manual Input Data:")
+        st.write(manual_data)
+        # Predict using the manually input data
+        manual_predictions = model.predict(manual_data)
+        manual_data["prediction"] = manual_predictions
+        manual_data["prediction_label"] = manual_data["prediction"].map(
+            {1: "Fraud Transactions", 0: "Not Fraud Transactions"}
+        )
+        st.write("Predictions:")
+        # Apply color formatting to the DataFrame
+        styled_manual_data = manual_data[
+            ["type", "nameOrig", "nameDest", "prediction_label"]
+        ].style.applymap(color_fraud, subset=["prediction_label"])
+        st.dataframe(styled_manual_data)
+        # For manual input, we'll just show the prediction result
+        st.header("Prediction Result")
+        result = manual_data["prediction_label"].iloc[0]
+        st.markdown(
+            f"<h2 style='text-align: center; color: {'red' if result == 'Fraud Transactions' else 'green'};'>{result}</h2>",
+            unsafe_allow_html=True,
+        )
+# Footer
+st.markdown("---")
+st.write(
+    """
+© 2024 Financial Fraud Detection System. All rights reserved.
+"""
+)

pages/home.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# pages/home.py
+import streamlit as st
+# Page title
+st.title("Welcome to the Financial Fraud Detection System")
+# Header with an image
+st.image("https://images.unsplash.com/photo-1611974789855-9c2a0a7236a3?ixlib=rb-1.2.1&auto=format&fit=crop&w=1950&q=80", use_column_width=True)
+# Introduction section
+st.header("Introduction")
+st.write("""
+In the digital age, financial fraud has become a significant concern for individuals, businesses, and financial institutions.
+With the increasing volume of online transactions, the need for robust fraud detection systems has never been more critical.
+Our **Financial Fraud Detection System** leverages advanced machine learning techniques to identify and prevent fraudulent activities in real-time.
+""")
+# Key features section
+st.header("Key Features")
+col1, col2, col3 = st.columns(3)
+with col1:
+    st.subheader("Real-Time Detection")
+    st.write("""
+    Our system processes transactions in real-time, providing instant fraud detection and alerting.
+    This ensures that fraudulent activities are identified and mitigated as soon as they occur.
+    """)
+with col2:
+    st.subheader("High Accuracy")
+    st.write("""
+    Utilizing state-of-the-art machine learning algorithms, our system achieves an accuracy rate of over 95%,
+    minimizing false positives and ensuring reliable fraud detection.
+    """)
+with col3:
+    st.subheader("User-Friendly Interface")
+    st.write("""
+    The system features an intuitive web interface built with Streamlit, allowing users to easily upload transaction data,
+    view fraud predictions, and analyze results with detailed visualizations.
+    """)
+# How it works section
+st.header("How It Works")
+st.write("""
+Our Financial Fraud Detection System is built on the **XGBoost** algorithm, a powerful machine learning model known for its efficiency and accuracy in handling tabular data.
+The system processes both historical and real-time transaction data, identifying patterns and anomalies that indicate fraudulent behavior.
+""")
+# Steps in the process
+st.subheader("Process Overview")
+st.write("""
+1. **Data Collection**: Transaction data is collected from various sources, including banks, e-commerce platforms, and payment gateways.
+2. **Data Preprocessing**: The data is cleaned, normalized, and transformed to ensure it is suitable for analysis.
+3. **Model Training**: The XGBoost model is trained on a large dataset of labeled transactions, learning to distinguish between legitimate and fraudulent activities.
+4. **Real-Time Detection**: The trained model is deployed to analyze incoming transactions in real-time, flagging potential fraud for further investigation.
+5. **Visualization & Reporting**: Users can view detailed reports and visualizations of fraud predictions, enabling informed decision-making.
+""")
+# Call to action
+st.header("Get Started")
+st.write("""
+Ready to experience the power of our Financial Fraud Detection System?
+Navigate to the **Fraud Detection** page to upload your transaction data and start detecting fraud today!
+""")
+# Footer
+st.markdown("---")
+st.write("""
+© 2024 Financial Fraud Detection System. All rights reserved.
+""")

pages/project_details.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# pages/project_details.py
+import streamlit as st
+# Page title
+st.title("Project Details")
+# Header with an image
+st.image("https://images.unsplash.com/photo-1454165804606-c3d57bc86b40?ixlib=rb-1.2.1&auto=format&fit=crop&w=1950&q=80", use_column_width=True)
+# Introduction section
+st.header("Introduction")
+st.write("""
+The **Financial Fraud Detection System** is an advanced solution designed to identify and prevent fraudulent transactions in real-time.
+With the increasing volume of online transactions, the need for a robust and scalable fraud detection system has become critical.
+Our project leverages state-of-the-art machine learning techniques to provide accurate and efficient fraud detection, helping financial institutions and businesses minimize losses and enhance security.
+""")
+# Objectives section
+st.header("Project Objectives")
+st.write("""
+The primary objectives of the Financial Fraud Detection System are:
+""")
+st.markdown("""
+- **Real-Time Fraud Detection**: Detect fraudulent transactions as they occur, enabling immediate intervention.
+- **High Accuracy**: Achieve a fraud detection accuracy rate of over 95% to minimize false positives and false negatives.
+- **Scalability**: Handle large volumes of transactions efficiently, ensuring the system can scale with growing demand.
+- **User-Friendly Interface**: Provide an intuitive and easy-to-use interface for financial analysts and decision-makers.
+- **Continuous Learning**: Enable the system to adapt to new fraud patterns by continuously retraining the model with new data.
+""")
+# Methodology section
+st.header("Methodology")
+st.write("""
+Our methodology for developing the Financial Fraud Detection System involves the following steps:
+""")
+st.markdown("""
+1. **Data Collection**: Gather transaction data from various sources, including banks, e-commerce platforms, and payment gateways.
+2. **Data Preprocessing**: Clean, normalize, and transform the data to ensure it is suitable for analysis.
+3. **Feature Engineering**: Extract relevant features from the transaction data, such as transaction amount, frequency, and user behavior.
+4. **Model Training**: Train the XGBoost machine learning model on a labeled dataset of transactions to distinguish between legitimate and fraudulent activities.
+5. **Model Evaluation**: Evaluate the model's performance using metrics such as accuracy, precision, recall, and F1-score.
+6. **Deployment**: Deploy the trained model in a production environment, enabling real-time fraud detection.
+7. **Monitoring & Retraining**: Continuously monitor the system's performance and retrain the model with new data to adapt to evolving fraud patterns.
+""")
+# Technology Stack section
+st.header("Technology Stack")
+st.write("""
+The Financial Fraud Detection System is built using the following technologies:
+""")
+st.markdown("""
+- **Programming Language**: Python
+- **Machine Learning Framework**: Scikit-learn, XGBoost
+- **Data Processing**: Pandas, NumPy
+- **Visualization**: Matplotlib, Seaborn, Plotly
+- **Web Interface**: Streamlit
+- **Model Serialization**: Joblib
+- **Version Control**: Git
+""")
+# Key Features section
+st.header("Key Features")
+st.write("""
+The Financial Fraud Detection System offers the following key features:
+""")
+st.markdown("""
+- **Real-Time Processing**: Analyze transactions in real-time to detect fraud as it happens.
+- **Batch Processing**: Upload and analyze bulk transaction data in CSV format.
+- **Interactive Dashboard**: Visualize fraud detection results with interactive charts and graphs.
+- **Fraud Probability Scores**: Provide a fraud risk score for each transaction, helping analysts prioritize investigations.
+- **Decision Explainability**: Offer insights into why a transaction was flagged as fraudulent, enhancing transparency.
+- **Scalable Architecture**: Designed to handle high volumes of transactions without performance degradation.
+""")
+# Future Enhancements section
+st.header("Future Enhancements")
+st.write("""
+We are continuously working to improve the Financial Fraud Detection System. Some of the planned enhancements include:
+""")
+st.markdown("""
+- **Integration with Banking Systems**: Enable seamless integration with existing banking and payment systems for live fraud detection.
+- **Advanced Feature Engineering**: Incorporate additional features such as behavioral analytics and device tracking to improve detection accuracy.
+- **Automated Model Retraining**: Implement an automated pipeline for retraining the model with new data to adapt to evolving fraud patterns.
+- **Mobile-Friendly Interface**: Develop a mobile-friendly version of the web interface for on-the-go fraud detection monitoring.
+""")
+# Footer
+st.markdown("---")
+st.write("""
+© 2024 Financial Fraud Detection System. All rights reserved.
+""")