File size: 3,416 Bytes
e2ad056
 
 
7e8ba8a
1d86238
7e8ba8a
 
0b32019
e2ad056
28828a1
 
 
 
 
 
 
 
 
 
 
e2ad056
 
 
 
 
 
 
 
7e8ba8a
 
 
 
 
 
 
 
 
 
 
 
efdb24b
7e8ba8a
efdb24b
28828a1
7e8ba8a
 
7c8dbb9
 
 
 
 
 
 
 
 
 
 
 
 
e2ad056
 
 
 
efdb24b
 
 
 
1d86238
efdb24b
7c8dbb9
 
 
 
 
 
 
 
 
e2ad056
eb1751e
 
cb88ded
 
973d230
450284e
e2ad056
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import pandas as pd
import os
import shutil
from omnibin import generate_binary_classification_report, ColorScheme

# Define results directory
RESULTS_DIR = "/tmp/results"

# Map string color schemes to enum values
COLOR_SCHEME_MAP = {
    "DEFAULT": ColorScheme.DEFAULT,
    "MONOCHROME": ColorScheme.MONOCHROME,
    "VIBRANT": ColorScheme.VIBRANT
}

def process_csv(csv_file, n_bootstrap=1000, dpi=72, color_scheme="DEFAULT"):
    # Convert string color scheme to enum
    color_scheme_enum = COLOR_SCHEME_MAP[color_scheme]
    
    # Read the CSV file
    df = pd.read_csv(csv_file.name)
    
    # Check if required columns exist
    required_columns = ['y_true', 'y_pred']
    if not all(col in df.columns for col in required_columns):
        raise ValueError("CSV file must contain 'y_true' and 'y_pred' columns")
    
    # Clean up results directory if it exists
    if os.path.exists(RESULTS_DIR):
        shutil.rmtree(RESULTS_DIR)
    
    # Create fresh results directory
    os.makedirs(RESULTS_DIR, exist_ok=True)
    
    # Generate the report
    report_path = generate_binary_classification_report(
        y_true=df['y_true'].values,
        y_scores=df['y_pred'].values,
        output_path=os.path.join(RESULTS_DIR, "classification_report.pdf"),
        n_bootstrap=n_bootstrap,
        random_seed=42,
        dpi=dpi,
        color_scheme=color_scheme_enum
    )
    
    # Get paths to individual plots
    plots_dir = os.path.join(RESULTS_DIR, "plots")
    plot_paths = {
        "ROC and PR Curves": os.path.join(plots_dir, "roc_pr.png"),
        "Metrics vs Threshold": os.path.join(plots_dir, "metrics_threshold.png"),
        "Confusion Matrix": os.path.join(plots_dir, "confusion_matrix.png"),
        "Calibration Plot": os.path.join(plots_dir, "calibration.png"),
        "Prediction Distribution": os.path.join(plots_dir, "prediction_distribution.png"),
        "Metrics Summary": os.path.join(plots_dir, "metrics_summary.png")
    }
    
    # Return both the PDF and the plot images
    return report_path, *plot_paths.values()

# Create the Gradio interface
iface = gr.Interface(
    fn=process_csv,
    inputs=[
        gr.File(label="Upload CSV file with 'y_true' and 'y_pred' columns"),
        gr.Number(label="Number of Bootstrap Iterations", value=1000, minimum=100, maximum=10000),
        gr.Number(label="DPI", value=72, minimum=50, maximum=300),
        gr.Dropdown(label="Color Scheme", choices=["DEFAULT", "MONOCHROME", "VIBRANT"], value="DEFAULT")
    ],
    outputs=[
        gr.File(label="Classification Report PDF"),
        gr.Image(label="ROC and PR Curves"),
        gr.Image(label="Metrics vs Threshold"),
        gr.Image(label="Confusion Matrix"),
        gr.Image(label="Calibration Plot"),
        gr.Image(label="Prediction Distribution"),
        gr.Image(label="Metrics Summary")
    ],
    title="Binary Classification Report Generator",
    description="Upload a CSV file containing 'y_true' and 'y_pred' columns to generate a binary classification report.\n\n"
                "'y_true': reference standard (0s or 1s).\n\n"
                "'y_pred': model prediction (continuous value between 0 and 1).\n\n"
                "This application takes approximately 35 seconds to generate the report.\n",

    examples=[["scores.csv", 1000, 72, "DEFAULT"]],
    cache_examples=False
)

if __name__ == "__main__":
    iface.launch()