File size: 4,659 Bytes
ab46b5d
58e2c34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab46b5d
58e2c34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab46b5d
58e2c34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab46b5d
 
58e2c34
ab46b5d
58e2c34
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import gradio as gr
import pandas as pd
import os
import tempfile
import chardet

def detect_encoding(file_path):
    """
    Function to detect file encoding
    """
    with open(file_path, 'rb') as f:
        result = chardet.detect(f.read())
    return result['encoding']

def merge_csv_files(files):
    """
    Function to merge multiple CSV files into one
    
    Args:
        files: List of uploaded CSV files
    
    Returns:
        Path to the merged CSV file and status message
    """
    if not files or len(files) == 0:
        return None, "No files were uploaded. Please select CSV files to merge."
    
    if len(files) > 30:
        return None, "Maximum 30 files can be merged at once."
    
    try:
        # Read all files into DataFrame list
        dataframes = []
        for file in files:
            # Detect file encoding
            encoding = detect_encoding(file.name)
            try:
                df = pd.read_csv(file.name, encoding=encoding)
            except UnicodeDecodeError:
                # Try other encodings if detected encoding fails
                encodings_to_try = ['cp949', 'euc-kr', 'latin1', 'ISO-8859-1']
                for enc in encodings_to_try:
                    try:
                        df = pd.read_csv(file.name, encoding=enc)
                        break
                    except UnicodeDecodeError:
                        continue
                else:
                    return None, f"Could not determine encoding for '{os.path.basename(file.name)}'."
            
            dataframes.append(df)
        
        # Merge all DataFrames
        if dataframes:
            merged_df = pd.concat(dataframes, ignore_index=True)
            
            # Save to temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
                output_path = tmp.name
            
            # Save merged data in Excel-compatible format (UTF-8 with BOM)
            merged_df.to_csv(output_path, index=False, encoding='utf-8-sig')
            
            return output_path, f"Successfully merged {len(files)} files. Please open with UTF-8 encoding in Excel."
        else:
            return None, "No data to merge."
    
    except Exception as e:
        return None, f"Error occurred: {str(e)}"

# Create a stylish Gradio interface
with gr.Blocks(title="CSVFusion") as app:
    gr.Markdown(
        """
        # 📊 CSVFusion: Intelligent File Merger
        
        *Seamlessly combine multiple CSV files into one unified dataset*
        
        ---
        """
    )
    
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("""
            ### How to use CSVFusion:
            1. Upload up to 30 CSV files using the panel on the right
            2. Click the "Merge Files" button
            3. Download your consolidated CSV file
            
            ### Features:
            - Automatic encoding detection
            - Handles various CSV formats
            - Excel-compatible output (UTF-8)
            - Preserves all data columns
            """)
        
        with gr.Column(scale=3):
            input_files = gr.File(
                file_count="multiple",
                label="Upload CSV Files (Max 30)",
                file_types=[".csv"],
                elem_id="file_upload"
            )
    
    with gr.Row():
        merge_button = gr.Button("Merge Files", variant="primary", size="lg")
    
    with gr.Row():
        with gr.Column():
            status = gr.Textbox(label="Status", placeholder="Ready to merge your files...")
        with gr.Column():
            output_file = gr.File(label="Download Merged CSV")
    
    # Add custom CSS for better visual appeal
    gr.HTML("""
    <style>
    .gradio-container {
        background: linear-gradient(to right, #f9f9f9, #ffffff);
        border-radius: 12px;
    }
    #file_upload {
        border: 2px dashed #3498db;
        border-radius: 8px;
        padding: 20px;
        transition: all 0.3s;
    }
    #file_upload:hover {
        border-color: #2980b9;
        box-shadow: 0 0 10px rgba(52, 152, 219, 0.3);
    }
    .footer {
        text-align: center;
        margin-top: 30px;
        color: #7f8c8d;
        font-size: 0.9em;
    }
    </style>
    """)
    
    # Add footer
    gr.HTML("""
    <div class="footer">
        <p>CSVFusion © 2025 - A powerful tool for data professionals</p>
    </div>
    """)
    
    merge_button.click(
        fn=merge_csv_files,
        inputs=[input_files],
        outputs=[output_file, status]
    )

# Run the app
if __name__ == "__main__":
    app.launch()