File size: 4,659 Bytes
ab46b5d 58e2c34 ab46b5d 58e2c34 ab46b5d 58e2c34 ab46b5d 58e2c34 ab46b5d 58e2c34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import gradio as gr
import pandas as pd
import os
import tempfile
import chardet
def detect_encoding(file_path):
"""
Function to detect file encoding
"""
with open(file_path, 'rb') as f:
result = chardet.detect(f.read())
return result['encoding']
def merge_csv_files(files):
"""
Function to merge multiple CSV files into one
Args:
files: List of uploaded CSV files
Returns:
Path to the merged CSV file and status message
"""
if not files or len(files) == 0:
return None, "No files were uploaded. Please select CSV files to merge."
if len(files) > 30:
return None, "Maximum 30 files can be merged at once."
try:
# Read all files into DataFrame list
dataframes = []
for file in files:
# Detect file encoding
encoding = detect_encoding(file.name)
try:
df = pd.read_csv(file.name, encoding=encoding)
except UnicodeDecodeError:
# Try other encodings if detected encoding fails
encodings_to_try = ['cp949', 'euc-kr', 'latin1', 'ISO-8859-1']
for enc in encodings_to_try:
try:
df = pd.read_csv(file.name, encoding=enc)
break
except UnicodeDecodeError:
continue
else:
return None, f"Could not determine encoding for '{os.path.basename(file.name)}'."
dataframes.append(df)
# Merge all DataFrames
if dataframes:
merged_df = pd.concat(dataframes, ignore_index=True)
# Save to temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
output_path = tmp.name
# Save merged data in Excel-compatible format (UTF-8 with BOM)
merged_df.to_csv(output_path, index=False, encoding='utf-8-sig')
return output_path, f"Successfully merged {len(files)} files. Please open with UTF-8 encoding in Excel."
else:
return None, "No data to merge."
except Exception as e:
return None, f"Error occurred: {str(e)}"
# Create a stylish Gradio interface
with gr.Blocks(title="CSVFusion") as app:
gr.Markdown(
"""
# 📊 CSVFusion: Intelligent File Merger
*Seamlessly combine multiple CSV files into one unified dataset*
---
"""
)
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("""
### How to use CSVFusion:
1. Upload up to 30 CSV files using the panel on the right
2. Click the "Merge Files" button
3. Download your consolidated CSV file
### Features:
- Automatic encoding detection
- Handles various CSV formats
- Excel-compatible output (UTF-8)
- Preserves all data columns
""")
with gr.Column(scale=3):
input_files = gr.File(
file_count="multiple",
label="Upload CSV Files (Max 30)",
file_types=[".csv"],
elem_id="file_upload"
)
with gr.Row():
merge_button = gr.Button("Merge Files", variant="primary", size="lg")
with gr.Row():
with gr.Column():
status = gr.Textbox(label="Status", placeholder="Ready to merge your files...")
with gr.Column():
output_file = gr.File(label="Download Merged CSV")
# Add custom CSS for better visual appeal
gr.HTML("""
<style>
.gradio-container {
background: linear-gradient(to right, #f9f9f9, #ffffff);
border-radius: 12px;
}
#file_upload {
border: 2px dashed #3498db;
border-radius: 8px;
padding: 20px;
transition: all 0.3s;
}
#file_upload:hover {
border-color: #2980b9;
box-shadow: 0 0 10px rgba(52, 152, 219, 0.3);
}
.footer {
text-align: center;
margin-top: 30px;
color: #7f8c8d;
font-size: 0.9em;
}
</style>
""")
# Add footer
gr.HTML("""
<div class="footer">
<p>CSVFusion © 2025 - A powerful tool for data professionals</p>
</div>
""")
merge_button.click(
fn=merge_csv_files,
inputs=[input_files],
outputs=[output_file, status]
)
# Run the app
if __name__ == "__main__":
app.launch() |