File size: 10,705 Bytes
ab46b5d
58e2c34
8de05ba
4e21102
58e2c34
4e21102
 
 
 
 
 
 
ab46b5d
4e21102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b6be65
4e21102
 
8de05ba
0b6be65
8de05ba
 
0b6be65
 
8de05ba
0b6be65
8de05ba
0b6be65
8de05ba
0b6be65
8de05ba
0b6be65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8de05ba
0b6be65
 
8de05ba
0b6be65
 
 
 
 
 
 
 
 
 
 
 
8de05ba
0b6be65
 
 
 
 
8de05ba
0b6be65
 
 
8de05ba
 
0b6be65
8de05ba
0b6be65
 
8de05ba
0b6be65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8de05ba
0b6be65
 
 
 
 
8de05ba
4e21102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8de05ba
 
0b6be65
 
 
 
 
 
58e2c34
 
0b6be65
 
 
 
 
 
4e21102
 
0b6be65
 
 
 
 
4e21102
0b6be65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58e2c34
0b6be65
7a50f12
 
 
 
 
 
 
 
 
4e21102
 
 
 
 
7a50f12
 
0b6be65
 
 
 
 
 
 
 
 
 
58e2c34
0b6be65
58e2c34
0b6be65
 
 
 
 
 
 
 
4e21102
0b6be65
 
ab46b5d
4e21102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
import gradio as gr
import pandas as pd
from io import BytesIO
import chardet

def detect_encoding(file_bytes):
    """Detect the encoding of a file using chardet"""
    # Only use a sample of the file for detection to improve performance
    result = chardet.detect(file_bytes[:10000])
    return result['encoding']

def convert_file(input_file, conversion_type, encoding_option):
    try:
        # Check if a file was uploaded
        if input_file is None:
            return None, "Please upload a file."
        
        # Determine if input_file is a file-like object or a file path string
        try:
            # Try reading from file-like object
            file_bytes = input_file.read()
            file_name = input_file.name
        except AttributeError:
            # If there's an AttributeError, treat input_file as a file path
            file_name = input_file
            with open(file_name, "rb") as f:
                file_bytes = f.read()
        
        file_extension = file_name.lower().split('.')[-1]
        df = None
        output_file = None
        converted_format = None
        
        # Handle encoding for CSV files
        if encoding_option == "Auto-detect":
            encoding = detect_encoding(file_bytes)
        else:
            encoding = encoding_option
        
        # Conversion: CSV to Parquet
        if conversion_type == "CSV to Parquet":
            if file_extension != "csv":
                return None, "For CSV to Parquet conversion, please upload a CSV file."
            
            # Try with the selected/detected encoding
            try:
                df = pd.read_csv(BytesIO(file_bytes), encoding=encoding)
            except UnicodeDecodeError:
                # If auto-detection fails, try a few common encodings
                common_encodings = ['latin1', 'iso-8859-1', 'cp1252']
                for enc in common_encodings:
                    try:
                        df = pd.read_csv(BytesIO(file_bytes), encoding=enc)
                        encoding = enc  # Update the successful encoding
                        break
                    except UnicodeDecodeError:
                        continue
                if df is None:
                    return None, f"Failed to decode the CSV file. Auto-detected encoding was '{encoding}'. Please try selecting a specific encoding."
            
            output_file = "output.parquet"
            df.to_parquet(output_file, index=False)
            converted_format = "Parquet"
        
        # Conversion: Parquet to CSV
        elif conversion_type == "Parquet to CSV":
            if file_extension != "parquet":
                return None, "For Parquet to CSV conversion, please upload a Parquet file."
            
            df = pd.read_parquet(BytesIO(file_bytes))
            output_file = "output.csv"
            df.to_csv(output_file, index=False, encoding=encoding)
            converted_format = "CSV"
        else:
            return None, "Invalid conversion type selected."
        
        # Generate a preview of the top 10 rows
        preview = df.head(10).to_string(index=False)
        info_message = (
            f"Input file: {file_name}\n"
            f"Converted file format: {converted_format}\n"
            f"Encoding used: {encoding}\n"
            f"Total rows: {len(df)}\n"
            f"Total columns: {len(df.columns)}\n\n"
            f"Preview (Top 10 Rows):\n{preview}"
        )
        return output_file, info_message
    
    except Exception as e:
        return None, f"Error during conversion: {str(e)}"

# Enhanced custom CSS for a more visually appealing interface
custom_css = """
body {
    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
    font-family: 'Poppins', 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}

.gradio-container {
    max-width: 950px;
    margin: 40px auto;
    padding: 30px;
    background-color: #ffffff;
    border-radius: 16px;
    box-shadow: 0 10px 25px rgba(0,0,0,0.1);
}

h1 {
    color: #3a4149;
    font-size: 2.5rem;
    text-align: center;
    margin-bottom: 5px;
    font-weight: 600;
}

h2 {
    color: #5a6570;
    font-size: 1.2rem;
    text-align: center;
    margin-bottom: 25px;
    font-weight: 400;
}

.header-icon {
    font-size: 3rem;
    text-align: center;
    margin-bottom: 10px;
    color: #4285f4;
}

.instruction-box {
    background-color: #f8f9fa;
    border-left: 4px solid #4285f4;
    padding: 15px;
    margin-bottom: 25px;
    border-radius: 6px;
}

.instruction-step {
    margin: 8px 0;
    padding-left: 10px;
}

.file-box {
    border: 2px dashed #ddd;
    border-radius: 12px;
    padding: 20px;
    transition: all 0.3s ease;
}

.file-box:hover {
    border-color: #4285f4;
    box-shadow: 0 5px 15px rgba(66, 133, 244, 0.15);
}

.conversion-radio label {
    padding: 10px 15px;
    margin: 5px;
    border-radius: 8px;
    border: 1px solid #eaeaea;
    transition: all 0.2s ease;
}

.conversion-radio input:checked + label {
    background-color: #e8f0fe;
    border-color: #4285f4;
    color: #4285f4;
}

.convert-button {
    background: linear-gradient(to right, #4285f4, #34a853) !important;
    color: white !important;
    border: none !important;
    padding: 12px 25px !important;
    font-size: 16px !important;
    font-weight: 500 !important;
    border-radius: 30px !important;
    cursor: pointer;
    margin: 20px auto !important;
    display: block !important;
    box-shadow: 0 4px 12px rgba(66, 133, 244, 0.25) !important;
}

.convert-button:hover {
    box-shadow: 0 6px 16px rgba(66, 133, 244, 0.4) !important;
    transform: translateY(-2px);
}

.footer {
    text-align: center;
    margin-top: 30px;
    color: #70757a;
    font-size: 0.9rem;
}

.preview-box {
    background-color: #f8f9fa;
    border-radius: 8px;
    padding: 15px;
    font-family: monospace;
    white-space: pre-wrap;
    max-height: 400px;
    overflow-y: auto;
}

.info-tag {
    display: inline-block;
    background-color: #e8f0fe;
    color: #4285f4;
    padding: 4px 10px;
    border-radius: 20px;
    font-size: 0.85rem;
    margin-right: 8px;
    margin-bottom: 8px;
}

.divider {
    height: 1px;
    background: linear-gradient(to right, transparent, #ddd, transparent);
    margin: 25px 0;
}

.error-message {
    color: #d93025;
    background-color: #fce8e6;
    padding: 10px;
    border-radius: 8px;
    margin-top: 10px;
    font-size: 0.9rem;
}

.success-message {
    color: #188038;
    background-color: #e6f4ea;
    padding: 10px;
    border-radius: 8px;
    margin-top: 10px;
    font-size: 0.9rem;
}
"""

with gr.Blocks(css=custom_css, title="DataFormat Converter") as demo:
    gr.HTML('<div class="header-icon">📊</div>')
    gr.Markdown("# DataFormat Converter")
    gr.Markdown("## Seamlessly convert between CSV and Parquet formats with just a few clicks")
    
    gr.HTML('<div class="divider"></div>')
    
    with gr.Row():
        with gr.Column():
            gr.HTML("""
            <div class="instruction-box">
                <h3>How It Works</h3>
                <div class="instruction-step">1. Upload your CSV or Parquet file</div>
                <div class="instruction-step">2. Select the conversion direction</div>
                <div class="instruction-step">3. Choose encoding (or leave as auto-detect)</div>
                <div class="instruction-step">4. Click "Convert" and download your transformed file</div>
            </div>
            
            <div class="info-section">
                <div class="info-tag">Fast Conversion</div>
                <div class="info-tag">Data Preview</div>
                <div class="info-tag">Multi-Encoding Support</div>
                <div class="info-tag">Maintains Structure</div>
            </div>
            """)
            
            gr.HTML("""
            <div style="margin-top: 25px;">
                <h3>Why Convert?</h3>
                <p>Parquet files offer significant advantages for data storage and analysis:</p>
                <ul>
                    <li>Smaller file size (up to 87% reduction)</li>
                    <li>Faster query performance</li>
                    <li>Column-oriented storage</li>
                    <li>Better compression</li>
                </ul>
                <p>CSV files are useful for:</p>
                <ul>
                    <li>Universal compatibility</li>
                    <li>Human readability</li>
                    <li>Simple integration with many tools</li>
                </ul>
            </div>
            """)
    
        with gr.Column():
            # Replace gr.Box with a div using gr.HTML for the file-box styling
            gr.HTML('<div class="file-box">')
            input_file = gr.File(label="Upload Your File")
            conversion_type = gr.Radio(
                choices=["CSV to Parquet", "Parquet to CSV"], 
                label="Select Conversion Type",
                value="CSV to Parquet",
                elem_classes=["conversion-radio"]
            )
            encoding_option = gr.Dropdown(
                choices=["Auto-detect", "utf-8", "latin1", "iso-8859-1", "cp1252", "utf-16"],
                value="Auto-detect",
                label="Select CSV Encoding"
            )
            convert_button = gr.Button("Convert Now", elem_classes=["convert-button"])
            gr.HTML('</div>')  # Close the file-box div
            
            with gr.Accordion("Conversion Results", open=False):
                output_file = gr.File(label="Download Converted File")
                
            with gr.Accordion("Data Preview", open=True):
                preview = gr.Textbox(
                    label="File Information and Preview", 
                    lines=15,
                    elem_classes=["preview-box"]
                )
    
    gr.HTML('<div class="divider"></div>')
    
    gr.HTML("""
    <div class="footer">
        <p>DataFormat Converter © 2025 | Built with Gradio | An efficient tool for data professionals</p>
    </div>
    """)
    
    convert_button.click(
        fn=convert_file, 
        inputs=[input_file, conversion_type, encoding_option], 
        outputs=[output_file, preview]
    )

    # Add dependency handling to show/hide encoding options based on conversion type
    def update_encoding_visibility(conversion_type):
        if conversion_type == "CSV to Parquet":
            return gr.update(visible=True)
        else:
            return gr.update(visible=False)
    
    conversion_type.change(
        fn=update_encoding_visibility,
        inputs=conversion_type,
        outputs=encoding_option
    )

if __name__ == "__main__":
    demo.launch()