| import gradio as gr |
| import pandas as pd |
| from io import BytesIO |
| import chardet |
|
|
| def detect_encoding(file_bytes): |
| """Detect the encoding of a file using chardet""" |
| |
| result = chardet.detect(file_bytes[:10000]) |
| return result['encoding'] |
|
|
| def convert_file(input_file, conversion_type, encoding_option): |
| try: |
| |
| if input_file is None: |
| return None, "Please upload a file." |
| |
| |
| try: |
| |
| file_bytes = input_file.read() |
| file_name = input_file.name |
| except AttributeError: |
| |
| file_name = input_file |
| with open(file_name, "rb") as f: |
| file_bytes = f.read() |
| |
| file_extension = file_name.lower().split('.')[-1] |
| df = None |
| output_file = None |
| converted_format = None |
| |
| |
| if encoding_option == "Auto-detect": |
| encoding = detect_encoding(file_bytes) |
| else: |
| encoding = encoding_option |
| |
| |
| if conversion_type == "CSV to Parquet": |
| if file_extension != "csv": |
| return None, "For CSV to Parquet conversion, please upload a CSV file." |
| |
| |
| try: |
| df = pd.read_csv(BytesIO(file_bytes), encoding=encoding) |
| except UnicodeDecodeError: |
| |
| common_encodings = ['latin1', 'iso-8859-1', 'cp1252'] |
| for enc in common_encodings: |
| try: |
| df = pd.read_csv(BytesIO(file_bytes), encoding=enc) |
| encoding = enc |
| break |
| except UnicodeDecodeError: |
| continue |
| if df is None: |
| return None, f"Failed to decode the CSV file. Auto-detected encoding was '{encoding}'. Please try selecting a specific encoding." |
| |
| |
| |
| |
| for col in df.columns: |
| if df[col].dtype == "object": |
| df[col] = df[col].astype(str) |
| |
| df[col] = df[col].replace({"nan": None, "None": None, "": None}) |
| |
| output_file = "output.parquet" |
| df.to_parquet(output_file, index=False) |
| converted_format = "Parquet" |
| |
| |
| elif conversion_type == "Parquet to CSV": |
| if file_extension != "parquet": |
| return None, "For Parquet to CSV conversion, please upload a Parquet file." |
| |
| df = pd.read_parquet(BytesIO(file_bytes)) |
| output_file = "output.csv" |
| df.to_csv(output_file, index=False, encoding=encoding) |
| converted_format = "CSV" |
| else: |
| return None, "Invalid conversion type selected." |
| |
| |
| preview = df.head(10).to_string(index=False) |
| info_message = ( |
| f"Input file: {file_name}\n" |
| f"Converted file format: {converted_format}\n" |
| f"Encoding used: {encoding}\n" |
| f"Total rows: {len(df)}\n" |
| f"Total columns: {len(df.columns)}\n\n" |
| f"Preview (Top 10 Rows):\n{preview}" |
| ) |
| return output_file, info_message |
| |
| except Exception as e: |
| return None, f"Error during conversion: {str(e)}" |
|
|
| |
| custom_css = """ |
| body { |
| background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
| font-family: 'Poppins', 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
| } |
| .gradio-container { |
| max-width: 950px; |
| margin: 40px auto; |
| padding: 30px; |
| background-color: #ffffff; |
| border-radius: 16px; |
| box-shadow: 0 10px 25px rgba(0,0,0,0.1); |
| } |
| h1 { |
| color: #3a4149; |
| font-size: 2.5rem; |
| text-align: center; |
| margin-bottom: 5px; |
| font-weight: 600; |
| } |
| h2 { |
| color: #5a6570; |
| font-size: 1.2rem; |
| text-align: center; |
| margin-bottom: 25px; |
| font-weight: 400; |
| } |
| .header-icon { |
| font-size: 3rem; |
| text-align: center; |
| margin-bottom: 10px; |
| color: #4285f4; |
| } |
| .instruction-box { |
| background-color: #f8f9fa; |
| border-left: 4px solid #4285f4; |
| padding: 15px; |
| margin-bottom: 25px; |
| border-radius: 6px; |
| } |
| .instruction-step { |
| margin: 8px 0; |
| padding-left: 10px; |
| } |
| .file-box { |
| border: 2px dashed #ddd; |
| border-radius: 12px; |
| padding: 20px; |
| transition: all 0.3s ease; |
| } |
| .file-box:hover { |
| border-color: #4285f4; |
| box-shadow: 0 5px 15px rgba(66, 133, 244, 0.15); |
| } |
| .conversion-radio label { |
| padding: 10px 15px; |
| margin: 5px; |
| border-radius: 8px; |
| border: 1px solid #eaeaea; |
| transition: all 0.2s ease; |
| } |
| .conversion-radio input:checked + label { |
| background-color: #e8f0fe; |
| border-color: #4285f4; |
| color: #4285f4; |
| } |
| .convert-button { |
| background: linear-gradient(to right, #4285f4, #34a853) !important; |
| color: white !important; |
| border: none !important; |
| padding: 12px 25px !important; |
| font-size: 16px !important; |
| font-weight: 500 !important; |
| border-radius: 30px !important; |
| cursor: pointer; |
| margin: 20px auto !important; |
| display: block !important; |
| box-shadow: 0 4px 12px rgba(66, 133, 244, 0.25) !important; |
| } |
| .convert-button:hover { |
| box-shadow: 0 6px 16px rgba(66, 133, 244, 0.4) !important; |
| transform: translateY(-2px); |
| } |
| .footer { |
| text-align: center; |
| margin-top: 30px; |
| color: #70757a; |
| font-size: 0.9rem; |
| } |
| .preview-box { |
| background-color: #f8f9fa; |
| border-radius: 8px; |
| padding: 15px; |
| font-family: monospace; |
| white-space: pre-wrap; |
| max-height: 400px; |
| overflow-y: auto; |
| } |
| .info-tag { |
| display: inline-block; |
| background-color: #e8f0fe; |
| color: #4285f4; |
| padding: 4px 10px; |
| border-radius: 20px; |
| font-size: 0.85rem; |
| margin-right: 8px; |
| margin-bottom: 8px; |
| } |
| .divider { |
| height: 1px; |
| background: linear-gradient(to right, transparent, #ddd, transparent); |
| margin: 25px 0; |
| } |
| .error-message { |
| color: #d93025; |
| background-color: #fce8e6; |
| padding: 10px; |
| border-radius: 8px; |
| margin-top: 10px; |
| font-size: 0.9rem; |
| } |
| .success-message { |
| color: #188038; |
| background-color: #e6f4ea; |
| padding: 10px; |
| border-radius: 8px; |
| margin-top: 10px; |
| font-size: 0.9rem; |
| } |
| """ |
|
|
| with gr.Blocks(css=custom_css, title="DataFormat Converter") as demo: |
| gr.HTML('<div class="header-icon">📊</div>') |
| gr.Markdown("# DataFormat Converter") |
| gr.Markdown("## Seamlessly convert between CSV and Parquet formats with just a few clicks") |
| |
| gr.HTML('<div class="divider"></div>') |
| |
| with gr.Row(): |
| with gr.Column(): |
| gr.HTML(""" |
| <div class="instruction-box"> |
| <h3>How It Works</h3> |
| <div class="instruction-step">1. Upload your CSV or Parquet file</div> |
| <div class="instruction-step">2. Select the conversion direction</div> |
| <div class="instruction-step">3. Choose encoding (or leave as auto-detect)</div> |
| <div class="instruction-step">4. Click "Convert" and download your transformed file</div> |
| </div> |
| |
| <div class="info-section"> |
| <div class="info-tag">Fast Conversion</div> |
| <div class="info-tag">Data Preview</div> |
| <div class="info-tag">Multi-Encoding Support</div> |
| <div class="info-tag">Maintains Structure</div> |
| </div> |
| """) |
| |
| gr.HTML(""" |
| <div style="margin-top: 25px;"> |
| <h3>Why Convert?</h3> |
| <p>Parquet files offer significant advantages for data storage and analysis:</p> |
| <ul> |
| <li>Smaller file size (up to 87% reduction)</li> |
| <li>Faster query performance</li> |
| <li>Column-oriented storage</li> |
| <li>Better compression</li> |
| </ul> |
| <p>CSV files are useful for:</p> |
| <ul> |
| <li>Universal compatibility</li> |
| <li>Human readability</li> |
| <li>Simple integration with many tools</li> |
| </ul> |
| </div> |
| """) |
| |
| with gr.Column(): |
| |
| gr.HTML('<div class="file-box">') |
| input_file = gr.File(label="Upload Your File") |
| conversion_type = gr.Radio( |
| choices=["CSV to Parquet", "Parquet to CSV"], |
| label="Select Conversion Type", |
| value="CSV to Parquet", |
| elem_classes=["conversion-radio"] |
| ) |
| encoding_option = gr.Dropdown( |
| choices=["Auto-detect", "utf-8", "latin1", "iso-8859-1", "cp1252", "utf-16"], |
| value="Auto-detect", |
| label="Select CSV Encoding" |
| ) |
| convert_button = gr.Button("Convert Now", elem_classes=["convert-button"]) |
| gr.HTML('</div>') |
| |
| with gr.Accordion("Conversion Results", open=False): |
| output_file = gr.File(label="Download Converted File") |
| |
| with gr.Accordion("Data Preview", open=True): |
| preview = gr.Textbox( |
| label="File Information and Preview", |
| lines=15, |
| elem_classes=["preview-box"] |
| ) |
| |
| gr.HTML('<div class="divider"></div>') |
| |
| gr.HTML(""" |
| <div class="footer"> |
| <p>DataFormat Converter © 2025 | Built with Gradio | An efficient tool for data professionals</p> |
| </div> |
| """) |
| |
| convert_button.click( |
| fn=convert_file, |
| inputs=[input_file, conversion_type, encoding_option], |
| outputs=[output_file, preview] |
| ) |
|
|
| |
| def update_encoding_visibility(conversion_type): |
| if conversion_type == "CSV to Parquet": |
| return gr.update(visible=True) |
| else: |
| return gr.update(visible=False) |
| |
| conversion_type.change( |
| fn=update_encoding_visibility, |
| inputs=conversion_type, |
| outputs=encoding_option |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |