import gradio as gr import pandas as pd from io import BytesIO import chardet def detect_encoding(file_bytes): """Detect the encoding of a file using chardet""" # Only use a sample of the file for detection to improve performance result = chardet.detect(file_bytes[:10000]) return result['encoding'] def convert_file(input_file, conversion_type, encoding_option): try: # Check if a file was uploaded if input_file is None: return None, "Please upload a file." # Determine if input_file is a file-like object or a file path string try: # Try reading from file-like object file_bytes = input_file.read() file_name = input_file.name except AttributeError: # If there's an AttributeError, treat input_file as a file path file_name = input_file with open(file_name, "rb") as f: file_bytes = f.read() file_extension = file_name.lower().split('.')[-1] df = None output_file = None converted_format = None # Handle encoding for CSV files if encoding_option == "Auto-detect": encoding = detect_encoding(file_bytes) else: encoding = encoding_option # Conversion: CSV to Parquet if conversion_type == "CSV to Parquet": if file_extension != "csv": return None, "For CSV to Parquet conversion, please upload a CSV file." # Try with the selected/detected encoding try: df = pd.read_csv(BytesIO(file_bytes), encoding=encoding) except UnicodeDecodeError: # If auto-detection fails, try a few common encodings common_encodings = ['latin1', 'iso-8859-1', 'cp1252'] for enc in common_encodings: try: df = pd.read_csv(BytesIO(file_bytes), encoding=enc) encoding = enc # Update the successful encoding break except UnicodeDecodeError: continue if df is None: return None, f"Failed to decode the CSV file. Auto-detected encoding was '{encoding}'. Please try selecting a specific encoding." output_file = "output.parquet" df.to_parquet(output_file, index=False) converted_format = "Parquet" # Conversion: Parquet to CSV elif conversion_type == "Parquet to CSV": if file_extension != "parquet": return None, "For Parquet to CSV conversion, please upload a Parquet file." df = pd.read_parquet(BytesIO(file_bytes)) output_file = "output.csv" df.to_csv(output_file, index=False, encoding=encoding) converted_format = "CSV" else: return None, "Invalid conversion type selected." # Generate a preview of the top 10 rows preview = df.head(10).to_string(index=False) info_message = ( f"Input file: {file_name}\n" f"Converted file format: {converted_format}\n" f"Encoding used: {encoding}\n" f"Total rows: {len(df)}\n" f"Total columns: {len(df.columns)}\n\n" f"Preview (Top 10 Rows):\n{preview}" ) return output_file, info_message except Exception as e: return None, f"Error during conversion: {str(e)}" # Enhanced custom CSS for a more visually appealing interface custom_css = """ body { background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); font-family: 'Poppins', 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; } .gradio-container { max-width: 950px; margin: 40px auto; padding: 30px; background-color: #ffffff; border-radius: 16px; box-shadow: 0 10px 25px rgba(0,0,0,0.1); } h1 { color: #3a4149; font-size: 2.5rem; text-align: center; margin-bottom: 5px; font-weight: 600; } h2 { color: #5a6570; font-size: 1.2rem; text-align: center; margin-bottom: 25px; font-weight: 400; } .header-icon { font-size: 3rem; text-align: center; margin-bottom: 10px; color: #4285f4; } .instruction-box { background-color: #f8f9fa; border-left: 4px solid #4285f4; padding: 15px; margin-bottom: 25px; border-radius: 6px; } .instruction-step { margin: 8px 0; padding-left: 10px; } .file-box { border: 2px dashed #ddd; border-radius: 12px; padding: 20px; transition: all 0.3s ease; } .file-box:hover { border-color: #4285f4; box-shadow: 0 5px 15px rgba(66, 133, 244, 0.15); } .conversion-radio label { padding: 10px 15px; margin: 5px; border-radius: 8px; border: 1px solid #eaeaea; transition: all 0.2s ease; } .conversion-radio input:checked + label { background-color: #e8f0fe; border-color: #4285f4; color: #4285f4; } .convert-button { background: linear-gradient(to right, #4285f4, #34a853) !important; color: white !important; border: none !important; padding: 12px 25px !important; font-size: 16px !important; font-weight: 500 !important; border-radius: 30px !important; cursor: pointer; margin: 20px auto !important; display: block !important; box-shadow: 0 4px 12px rgba(66, 133, 244, 0.25) !important; } .convert-button:hover { box-shadow: 0 6px 16px rgba(66, 133, 244, 0.4) !important; transform: translateY(-2px); } .footer { text-align: center; margin-top: 30px; color: #70757a; font-size: 0.9rem; } .preview-box { background-color: #f8f9fa; border-radius: 8px; padding: 15px; font-family: monospace; white-space: pre-wrap; max-height: 400px; overflow-y: auto; } .info-tag { display: inline-block; background-color: #e8f0fe; color: #4285f4; padding: 4px 10px; border-radius: 20px; font-size: 0.85rem; margin-right: 8px; margin-bottom: 8px; } .divider { height: 1px; background: linear-gradient(to right, transparent, #ddd, transparent); margin: 25px 0; } .error-message { color: #d93025; background-color: #fce8e6; padding: 10px; border-radius: 8px; margin-top: 10px; font-size: 0.9rem; } .success-message { color: #188038; background-color: #e6f4ea; padding: 10px; border-radius: 8px; margin-top: 10px; font-size: 0.9rem; } """ with gr.Blocks(css=custom_css, title="DataFormat Converter") as demo: gr.HTML('
📊
') gr.Markdown("# DataFormat Converter") gr.Markdown("## Seamlessly convert between CSV and Parquet formats with just a few clicks") gr.HTML('
') with gr.Row(): with gr.Column(): gr.HTML("""

How It Works

1. Upload your CSV or Parquet file
2. Select the conversion direction
3. Choose encoding (or leave as auto-detect)
4. Click "Convert" and download your transformed file
Fast Conversion
Data Preview
Multi-Encoding Support
Maintains Structure
""") gr.HTML("""

Why Convert?

Parquet files offer significant advantages for data storage and analysis:

CSV files are useful for:

""") with gr.Column(): # Replace gr.Box with a div using gr.HTML for the file-box styling gr.HTML('
') input_file = gr.File(label="Upload Your File") conversion_type = gr.Radio( choices=["CSV to Parquet", "Parquet to CSV"], label="Select Conversion Type", value="CSV to Parquet", elem_classes=["conversion-radio"] ) encoding_option = gr.Dropdown( choices=["Auto-detect", "utf-8", "latin1", "iso-8859-1", "cp1252", "utf-16"], value="Auto-detect", label="Select CSV Encoding" ) convert_button = gr.Button("Convert Now", elem_classes=["convert-button"]) gr.HTML('
') # Close the file-box div with gr.Accordion("Conversion Results", open=False): output_file = gr.File(label="Download Converted File") with gr.Accordion("Data Preview", open=True): preview = gr.Textbox( label="File Information and Preview", lines=15, elem_classes=["preview-box"] ) gr.HTML('
') gr.HTML(""" """) convert_button.click( fn=convert_file, inputs=[input_file, conversion_type, encoding_option], outputs=[output_file, preview] ) # Add dependency handling to show/hide encoding options based on conversion type def update_encoding_visibility(conversion_type): if conversion_type == "CSV to Parquet": return gr.update(visible=True) else: return gr.update(visible=False) conversion_type.change( fn=update_encoding_visibility, inputs=conversion_type, outputs=encoding_option ) if __name__ == "__main__": demo.launch()