{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "ae0e83db", "metadata": {}, "outputs": [], "source": [ "import dspy \n", "import duckdb \n", "\n", "excel = pd.read_excel(io.BytesIO(contents), sheet_name=sheet_name)\n", "\n", "# Preprocessing steps\n", "# 1. Drop empty rows and columns\n", "excel_df.dropna(how='all', inplace=True) # Remove empty rows\n", "excel_df.dropna(how='all', axis=1, inplace=True) # Remove empty columns\n", "\n", "# 2. Clean column names\n", "excel_df.columns = excel_df.columns.str.strip() # Remove extra spaces\n", "\n", "# 3. Convert Excel data to CSV with UTF-8-sig encoding\n", "csv_buffer = io.StringIO()\n", "excel_df.to_csv(csv_buffer, index=False, encoding='utf-8-sig')\n", "csv_buffer.seek(0)\n", "\n", "# Read the processed CSV back into a dataframe\n", "new_df = pd.read_csv(csv_buffer)" ] }, { "cell_type": "code", "execution_count": null, "id": "bcef79e3", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }