{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae0e83db",
   "metadata": {},
   "outputs": [],
   "source": [
    "import dspy \n",
    "import duckdb \n",
    "\n",
    "excel = pd.read_excel(io.BytesIO(contents), sheet_name=sheet_name)\n",
    "\n",
    "# Preprocessing steps\n",
    "# 1. Drop empty rows and columns\n",
    "excel_df.dropna(how='all', inplace=True)  # Remove empty rows\n",
    "excel_df.dropna(how='all', axis=1, inplace=True)  # Remove empty columns\n",
    "\n",
    "# 2. Clean column names\n",
    "excel_df.columns = excel_df.columns.str.strip()  # Remove extra spaces\n",
    "\n",
    "# 3. Convert Excel data to CSV with UTF-8-sig encoding\n",
    "csv_buffer = io.StringIO()\n",
    "excel_df.to_csv(csv_buffer, index=False, encoding='utf-8-sig')\n",
    "csv_buffer.seek(0)\n",
    "\n",
    "# Read the processed CSV back into a dataframe\n",
    "new_df = pd.read_csv(csv_buffer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bcef79e3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}