{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Text task notebook template\n",
    "## Loading the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastapi import APIRouter\n",
    "from datetime import datetime\n",
    "from datasets import load_dataset\n",
    "import librosa\n",
    "from sklearn.metrics import accuracy_score\n",
    "import random\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import sys\n",
    "import json\n",
    "sys.path.append('../tasks')\n",
    "\n",
    "from utils.evaluation import AudioEvaluationRequest\n",
    "from utils.emissions import tracker, clean_emissions_data, get_space_info\n",
    "\n",
    "\n",
    "# Define the label mapping\n",
    "LABEL_MAPPING = {\n",
    "    \"chainsaw\": 0,\n",
    "    \"environment\": 1\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import login\n",
    "login()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading the datasets and splitting them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "request = AudioEvaluationRequest()\n",
    "\n",
    "# Load and prepare the dataset\n",
    "dataset = load_dataset(request.dataset_name)\n",
    "\n",
    "# Split dataset\n",
    "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n",
    "test_dataset = train_test[\"test\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = dataset[\"train\"]\n",
    "test = dataset['test']\n",
    "\n",
    "train_df = pd.DataFrame(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df[\"path\"] = train_df[\"audio\"].apply(lambda x: x['path'])\n",
    "train_df[\"array\"] = train_df[\"audio\"].apply(lambda x: x['array'])\n",
    "train_df[\"sampling_rate\"] = train_df[\"audio\"].apply(lambda x: x['sampling_rate'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Target sampling rate\n",
    "target_sr = 12000\n",
    "\n",
    "# Function to resample the audio array\n",
    "def resample_audio(array, orig_sr, target_sr):\n",
    "    array = np.array(array)  # Ensure it's a numpy array\n",
    "    if orig_sr != target_sr:\n",
    "        array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr)\n",
    "    return array\n",
    "\n",
    "# Apply resampling to each row\n",
    "train_df[\"resampled_array\"] = train_df.apply(\n",
    "    lambda row: resample_audio(row[\"array\"], row[\"sampling_rate\"], target_sr), axis=1\n",
    ")\n",
    "\n",
    "# Update the sampling rate column to reflect the target rate\n",
    "train_df[\"sampling_rate\"] = target_sr\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df.sampling_rate.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Random Baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Start tracking emissions\n",
    "tracker.start()\n",
    "tracker.start_task(\"inference\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#--------------------------------------------------------------------------------------------\n",
    "# YOUR MODEL INFERENCE CODE HERE\n",
    "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n",
    "#--------------------------------------------------------------------------------------------   \n",
    "\n",
    "# Make random predictions (placeholder for actual model inference)\n",
    "true_labels = test_dataset[\"label\"]\n",
    "\n",
    "\n",
    "predictions = [random.randint(0, 1) for _ in range(len(true_labels))]\n",
    "\n",
    "predictions\n",
    "\n",
    "#--------------------------------------------------------------------------------------------\n",
    "# YOUR MODEL INFERENCE STOPS HERE\n",
    "#--------------------------------------------------------------------------------------------   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Stop tracking emissions\n",
    "emissions_data = tracker.stop_task()\n",
    "emissions_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate accuracy\n",
    "accuracy = accuracy_score(true_labels, predictions)\n",
    "accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare results dictionary\n",
    "results = {\n",
    "    \"submission_timestamp\": datetime.now().isoformat(),\n",
    "    \"accuracy\": float(accuracy),\n",
    "    \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
    "    \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
    "    \"emissions_data\": clean_emissions_data(emissions_data),\n",
    "    \"dataset_config\": {\n",
    "        \"dataset_name\": request.dataset_name,\n",
    "        \"test_size\": request.test_size,\n",
    "        \"test_seed\": request.test_seed\n",
    "    }\n",
    "}\n",
    "\n",
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}