# Text task notebook template
## Loading the necessary libraries

In [31]:
from fastapi import APIRouter
from datetime import datetime
from datasets import load_dataset
import librosa
from sklearn.metrics import accuracy_score
import random
import pandas as pd
import numpy as np
import sys
import json
sys.path.append('../tasks')

from utils.evaluation import AudioEvaluationRequest
from utils.emissions import tracker, clean_emissions_data, get_space_info


# Define the label mapping
LABEL_MAPPING = {
 "chainsaw": 0,
 "environment": 1
}

In [None]:
from huggingface_hub import login
login()

## Loading the datasets and splitting them

In [2]:
request = AudioEvaluationRequest()

# Load and prepare the dataset
dataset = load_dataset(request.dataset_name)

# Split dataset
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
test_dataset = train_test["test"]

## Analysis

In [None]:
train = dataset["train"]
test = dataset['test']

train_df = pd.DataFrame(train)

In [24]:
train_df["path"] = train_df["audio"].apply(lambda x: x['path'])
train_df["array"] = train_df["audio"].apply(lambda x: x['array'])
train_df["sampling_rate"] = train_df["audio"].apply(lambda x: x['sampling_rate'])

In [None]:
# Target sampling rate
target_sr = 12000

# Function to resample the audio array
def resample_audio(array, orig_sr, target_sr):
 array = np.array(array) # Ensure it's a numpy array
 if orig_sr != target_sr:
 array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr)
 return array

# Apply resampling to each row
train_df["resampled_array"] = train_df.apply(
 lambda row: resample_audio(row["array"], row["sampling_rate"], target_sr), axis=1
)

# Update the sampling rate column to reflect the target rate
train_df["sampling_rate"] = target_sr


In [None]:
train_df.sampling_rate.describe()

## Random Baseline

In [8]:
# Start tracking emissions
tracker.start()
tracker.start_task("inference")

In [None]:

#--------------------------------------------------------------------------------------------
# YOUR MODEL INFERENCE CODE HERE
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
#-------------------------------------------------------------------------------------------- 

# Make random predictions (placeholder for actual model inference)
true_labels = test_dataset["label"]


predictions = [random.randint(0, 1) for _ in range(len(true_labels))]

predictions

#--------------------------------------------------------------------------------------------
# YOUR MODEL INFERENCE STOPS HERE
#-------------------------------------------------------------------------------------------- 

In [None]:
# Stop tracking emissions
emissions_data = tracker.stop_task()
emissions_data

In [None]:
# Calculate accuracy
accuracy = accuracy_score(true_labels, predictions)
accuracy

In [None]:
# Prepare results dictionary
results = {
 "submission_timestamp": datetime.now().isoformat(),
 "accuracy": float(accuracy),
 "energy_consumed_wh": emissions_data.energy_consumed * 1000,
 "emissions_gco2eq": emissions_data.emissions * 1000,
 "emissions_data": clean_emissions_data(emissions_data),
 "dataset_config": {
 "dataset_name": request.dataset_name,
 "test_size": request.test_size,
 "test_seed": request.test_seed
 }
}

results