frugal-ai-challenge-submission

Sleeping

frugal-ai-challenge-submission / tasks /audio.py

Nicolas Denier

update readme

0388c00 5 months ago

3.57 kB

	from fastapi import APIRouter
	from datetime import datetime
	from datasets import load_dataset
	import os
	import torch

	from .utils.evaluation import AudioEvaluationRequest
	from .utils.emissions import tracker, clean_emissions_data, get_space_info
	from .utils.preprocess import get_dataloader
	from .models.model import ChainsawDetector

	from dotenv import load_dotenv
	load_dotenv()

	router = APIRouter()

	DESCRIPTION = "ChainsawDetector"
	ROUTE = "/audio"


	@router.post(ROUTE, tags=["Audio Task"], description=DESCRIPTION)
	async def evaluate_audio(request: AudioEvaluationRequest):
	"""
	Evaluate audio classification for rainforest sound detection.

	Current Model: ChainsawDetector
	- STFT -> PCEN -> split into small time chunks -> CNN+LSTM for each chunk -> dense -> prediction
	"""
	# Get space info
	username, space_url = get_space_info()

	# Define the label mapping
	LABEL_MAPPING = {
	"chainsaw": 0,
	"environment": 1
	}

	# Load and prepare the dataset
	# Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
	batch_size = 16
	device = "cuda" if torch.cuda.is_available() else "cpu"
	split='test'
	test_dataset = load_dataset(request.dataset_name, split=split, token=os.getenv("HF_TOKEN"))
	dataloader = get_dataloader(test_dataset, device, batch_size=batch_size, shuffle=False)

	# Load model
	model = ChainsawDetector(batch_size).to(device, dtype=torch.bfloat16)
	model = torch.compile(model)
	model.load_state_dict(torch.load('tasks/models/final-bf16.pth', weights_only=True))
	model.eval()
	num_correct = 0
	num_samples = len(test_dataset)
	# Start tracking emissions
	tracker.start()
	tracker.start_task("inference")

	#--------------------------------------------------------------------------------------------
	# YOUR MODEL INFERENCE CODE HERE
	# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
	#--------------------------------------------------------------------------------------------

	predictions = []
	with torch.no_grad():
	for (X, y) in dataloader:
	X = X.to(device, dtype=torch.bfloat16)
	y = y.to(device, dtype=torch.bfloat16)

	predictions = model(X)
	num_correct += (y==predictions).sum() # count correct predictions

	#--------------------------------------------------------------------------------------------
	# YOUR MODEL INFERENCE STOPS HERE
	#--------------------------------------------------------------------------------------------

	# Stop tracking emissions
	emissions_data = tracker.stop_task()

	# Calculate accuracy
	accuracy = float(num_correct) / float(num_samples)

	# Prepare results dictionary
	results = {
	"username": username,
	"space_url": space_url,
	"submission_timestamp": datetime.now().isoformat(),
	"model_description": DESCRIPTION,
	"accuracy": float(accuracy),
	"energy_consumed_wh": emissions_data.energy_consumed * 1000,
	"emissions_gco2eq": emissions_data.emissions * 1000,
	"emissions_data": clean_emissions_data(emissions_data),
	"api_route": ROUTE,
	"dataset_config": {
	"dataset_name": request.dataset_name,
	"test_size": request.test_size,
	"test_seed": request.test_seed
	}
	}

	return results