Spaces:
Sleeping
Sleeping
Updated API submission
Browse files- .gitignore +17 -0
- Dockerfile +16 -0
- app.py +27 -0
- requirements.txt +7 -0
- tasks/__init__.py +0 -0
- tasks/audio.py +23 -0
- tasks/image.py +23 -0
- tasks/text.py +73 -0
- tasks/utils/__init__.py +0 -0
- tasks/utils/emissions.py +28 -0
- tasks/utils/evaluation.py +19 -0
.gitignore
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.ipynb_checkpoints/sandbox-checkpoint.ipynb
|
2 |
+
|
3 |
+
auto_evals/
|
4 |
+
venv/
|
5 |
+
__pycache__/
|
6 |
+
.env
|
7 |
+
.ipynb_checkpoints
|
8 |
+
*ipynb
|
9 |
+
.vscode/
|
10 |
+
|
11 |
+
eval-queue/
|
12 |
+
eval-results/
|
13 |
+
eval-queue-bk/
|
14 |
+
eval-results-bk/
|
15 |
+
logs/
|
16 |
+
|
17 |
+
emissions.csv
|
Dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
RUN useradd -m -u 1000 user
|
7 |
+
USER user
|
8 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
9 |
+
|
10 |
+
WORKDIR /app
|
11 |
+
|
12 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
13 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
14 |
+
|
15 |
+
COPY --chown=user . /app
|
16 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from tasks import text, image, audio
|
4 |
+
|
5 |
+
# Load environment variables
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
app = FastAPI(
|
9 |
+
title="Frugal AI Challenge API",
|
10 |
+
description="API for the Frugal AI Challenge evaluation endpoints"
|
11 |
+
)
|
12 |
+
|
13 |
+
# Include all routers
|
14 |
+
app.include_router(text.router)
|
15 |
+
app.include_router(image.router)
|
16 |
+
app.include_router(audio.router)
|
17 |
+
|
18 |
+
@app.get("/")
|
19 |
+
async def root():
|
20 |
+
return {
|
21 |
+
"message": "Welcome to the Frugal AI Challenge API",
|
22 |
+
"endpoints": {
|
23 |
+
"text": "/text - Text classification task",
|
24 |
+
"image": "/image - Image classification task (coming soon)",
|
25 |
+
"audio": "/audio - Audio classification task (coming soon)"
|
26 |
+
}
|
27 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi>=0.68.0
|
2 |
+
uvicorn>=0.15.0
|
3 |
+
codecarbon>=2.3.1
|
4 |
+
datasets>=2.14.0
|
5 |
+
scikit-learn>=1.0.2
|
6 |
+
pydantic>=1.10.0
|
7 |
+
python-dotenv>=1.0.0
|
tasks/__init__.py
ADDED
File without changes
|
tasks/audio.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from .utils.evaluation import AudioEvaluationRequest
|
3 |
+
from .utils.emissions import get_space_info
|
4 |
+
|
5 |
+
router = APIRouter()
|
6 |
+
|
7 |
+
@router.post("/audio", tags=["Audio Task"])
|
8 |
+
async def evaluate_audio(request: AudioEvaluationRequest):
|
9 |
+
"""
|
10 |
+
Placeholder for audio task evaluation.
|
11 |
+
"""
|
12 |
+
username, space_url = get_space_info()
|
13 |
+
return {
|
14 |
+
"message": "Audio evaluation endpoint not yet implemented",
|
15 |
+
"username": username,
|
16 |
+
"space_url": space_url,
|
17 |
+
"received_config": {
|
18 |
+
"dataset_name": request.dataset_name,
|
19 |
+
"test_size": request.test_size,
|
20 |
+
"test_seed": request.test_seed,
|
21 |
+
"model_description": request.model_description
|
22 |
+
}
|
23 |
+
}
|
tasks/image.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from .utils.evaluation import ImageEvaluationRequest
|
3 |
+
from .utils.emissions import get_space_info
|
4 |
+
|
5 |
+
router = APIRouter()
|
6 |
+
|
7 |
+
@router.post("/image", tags=["Image Task"])
|
8 |
+
async def evaluate_image(request: ImageEvaluationRequest):
|
9 |
+
"""
|
10 |
+
Placeholder for image task evaluation.
|
11 |
+
"""
|
12 |
+
username, space_url = get_space_info()
|
13 |
+
return {
|
14 |
+
"message": "Image evaluation endpoint not yet implemented",
|
15 |
+
"username": username,
|
16 |
+
"space_url": space_url,
|
17 |
+
"received_config": {
|
18 |
+
"dataset_name": request.dataset_name,
|
19 |
+
"test_size": request.test_size,
|
20 |
+
"test_seed": request.test_seed,
|
21 |
+
"model_description": request.model_description
|
22 |
+
}
|
23 |
+
}
|
tasks/text.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from datetime import datetime
|
3 |
+
from datasets import load_dataset
|
4 |
+
from sklearn.metrics import accuracy_score
|
5 |
+
import random
|
6 |
+
|
7 |
+
from .utils.evaluation import TextEvaluationRequest
|
8 |
+
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
9 |
+
|
10 |
+
router = APIRouter()
|
11 |
+
|
12 |
+
@router.post("/text", tags=["Text Task"])
|
13 |
+
async def evaluate_text(request: TextEvaluationRequest):
|
14 |
+
"""
|
15 |
+
Evaluate a text classification model for climate disinformation detection.
|
16 |
+
"""
|
17 |
+
# Get space info
|
18 |
+
username, space_url = get_space_info()
|
19 |
+
|
20 |
+
# Define the label mapping
|
21 |
+
LABEL_MAPPING = {
|
22 |
+
"0_not_relevant": 0,
|
23 |
+
"1_not_happening": 1,
|
24 |
+
"2_not_human": 2,
|
25 |
+
"3_not_bad": 3,
|
26 |
+
"4_solutions_harmful_unnecessary": 4,
|
27 |
+
"5_science_unreliable": 5,
|
28 |
+
"6_proponents_biased": 6,
|
29 |
+
"7_fossil_fuels_needed": 7
|
30 |
+
}
|
31 |
+
|
32 |
+
# Load and prepare the dataset
|
33 |
+
dataset = load_dataset(request.dataset_name)
|
34 |
+
|
35 |
+
# Convert string labels to integers
|
36 |
+
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
37 |
+
|
38 |
+
# Split dataset
|
39 |
+
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
|
40 |
+
test_dataset = train_test["test"]
|
41 |
+
|
42 |
+
# Start tracking emissions
|
43 |
+
tracker.start()
|
44 |
+
tracker.start_task("inference")
|
45 |
+
|
46 |
+
# Make random predictions (placeholder for actual model inference)
|
47 |
+
true_labels = test_dataset["label"]
|
48 |
+
predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
|
49 |
+
|
50 |
+
# Stop tracking emissions
|
51 |
+
emissions_data = tracker.stop_task()
|
52 |
+
|
53 |
+
# Calculate accuracy
|
54 |
+
accuracy = accuracy_score(true_labels, predictions)
|
55 |
+
|
56 |
+
# Prepare results dictionary
|
57 |
+
results = {
|
58 |
+
"username": username,
|
59 |
+
"space_url": space_url,
|
60 |
+
"submission_timestamp": datetime.now().isoformat(),
|
61 |
+
"model_description": request.model_description,
|
62 |
+
"accuracy": float(accuracy),
|
63 |
+
"energy_consumed_wh": emissions_data.energy_consumed * 1000,
|
64 |
+
"emissions_gco2eq": emissions_data.emissions * 1000,
|
65 |
+
"emissions_data": clean_emissions_data(emissions_data),
|
66 |
+
"dataset_config": {
|
67 |
+
"dataset_name": request.dataset_name,
|
68 |
+
"test_size": request.test_size,
|
69 |
+
"test_seed": request.test_seed
|
70 |
+
}
|
71 |
+
}
|
72 |
+
|
73 |
+
return results
|
tasks/utils/__init__.py
ADDED
File without changes
|
tasks/utils/emissions.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from codecarbon import EmissionsTracker
|
2 |
+
import os
|
3 |
+
|
4 |
+
# Initialize tracker
|
5 |
+
tracker = EmissionsTracker(allow_multiple_runs=True)
|
6 |
+
|
7 |
+
class EmissionsData:
|
8 |
+
def __init__(self, energy_consumed: float, emissions: float):
|
9 |
+
self.energy_consumed = energy_consumed
|
10 |
+
self.emissions = emissions
|
11 |
+
|
12 |
+
def clean_emissions_data(emissions_data):
|
13 |
+
"""Remove unwanted fields from emissions data"""
|
14 |
+
data_dict = emissions_data.__dict__
|
15 |
+
fields_to_remove = ['timestamp', 'project_name', 'experiment_id', 'latitude', 'longitude']
|
16 |
+
return {k: v for k, v in data_dict.items() if k not in fields_to_remove}
|
17 |
+
|
18 |
+
def get_space_info():
|
19 |
+
"""Get the space username and URL from environment variables"""
|
20 |
+
space_name = os.getenv("SPACE_ID", "")
|
21 |
+
if space_name:
|
22 |
+
try:
|
23 |
+
username = space_name.split("/")[0]
|
24 |
+
space_url = f"https://huggingface.co/spaces/{space_name}"
|
25 |
+
return username, space_url
|
26 |
+
except Exception as e:
|
27 |
+
print(f"Error getting space info: {e}")
|
28 |
+
return "local-user", "local-development"
|
tasks/utils/evaluation.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional
|
2 |
+
from pydantic import BaseModel, Field
|
3 |
+
|
4 |
+
class BaseEvaluationRequest(BaseModel):
|
5 |
+
test_size: float = Field(0.2, ge=0.0, le=1.0, description="Size of the test split (between 0 and 1)")
|
6 |
+
test_seed: int = Field(42, ge=0, description="Random seed for reproducibility")
|
7 |
+
model_description: Optional[str] = Field("No description provided", description="Description of the model being evaluated")
|
8 |
+
|
9 |
+
class TextEvaluationRequest(BaseEvaluationRequest):
|
10 |
+
dataset_name: str = Field("QuotaClimat/frugalaichallenge-text-train",
|
11 |
+
description="The name of the dataset on HuggingFace Hub")
|
12 |
+
|
13 |
+
class ImageEvaluationRequest(BaseEvaluationRequest):
|
14 |
+
dataset_name: str = Field("placeholder/frugalaichallenge-image-train",
|
15 |
+
description="The name of the dataset on HuggingFace Hub")
|
16 |
+
|
17 |
+
class AudioEvaluationRequest(BaseEvaluationRequest):
|
18 |
+
dataset_name: str = Field("placeholder/frugalaichallenge-audio-train",
|
19 |
+
description="The name of the dataset on HuggingFace Hub")
|