Final_Assignment_Agent

Paused

File size: 6,405 Bytes

"""Tools to handle multimodal understandig."""

import os
import io
import re
import requests

import librosa
import soundfile as sf
import pandas as pd
from llama_index.core.tools import FunctionTool
from huggingface_hub import InferenceClient
from transformers import pipeline

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

def transcribe_audio(file_id: str) -> str:
    """
    Transcribes an English audio file identfied by its id.
    """   
    try:
        audio, sr = sf.read(_get_file(file_id))
        if sr != 16000:
            audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
    except:
        return "Error: Invalid file. This file is either not an audio file or the id does not exist."

    asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")

    if (len(audio) / 16000) > 25:
        output = asr(audio, return_timestamps=True)
    else:
        output = asr(audio)

    return output["text"].strip()

def transcribe_audio_hf(file_id: str) -> str:
    """
    Transcribes an audio file identfied by its id.
    """   
    #audio, sr = sf.read(_get_file(file_id))
    try:
        audio_bytes = _get_file(file_id).read()
    except:
        return "Error: Invalid file. This file is either not an audio file or the id does not exist."

    client = InferenceClient(
        provider="hf-inference",
        api_key=os.getenv("HF_TOKEN"),
    )

    output = client.automatic_speech_recognition(audio_bytes, model="openai/whisper-small")
    return output

def get_transcription_tool():
    return FunctionTool.from_defaults(
        fn=transcribe_audio,
        description="Transcribes an audio file identified by its id."
    )


def answer_image_question(question: str, file_id: str) -> str:
    """
    Answers questions about an image identified by its id.
    """
    client = InferenceClient(
        provider="hf-inference",
        api_key=os.getenv("HF_TOKEN"),
    )
    
    completion = client.chat.completions.create(
        model= "Qwen/Qwen2.5-VL-32B-Instruct",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": question
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": DEFAULT_API_URL + f"/files/{file_id}",
                        }
                    }
                ]
            }
        ],
        max_tokens=512,
    )

    return remove_think(completion.choices[0].message.content)

def get_image_qa_tool():
    return FunctionTool.from_defaults(
        fn=answer_image_question,
        description="Answer a question about a given image. The image is identified by a file id."
    )

def read_excel(file_id: str) -> str:
    file_io = _get_file(file_id)
    df = pd.read_excel(file_io)
    return df.to_markdown()

def get_excel_tool():
    return FunctionTool.from_defaults(
        fn=read_excel,
        description="Convert an excel file that is identified by its file id into a markdown string."
    )

def analyse_excel(file_id: str) -> str:
    file_io = _get_file(file_id)
    df = pd.read_excel(file_io)
    return df.describe()

def get_excel_analysis_tool():
    return FunctionTool.from_defaults(
        fn=read_excel,
        description="Analyse an excel file that is identified by its file id and get common statistics such as mean or max per column."
    )

def read_csv(file_id: str) -> str:
    file_io = _get_file(file_id)
    df = pd.read_csv(file_io)
    return df.to_markdown()

def get_csv_tool():
    return FunctionTool.from_defaults(
        fn=read_excel,
        description="Convert a csv file that is identified by its file id into a markdown string."
    )

def analyse_csv(file_id: str) -> str:
    file_io = _get_file(file_id)
    df = pd.read_csv(file_io)
    return df.describe()

def get_csv_analysis_tool():
    return FunctionTool.from_defaults(
        fn=read_excel,
        description="Analyse a csv file that is identified by its file id and get common statistics such as mean or max per column."
    )

def watch_video(question: str, youtube_url: str) -> str:
    """Answer a question about a YouTube video identified by its url."""
    # TODO our Gemini Key cannot be used to watch videos.
    return "You are not able to watch a Video yet. Reply with 'I don't know' to the question."
    try:
        from google import genai
        from google.genai import types

        client = genai.Client(api_key="GOOGLE_API_KEY")
        response = client.models.generate_content(
            model='models/gemini-2.0-flash',
            contents=types.Content(
                parts=[
                    types.Part(
                        file_data=types.FileData(file_uri=youtube_url)
                    ),
                    types.Part(text=question)
                ]
            )
        )
    except Exception as e:
        print(e)
        return "You are not able to watch a Video yet. Reply with 'I don't know' to the question."
    return str(response.text)

def get_video_tool():
    return FunctionTool.from_defaults(
        fn=watch_video,
        description="Answer a question about a YouTube video identified by its url."
    )

def _build_file_url(task_id: str) -> str:
    return DEFAULT_API_URL + f"/files/{task_id}"

def _get_file(task_id: str) -> io.BytesIO:
    res = requests.get(_build_file_url(task_id))
    if res.status_code != 200:
        raise FileNotFoundError("Invalid file or task id.")
    file_like = io.BytesIO(res.content)
    return file_like

def remove_think(output: str) -> str:
    """Removes the <think> part of an LLM output."""
    if output:
        return re.sub("<think>.*</think>", "", output).strip()
    return output

def read_txt_or_py_file(file_id: str) -> str:
    """Read a python or txt file as plain text and return its content."""
    try:
        bytes_io = _get_file(file_id)
    except:
        return "Error: Invalid file. This file is either not a .py/.txt file or the id does not exist." 
    bytes_io.seek(0)
    return bytes_io.read().decode()

def get_read_file_tool():
    return FunctionTool.from_defaults(
        fn=read_txt_or_py_file,
        description="Read a python or txt file as plain text and return its content."
    )