from smolagents import tool
from langchain_community.document_loaders import WikipediaLoader


@tool
def multiply(a: int, b: int) -> int:
    """Multiply two numbers.
    Args:
        a: first int
        b: second int
    """
    return a * b

@tool
def add(a: int, b: int) -> int:
    """Add two numbers.
    
    Args:
        a: first int
        b: second int
    """
    return a + b

@tool
def subtract(a: int, b: int) -> int:
    """Subtract two numbers.
    
    Args:
        a: first int
        b: second int
    """
    return a - b

@tool
def divide(a: int, b: int) -> int:
    """Divide two numbers.
    
    Args:
        a: first int
        b: second int
    """
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b

@tool
def modulus(a: int, b: int) -> int:
    """Get the modulus of two numbers.
    
    Args:
        a: first int
        b: second int
    """
    return a % b

@tool
def wiki_search(query: str) -> str:
    """Search Wikipedia for a query and return maximum 2 results.
    
    Args:
        query: The search query.
    """
    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ])
    return {"wiki_results": formatted_search_docs}

@tool
def read_excel_file(file_path: str, query: str) -> str:
    """
    This function uses pandas to read an Excel file and perform some basic analysis.
    It returns the number of rows and columns, the column names, and some summary statistics.
    
    Args:
        file_path: Path to the Excel file
        query: Question about the data
    """
    try:
        import pandas as pd
        
        df = pd.read_excel(file_path)
        result = f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
        result += f"Columns: {', '.join(df.columns)}\n\n"
        result += "Summary statistics:\n"
        result += str(df.describe())
        return result
    except ImportError:
        return "Error: pandasis not installed. Please install it with 'pip install pandas'."
    except Exception as e:
        return f"Error analyzing Excel file: {str(e)}"
    
@tool
def transcribe_audio_file(mp3_file_path: str) -> str:
    """
    Transcribe text from an mp3 file.
    It returns the text extracted from the mp3 file.

    Args:
        mp3_file_path (str): Path to the mp3 file.
    """
    try:
        import speech_recognition as sr
        from pydub import AudioSegment
        import os

        file, _ = os.path.splitext(mp3_file_path)
        audio = AudioSegment.from_mp3(mp3_file_path)
        wav_file = f"{file}.wav"
        audio.export(wav_file, format="wav")
        recognizer = sr.Recognizer()

        with sr.AudioFile(wav_file) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
        return text
    except Exception as e:
        return f"Error transcribing mp3 file: {e}"


@tool
def transcribe_from_youtube(youtube_id: str) -> str:
    """
    Transcribe text from a youtube video.
    It returns the text extracted from the youtube video.

    Args:
        youtube_id (str): ID of the youtube video. Not the full URL. Example: "dQw4w9WgXcQ"
    """
    try:
        from youtube_transcript_api import YouTubeTranscriptApi

        ytt_api = YouTubeTranscriptApi()
        fetched_transcript = ytt_api.fetch(youtube_id)
        plaintext = " ".join(snippet.text for snippet in fetched_transcript)
        return plaintext
    except:
        return "Could not extract transcript from YouTube video."